commit a8e0bd39e557d439d9a33c067932e86c201c91f4 Author: Edward Betts Date: Mon Mar 30 19:34:46 2026 +0100 Initial commit. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8057dbb --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +# Python +__pycache__/ +*.pyc +*.pyo +.mypy_cache/ +.ruff_cache/ +*.egg-info/ +dist/ +build/ +.venv/ +venv/ + +# App +cache/ + +# Pytest +.pytest_cache/ + +# Editors +.idea/ +.vscode/ +*.swp diff --git a/README.md b/README.md new file mode 100644 index 0000000..afe7910 --- /dev/null +++ b/README.md @@ -0,0 +1,53 @@ +# Bristol Eurostar + +Plan a trip from Bristol Temple Meads to Europe on Eurostar. + +Combines GWR trains (Bristol → Paddington) with Eurostar services (St Pancras → destination) and shows all valid same-day connections, filtering by journey time and minimum/maximum transfer window at Paddington/St Pancras. + +## Destinations + +- Paris Gare du Nord +- Brussels Midi +- Lille Europe +- Amsterdam Centraal + +## How it works + +Train times are fetched from two sources simultaneously: + +- **GWR** — scraped from [Realtime Trains](https://www.realtimetrains.co.uk/) using httpx +- **Eurostar** — scraped from the Eurostar timetable pages via the embedded `__NEXT_DATA__` JSON (no browser required) + +Results are cached to disk by date and destination. + +## Connection constraints + +| | | +|---|---| +| Minimum Paddington → St Pancras | 75 min | +| Maximum Paddington → St Pancras | 2h 20m | +| Maximum Bristol → Paddington | 1h 50m | + +## Setup + +```bash +pip install -e ".[dev]" +``` + +## Running + +```bash +flask --app app run +``` + +Then open http://localhost:5000. + +## Tests + +```bash +pytest +``` + +## License + +MIT diff --git a/app.py b/app.py new file mode 100644 index 0000000..2329d42 --- /dev/null +++ b/app.py @@ -0,0 +1,96 @@ +import asyncio +from flask import Flask, render_template, redirect, url_for, request +from datetime import date, timedelta + +from cache import get_cached, set_cached +import scraper.eurostar as eurostar_scraper +import scraper.realtime_trains as rtt_scraper +from trip_planner import combine_trips + +app = Flask(__name__) + +DESTINATIONS = { + 'paris': 'Paris Gare du Nord', + 'brussels': 'Brussels Midi', + 'lille': 'Lille Europe', + 'amsterdam': 'Amsterdam Centraal', +} + + +async def _fetch_both(destination: str, travel_date: str, user_agent: str): + """Fetch GWR trains and Eurostar times simultaneously.""" + gwr, es = await asyncio.gather( + rtt_scraper.fetch(travel_date, user_agent), + eurostar_scraper.fetch(destination, travel_date, user_agent), + return_exceptions=True, + ) + return gwr, es + + +@app.route('/') +def index(): + today = date.today().isoformat() + return render_template('index.html', destinations=DESTINATIONS, today=today) + + +@app.route('/results//') +def results(slug, travel_date): + destination = DESTINATIONS.get(slug) + if not destination or not travel_date: + return redirect(url_for('index')) + + user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA) + + cache_key = f"{travel_date}_{destination}" + cached = get_cached(cache_key) + + error = None + if cached: + gwr_trains = cached['gwr'] + eurostar_trains = cached['eurostar'] + from_cache = True + else: + from_cache = False + gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent)) + + if isinstance(gwr_result, Exception): + gwr_trains = [] + error = f"Could not fetch GWR trains: {gwr_result}" + else: + gwr_trains = gwr_result + + if isinstance(es_result, Exception): + eurostar_trains = [] + msg = f"Could not fetch Eurostar times: {es_result}" + error = f"{error}; {msg}" if error else msg + else: + eurostar_trains = es_result + + if gwr_trains or eurostar_trains: + set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains}) + + trips = combine_trips(gwr_trains, eurostar_trains, travel_date) + + dt = date.fromisoformat(travel_date) + prev_date = (dt - timedelta(days=1)).isoformat() + next_date = (dt + timedelta(days=1)).isoformat() + travel_date_display = dt.strftime('%A %-d %B %Y') + + return render_template( + 'results.html', + trips=trips, + destination=destination, + travel_date=travel_date, + slug=slug, + prev_date=prev_date, + next_date=next_date, + travel_date_display=travel_date_display, + gwr_count=len(gwr_trains), + eurostar_count=len(eurostar_trains), + from_cache=from_cache, + error=error, + ) + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/cache.py b/cache.py new file mode 100644 index 0000000..668103f --- /dev/null +++ b/cache.py @@ -0,0 +1,23 @@ +import json +import os + +CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache') + + +def _cache_path(key: str) -> str: + safe_key = key.replace('/', '_').replace(' ', '_') + return os.path.join(CACHE_DIR, f"{safe_key}.json") + + +def get_cached(key: str): + path = _cache_path(key) + if not os.path.exists(path): + return None + with open(path) as f: + return json.load(f) + + +def set_cached(key: str, data) -> None: + os.makedirs(CACHE_DIR, exist_ok=True) + with open(_cache_path(key), 'w') as f: + json.dump(data, f, indent=2) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d1f9700 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.backends.legacy:build" + +[project] +name = "bristol-eurostar" +version = "0.1.0" +description = "Plan Bristol → Europe trips combining GWR and Eurostar services" +readme = "README.md" +license = "MIT" +authors = [ + { name = "Edward Betts" } +] +requires-python = ">=3.11" +dependencies = [ + "flask", + "httpx", + "lxml", + "cssselect", +] + +[project.optional-dependencies] +dev = [ + "pytest", +] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9321377 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +flask +httpx +lxml +cssselect +pytest diff --git a/scraper/__init__.py b/scraper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scraper/eurostar.py b/scraper/eurostar.py new file mode 100644 index 0000000..3a5ef26 --- /dev/null +++ b/scraper/eurostar.py @@ -0,0 +1,90 @@ +""" +Scrape Eurostar timetable via httpx. + +The route-specific timetable pages are Next.js SSR — all departure data is +embedded in ', html, re.DOTALL) + if not m: + return [] + data = json.loads(m.group(1)) + departures = data['props']['pageProps']['pageData']['liveDepartures'] + services = [] + for dep in departures: + dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) + arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) + if dep_time and arr_time: + services.append({ + 'depart_st_pancras': dep_time, + 'arrive_destination': arr_time, + 'destination': destination, + }) + return sorted(services, key=lambda s: s['depart_st_pancras']) + + +async def fetch(destination: str, travel_date: str, + user_agent: str = DEFAULT_UA) -> list[dict]: + url = ROUTE_URLS[destination] + headers = { + 'User-Agent': user_agent, + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'en-GB,en;q=0.9', + } + async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client: + r = await client.get(url, params={'date': travel_date}) + r.raise_for_status() + return _parse(r.text, destination) + + +def get_eurostar_times(destination: str, travel_date: str, + user_agent: str = DEFAULT_UA) -> list[dict]: + """Synchronous wrapper for CLI/testing.""" + return asyncio.run(fetch(destination, travel_date, user_agent)) diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py new file mode 100644 index 0000000..8a8023d --- /dev/null +++ b/scraper/realtime_trains.py @@ -0,0 +1,96 @@ +""" +Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. + +Uses httpx (not Playwright) with browser-like headers. + +Two fetches run concurrently: + BRI/to/PAD → departure times from Bristol (div.time.plan.d) + PAD/from/BRI → arrival times at Paddington (div.time.plan.a) +Matched by train ID (div.tid). +""" +import asyncio +import re +import httpx +import lxml.html + +BRI_TO_PAD = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:BRI/to/gb-nr:PAD/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) +PAD_FROM_BRI = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) + +DEFAULT_UA = ( + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" +) + + +def _browser_headers(user_agent: str) -> dict: + return { + "User-Agent": user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-GB,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + } + + +def _fmt(hhmm: str) -> str: + """Convert '0830' → '08:30'.""" + hhmm = re.sub(r'[^0-9]', '', hhmm) + if len(hhmm) == 4: + return f"{hhmm[:2]}:{hhmm[2:]}" + return hhmm + + +def _parse_services(html: str, time_selector: str) -> dict[str, str]: + """Return {train_id: time_string} from a servicelist page.""" + root = lxml.html.fromstring(html) + sl = root.cssselect('div.servicelist') + if not sl: + return {} + result = {} + for svc in sl[0].cssselect('a.service'): + tid_els = svc.cssselect('div.tid') + time_els = svc.cssselect(time_selector) + if tid_els and time_els: + tid = tid_els[0].text_content().strip() + time_text = time_els[0].text_content().strip() + if time_text: + result[tid] = _fmt(time_text) + return result + + +async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: + """Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}].""" + headers = _browser_headers(user_agent) + async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client: + r_bri, r_pad = await asyncio.gather( + client.get(BRI_TO_PAD.format(date=date)), + client.get(PAD_FROM_BRI.format(date=date)), + ) + + departures = _parse_services(r_bri.text, 'div.time.plan.d') + arrivals = _parse_services(r_pad.text, 'div.time.plan.a') + + trains = [ + {'depart_bristol': dep, 'arrive_paddington': arr} + for tid, dep in departures.items() + if (arr := arrivals.get(tid)) + ] + return sorted(trains, key=lambda t: t['depart_bristol']) + + +def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: + """Synchronous wrapper around fetch() for CLI/testing use.""" + return asyncio.run(fetch(date, user_agent)) diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..8d6230d --- /dev/null +++ b/templates/base.html @@ -0,0 +1,61 @@ + + + + + + Bristol to Europe via Eurostar + + + +
+

Bristol to Europe via Eurostar

+

GWR to Paddington → St Pancras → Eurostar

+
+
+ {% block content %}{% endblock %} +
+ + diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..d9d0dc4 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,43 @@ +{% extends "base.html" %} +{% block content %} +
+

Plan your journey

+
+
+ + +
+ +
+ + +
+ + +
+
+ +{% endblock %} diff --git a/templates/results.html b/templates/results.html new file mode 100644 index 0000000..661c3f1 --- /dev/null +++ b/templates/results.html @@ -0,0 +1,92 @@ +{% extends "base.html" %} +{% block content %} + +

+ ← New search +

+ +
+

+ Bristol Temple Meads → {{ destination }} +

+
+ ← Prev + {{ travel_date_display }} + Next → +
+

+ {{ gwr_count }} GWR service{{ 's' if gwr_count != 1 }} +  ·  + {{ eurostar_count }} Eurostar service{{ 's' if eurostar_count != 1 }} + {% if from_cache %} +  ·  (cached) + {% endif %} +

+ {% if error %} +
+ Warning: {{ error }} +
+ {% endif %} +
+ +{% if trips %} +
+ + + + + + + + + + + + + {% for trip in trips %} + + + + + + + + + {% endfor %} + +
Depart BristolArrive PaddingtonTransferDepart St PancrasArrive {{ destination }}Total
{{ trip.depart_bristol }} + {{ trip.arrive_paddington }} + ({{ trip.gwr_duration }}) + + {{ trip.connection_duration }} + {{ trip.depart_st_pancras }}{{ trip.arrive_destination }}{{ trip.total_duration }}
+
+ +

+ Paddington → St Pancras connection: 75 min minimum, 2h 20m maximum. + Eurostar times are from the general timetable and may vary; always check + eurostar.com to book. +

+ +{% else %} +
+

No valid journeys found.

+

+ {% if gwr_count == 0 and eurostar_count == 0 %} + Could not retrieve train data. Check your network connection or try again. + {% elif gwr_count == 0 %} + No GWR trains found for this date. + {% elif eurostar_count == 0 %} + No Eurostar services found for {{ destination }} on this date. + {% else %} + No GWR + Eurostar combination allows an 80-minute connection at Paddington/St Pancras. + {% endif %} +

+
+{% endif %} + +{% endblock %} diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py new file mode 100644 index 0000000..ff2d4b1 --- /dev/null +++ b/tests/test_eurostar_scraper.py @@ -0,0 +1,82 @@ +import json +import pytest +from scraper.eurostar import _hhmm, _parse + + +# --------------------------------------------------------------------------- +# _hhmm +# --------------------------------------------------------------------------- + +def test_hhmm_parses_datetime_string(): + assert _hhmm('2026-03-30 09:34:00') == '09:34' + +def test_hhmm_none_input(): + assert _hhmm(None) is None + +def test_hhmm_empty_string(): + assert _hhmm('') is None + + +# --------------------------------------------------------------------------- +# _parse +# --------------------------------------------------------------------------- + +def _make_next_data(departures: list) -> str: + data = { + 'props': { + 'pageProps': { + 'pageData': { + 'liveDepartures': departures + } + } + } + } + return f'' + + +def _departure(dep_dt: str, arr_dt: str) -> dict: + return { + 'origin': {'model': {'scheduledDepartureDateTime': dep_dt}}, + 'destination': {'model': {'scheduledArrivalDateTime': arr_dt}}, + } + + +def test_parse_single_departure(): + html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')]) + services = _parse(html, 'Paris Gare du Nord') + assert len(services) == 1 + assert services[0] == { + 'depart_st_pancras': '06:01', + 'arrive_destination': '09:34', + 'destination': 'Paris Gare du Nord', + } + + +def test_parse_results_sorted_by_departure(): + html = _make_next_data([ + _departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'), + _departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'), + ]) + services = _parse(html, 'Paris Gare du Nord') + assert services[0]['depart_st_pancras'] == '07:00' + assert services[1]['depart_st_pancras'] == '10:00' + + +def test_parse_skips_entries_with_missing_times(): + html = _make_next_data([ + _departure(None, '2026-03-30 09:34:00'), + _departure('2026-03-30 08:00:00', None), + _departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'), + ]) + services = _parse(html, 'Paris Gare du Nord') + assert len(services) == 1 + assert services[0]['depart_st_pancras'] == '09:00' + + +def test_parse_no_next_data_returns_empty(): + assert _parse('nothing here', 'Paris Gare du Nord') == [] + + +def test_parse_empty_departures(): + html = _make_next_data([]) + assert _parse(html, 'Paris Gare du Nord') == [] diff --git a/tests/test_rtt_scraper.py b/tests/test_rtt_scraper.py new file mode 100644 index 0000000..7bceb61 --- /dev/null +++ b/tests/test_rtt_scraper.py @@ -0,0 +1,71 @@ +import pytest +from scraper.realtime_trains import _fmt, _parse_services + + +# --------------------------------------------------------------------------- +# _fmt +# --------------------------------------------------------------------------- + +def test_fmt_four_digits(): + assert _fmt('0830') == '08:30' + +def test_fmt_already_colon(): + assert _fmt('08:30') == '08:30' + +def test_fmt_strips_non_digits(): + assert _fmt('08h30') == '08:30' + + +# --------------------------------------------------------------------------- +# _parse_services +# --------------------------------------------------------------------------- + +def _make_html(services: list[tuple[str, str]], time_class: str) -> str: + """Build a minimal servicelist HTML with (train_id, time) pairs.""" + items = '' + for tid, time in services: + items += f''' + +
{tid}
+
{time}
+
''' + return f'
{items}
' + + +def test_parse_services_departures(): + html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd') + result = _parse_services(html, 'div.time.plan.d') + assert result == {'1A23': '07:00', '2B45': '08:30'} + + +def test_parse_services_arrivals(): + html = _make_html([('1A23', '0845')], 'a') + result = _parse_services(html, 'div.time.plan.a') + assert result == {'1A23': '08:45'} + + +def test_parse_services_no_servicelist(): + assert _parse_services('', 'div.time.plan.d') == {} + + +def test_parse_services_skips_missing_time(): + html = ''' +
+
1A23
+
2B45
0900
+
''' + result = _parse_services(html, 'div.time.plan.d') + assert '1A23' not in result + assert result == {'2B45': '09:00'} + + +def test_parse_services_skips_empty_time(): + html = ''' + ''' + result = _parse_services(html, 'div.time.plan.d') + assert result == {} diff --git a/tests/test_trip_planner.py b/tests/test_trip_planner.py new file mode 100644 index 0000000..2ea1e25 --- /dev/null +++ b/tests/test_trip_planner.py @@ -0,0 +1,131 @@ +import pytest +from trip_planner import combine_trips, _fmt_duration + +DATE = '2026-03-30' + + +# --------------------------------------------------------------------------- +# _fmt_duration +# --------------------------------------------------------------------------- + +def test_fmt_duration_hours_and_minutes(): + assert _fmt_duration(95) == '1h 35m' + +def test_fmt_duration_exact_hours(): + assert _fmt_duration(120) == '2h' + +def test_fmt_duration_minutes_only(): + assert _fmt_duration(45) == '45m' + + +# --------------------------------------------------------------------------- +# combine_trips — basic pairing +# --------------------------------------------------------------------------- + +GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m +GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — over limit + +ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'} +ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'} + + +def test_valid_trip_is_returned(): + trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) + assert len(trips) == 1 + t = trips[0] + assert t['depart_bristol'] == '07:00' + assert t['arrive_paddington'] == '08:45' + assert t['depart_st_pancras'] == '10:01' + assert t['arrive_destination'] == '13:34' + assert t['destination'] == 'Paris Gare du Nord' + + +def test_gwr_too_slow_excluded(): + # 2h 26m GWR journey exceeds MAX_GWR_MINUTES (110) + trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE) + assert trips == [] + + +def test_eurostar_too_early_excluded(): + # Eurostar departs before min connection time has elapsed + trips = combine_trips([GWR_FAST], [ES_EARLY], DATE) + assert trips == [] + + +def test_no_trains_returns_empty(): + assert combine_trips([], [], DATE) == [] + +def test_no_gwr_returns_empty(): + assert combine_trips([], [ES_PARIS], DATE) == [] + +def test_no_eurostar_returns_empty(): + assert combine_trips([GWR_FAST], [], DATE) == [] + + +# --------------------------------------------------------------------------- +# Connection window constraints +# --------------------------------------------------------------------------- + +def test_min_connection_enforced(): + # Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00 + # ES at 09:59 should be excluded, 10:00 should be included + es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'} + es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'} + assert combine_trips([GWR_FAST], [es_too_close], DATE) == [] + trips = combine_trips([GWR_FAST], [es_ok], DATE) + assert len(trips) == 1 + + +def test_max_connection_enforced(): + # Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05 + es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'} + es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'} + trips = combine_trips([GWR_FAST], [es_ok], DATE) + assert len(trips) == 1 + assert combine_trips([GWR_FAST], [es_too_late], DATE) == [] + + +# --------------------------------------------------------------------------- +# Only earliest valid Eurostar per GWR departure +# --------------------------------------------------------------------------- + +def test_only_earliest_eurostar_per_gwr(): + es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'} + es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'} + trips = combine_trips([GWR_FAST], [es1, es2], DATE) + assert len(trips) == 1 + assert trips[0]['depart_st_pancras'] == '10:01' + + +# --------------------------------------------------------------------------- +# Multiple GWR trains → multiple trips +# --------------------------------------------------------------------------- + +def test_multiple_gwr_trains(): + gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'} + es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'} + trips = combine_trips([GWR_FAST, gwr2], [es], DATE) + assert len(trips) == 2 + assert trips[0]['depart_bristol'] == '07:00' + assert trips[1]['depart_bristol'] == '08:00' + + +# --------------------------------------------------------------------------- +# Duration fields +# --------------------------------------------------------------------------- + +def test_gwr_duration_in_trip(): + trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) + assert trips[0]['gwr_duration'] == '1h 45m' + + +def test_total_duration_in_trip(): + # depart 07:00, arrive 13:34 → 6h 34m + trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) + assert trips[0]['total_duration'] == '6h 34m' + + +def test_connection_duration_in_trip(): + # arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m + trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) + assert trips[0]['connection_duration'] == '1h 16m' diff --git a/trip_planner.py b/trip_planner.py new file mode 100644 index 0000000..0e5c6ea --- /dev/null +++ b/trip_planner.py @@ -0,0 +1,88 @@ +""" +Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains. +""" +from datetime import datetime, timedelta + +MIN_CONNECTION_MINUTES = 75 +MAX_CONNECTION_MINUTES = 140 +MAX_GWR_MINUTES = 110 +DATE_FMT = '%Y-%m-%d' +TIME_FMT = '%H:%M' + + +def _parse_dt(date: str, time: str) -> datetime: + return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}") + + +def _fmt_duration(minutes: int) -> str: + h, m = divmod(minutes, 60) + if h and m: + return f"{h}h {m}m" + if h: + return f"{h}h" + return f"{m}m" + + +def combine_trips( + gwr_trains: list[dict], + eurostar_trains: list[dict], + travel_date: str, +) -> list[dict]: + """ + Return a list of valid combined trips, sorted by Bristol departure time. + + Each trip dict: + depart_bristol HH:MM + arrive_paddington HH:MM + gwr_duration str (e.g. "1h 45m") + connection_duration str + depart_st_pancras HH:MM + arrive_destination HH:MM + total_duration str (e.g. "5h 30m") + destination str + """ + trips = [] + + for gwr in gwr_trains: + try: + arr_pad = _parse_dt(travel_date, gwr['arrive_paddington']) + dep_bri = _parse_dt(travel_date, gwr['depart_bristol']) + except (ValueError, KeyError): + continue + + if int((arr_pad - dep_bri).total_seconds() / 60) > MAX_GWR_MINUTES: + continue + + earliest_eurostar = arr_pad + timedelta(minutes=MIN_CONNECTION_MINUTES) + + # Find only the earliest viable Eurostar for this GWR departure + for es in eurostar_trains: + try: + dep_stp = _parse_dt(travel_date, es['depart_st_pancras']) + arr_dest = _parse_dt(travel_date, es['arrive_destination']) + except (ValueError, KeyError): + continue + + # Eurostar arrives next day? (e.g. night service — unlikely but handle it) + if arr_dest < dep_stp: + arr_dest += timedelta(days=1) + + if dep_stp < earliest_eurostar: + continue + if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES: + continue + + trips.append({ + 'depart_bristol': gwr['depart_bristol'], + 'arrive_paddington': gwr['arrive_paddington'], + 'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)), + 'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)), + 'depart_st_pancras': es['depart_st_pancras'], + 'arrive_destination': es['arrive_destination'], + 'total_duration': _fmt_duration(int((arr_dest - dep_bri).total_seconds() / 60)), + 'destination': es['destination'], + }) + break # Only the earliest valid Eurostar per GWR departure + + trips.sort(key=lambda t: (t['depart_bristol'], t['depart_st_pancras'])) + return trips