From 60674fe663bad689f5f7fb5445fdc06cf1e5ed86 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 12:58:19 +0100 Subject: [PATCH 1/7] Add Circle Line timetable info. --- templates/results.html | 1 + trip_planner.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/templates/results.html b/templates/results.html index 7a7259f..5426995 100644 --- a/templates/results.html +++ b/templates/results.html @@ -150,6 +150,7 @@ {{ row.connection_duration }}{% if row.connection_minutes < 80 %} ⚠️{% endif %} +
Circle {{ row.circle_line_depart }} → STP {{ row.circle_arrive_checkin }} {{ row.depart_st_pancras }} diff --git a/trip_planner.py b/trip_planner.py index c7e3286..e732b93 100644 --- a/trip_planner.py +++ b/trip_planner.py @@ -9,6 +9,12 @@ MAX_GWR_MINUTES = 110 DATE_FMT = '%Y-%m-%d' TIME_FMT = '%H:%M' +# Circle Line: Paddington (H&C) → Kings Cross St Pancras +CIRCLE_LINE_MINUTES_PAST = [8, 18, 28, 38, 48, 58] # departures past each hour +CIRCLE_LINE_JOURNEY_MINUTES = 11 +PAD_WALK_TO_UNDERGROUND_MINUTES = 8 # GWR platform → Paddington Underground +KX_WALK_TO_CHECKIN_MINUTES = 10 # Kings Cross St Pancras platform → St Pancras check-in + # Bristol Temple Meads → London Paddington walk-on single fares. # Weekday restrictions (Mon–Fri only): @@ -45,6 +51,28 @@ def _parse_dt(date: str, time: str) -> datetime: return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}") +def _next_circle_line(arrive_paddington: datetime) -> tuple[datetime, datetime]: + """ + Given GWR arrival at Paddington, return (circle_line_depart, arrive_checkin). + + Walk 10 min to Paddington Underground, catch next Circle Line at :08/:18/:28/:38/:48/:58, + 11 min journey to Kings Cross St Pancras, 10 min walk to St Pancras check-in. + """ + earliest_board = arrive_paddington + timedelta(minutes=PAD_WALK_TO_UNDERGROUND_MINUTES) + minute = earliest_board.minute + depart_minute = next((m for m in CIRCLE_LINE_MINUTES_PAST if m >= minute), None) + if depart_minute is None: + circle_depart = (earliest_board + timedelta(hours=1)).replace( + minute=CIRCLE_LINE_MINUTES_PAST[0], second=0, microsecond=0 + ) + else: + circle_depart = earliest_board.replace(minute=depart_minute, second=0, microsecond=0) + arrive_checkin = circle_depart + timedelta( + minutes=CIRCLE_LINE_JOURNEY_MINUTES + KX_WALK_TO_CHECKIN_MINUTES + ) + return circle_depart, arrive_checkin + + def _fmt_duration(minutes: int) -> str: h, m = divmod(minutes, 60) if h and m: @@ -122,6 +150,7 @@ def combine_trips( total_mins = int((arr_dest - dep_bri).total_seconds() / 60) ticket = cheapest_gwr_ticket(gwr['depart_bristol'], travel_date) + circle_depart, arrive_checkin = _next_circle_line(arr_pad) trips.append({ 'depart_bristol': gwr['depart_bristol'], 'arrive_paddington': gwr['arrive_paddington'], @@ -129,6 +158,8 @@ def combine_trips( 'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)), 'connection_minutes': int((dep_stp - arr_pad).total_seconds() / 60), 'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)), + 'circle_line_depart': circle_depart.strftime(TIME_FMT), + 'circle_arrive_checkin': arrive_checkin.strftime(TIME_FMT), 'depart_st_pancras': es['depart_st_pancras'], 'arrive_destination': es['arrive_destination'], 'train_number': es.get('train_number', ''), From c215456620f0fb9cf11c820707abd99401a44a8d Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 13:26:35 +0100 Subject: [PATCH 2/7] Use real Circle Line timetable; add Eurostar duration Parse Circle Line times from TransXChange XML (output_txc_01CIR_.xml) with separate weekday/Saturday/Sunday schedules, replacing the approximated every-10-minutes pattern. Subtract 1 hour timezone offset (CET/CEST vs GMT/BST) when computing Eurostar journey duration, shown for both viable and unreachable services. Co-Authored-By: Claude Sonnet 4.6 --- circle_line.py | 148 +++++++++++++++++++++++++++++++++++++++++ templates/results.html | 4 +- trip_planner.py | 53 ++++++++------- 3 files changed, 179 insertions(+), 26 deletions(-) create mode 100644 circle_line.py diff --git a/circle_line.py b/circle_line.py new file mode 100644 index 0000000..1fee867 --- /dev/null +++ b/circle_line.py @@ -0,0 +1,148 @@ +""" +Circle Line timetable: Paddington (H&C Line) → King's Cross St Pancras. + +Parses the TransXChange XML file on first use and caches the result in memory. +""" +import os +import re +import xml.etree.ElementTree as ET +from datetime import datetime, timedelta + +_PAD_STOP = '9400ZZLUPAH1' # Paddington (H&C Line) +_KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras + +_TXC_XML = os.path.join(os.path.dirname(__file__), 'output_txc_01CIR_.xml') +_NS = {'t': 'http://www.transxchange.org.uk/'} + +# Populated on first call to next_service(); maps day-type -> sorted list of +# (pad_depart_seconds, kxp_arrive_seconds) measured from midnight. +_timetable: dict[str, list[tuple[int, int]]] | None = None + + +def _parse_duration(s: str | None) -> int: + if not s: + return 0 + m = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', s) + return int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0) + + +def _load_timetable() -> dict[str, list[tuple[int, int]]]: + tree = ET.parse(_TXC_XML) + root = tree.getroot() + + # Build JPS id -> [(from_stop, to_stop, runtime_secs, wait_secs)] + jps_map: dict[str, list[tuple]] = {} + for jps_el in root.find('t:JourneyPatternSections', _NS): + links = [] + for link in jps_el.findall('t:JourneyPatternTimingLink', _NS): + fr = link.find('t:From/t:StopPointRef', _NS) + to = link.find('t:To/t:StopPointRef', _NS) + rt = link.find('t:RunTime', _NS) + wait = link.find('t:From/t:WaitTime', _NS) + links.append(( + fr.text if fr is not None else None, + to.text if to is not None else None, + _parse_duration(rt.text if rt is not None else None), + _parse_duration(wait.text if wait is not None else None), + )) + jps_map[jps_el.get('id')] = links + + def _seconds_to_depart(links, stop): + """Seconds from journey start until departure from *stop*.""" + elapsed = 0 + for fr, to, rt, wait in links: + elapsed += wait + if fr == stop: + return elapsed + elapsed += rt + return None + + def _seconds_to_arrive(links, stop): + """Seconds from journey start until arrival at *stop*.""" + elapsed = 0 + for fr, to, rt, wait in links: + elapsed += wait + rt + if to == stop: + return elapsed + return None + + # Map JP id -> (pad_offset_secs, kxp_arrive_offset_secs) + jp_offsets: dict[str, tuple[int, int]] = {} + for svc in root.find('t:Services', _NS): + for jp in svc.findall('.//t:JourneyPattern', _NS): + jps_ref = jp.find('t:JourneyPatternSectionRefs', _NS) + if jps_ref is None: + continue + links = jps_map.get(jps_ref.text, []) + stops = [l[0] for l in links] + ([links[-1][1]] if links else []) + if ( + _PAD_STOP in stops + and _KXP_STOP in stops + and stops.index(_PAD_STOP) < stops.index(_KXP_STOP) + ): + pad_off = _seconds_to_depart(links, _PAD_STOP) + kxp_off = _seconds_to_arrive(links, _KXP_STOP) + if pad_off is not None and kxp_off is not None: + jp_offsets[jp.get('id')] = (pad_off, kxp_off) + + result: dict[str, list[tuple[int, int]]] = { + 'MondayToFriday': [], + 'Saturday': [], + 'Sunday': [], + } + + for vj in root.find('t:VehicleJourneys', _NS): + jp_ref = vj.find('t:JourneyPatternRef', _NS) + dep_time = vj.find('t:DepartureTime', _NS) + op = vj.find('t:OperatingProfile', _NS) + if jp_ref is None or dep_time is None or jp_ref.text not in jp_offsets: + continue + pad_off, kxp_off = jp_offsets[jp_ref.text] + h, m, s = map(int, dep_time.text.split(':')) + dep_secs = h * 3600 + m * 60 + s + rdt = op.find('.//t:DaysOfWeek', _NS) if op is not None else None + if rdt is None: + continue + for day_el in rdt: + day_type = day_el.tag.split('}')[-1] + if day_type in result: + result[day_type].append((dep_secs + pad_off, dep_secs + kxp_off)) + + for key in result: + result[key].sort() + return result + + +def _get_timetable() -> dict[str, list[tuple[int, int]]]: + global _timetable + if _timetable is None: + _timetable = _load_timetable() + return _timetable + + +def _day_type(weekday: int) -> str: + if weekday < 5: + return 'MondayToFriday' + return 'Saturday' if weekday == 5 else 'Sunday' + + +def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: + """ + Given the earliest time a passenger can board at Paddington (H&C Line), + return (circle_line_depart, arrive_kings_cross) as datetimes, or None if + no service is found before midnight. + + The caller is responsible for adding any walk time from the GWR platform + before passing *earliest_board*. + """ + timetable = _get_timetable()[_day_type(earliest_board.weekday())] + board_secs = ( + earliest_board.hour * 3600 + + earliest_board.minute * 60 + + earliest_board.second + ) + midnight = earliest_board.replace(hour=0, minute=0, second=0, microsecond=0) + for pad_secs, kxp_secs in timetable: + if pad_secs >= board_secs: + return midnight + timedelta(seconds=pad_secs), midnight + timedelta(seconds=kxp_secs) + return None diff --git a/templates/results.html b/templates/results.html index 5426995..403d01d 100644 --- a/templates/results.html +++ b/templates/results.html @@ -106,7 +106,7 @@ Paddington GWR Fare Transfer - Depart St Pancras + Depart STP {{ destination }} ES Std Total @@ -159,6 +159,7 @@ {{ row.arrive_destination }} (CET) + {% if row.eurostar_duration %}
({{ row.eurostar_duration }}){% endif %} {% if row.eurostar_price is not none %} @@ -191,6 +192,7 @@ {{ row.arrive_destination }} (CET) + {% if row.eurostar_duration %}
({{ row.eurostar_duration }}){% endif %} {% if row.eurostar_price is not none %} diff --git a/trip_planner.py b/trip_planner.py index e732b93..5a0b718 100644 --- a/trip_planner.py +++ b/trip_planner.py @@ -3,17 +3,16 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination t """ from datetime import datetime, timedelta, time as _time +import circle_line + MIN_CONNECTION_MINUTES = 50 MAX_CONNECTION_MINUTES = 110 MAX_GWR_MINUTES = 110 DATE_FMT = '%Y-%m-%d' TIME_FMT = '%H:%M' -# Circle Line: Paddington (H&C) → Kings Cross St Pancras -CIRCLE_LINE_MINUTES_PAST = [8, 18, 28, 38, 48, 58] # departures past each hour -CIRCLE_LINE_JOURNEY_MINUTES = 11 -PAD_WALK_TO_UNDERGROUND_MINUTES = 8 # GWR platform → Paddington Underground -KX_WALK_TO_CHECKIN_MINUTES = 10 # Kings Cross St Pancras platform → St Pancras check-in +PAD_WALK_TO_UNDERGROUND_MINUTES = 7 # GWR platform → Paddington (H&C Line) platform +KX_WALK_TO_CHECKIN_MINUTES = 8 # King's Cross St Pancras platform → St Pancras check-in # Bristol Temple Meads → London Paddington walk-on single fares. @@ -51,26 +50,22 @@ def _parse_dt(date: str, time: str) -> datetime: return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}") -def _next_circle_line(arrive_paddington: datetime) -> tuple[datetime, datetime]: +def _circle_line_times(arrive_paddington: datetime) -> tuple[str, str] | tuple[None, None]: """ - Given GWR arrival at Paddington, return (circle_line_depart, arrive_checkin). + Given GWR arrival at Paddington, return (circle_line_depart_str, arrive_checkin_str). - Walk 10 min to Paddington Underground, catch next Circle Line at :08/:18/:28/:38/:48/:58, - 11 min journey to Kings Cross St Pancras, 10 min walk to St Pancras check-in. + Adds PAD_WALK_TO_UNDERGROUND_MINUTES to get the earliest boarding time, looks up + the next real Circle Line service from the timetable, then adds KX_WALK_TO_CHECKIN_MINUTES + to the Kings Cross arrival to give an estimated St Pancras check-in time. + Returns (None, None) if no service is found. """ earliest_board = arrive_paddington + timedelta(minutes=PAD_WALK_TO_UNDERGROUND_MINUTES) - minute = earliest_board.minute - depart_minute = next((m for m in CIRCLE_LINE_MINUTES_PAST if m >= minute), None) - if depart_minute is None: - circle_depart = (earliest_board + timedelta(hours=1)).replace( - minute=CIRCLE_LINE_MINUTES_PAST[0], second=0, microsecond=0 - ) - else: - circle_depart = earliest_board.replace(minute=depart_minute, second=0, microsecond=0) - arrive_checkin = circle_depart + timedelta( - minutes=CIRCLE_LINE_JOURNEY_MINUTES + KX_WALK_TO_CHECKIN_MINUTES - ) - return circle_depart, arrive_checkin + result = circle_line.next_service(earliest_board) + if result is None: + return None, None + circle_depart, arrive_kx = result + arrive_checkin = arrive_kx + timedelta(minutes=KX_WALK_TO_CHECKIN_MINUTES) + return circle_depart.strftime(TIME_FMT), arrive_checkin.strftime(TIME_FMT) def _fmt_duration(minutes: int) -> str: @@ -149,8 +144,10 @@ def combine_trips( dep_bri, arr_pad, dep_stp, arr_dest = connection total_mins = int((arr_dest - dep_bri).total_seconds() / 60) + # Destination time is CET/CEST, departure is GMT/BST; Europe is always 1h ahead. + eurostar_mins = int((arr_dest - dep_stp).total_seconds() / 60) - 60 ticket = cheapest_gwr_ticket(gwr['depart_bristol'], travel_date) - circle_depart, arrive_checkin = _next_circle_line(arr_pad) + circle_depart, arrive_checkin = _circle_line_times(arr_pad) trips.append({ 'depart_bristol': gwr['depart_bristol'], 'arrive_paddington': gwr['arrive_paddington'], @@ -158,10 +155,11 @@ def combine_trips( 'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)), 'connection_minutes': int((dep_stp - arr_pad).total_seconds() / 60), 'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)), - 'circle_line_depart': circle_depart.strftime(TIME_FMT), - 'circle_arrive_checkin': arrive_checkin.strftime(TIME_FMT), + 'circle_line_depart': circle_depart, + 'circle_arrive_checkin': arrive_checkin, 'depart_st_pancras': es['depart_st_pancras'], 'arrive_destination': es['arrive_destination'], + 'eurostar_duration': _fmt_duration(eurostar_mins), 'train_number': es.get('train_number', ''), 'total_duration': _fmt_duration(total_mins), 'total_minutes': total_mins, @@ -198,6 +196,11 @@ def find_unreachable_morning_eurostars( ): continue - unreachable.append(es) + dep_stp = _parse_dt(travel_date, es['depart_st_pancras']) + arr_dest = _parse_dt(travel_date, es['arrive_destination']) + if arr_dest < dep_stp: + arr_dest += timedelta(days=1) + eurostar_mins = int((arr_dest - dep_stp).total_seconds() / 60) - 60 + unreachable.append({**es, 'eurostar_duration': _fmt_duration(eurostar_mins)}) return sorted(unreachable, key=lambda s: s['depart_st_pancras']) From cd37f0619b8246badc48e7a723ad6d81943a0e49 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 13:37:44 +0100 Subject: [PATCH 3/7] Add config system with TFL_DATA_DIR and CACHE_DIR config/default.py holds defaults using ~/lib/data/tfl (expanduser, so safe to commit). app.py loads it then overlays config/local.py if present, pushing paths into cache and circle_line modules. config/local.py is gitignored for machine-specific absolute paths (e.g. on the server where www-data runs). Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + app.py | 12 +++++++++++- cache.py | 2 +- circle_line.py | 2 +- config/__init__.py | 0 config/default.py | 10 ++++++++++ 6 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 config/__init__.py create mode 100644 config/default.py diff --git a/.gitignore b/.gitignore index 8057dbb..b86fb2a 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ venv/ # App cache/ +config/local.py # Pytest .pytest_cache/ diff --git a/app.py b/app.py index 2d2a2e2..04c2adc 100644 --- a/app.py +++ b/app.py @@ -3,6 +3,7 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination t """ from flask import Flask, render_template, redirect, url_for, request from datetime import date, timedelta +import os from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper @@ -22,7 +23,16 @@ RTT_BRISTOL_URL = ( "?stp=WVS&show=pax-calls&order=wtt" ) -app = Flask(__name__) +app = Flask(__name__, instance_relative_config=False) +app.config.from_object('config.default') +_local = os.path.join(os.path.dirname(__file__), 'config', 'local.py') +if os.path.exists(_local): + app.config.from_pyfile(_local) + +import cache +import circle_line +cache.CACHE_DIR = app.config['CACHE_DIR'] +circle_line._TXC_XML = app.config['CIRCLE_LINE_XML'] DESTINATIONS = { 'paris': 'Paris Gare du Nord', diff --git a/cache.py b/cache.py index 2e82500..31fad05 100644 --- a/cache.py +++ b/cache.py @@ -2,7 +2,7 @@ import json import os import time -CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache') +from config.default import CACHE_DIR # overridden by app config after import def _cache_path(key: str) -> str: diff --git a/circle_line.py b/circle_line.py index 1fee867..5c942f9 100644 --- a/circle_line.py +++ b/circle_line.py @@ -11,7 +11,7 @@ from datetime import datetime, timedelta _PAD_STOP = '9400ZZLUPAH1' # Paddington (H&C Line) _KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras -_TXC_XML = os.path.join(os.path.dirname(__file__), 'output_txc_01CIR_.xml') +from config.default import CIRCLE_LINE_XML as _TXC_XML # overridden by app config after import _NS = {'t': 'http://www.transxchange.org.uk/'} # Populated on first call to next_service(); maps day-type -> sorted list of diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config/default.py b/config/default.py new file mode 100644 index 0000000..aff1369 --- /dev/null +++ b/config/default.py @@ -0,0 +1,10 @@ +import os + +# Directory containing TfL reference data (TransXChange XML files etc.) +TFL_DATA_DIR = os.path.expanduser('~/lib/data/tfl') + +# Directory for caching scraped train times +CACHE_DIR = os.path.expanduser('~/lib/data/tfl/cache') + +# TransXChange timetable file for the Circle Line +CIRCLE_LINE_XML = os.path.join(TFL_DATA_DIR, 'output_txc_01CIR_.xml') From 05eec29b7d60b5fd0aa268979a162c347bf5f2dd Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 14:12:54 +0100 Subject: [PATCH 4/7] Show Eurostar seat availability and no-prices notice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fetch_prices now returns {'price': ..., 'seats': ...} per departure. Seat count (labelled "N at this price") is shown below the fare — it reflects price-band depth rather than total remaining seats. A yellow notice is shown when the API returns journeys but all prices are null (tickets not yet on sale). Co-Authored-By: Claude Sonnet 4.6 --- app.py | 21 ++++++++++++++------- scraper/eurostar.py | 15 +++++++++------ templates/results.html | 13 ++++++++++++- tests/test_app.py | 6 +++--- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index 04c2adc..0ca44c7 100644 --- a/app.py +++ b/app.py @@ -146,14 +146,18 @@ def results(slug, travel_date): trips = combine_trips(gwr_trains, eurostar_trains, travel_date, min_connection, max_connection) - # Annotate each trip with Eurostar Standard price and total cost + # Annotate each trip with Eurostar Standard price, seats, and total cost for trip in trips: - es_price = eurostar_prices.get(trip['depart_st_pancras']) + es = eurostar_prices.get(trip['depart_st_pancras'], {}) + es_price = es.get('price') trip['eurostar_price'] = es_price - if es_price is not None: - trip['total_price'] = trip['ticket_price'] + es_price - else: - trip['total_price'] = None + trip['eurostar_seats'] = es.get('seats') + trip['total_price'] = trip['ticket_price'] + es_price if es_price is not None else None + + # If the API returned journeys but every price is None, tickets aren't on sale yet + no_prices_note = None + if eurostar_prices and all(v.get('price') is None for v in eurostar_prices.values()): + no_prices_note = 'Eurostar prices not yet available — tickets may not be on sale yet.' unreachable_morning_services = find_unreachable_morning_eurostars( gwr_trains, @@ -163,7 +167,9 @@ def results(slug, travel_date): max_connection, ) for svc in unreachable_morning_services: - svc['eurostar_price'] = eurostar_prices.get(svc['depart_st_pancras']) + es = eurostar_prices.get(svc['depart_st_pancras'], {}) + svc['eurostar_price'] = es.get('price') + svc['eurostar_seats'] = es.get('seats') result_rows = sorted( [{'row_type': 'trip', **trip} for trip in trips] @@ -196,6 +202,7 @@ def results(slug, travel_date): eurostar_count=len(eurostar_trains), from_cache=from_cache, error=error, + no_prices_note=no_prices_note, eurostar_url=eurostar_url, rtt_url=rtt_url, rtt_bristol_url=rtt_bristol_url, diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 4bbdfd9..f12f283 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -162,12 +162,13 @@ def _generate_cid() -> str: return 'SRCH-' + ''.join(random.choices(chars, k=22)) -def fetch_prices(destination: str, travel_date: str) -> dict[str, int | None]: +def fetch_prices(destination: str, travel_date: str) -> dict[str, dict]: """ - Return Eurostar Standard prices for every departure on travel_date. + Return Eurostar Standard price and seat availability for every departure on travel_date. - Result: {depart_st_pancras: price_gbp_int_or_None} - None means the class is sold out or unavailable for that departure. + Result: {depart_st_pancras: {'price': int_or_None, 'seats': int_or_None}} + price is None when unavailable/not yet on sale; seats is the number of + Standard seats currently available for sale. """ dest_id = DESTINATION_STATION_IDS[destination] headers = { @@ -196,16 +197,18 @@ def fetch_prices(destination: str, travel_date: str) -> dict[str, int | None]: resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) resp.raise_for_status() data = resp.json() - prices: dict[str, int | None] = {} + prices: dict[str, dict] = {} journeys = data['data']['journeySearch']['outbound']['journeys'] for journey in journeys: dep = journey['timing']['departureTime'] price = None + seats = None for fare in journey['fares']: if fare['classOfService']['code'] == 'STANDARD': p = fare.get('prices') if p and p.get('displayPrice'): price = int(p['displayPrice']) + seats = fare.get('seats') break - prices[dep] = price + prices[dep] = {'price': price, 'seats': seats} return prices diff --git a/templates/results.html b/templates/results.html index 403d01d..634ede8 100644 --- a/templates/results.html +++ b/templates/results.html @@ -95,6 +95,11 @@ Warning: {{ error }} {% endif %} + {% if no_prices_note %} +
+ {{ no_prices_note }} +
+ {% endif %} {% if trips or unreachable_morning_services %} @@ -164,6 +169,9 @@ {% if row.eurostar_price is not none %} £{{ row.eurostar_price }} + {% if row.eurostar_seats is not none %} +
{{ row.eurostar_seats }} at this price + {% endif %} {% else %} {% endif %} @@ -197,12 +205,15 @@ {% if row.eurostar_price is not none %} £{{ row.eurostar_price }} + {% if row.eurostar_seats is not none %} +
{{ row.eurostar_seats }} at this price + {% endif %} {% else %} {% endif %} - Too early + Too early {% endif %} diff --git a/tests/test_app.py b/tests/test_app.py index d4de5c2..8806137 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -214,13 +214,13 @@ def test_results_shows_unreachable_morning_eurostar_services(monkeypatch): assert '2 Eurostar services unavailable from Bristol' in html assert '09:30' in html assert 'ES 9001' in html - assert 'Unavailable from Bristol' in html + assert 'Too early' in html assert html.index('09:30') < html.index('10:15') def test_results_shows_eurostar_price_and_total(monkeypatch): # 07:00 on Friday 2026-04-10 → Anytime £138.70 (weekday, 05:05–08:25 window) - _stub_data(monkeypatch, prices={'10:01': 59}) + _stub_data(monkeypatch, prices={'10:01': {'price': 59, 'seats': 42}}) client = _client() resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') @@ -268,4 +268,4 @@ def test_results_can_show_only_unreachable_morning_services(monkeypatch): assert 'No valid journeys found.' not in html assert '1 Eurostar service unavailable from Bristol' in html assert '09:30' in html - assert 'Unavailable from Bristol' in html + assert 'Too early' in html From c22a3ea0fc97f469c98137d94a0f94a4fed5ac3e Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 14:46:22 +0100 Subject: [PATCH 5/7] Consolidate to single GraphQL call; show indirect trains; fix price formatting Replace two-step Eurostar fetch (HTML timetable + GraphQL prices) with a single GraphQL call that returns timing, train numbers, prices, and seats. Support indirect services (e.g. Amsterdam) by joining multi-leg train numbers with ' + ' and keeping the earliest arrival per departure time. Fix half-pound prices by casting displayPrice to float instead of int. Wrap each train number segment in white-space:nowrap so 'ES 9132 + ER 9363' never breaks mid-segment. Format Eurostar prices with two decimal places. Co-Authored-By: Claude Sonnet 4.6 --- app.py | 30 ++--- scraper/eurostar.py | 224 ++++++++++++--------------------- templates/results.html | 8 +- tests/test_app.py | 99 +++------------ tests/test_eurostar_scraper.py | 141 ++++++++++----------- 5 files changed, 182 insertions(+), 320 deletions(-) diff --git a/app.py b/app.py index 0ca44c7..6425e79 100644 --- a/app.py +++ b/app.py @@ -9,7 +9,6 @@ from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper import scraper.realtime_trains as rtt_scraper from trip_planner import combine_trips, find_unreachable_morning_eurostars -from scraper.eurostar import fetch_prices as fetch_eurostar_prices RTT_PADDINGTON_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" @@ -103,12 +102,10 @@ def results(slug, travel_date): rtt_cache_key = f"rtt_{travel_date}" es_cache_key = f"eurostar_{travel_date}_{destination}" - prices_cache_key = f"eurostar_prices_{travel_date}_{destination}" cached_rtt = get_cached(rtt_cache_key) - cached_es = get_cached(es_cache_key) - cached_prices = get_cached(prices_cache_key, ttl=24 * 3600) - from_cache = bool(cached_rtt and cached_es and cached_prices) + cached_es = get_cached(es_cache_key, ttl=24 * 3600) + from_cache = bool(cached_rtt and cached_es) error = None @@ -123,26 +120,21 @@ def results(slug, travel_date): error = f"Could not fetch GWR trains: {e}" if cached_es: - eurostar_trains = cached_es + eurostar_services = cached_es else: try: - eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent) - set_cached(es_cache_key, eurostar_trains) + eurostar_services = eurostar_scraper.fetch(destination, travel_date) + set_cached(es_cache_key, eurostar_services) except Exception as e: - eurostar_trains = [] + eurostar_services = [] msg = f"Could not fetch Eurostar times: {e}" error = f"{error}; {msg}" if error else msg - if cached_prices: - eurostar_prices = cached_prices - else: - try: - eurostar_prices = fetch_eurostar_prices(destination, travel_date) - set_cached(prices_cache_key, eurostar_prices) - except Exception as e: - eurostar_prices = {} - msg = f"Could not fetch Eurostar prices: {e}" - error = f"{error}; {msg}" if error else msg + eurostar_trains = eurostar_services + eurostar_prices = { + s['depart_st_pancras']: {'price': s.get('price'), 'seats': s.get('seats')} + for s in eurostar_services + } trips = combine_trips(gwr_trains, eurostar_trains, travel_date, min_connection, max_connection) diff --git a/scraper/eurostar.py b/scraper/eurostar.py index f12f283..ca489d9 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -1,29 +1,14 @@ """ -Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API. +Fetch Eurostar timetable, prices, and seat availability via the GraphQL API. -Timetable: route-specific pages are Next.js SSR — all departure data is -embedded in ', html, re.DOTALL) - if not m: - return [] - data = json.loads(m.group(1)) - departures = data['props']['pageProps']['pageData']['liveDepartures'] - services = [] - for dep in departures: - dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) - arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) - if dep_time and arr_time: - carrier = dep.get('model', {}).get('carrier', 'ES') - number = dep.get('model', {}).get('trainNumber', '') - services.append({ - 'depart_st_pancras': dep_time, - 'arrive_destination': arr_time, - 'destination': destination, - 'train_number': f"{carrier} {number}" if number else '', - }) - return sorted(services, key=lambda s: s['depart_st_pancras']) - - -def fetch(destination: str, travel_date: str, - user_agent: str = DEFAULT_UA) -> list[dict]: - url = timetable_url(destination) - headers = { - 'User-Agent': user_agent, - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Language': 'en-GB,en;q=0.9', - } - with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client: - r = client.get(url, params={'date': travel_date}) - r.raise_for_status() - return _parse(r.text, destination) - - -# --------------------------------------------------------------------------- -# Price fetching via site-api.eurostar.com GraphQL -# --------------------------------------------------------------------------- - _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' -# Minimal query requesting only timing + Eurostar Standard fare price. -# Variable names and inline argument names match what the site sends so the +# Query requesting timing, train identity, and Standard fare price + seats. +# Variable names and argument names match the site's own query so the # server-side query planner sees a familiar shape. -_GQL_PRICES = ( +_GQL_QUERY = ( "query NewBookingSearch(" "$origin:String!,$destination:String!,$outbound:String!," "$currency:Currency!,$adult:Int," @@ -141,74 +54,103 @@ _GQL_PRICES = ( " hideExternalCarrierTrains:true" " hideDirectExternalCarrierTrains:true" "){" - "timing{departureTime:departs __typename}" + "timing{departureTime:departs arrivalTime:arrives}" "fares(filteredClassesOfService:$filteredClassesOfService){" - "classOfService{code __typename}" - "prices{displayPrice __typename}" - "seats __typename" + "classOfService{code}" + "prices{displayPrice}" + "seats " + "legs{serviceName serviceType{code}}" "}" - "__typename" "}" - "__typename" "}" - "__typename" "}" "}" ) +def search_url(destination: str, travel_date: str) -> str: + dest_id = DESTINATION_STATION_IDS[destination] + return ( + f'https://www.eurostar.com/search/uk-en' + f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}' + ) + + def _generate_cid() -> str: chars = string.ascii_letters + string.digits return 'SRCH-' + ''.join(random.choices(chars, k=22)) -def fetch_prices(destination: str, travel_date: str) -> dict[str, dict]: +def _parse_graphql(data: dict, destination: str) -> list[dict]: """ - Return Eurostar Standard price and seat availability for every departure on travel_date. + Parse a NewBookingSearch GraphQL response into a list of service dicts. - Result: {depart_st_pancras: {'price': int_or_None, 'seats': int_or_None}} - price is None when unavailable/not yet on sale; seats is the number of - Standard seats currently available for sale. + Each dict contains: depart_st_pancras, arrive_destination, destination, + train_number, price (float or None), seats (int or None). + + The same St Pancras departure can appear multiple times (different + connecting trains); we keep the entry with the earliest arrival. + Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329'). + """ + best: dict[str, dict] = {} + journeys = data['data']['journeySearch']['outbound']['journeys'] + for journey in journeys: + dep = journey['timing']['departureTime'] + arr = journey['timing']['arrivalTime'] + for fare in journey['fares']: + if fare['classOfService']['code'] == 'STANDARD': + p = fare.get('prices') + price = float(p['displayPrice']) if p and p.get('displayPrice') else None + seats = fare.get('seats') + legs = fare.get('legs') or [] + train_number = ' + '.join( + f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}" + for leg in legs if leg.get('serviceName') + ) + if dep not in best or arr < best[dep]['arrive_destination']: + best[dep] = { + 'depart_st_pancras': dep, + 'arrive_destination': arr, + 'destination': destination, + 'train_number': train_number, + 'price': price, + 'seats': seats, + } + break + return sorted(best.values(), key=lambda s: s['depart_st_pancras']) + + +def fetch(destination: str, travel_date: str) -> list[dict]: + """ + Return all Eurostar services for destination on travel_date. + + Each dict contains timetable info (depart_st_pancras, arrive_destination, + train_number) plus pricing (price, seats) from a single GraphQL call. """ dest_id = DESTINATION_STATION_IDS[destination] headers = { - 'User-Agent': DEFAULT_UA, - 'Content-Type': 'application/json', - 'Accept': '*/*', - 'Accept-Language': 'en-GB', - 'Referer': 'https://www.eurostar.com/', - 'x-platform': 'web', - 'x-market-code': 'uk', - 'x-source-url': 'search-app/', - 'cid': _generate_cid(), + 'User-Agent': DEFAULT_UA, + 'Content-Type': 'application/json', + 'Accept': '*/*', + 'Accept-Language':'en-GB', + 'Referer': 'https://www.eurostar.com/', + 'x-platform': 'web', + 'x-market-code': 'uk', + 'x-source-url': 'search-app/', + 'cid': _generate_cid(), } payload = { 'operationName': 'NewBookingSearch', 'variables': { - 'origin': ORIGIN_STATION_ID, - 'destination': dest_id, - 'outbound': travel_date, - 'currency': 'GBP', - 'adult': 1, - 'filteredClassesOfService': ['STANDARD'], + 'origin': ORIGIN_STATION_ID, + 'destination': dest_id, + 'outbound': travel_date, + 'currency': 'GBP', + 'adult': 1, + 'filteredClassesOfService': ['STANDARD'], }, - 'query': _GQL_PRICES, + 'query': _GQL_QUERY, } resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) resp.raise_for_status() - data = resp.json() - prices: dict[str, dict] = {} - journeys = data['data']['journeySearch']['outbound']['journeys'] - for journey in journeys: - dep = journey['timing']['departureTime'] - price = None - seats = None - for fare in journey['fares']: - if fare['classOfService']['code'] == 'STANDARD': - p = fare.get('prices') - if p and p.get('displayPrice'): - price = int(p['displayPrice']) - seats = fare.get('seats') - break - prices[dep] = {'price': price, 'seats': seats} - return prices + return _parse_graphql(resp.json(), destination) diff --git a/templates/results.html b/templates/results.html index 634ede8..e4578ec 100644 --- a/templates/results.html +++ b/templates/results.html @@ -159,7 +159,7 @@ {{ row.depart_st_pancras }} - {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% if row.train_number %}
{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} {{ row.arrive_destination }} @@ -168,7 +168,7 @@ {% if row.eurostar_price is not none %} - £{{ row.eurostar_price }} + £{{ "%.2f"|format(row.eurostar_price) }} {% if row.eurostar_seats is not none %}
{{ row.eurostar_seats }} at this price {% endif %} @@ -195,7 +195,7 @@ n/a {{ row.depart_st_pancras }} - {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% if row.train_number %}
{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} {{ row.arrive_destination }} @@ -204,7 +204,7 @@ {% if row.eurostar_price is not none %} - £{{ row.eurostar_price }} + £{{ "%.2f"|format(row.eurostar_price) }} {% if row.eurostar_seats is not none %}
{{ row.eurostar_seats }} at this price {% endif %} diff --git a/tests/test_app.py b/tests/test_app.py index 8806137..fd06a8d 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -16,25 +16,21 @@ def _stub_data(monkeypatch, prices=None): {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, ], ) + p = (prices or {}).get('10:01', {}) monkeypatch.setattr( app_module.eurostar_scraper, 'fetch', - lambda destination, travel_date, user_agent: [ + lambda destination, travel_date: [ { 'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': destination, 'train_number': 'ES 9014', + 'price': p.get('price') if isinstance(p, dict) else None, + 'seats': p.get('seats') if isinstance(p, dict) else None, }, ], ) - monkeypatch.setattr( - app_module.eurostar_scraper, - 'timetable_url', - lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}', - ) - _prices = prices if prices is not None else {} - monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: _prices) def test_index_shows_fixed_departure_and_destination_radios(): @@ -96,7 +92,6 @@ def test_results_title_and_social_meta_include_destination(monkeypatch): def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypatch): monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {}) monkeypatch.setattr( app_module.rtt_scraper, 'fetch', @@ -111,44 +106,14 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa monkeypatch.setattr( app_module.eurostar_scraper, 'fetch', - lambda destination, travel_date, user_agent: [ - { - 'depart_st_pancras': '09:30', - 'arrive_destination': '11:50', - 'destination': destination, - 'train_number': 'ES 1001', - }, - { - 'depart_st_pancras': '09:40', - 'arrive_destination': '12:00', - 'destination': destination, - 'train_number': 'ES 1002', - }, - { - 'depart_st_pancras': '09:50', - 'arrive_destination': '12:20', - 'destination': destination, - 'train_number': 'ES 1003', - }, - { - 'depart_st_pancras': '10:00', - 'arrive_destination': '12:35', - 'destination': destination, - 'train_number': 'ES 1004', - }, - { - 'depart_st_pancras': '10:10', - 'arrive_destination': '12:45', - 'destination': destination, - 'train_number': 'ES 1005', - }, + lambda destination, travel_date: [ + {'depart_st_pancras': '09:30', 'arrive_destination': '11:50', 'destination': destination, 'train_number': 'ES 1001', 'price': None, 'seats': None}, + {'depart_st_pancras': '09:40', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 1002', 'price': None, 'seats': None}, + {'depart_st_pancras': '09:50', 'arrive_destination': '12:20', 'destination': destination, 'train_number': 'ES 1003', 'price': None, 'seats': None}, + {'depart_st_pancras': '10:00', 'arrive_destination': '12:35', 'destination': destination, 'train_number': 'ES 1004', 'price': None, 'seats': None}, + {'depart_st_pancras': '10:10', 'arrive_destination': '12:45', 'destination': destination, 'train_number': 'ES 1005', 'price': None, 'seats': None}, ], ) - monkeypatch.setattr( - app_module.eurostar_scraper, - 'timetable_url', - lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}', - ) client = _client() resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') @@ -168,7 +133,6 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa def test_results_shows_unreachable_morning_eurostar_services(monkeypatch): monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {}) monkeypatch.setattr( app_module.rtt_scraper, 'fetch', @@ -179,32 +143,12 @@ def test_results_shows_unreachable_morning_eurostar_services(monkeypatch): monkeypatch.setattr( app_module.eurostar_scraper, 'fetch', - lambda destination, travel_date, user_agent: [ - { - 'depart_st_pancras': '09:30', - 'arrive_destination': '12:00', - 'destination': destination, - 'train_number': 'ES 9001', - }, - { - 'depart_st_pancras': '10:15', - 'arrive_destination': '13:40', - 'destination': destination, - 'train_number': 'ES 9002', - }, - { - 'depart_st_pancras': '12:30', - 'arrive_destination': '15:55', - 'destination': destination, - 'train_number': 'ES 9003', - }, + lambda destination, travel_date: [ + {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None}, + {'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': destination, 'train_number': 'ES 9002', 'price': None, 'seats': None}, + {'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': destination, 'train_number': 'ES 9003', 'price': None, 'seats': None}, ], ) - monkeypatch.setattr( - app_module.eurostar_scraper, - 'timetable_url', - lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}', - ) client = _client() resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') @@ -234,7 +178,6 @@ def test_results_shows_eurostar_price_and_total(monkeypatch): def test_results_can_show_only_unreachable_morning_services(monkeypatch): monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {}) monkeypatch.setattr( app_module.rtt_scraper, 'fetch', @@ -245,20 +188,10 @@ def test_results_can_show_only_unreachable_morning_services(monkeypatch): monkeypatch.setattr( app_module.eurostar_scraper, 'fetch', - lambda destination, travel_date, user_agent: [ - { - 'depart_st_pancras': '09:30', - 'arrive_destination': '12:00', - 'destination': destination, - 'train_number': 'ES 9001', - }, + lambda destination, travel_date: [ + {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None}, ], ) - monkeypatch.setattr( - app_module.eurostar_scraper, - 'timetable_url', - lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}', - ) client = _client() resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py index 1015b8a..ed82358 100644 --- a/tests/test_eurostar_scraper.py +++ b/tests/test_eurostar_scraper.py @@ -1,97 +1,92 @@ -import json import pytest -from scraper.eurostar import _hhmm, _parse, timetable_url +from scraper.eurostar import _parse_graphql, search_url -# --------------------------------------------------------------------------- -# _hhmm -# --------------------------------------------------------------------------- - -def test_hhmm_parses_datetime_string(): - assert _hhmm('2026-03-30 09:34:00') == '09:34' - -def test_hhmm_none_input(): - assert _hhmm(None) is None - -def test_hhmm_empty_string(): - assert _hhmm('') is None +def _gql_response(journeys: list) -> dict: + return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}} -# --------------------------------------------------------------------------- -# _parse -# --------------------------------------------------------------------------- - -def _make_next_data(departures: list) -> str: - data = { - 'props': { - 'pageProps': { - 'pageData': { - 'liveDepartures': departures - } - } - } - } - return f'' - - -def _departure(dep_dt: str, arr_dt: str) -> dict: +def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES') -> dict: return { - 'origin': {'model': {'scheduledDepartureDateTime': dep_dt}}, - 'destination': {'model': {'scheduledArrivalDateTime': arr_dt}}, + 'timing': {'departureTime': departs, 'arrivalTime': arrives}, + 'fares': [{ + 'classOfService': {'code': 'STANDARD'}, + 'prices': {'displayPrice': price}, + 'seats': seats, + 'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}] + if service_name else [], + }], } -def test_parse_single_departure(): - html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')]) - services = _parse(html, 'Paris Gare du Nord') +# --------------------------------------------------------------------------- +# _parse_graphql +# --------------------------------------------------------------------------- + +def test_parse_graphql_single_journey(): + data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')]) + services = _parse_graphql(data, 'Paris Gare du Nord') assert len(services) == 1 - assert services[0] == { - 'depart_st_pancras': '06:01', - 'arrive_destination': '09:34', - 'destination': 'Paris Gare du Nord', - 'train_number': '', - } + s = services[0] + assert s['depart_st_pancras'] == '09:31' + assert s['arrive_destination'] == '12:55' + assert s['destination'] == 'Paris Gare du Nord' + assert s['train_number'] == 'ES 9014' + assert s['price'] == 156.0 + assert s['seats'] == 37 -def test_parse_results_sorted_by_departure(): - html = _make_next_data([ - _departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'), - _departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'), +def test_parse_graphql_half_pound_price(): + data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')]) + services = _parse_graphql(data, 'Amsterdam Centraal') + assert services[0]['price'] == 192.5 + + +def test_parse_graphql_null_price(): + data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)]) + services = _parse_graphql(data, 'Amsterdam Centraal') + assert services[0]['price'] is None + assert services[0]['seats'] == 0 + + +def test_parse_graphql_sorted_by_departure(): + data = _gql_response([ + _journey('10:31', '13:55'), + _journey('07:31', '10:59'), ]) - services = _parse(html, 'Paris Gare du Nord') - assert services[0]['depart_st_pancras'] == '07:00' - assert services[1]['depart_st_pancras'] == '10:00' + services = _parse_graphql(data, 'Paris Gare du Nord') + assert services[0]['depart_st_pancras'] == '07:31' + assert services[1]['depart_st_pancras'] == '10:31' -def test_parse_skips_entries_with_missing_times(): - html = _make_next_data([ - _departure(None, '2026-03-30 09:34:00'), - _departure('2026-03-30 08:00:00', None), - _departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'), +def test_parse_graphql_deduplicates_same_departure_time(): + data = _gql_response([ + _journey('06:16', '11:09', price=None, seats=0), + _journey('06:16', '11:09', price=None, seats=0), + _journey('06:16', '11:09', price=None, seats=0), ]) - services = _parse(html, 'Paris Gare du Nord') + services = _parse_graphql(data, 'Amsterdam Centraal') assert len(services) == 1 - assert services[0]['depart_st_pancras'] == '09:00' -def test_parse_no_next_data_returns_empty(): - assert _parse('nothing here', 'Paris Gare du Nord') == [] +def test_parse_graphql_no_legs_gives_empty_train_number(): + data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')]) + services = _parse_graphql(data, 'Paris Gare du Nord') + assert services[0]['train_number'] == '' -def test_parse_empty_departures(): - html = _make_next_data([]) - assert _parse(html, 'Paris Gare du Nord') == [] +def test_parse_graphql_empty_journeys(): + data = _gql_response([]) + assert _parse_graphql(data, 'Paris Gare du Nord') == [] -def test_timetable_url_uses_station_id_table(): - assert timetable_url('Paris Gare du Nord') == ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8727100/london-st-pancras-intl/paris-gare-du-nord' - ) - - -def test_timetable_url_slugifies_destination_name(): - assert timetable_url('Rotterdam Centraal') == ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8400530/london-st-pancras-intl/rotterdam-centraal' +# --------------------------------------------------------------------------- +# search_url +# --------------------------------------------------------------------------- + +def test_search_url(): + url = search_url('Paris Gare du Nord', '2026-04-10') + assert url == ( + 'https://www.eurostar.com/search/uk-en' + '?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10' ) From e6f310f517bc34439a401b89c276111440d0f154 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 15:02:01 +0100 Subject: [PATCH 6/7] Add Cologne Hbf destination; use coin emoji for cheapest journey Co-Authored-By: Claude Sonnet 4.6 --- app.py | 1 + scraper/eurostar.py | 1 + templates/results.html | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app.py b/app.py index 6425e79..a5a60cb 100644 --- a/app.py +++ b/app.py @@ -39,6 +39,7 @@ DESTINATIONS = { 'lille': 'Lille Europe', 'amsterdam': 'Amsterdam Centraal', 'rotterdam': 'Rotterdam Centraal', + 'cologne': 'Cologne Hbf', } diff --git a/scraper/eurostar.py b/scraper/eurostar.py index ca489d9..8b9a881 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -24,6 +24,7 @@ DESTINATION_STATION_IDS = { 'Lille Europe': '8722326', 'Amsterdam Centraal': '8400058', 'Rotterdam Centraal': '8400530', + 'Cologne Hbf': '8015458', } _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' diff --git a/templates/results.html b/templates/results.html index e4578ec..6d3b2b9 100644 --- a/templates/results.html +++ b/templates/results.html @@ -185,7 +185,7 @@ {{ row.total_duration }} {% endif %} {% if row.total_price is not none %} -
£{{ "%.2f"|format(row.total_price) }}{% if min_price is defined and max_price is defined %}{% if row.total_price <= min_price + 10 %} 💰{% elif row.total_price >= max_price - 10 %} 💸{% endif %}{% endif %} +
£{{ "%.2f"|format(row.total_price) }}{% if min_price is defined and max_price is defined %}{% if row.total_price <= min_price + 10 %} 🪙{% elif row.total_price >= max_price - 10 %} 💸{% endif %}{% endif %} {% endif %} {% else %} From 71be0dd8cf867312e6cd4a21e93588309c5c684d Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 4 Apr 2026 15:39:04 +0100 Subject: [PATCH 7/7] Move inline styles to CSS classes; update README Extract repeated inline styles from templates into named CSS classes in base.html: layout helpers, buttons, form groups, alert boxes, results table rules, row highlight classes, typography utilities, and empty-state styles. Remove the per-page diff --git a/templates/index.html b/templates/index.html index 306fb90..8bdbb51 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,9 +1,9 @@ {% extends "base.html" %} {% block content %}
-

Plan your journey

+

Plan your journey

-
+
Departure point
Bristol Temple Meads @@ -11,7 +11,7 @@
-
+
Eurostar destination
{% for slug, name in destinations.items() %} @@ -33,7 +33,7 @@
-
+
@@ -42,7 +42,7 @@ class="form-control">
-
+
@@ -53,7 +53,7 @@
-
+
@@ -64,9 +64,7 @@
- diff --git a/templates/results.html b/templates/results.html index 6d3b2b9..8389b73 100644 --- a/templates/results.html +++ b/templates/results.html @@ -5,31 +5,24 @@ {% block twitter_title %}Bristol to {{ destination }} via Eurostar{% endblock %} {% block twitter_description %}Train options from Bristol Temple Meads to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} {% block content %} - -

+

-

+

Bristol Temple Meads → {{ destination }}

-
+
← Prev + class="btn-nav">← Prev {{ travel_date_display }} Next → + class="btn-nav">Next →
-
-
Switch destination for {{ travel_date_display }}
+
+
{% for destination_slug, destination_name in destinations.items() %} {% if destination_slug == slug %} @@ -43,26 +36,26 @@ {% endfor %}
-
+
-
-