Consolidate to single GraphQL call; show indirect trains; fix price formatting

Replace two-step Eurostar fetch (HTML timetable + GraphQL prices) with a
single GraphQL call that returns timing, train numbers, prices, and seats.
Support indirect services (e.g. Amsterdam) by joining multi-leg train numbers
with ' + ' and keeping the earliest arrival per departure time.
Fix half-pound prices by casting displayPrice to float instead of int.
Wrap each train number segment in white-space:nowrap so 'ES 9132 + ER 9363'
never breaks mid-segment.
Format Eurostar prices with two decimal places.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-04-04 14:46:22 +01:00
parent 05eec29b7d
commit c22a3ea0fc
5 changed files with 182 additions and 320 deletions

30
app.py
View file

@ -9,7 +9,6 @@ from cache import get_cached, set_cached
import scraper.eurostar as eurostar_scraper import scraper.eurostar as eurostar_scraper
import scraper.realtime_trains as rtt_scraper import scraper.realtime_trains as rtt_scraper
from trip_planner import combine_trips, find_unreachable_morning_eurostars from trip_planner import combine_trips, find_unreachable_morning_eurostars
from scraper.eurostar import fetch_prices as fetch_eurostar_prices
RTT_PADDINGTON_URL = ( RTT_PADDINGTON_URL = (
"https://www.realtimetrains.co.uk/search/detailed/" "https://www.realtimetrains.co.uk/search/detailed/"
@ -103,12 +102,10 @@ def results(slug, travel_date):
rtt_cache_key = f"rtt_{travel_date}" rtt_cache_key = f"rtt_{travel_date}"
es_cache_key = f"eurostar_{travel_date}_{destination}" es_cache_key = f"eurostar_{travel_date}_{destination}"
prices_cache_key = f"eurostar_prices_{travel_date}_{destination}"
cached_rtt = get_cached(rtt_cache_key) cached_rtt = get_cached(rtt_cache_key)
cached_es = get_cached(es_cache_key) cached_es = get_cached(es_cache_key, ttl=24 * 3600)
cached_prices = get_cached(prices_cache_key, ttl=24 * 3600) from_cache = bool(cached_rtt and cached_es)
from_cache = bool(cached_rtt and cached_es and cached_prices)
error = None error = None
@ -123,26 +120,21 @@ def results(slug, travel_date):
error = f"Could not fetch GWR trains: {e}" error = f"Could not fetch GWR trains: {e}"
if cached_es: if cached_es:
eurostar_trains = cached_es eurostar_services = cached_es
else: else:
try: try:
eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent) eurostar_services = eurostar_scraper.fetch(destination, travel_date)
set_cached(es_cache_key, eurostar_trains) set_cached(es_cache_key, eurostar_services)
except Exception as e: except Exception as e:
eurostar_trains = [] eurostar_services = []
msg = f"Could not fetch Eurostar times: {e}" msg = f"Could not fetch Eurostar times: {e}"
error = f"{error}; {msg}" if error else msg error = f"{error}; {msg}" if error else msg
if cached_prices: eurostar_trains = eurostar_services
eurostar_prices = cached_prices eurostar_prices = {
else: s['depart_st_pancras']: {'price': s.get('price'), 'seats': s.get('seats')}
try: for s in eurostar_services
eurostar_prices = fetch_eurostar_prices(destination, travel_date) }
set_cached(prices_cache_key, eurostar_prices)
except Exception as e:
eurostar_prices = {}
msg = f"Could not fetch Eurostar prices: {e}"
error = f"{error}; {msg}" if error else msg
trips = combine_trips(gwr_trains, eurostar_trains, travel_date, min_connection, max_connection) trips = combine_trips(gwr_trains, eurostar_trains, travel_date, min_connection, max_connection)

View file

@ -1,29 +1,14 @@
""" """
Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API. Fetch Eurostar timetable, prices, and seat availability via the GraphQL API.
Timetable: route-specific pages are Next.js SSR all departure data is A single POST to https://site-api.eurostar.com/gateway (operationName
embedded in <script id="__NEXT_DATA__"> as JSON, so no browser / JS needed. NewBookingSearch) returns departure time, arrival time, train number,
Eurostar Standard fare price, and seats remaining at that price for every
URL pattern: service on the requested date.
https://www.eurostar.com/uk-en/travel-info/timetable/
{origin_id}/{dest_id}/{origin_slug}/{dest_slug}?date=YYYY-MM-DD
Data path: props.pageProps.pageData.liveDepartures[]
.origin.model.scheduledDepartureDateTime London departure
.destination.model.scheduledArrivalDateTime destination arrival
(already filtered to the requested stop, not the final stop)
Prices: POST https://site-api.eurostar.com/gateway (GraphQL, operationName
NewBookingSearch). The `journeys[].fares[]` array contains one entry per
class of service; we extract the Eurostar Standard (classOfService.code ==
"STANDARD") displayPrice for 1 adult, in GBP.
""" """
import json
import random import random
import re
import string import string
import httpx
import requests import requests
DEFAULT_UA = ( DEFAULT_UA = (
@ -32,93 +17,21 @@ DEFAULT_UA = (
) )
ORIGIN_STATION_ID = '7015400' ORIGIN_STATION_ID = '7015400'
ORIGIN_STATION_SLUG = 'london-st-pancras-intl'
TIMETABLE_BASE_URL = 'https://www.eurostar.com/uk-en/travel-info/timetable'
DESTINATION_STATION_IDS = { DESTINATION_STATION_IDS = {
'Paris Gare du Nord': '8727100', 'Paris Gare du Nord': '8727100',
'Brussels Midi': '8814001', 'Brussels Midi': '8814001',
'Lille Europe': '8722326', 'Lille Europe': '8722326',
'Amsterdam Centraal': '8400058', 'Amsterdam Centraal': '8400058',
'Rotterdam Centraal': '8400530', 'Rotterdam Centraal': '8400530',
} }
def _slugify_station_name(name: str) -> str:
return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-')
def search_url(destination: str, travel_date: str) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
return (
f'https://www.eurostar.com/search/uk-en'
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
)
def timetable_url(destination: str) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
dest_slug = _slugify_station_name(destination)
return (
f'{TIMETABLE_BASE_URL}/{ORIGIN_STATION_ID}/{dest_id}/'
f'{ORIGIN_STATION_SLUG}/{dest_slug}'
)
def _hhmm(dt_str: str | None) -> str | None:
"""'2026-03-30 09:34:00''09:34'"""
if not dt_str:
return None
m = re.search(r'(\d{2}):(\d{2}):\d{2}$', dt_str)
return f"{m.group(1)}:{m.group(2)}" if m else None
def _parse(html: str, destination: str) -> list[dict]:
m = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
if not m:
return []
data = json.loads(m.group(1))
departures = data['props']['pageProps']['pageData']['liveDepartures']
services = []
for dep in departures:
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
if dep_time and arr_time:
carrier = dep.get('model', {}).get('carrier', 'ES')
number = dep.get('model', {}).get('trainNumber', '')
services.append({
'depart_st_pancras': dep_time,
'arrive_destination': arr_time,
'destination': destination,
'train_number': f"{carrier} {number}" if number else '',
})
return sorted(services, key=lambda s: s['depart_st_pancras'])
def fetch(destination: str, travel_date: str,
user_agent: str = DEFAULT_UA) -> list[dict]:
url = timetable_url(destination)
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.9',
}
with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client:
r = client.get(url, params={'date': travel_date})
r.raise_for_status()
return _parse(r.text, destination)
# ---------------------------------------------------------------------------
# Price fetching via site-api.eurostar.com GraphQL
# ---------------------------------------------------------------------------
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway' _GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
# Minimal query requesting only timing + Eurostar Standard fare price. # Query requesting timing, train identity, and Standard fare price + seats.
# Variable names and inline argument names match what the site sends so the # Variable names and argument names match the site's own query so the
# server-side query planner sees a familiar shape. # server-side query planner sees a familiar shape.
_GQL_PRICES = ( _GQL_QUERY = (
"query NewBookingSearch(" "query NewBookingSearch("
"$origin:String!,$destination:String!,$outbound:String!," "$origin:String!,$destination:String!,$outbound:String!,"
"$currency:Currency!,$adult:Int," "$currency:Currency!,$adult:Int,"
@ -141,74 +54,103 @@ _GQL_PRICES = (
" hideExternalCarrierTrains:true" " hideExternalCarrierTrains:true"
" hideDirectExternalCarrierTrains:true" " hideDirectExternalCarrierTrains:true"
"){" "){"
"timing{departureTime:departs __typename}" "timing{departureTime:departs arrivalTime:arrives}"
"fares(filteredClassesOfService:$filteredClassesOfService){" "fares(filteredClassesOfService:$filteredClassesOfService){"
"classOfService{code __typename}" "classOfService{code}"
"prices{displayPrice __typename}" "prices{displayPrice}"
"seats __typename" "seats "
"legs{serviceName serviceType{code}}"
"}" "}"
"__typename"
"}" "}"
"__typename"
"}" "}"
"__typename"
"}" "}"
"}" "}"
) )
def search_url(destination: str, travel_date: str) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
return (
f'https://www.eurostar.com/search/uk-en'
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
)
def _generate_cid() -> str: def _generate_cid() -> str:
chars = string.ascii_letters + string.digits chars = string.ascii_letters + string.digits
return 'SRCH-' + ''.join(random.choices(chars, k=22)) return 'SRCH-' + ''.join(random.choices(chars, k=22))
def fetch_prices(destination: str, travel_date: str) -> dict[str, dict]: def _parse_graphql(data: dict, destination: str) -> list[dict]:
""" """
Return Eurostar Standard price and seat availability for every departure on travel_date. Parse a NewBookingSearch GraphQL response into a list of service dicts.
Result: {depart_st_pancras: {'price': int_or_None, 'seats': int_or_None}} Each dict contains: depart_st_pancras, arrive_destination, destination,
price is None when unavailable/not yet on sale; seats is the number of train_number, price (float or None), seats (int or None).
Standard seats currently available for sale.
The same St Pancras departure can appear multiple times (different
connecting trains); we keep the entry with the earliest arrival.
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
"""
best: dict[str, dict] = {}
journeys = data['data']['journeySearch']['outbound']['journeys']
for journey in journeys:
dep = journey['timing']['departureTime']
arr = journey['timing']['arrivalTime']
for fare in journey['fares']:
if fare['classOfService']['code'] == 'STANDARD':
p = fare.get('prices')
price = float(p['displayPrice']) if p and p.get('displayPrice') else None
seats = fare.get('seats')
legs = fare.get('legs') or []
train_number = ' + '.join(
f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}"
for leg in legs if leg.get('serviceName')
)
if dep not in best or arr < best[dep]['arrive_destination']:
best[dep] = {
'depart_st_pancras': dep,
'arrive_destination': arr,
'destination': destination,
'train_number': train_number,
'price': price,
'seats': seats,
}
break
return sorted(best.values(), key=lambda s: s['depart_st_pancras'])
def fetch(destination: str, travel_date: str) -> list[dict]:
"""
Return all Eurostar services for destination on travel_date.
Each dict contains timetable info (depart_st_pancras, arrive_destination,
train_number) plus pricing (price, seats) from a single GraphQL call.
""" """
dest_id = DESTINATION_STATION_IDS[destination] dest_id = DESTINATION_STATION_IDS[destination]
headers = { headers = {
'User-Agent': DEFAULT_UA, 'User-Agent': DEFAULT_UA,
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Accept': '*/*', 'Accept': '*/*',
'Accept-Language': 'en-GB', 'Accept-Language':'en-GB',
'Referer': 'https://www.eurostar.com/', 'Referer': 'https://www.eurostar.com/',
'x-platform': 'web', 'x-platform': 'web',
'x-market-code': 'uk', 'x-market-code': 'uk',
'x-source-url': 'search-app/', 'x-source-url': 'search-app/',
'cid': _generate_cid(), 'cid': _generate_cid(),
} }
payload = { payload = {
'operationName': 'NewBookingSearch', 'operationName': 'NewBookingSearch',
'variables': { 'variables': {
'origin': ORIGIN_STATION_ID, 'origin': ORIGIN_STATION_ID,
'destination': dest_id, 'destination': dest_id,
'outbound': travel_date, 'outbound': travel_date,
'currency': 'GBP', 'currency': 'GBP',
'adult': 1, 'adult': 1,
'filteredClassesOfService': ['STANDARD'], 'filteredClassesOfService': ['STANDARD'],
}, },
'query': _GQL_PRICES, 'query': _GQL_QUERY,
} }
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
resp.raise_for_status() resp.raise_for_status()
data = resp.json() return _parse_graphql(resp.json(), destination)
prices: dict[str, dict] = {}
journeys = data['data']['journeySearch']['outbound']['journeys']
for journey in journeys:
dep = journey['timing']['departureTime']
price = None
seats = None
for fare in journey['fares']:
if fare['classOfService']['code'] == 'STANDARD':
p = fare.get('prices')
if p and p.get('displayPrice'):
price = int(p['displayPrice'])
seats = fare.get('seats')
break
prices[dep] = {'price': price, 'seats': seats}
return prices

View file

@ -159,7 +159,7 @@
</td> </td>
<td style="padding:0.6rem 0.8rem;font-weight:600"> <td style="padding:0.6rem 0.8rem;font-weight:600">
{{ row.depart_st_pancras }} {{ row.depart_st_pancras }}
{% if row.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{{ row.train_number }}</span>{% endif %} {% if row.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{% for part in row.train_number.split(' + ') %}<span style="white-space:nowrap">{{ part }}</span>{% if not loop.last %} + {% endif %}{% endfor %}</span>{% endif %}
</td> </td>
<td style="padding:0.6rem 0.8rem"> <td style="padding:0.6rem 0.8rem">
{{ row.arrive_destination }} {{ row.arrive_destination }}
@ -168,7 +168,7 @@
</td> </td>
<td style="padding:0.6rem 0.8rem;white-space:nowrap"> <td style="padding:0.6rem 0.8rem;white-space:nowrap">
{% if row.eurostar_price is not none %} {% if row.eurostar_price is not none %}
£{{ row.eurostar_price }} £{{ "%.2f"|format(row.eurostar_price) }}
{% if row.eurostar_seats is not none %} {% if row.eurostar_seats is not none %}
<br><span style="font-size:0.75rem;color:#718096">{{ row.eurostar_seats }} at this price</span> <br><span style="font-size:0.75rem;color:#718096">{{ row.eurostar_seats }} at this price</span>
{% endif %} {% endif %}
@ -195,7 +195,7 @@
<td style="padding:0.6rem 0.8rem">n/a</td> <td style="padding:0.6rem 0.8rem">n/a</td>
<td style="padding:0.6rem 0.8rem;font-weight:600"> <td style="padding:0.6rem 0.8rem;font-weight:600">
{{ row.depart_st_pancras }} {{ row.depart_st_pancras }}
{% if row.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#a0aec0">{{ row.train_number }}</span>{% endif %} {% if row.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#a0aec0">{% for part in row.train_number.split(' + ') %}<span style="white-space:nowrap">{{ part }}</span>{% if not loop.last %} + {% endif %}{% endfor %}</span>{% endif %}
</td> </td>
<td style="padding:0.6rem 0.8rem"> <td style="padding:0.6rem 0.8rem">
{{ row.arrive_destination }} {{ row.arrive_destination }}
@ -204,7 +204,7 @@
</td> </td>
<td style="padding:0.6rem 0.8rem;white-space:nowrap"> <td style="padding:0.6rem 0.8rem;white-space:nowrap">
{% if row.eurostar_price is not none %} {% if row.eurostar_price is not none %}
<span style="color:#a0aec0">£{{ row.eurostar_price }}</span> <span style="color:#a0aec0">£{{ "%.2f"|format(row.eurostar_price) }}</span>
{% if row.eurostar_seats is not none %} {% if row.eurostar_seats is not none %}
<br><span style="font-size:0.75rem;color:#a0aec0">{{ row.eurostar_seats }} at this price</span> <br><span style="font-size:0.75rem;color:#a0aec0">{{ row.eurostar_seats }} at this price</span>
{% endif %} {% endif %}

View file

@ -16,25 +16,21 @@ def _stub_data(monkeypatch, prices=None):
{'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'},
], ],
) )
p = (prices or {}).get('10:01', {})
monkeypatch.setattr( monkeypatch.setattr(
app_module.eurostar_scraper, app_module.eurostar_scraper,
'fetch', 'fetch',
lambda destination, travel_date, user_agent: [ lambda destination, travel_date: [
{ {
'depart_st_pancras': '10:01', 'depart_st_pancras': '10:01',
'arrive_destination': '13:34', 'arrive_destination': '13:34',
'destination': destination, 'destination': destination,
'train_number': 'ES 9014', 'train_number': 'ES 9014',
'price': p.get('price') if isinstance(p, dict) else None,
'seats': p.get('seats') if isinstance(p, dict) else None,
}, },
], ],
) )
monkeypatch.setattr(
app_module.eurostar_scraper,
'timetable_url',
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
)
_prices = prices if prices is not None else {}
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: _prices)
def test_index_shows_fixed_departure_and_destination_radios(): def test_index_shows_fixed_departure_and_destination_radios():
@ -96,7 +92,6 @@ def test_results_title_and_social_meta_include_destination(monkeypatch):
def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypatch): def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypatch):
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
monkeypatch.setattr( monkeypatch.setattr(
app_module.rtt_scraper, app_module.rtt_scraper,
'fetch', 'fetch',
@ -111,44 +106,14 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa
monkeypatch.setattr( monkeypatch.setattr(
app_module.eurostar_scraper, app_module.eurostar_scraper,
'fetch', 'fetch',
lambda destination, travel_date, user_agent: [ lambda destination, travel_date: [
{ {'depart_st_pancras': '09:30', 'arrive_destination': '11:50', 'destination': destination, 'train_number': 'ES 1001', 'price': None, 'seats': None},
'depart_st_pancras': '09:30', {'depart_st_pancras': '09:40', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 1002', 'price': None, 'seats': None},
'arrive_destination': '11:50', {'depart_st_pancras': '09:50', 'arrive_destination': '12:20', 'destination': destination, 'train_number': 'ES 1003', 'price': None, 'seats': None},
'destination': destination, {'depart_st_pancras': '10:00', 'arrive_destination': '12:35', 'destination': destination, 'train_number': 'ES 1004', 'price': None, 'seats': None},
'train_number': 'ES 1001', {'depart_st_pancras': '10:10', 'arrive_destination': '12:45', 'destination': destination, 'train_number': 'ES 1005', 'price': None, 'seats': None},
},
{
'depart_st_pancras': '09:40',
'arrive_destination': '12:00',
'destination': destination,
'train_number': 'ES 1002',
},
{
'depart_st_pancras': '09:50',
'arrive_destination': '12:20',
'destination': destination,
'train_number': 'ES 1003',
},
{
'depart_st_pancras': '10:00',
'arrive_destination': '12:35',
'destination': destination,
'train_number': 'ES 1004',
},
{
'depart_st_pancras': '10:10',
'arrive_destination': '12:45',
'destination': destination,
'train_number': 'ES 1005',
},
], ],
) )
monkeypatch.setattr(
app_module.eurostar_scraper,
'timetable_url',
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
)
client = _client() client = _client()
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')
@ -168,7 +133,6 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa
def test_results_shows_unreachable_morning_eurostar_services(monkeypatch): def test_results_shows_unreachable_morning_eurostar_services(monkeypatch):
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
monkeypatch.setattr( monkeypatch.setattr(
app_module.rtt_scraper, app_module.rtt_scraper,
'fetch', 'fetch',
@ -179,32 +143,12 @@ def test_results_shows_unreachable_morning_eurostar_services(monkeypatch):
monkeypatch.setattr( monkeypatch.setattr(
app_module.eurostar_scraper, app_module.eurostar_scraper,
'fetch', 'fetch',
lambda destination, travel_date, user_agent: [ lambda destination, travel_date: [
{ {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None},
'depart_st_pancras': '09:30', {'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': destination, 'train_number': 'ES 9002', 'price': None, 'seats': None},
'arrive_destination': '12:00', {'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': destination, 'train_number': 'ES 9003', 'price': None, 'seats': None},
'destination': destination,
'train_number': 'ES 9001',
},
{
'depart_st_pancras': '10:15',
'arrive_destination': '13:40',
'destination': destination,
'train_number': 'ES 9002',
},
{
'depart_st_pancras': '12:30',
'arrive_destination': '15:55',
'destination': destination,
'train_number': 'ES 9003',
},
], ],
) )
monkeypatch.setattr(
app_module.eurostar_scraper,
'timetable_url',
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
)
client = _client() client = _client()
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')
@ -234,7 +178,6 @@ def test_results_shows_eurostar_price_and_total(monkeypatch):
def test_results_can_show_only_unreachable_morning_services(monkeypatch): def test_results_can_show_only_unreachable_morning_services(monkeypatch):
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
monkeypatch.setattr( monkeypatch.setattr(
app_module.rtt_scraper, app_module.rtt_scraper,
'fetch', 'fetch',
@ -245,20 +188,10 @@ def test_results_can_show_only_unreachable_morning_services(monkeypatch):
monkeypatch.setattr( monkeypatch.setattr(
app_module.eurostar_scraper, app_module.eurostar_scraper,
'fetch', 'fetch',
lambda destination, travel_date, user_agent: [ lambda destination, travel_date: [
{ {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None},
'depart_st_pancras': '09:30',
'arrive_destination': '12:00',
'destination': destination,
'train_number': 'ES 9001',
},
], ],
) )
monkeypatch.setattr(
app_module.eurostar_scraper,
'timetable_url',
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
)
client = _client() client = _client()
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120') resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')

View file

@ -1,97 +1,92 @@
import json
import pytest import pytest
from scraper.eurostar import _hhmm, _parse, timetable_url from scraper.eurostar import _parse_graphql, search_url
# --------------------------------------------------------------------------- def _gql_response(journeys: list) -> dict:
# _hhmm return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}}
# ---------------------------------------------------------------------------
def test_hhmm_parses_datetime_string():
assert _hhmm('2026-03-30 09:34:00') == '09:34'
def test_hhmm_none_input():
assert _hhmm(None) is None
def test_hhmm_empty_string():
assert _hhmm('') is None
# --------------------------------------------------------------------------- def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES') -> dict:
# _parse
# ---------------------------------------------------------------------------
def _make_next_data(departures: list) -> str:
data = {
'props': {
'pageProps': {
'pageData': {
'liveDepartures': departures
}
}
}
}
return f'<script id="__NEXT_DATA__" type="application/json">{json.dumps(data)}</script>'
def _departure(dep_dt: str, arr_dt: str) -> dict:
return { return {
'origin': {'model': {'scheduledDepartureDateTime': dep_dt}}, 'timing': {'departureTime': departs, 'arrivalTime': arrives},
'destination': {'model': {'scheduledArrivalDateTime': arr_dt}}, 'fares': [{
'classOfService': {'code': 'STANDARD'},
'prices': {'displayPrice': price},
'seats': seats,
'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}]
if service_name else [],
}],
} }
def test_parse_single_departure(): # ---------------------------------------------------------------------------
html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')]) # _parse_graphql
services = _parse(html, 'Paris Gare du Nord') # ---------------------------------------------------------------------------
def test_parse_graphql_single_journey():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert len(services) == 1 assert len(services) == 1
assert services[0] == { s = services[0]
'depart_st_pancras': '06:01', assert s['depart_st_pancras'] == '09:31'
'arrive_destination': '09:34', assert s['arrive_destination'] == '12:55'
'destination': 'Paris Gare du Nord', assert s['destination'] == 'Paris Gare du Nord'
'train_number': '', assert s['train_number'] == 'ES 9014'
} assert s['price'] == 156.0
assert s['seats'] == 37
def test_parse_results_sorted_by_departure(): def test_parse_graphql_half_pound_price():
html = _make_next_data([ data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')])
_departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'), services = _parse_graphql(data, 'Amsterdam Centraal')
_departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'), assert services[0]['price'] == 192.5
def test_parse_graphql_null_price():
data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)])
services = _parse_graphql(data, 'Amsterdam Centraal')
assert services[0]['price'] is None
assert services[0]['seats'] == 0
def test_parse_graphql_sorted_by_departure():
data = _gql_response([
_journey('10:31', '13:55'),
_journey('07:31', '10:59'),
]) ])
services = _parse(html, 'Paris Gare du Nord') services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['depart_st_pancras'] == '07:00' assert services[0]['depart_st_pancras'] == '07:31'
assert services[1]['depart_st_pancras'] == '10:00' assert services[1]['depart_st_pancras'] == '10:31'
def test_parse_skips_entries_with_missing_times(): def test_parse_graphql_deduplicates_same_departure_time():
html = _make_next_data([ data = _gql_response([
_departure(None, '2026-03-30 09:34:00'), _journey('06:16', '11:09', price=None, seats=0),
_departure('2026-03-30 08:00:00', None), _journey('06:16', '11:09', price=None, seats=0),
_departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'), _journey('06:16', '11:09', price=None, seats=0),
]) ])
services = _parse(html, 'Paris Gare du Nord') services = _parse_graphql(data, 'Amsterdam Centraal')
assert len(services) == 1 assert len(services) == 1
assert services[0]['depart_st_pancras'] == '09:00'
def test_parse_no_next_data_returns_empty(): def test_parse_graphql_no_legs_gives_empty_train_number():
assert _parse('<html><body>nothing here</body></html>', 'Paris Gare du Nord') == [] data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['train_number'] == ''
def test_parse_empty_departures(): def test_parse_graphql_empty_journeys():
html = _make_next_data([]) data = _gql_response([])
assert _parse(html, 'Paris Gare du Nord') == [] assert _parse_graphql(data, 'Paris Gare du Nord') == []
def test_timetable_url_uses_station_id_table(): # ---------------------------------------------------------------------------
assert timetable_url('Paris Gare du Nord') == ( # search_url
'https://www.eurostar.com/uk-en/travel-info/timetable/' # ---------------------------------------------------------------------------
'7015400/8727100/london-st-pancras-intl/paris-gare-du-nord'
) def test_search_url():
url = search_url('Paris Gare du Nord', '2026-04-10')
assert url == (
def test_timetable_url_slugifies_destination_name(): 'https://www.eurostar.com/search/uk-en'
assert timetable_url('Rotterdam Centraal') == ( '?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10'
'https://www.eurostar.com/uk-en/travel-info/timetable/'
'7015400/8400530/london-st-pancras-intl/rotterdam-centraal'
) )