Consolidate to single GraphQL call; show indirect trains; fix price formatting
Replace two-step Eurostar fetch (HTML timetable + GraphQL prices) with a single GraphQL call that returns timing, train numbers, prices, and seats. Support indirect services (e.g. Amsterdam) by joining multi-leg train numbers with ' + ' and keeping the earliest arrival per departure time. Fix half-pound prices by casting displayPrice to float instead of int. Wrap each train number segment in white-space:nowrap so 'ES 9132 + ER 9363' never breaks mid-segment. Format Eurostar prices with two decimal places. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
05eec29b7d
commit
c22a3ea0fc
5 changed files with 182 additions and 320 deletions
|
|
@ -16,25 +16,21 @@ def _stub_data(monkeypatch, prices=None):
|
|||
{'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'},
|
||||
],
|
||||
)
|
||||
p = (prices or {}).get('10:01', {})
|
||||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'fetch',
|
||||
lambda destination, travel_date, user_agent: [
|
||||
lambda destination, travel_date: [
|
||||
{
|
||||
'depart_st_pancras': '10:01',
|
||||
'arrive_destination': '13:34',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 9014',
|
||||
'price': p.get('price') if isinstance(p, dict) else None,
|
||||
'seats': p.get('seats') if isinstance(p, dict) else None,
|
||||
},
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'timetable_url',
|
||||
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
|
||||
)
|
||||
_prices = prices if prices is not None else {}
|
||||
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: _prices)
|
||||
|
||||
|
||||
def test_index_shows_fixed_departure_and_destination_radios():
|
||||
|
|
@ -96,7 +92,6 @@ def test_results_title_and_social_meta_include_destination(monkeypatch):
|
|||
def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypatch):
|
||||
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
|
||||
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
|
||||
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
|
||||
monkeypatch.setattr(
|
||||
app_module.rtt_scraper,
|
||||
'fetch',
|
||||
|
|
@ -111,44 +106,14 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa
|
|||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'fetch',
|
||||
lambda destination, travel_date, user_agent: [
|
||||
{
|
||||
'depart_st_pancras': '09:30',
|
||||
'arrive_destination': '11:50',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 1001',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '09:40',
|
||||
'arrive_destination': '12:00',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 1002',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '09:50',
|
||||
'arrive_destination': '12:20',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 1003',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '10:00',
|
||||
'arrive_destination': '12:35',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 1004',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '10:10',
|
||||
'arrive_destination': '12:45',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 1005',
|
||||
},
|
||||
lambda destination, travel_date: [
|
||||
{'depart_st_pancras': '09:30', 'arrive_destination': '11:50', 'destination': destination, 'train_number': 'ES 1001', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '09:40', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 1002', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '09:50', 'arrive_destination': '12:20', 'destination': destination, 'train_number': 'ES 1003', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '10:00', 'arrive_destination': '12:35', 'destination': destination, 'train_number': 'ES 1004', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '10:10', 'arrive_destination': '12:45', 'destination': destination, 'train_number': 'ES 1005', 'price': None, 'seats': None},
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'timetable_url',
|
||||
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
|
||||
)
|
||||
client = _client()
|
||||
|
||||
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')
|
||||
|
|
@ -168,7 +133,6 @@ def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypa
|
|||
def test_results_shows_unreachable_morning_eurostar_services(monkeypatch):
|
||||
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
|
||||
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
|
||||
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
|
||||
monkeypatch.setattr(
|
||||
app_module.rtt_scraper,
|
||||
'fetch',
|
||||
|
|
@ -179,32 +143,12 @@ def test_results_shows_unreachable_morning_eurostar_services(monkeypatch):
|
|||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'fetch',
|
||||
lambda destination, travel_date, user_agent: [
|
||||
{
|
||||
'depart_st_pancras': '09:30',
|
||||
'arrive_destination': '12:00',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 9001',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '10:15',
|
||||
'arrive_destination': '13:40',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 9002',
|
||||
},
|
||||
{
|
||||
'depart_st_pancras': '12:30',
|
||||
'arrive_destination': '15:55',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 9003',
|
||||
},
|
||||
lambda destination, travel_date: [
|
||||
{'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': destination, 'train_number': 'ES 9002', 'price': None, 'seats': None},
|
||||
{'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': destination, 'train_number': 'ES 9003', 'price': None, 'seats': None},
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'timetable_url',
|
||||
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
|
||||
)
|
||||
client = _client()
|
||||
|
||||
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')
|
||||
|
|
@ -234,7 +178,6 @@ def test_results_shows_eurostar_price_and_total(monkeypatch):
|
|||
def test_results_can_show_only_unreachable_morning_services(monkeypatch):
|
||||
monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None)
|
||||
monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None)
|
||||
monkeypatch.setattr(app_module, 'fetch_eurostar_prices', lambda dest, date: {})
|
||||
monkeypatch.setattr(
|
||||
app_module.rtt_scraper,
|
||||
'fetch',
|
||||
|
|
@ -245,20 +188,10 @@ def test_results_can_show_only_unreachable_morning_services(monkeypatch):
|
|||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'fetch',
|
||||
lambda destination, travel_date, user_agent: [
|
||||
{
|
||||
'depart_st_pancras': '09:30',
|
||||
'arrive_destination': '12:00',
|
||||
'destination': destination,
|
||||
'train_number': 'ES 9001',
|
||||
},
|
||||
lambda destination, travel_date: [
|
||||
{'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None},
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
app_module.eurostar_scraper,
|
||||
'timetable_url',
|
||||
lambda destination: f'https://example.test/{destination.lower().replace(" ", "-")}',
|
||||
)
|
||||
client = _client()
|
||||
|
||||
resp = client.get('/results/paris/2026-04-10?min_connection=60&max_connection=120')
|
||||
|
|
|
|||
|
|
@ -1,97 +1,92 @@
|
|||
import json
|
||||
import pytest
|
||||
from scraper.eurostar import _hhmm, _parse, timetable_url
|
||||
from scraper.eurostar import _parse_graphql, search_url
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _hhmm
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_hhmm_parses_datetime_string():
|
||||
assert _hhmm('2026-03-30 09:34:00') == '09:34'
|
||||
|
||||
def test_hhmm_none_input():
|
||||
assert _hhmm(None) is None
|
||||
|
||||
def test_hhmm_empty_string():
|
||||
assert _hhmm('') is None
|
||||
def _gql_response(journeys: list) -> dict:
|
||||
return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_next_data(departures: list) -> str:
|
||||
data = {
|
||||
'props': {
|
||||
'pageProps': {
|
||||
'pageData': {
|
||||
'liveDepartures': departures
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return f'<script id="__NEXT_DATA__" type="application/json">{json.dumps(data)}</script>'
|
||||
|
||||
|
||||
def _departure(dep_dt: str, arr_dt: str) -> dict:
|
||||
def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES') -> dict:
|
||||
return {
|
||||
'origin': {'model': {'scheduledDepartureDateTime': dep_dt}},
|
||||
'destination': {'model': {'scheduledArrivalDateTime': arr_dt}},
|
||||
'timing': {'departureTime': departs, 'arrivalTime': arrives},
|
||||
'fares': [{
|
||||
'classOfService': {'code': 'STANDARD'},
|
||||
'prices': {'displayPrice': price},
|
||||
'seats': seats,
|
||||
'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}]
|
||||
if service_name else [],
|
||||
}],
|
||||
}
|
||||
|
||||
|
||||
def test_parse_single_departure():
|
||||
html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse_graphql
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_parse_graphql_single_journey():
|
||||
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')])
|
||||
services = _parse_graphql(data, 'Paris Gare du Nord')
|
||||
assert len(services) == 1
|
||||
assert services[0] == {
|
||||
'depart_st_pancras': '06:01',
|
||||
'arrive_destination': '09:34',
|
||||
'destination': 'Paris Gare du Nord',
|
||||
'train_number': '',
|
||||
}
|
||||
s = services[0]
|
||||
assert s['depart_st_pancras'] == '09:31'
|
||||
assert s['arrive_destination'] == '12:55'
|
||||
assert s['destination'] == 'Paris Gare du Nord'
|
||||
assert s['train_number'] == 'ES 9014'
|
||||
assert s['price'] == 156.0
|
||||
assert s['seats'] == 37
|
||||
|
||||
|
||||
def test_parse_results_sorted_by_departure():
|
||||
html = _make_next_data([
|
||||
_departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'),
|
||||
_departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'),
|
||||
def test_parse_graphql_half_pound_price():
|
||||
data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')])
|
||||
services = _parse_graphql(data, 'Amsterdam Centraal')
|
||||
assert services[0]['price'] == 192.5
|
||||
|
||||
|
||||
def test_parse_graphql_null_price():
|
||||
data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)])
|
||||
services = _parse_graphql(data, 'Amsterdam Centraal')
|
||||
assert services[0]['price'] is None
|
||||
assert services[0]['seats'] == 0
|
||||
|
||||
|
||||
def test_parse_graphql_sorted_by_departure():
|
||||
data = _gql_response([
|
||||
_journey('10:31', '13:55'),
|
||||
_journey('07:31', '10:59'),
|
||||
])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
assert services[0]['depart_st_pancras'] == '07:00'
|
||||
assert services[1]['depart_st_pancras'] == '10:00'
|
||||
services = _parse_graphql(data, 'Paris Gare du Nord')
|
||||
assert services[0]['depart_st_pancras'] == '07:31'
|
||||
assert services[1]['depart_st_pancras'] == '10:31'
|
||||
|
||||
|
||||
def test_parse_skips_entries_with_missing_times():
|
||||
html = _make_next_data([
|
||||
_departure(None, '2026-03-30 09:34:00'),
|
||||
_departure('2026-03-30 08:00:00', None),
|
||||
_departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'),
|
||||
def test_parse_graphql_deduplicates_same_departure_time():
|
||||
data = _gql_response([
|
||||
_journey('06:16', '11:09', price=None, seats=0),
|
||||
_journey('06:16', '11:09', price=None, seats=0),
|
||||
_journey('06:16', '11:09', price=None, seats=0),
|
||||
])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
services = _parse_graphql(data, 'Amsterdam Centraal')
|
||||
assert len(services) == 1
|
||||
assert services[0]['depart_st_pancras'] == '09:00'
|
||||
|
||||
|
||||
def test_parse_no_next_data_returns_empty():
|
||||
assert _parse('<html><body>nothing here</body></html>', 'Paris Gare du Nord') == []
|
||||
def test_parse_graphql_no_legs_gives_empty_train_number():
|
||||
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')])
|
||||
services = _parse_graphql(data, 'Paris Gare du Nord')
|
||||
assert services[0]['train_number'] == ''
|
||||
|
||||
|
||||
def test_parse_empty_departures():
|
||||
html = _make_next_data([])
|
||||
assert _parse(html, 'Paris Gare du Nord') == []
|
||||
def test_parse_graphql_empty_journeys():
|
||||
data = _gql_response([])
|
||||
assert _parse_graphql(data, 'Paris Gare du Nord') == []
|
||||
|
||||
|
||||
def test_timetable_url_uses_station_id_table():
|
||||
assert timetable_url('Paris Gare du Nord') == (
|
||||
'https://www.eurostar.com/uk-en/travel-info/timetable/'
|
||||
'7015400/8727100/london-st-pancras-intl/paris-gare-du-nord'
|
||||
)
|
||||
|
||||
|
||||
def test_timetable_url_slugifies_destination_name():
|
||||
assert timetable_url('Rotterdam Centraal') == (
|
||||
'https://www.eurostar.com/uk-en/travel-info/timetable/'
|
||||
'7015400/8400530/london-st-pancras-intl/rotterdam-centraal'
|
||||
# ---------------------------------------------------------------------------
|
||||
# search_url
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_search_url():
|
||||
url = search_url('Paris Gare du Nord', '2026-04-10')
|
||||
assert url == (
|
||||
'https://www.eurostar.com/search/uk-en'
|
||||
'?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10'
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue