From 945d028c1367ebde4abeed6d920af7614e585bab Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Wed, 1 Apr 2026 12:17:50 +0100 Subject: [PATCH] Generate Eurostar timetable URLs from station IDs --- app.py | 2 +- scraper/eurostar.py | 46 ++++++++++++++++++---------------- tests/test_eurostar_scraper.py | 16 +++++++++++- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/app.py b/app.py index 6e9e6fd..1a61c9c 100644 --- a/app.py +++ b/app.py @@ -115,7 +115,7 @@ def results(slug, travel_date): next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime('%A %-d %B %Y') - eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}" + eurostar_url = eurostar_scraper.timetable_url(destination) + f"?date={travel_date}" rtt_url = RTT_PADDINGTON_URL.format(date=travel_date) return render_template( diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 4307835..7e9f3e7 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -22,30 +22,32 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) -ROUTE_URLS = { - 'Paris Gare du Nord': ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8727100/london-st-pancras-intl/paris-gare-du-nord' - ), - 'Brussels Midi': ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8814001/london-st-pancras-intl/brussels-midi' - ), - 'Lille Europe': ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8722326/london-st-pancras-intl/lille-europe' - ), - 'Amsterdam Centraal': ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8400058/london-st-pancras-intl/amsterdam-centraal' - ), - 'Rotterdam Centraal': ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8400530/london-st-pancras-intl/rotterdam-centraal' - ), +ORIGIN_STATION_ID = '7015400' +ORIGIN_STATION_SLUG = 'london-st-pancras-intl' +TIMETABLE_BASE_URL = 'https://www.eurostar.com/uk-en/travel-info/timetable' + +DESTINATION_STATION_IDS = { + 'Paris Gare du Nord': '8727100', + 'Brussels Midi': '8814001', + 'Lille Europe': '8722326', + 'Amsterdam Centraal': '8400058', + 'Rotterdam Centraal': '8400530', } +def _slugify_station_name(name: str) -> str: + return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-') + + +def timetable_url(destination: str) -> str: + dest_id = DESTINATION_STATION_IDS[destination] + dest_slug = _slugify_station_name(destination) + return ( + f'{TIMETABLE_BASE_URL}/{ORIGIN_STATION_ID}/{dest_id}/' + f'{ORIGIN_STATION_SLUG}/{dest_slug}' + ) + + def _hhmm(dt_str: str | None) -> str | None: """'2026-03-30 09:34:00' → '09:34'""" if not dt_str: @@ -78,7 +80,7 @@ def _parse(html: str, destination: str) -> list[dict]: def fetch(destination: str, travel_date: str, user_agent: str = DEFAULT_UA) -> list[dict]: - url = ROUTE_URLS[destination] + url = timetable_url(destination) headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py index ff2d4b1..137cf71 100644 --- a/tests/test_eurostar_scraper.py +++ b/tests/test_eurostar_scraper.py @@ -1,6 +1,6 @@ import json import pytest -from scraper.eurostar import _hhmm, _parse +from scraper.eurostar import _hhmm, _parse, timetable_url # --------------------------------------------------------------------------- @@ -80,3 +80,17 @@ def test_parse_no_next_data_returns_empty(): def test_parse_empty_departures(): html = _make_next_data([]) assert _parse(html, 'Paris Gare du Nord') == [] + + +def test_timetable_url_uses_station_id_table(): + assert timetable_url('Paris Gare du Nord') == ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8727100/london-st-pancras-intl/paris-gare-du-nord' + ) + + +def test_timetable_url_slugifies_destination_name(): + assert timetable_url('Rotterdam Centraal') == ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8400530/london-st-pancras-intl/rotterdam-centraal' + )