diff --git a/app.py b/app.py index 1a61c9c..b33d900 100644 --- a/app.py +++ b/app.py @@ -22,7 +22,6 @@ DESTINATIONS = { 'brussels': 'Brussels Midi', 'lille': 'Lille Europe', 'amsterdam': 'Amsterdam Centraal', - 'rotterdam': 'Rotterdam Centraal', } @@ -115,7 +114,7 @@ def results(slug, travel_date): next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime('%A %-d %B %Y') - eurostar_url = eurostar_scraper.timetable_url(destination) + f"?date={travel_date}" + eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}" rtt_url = RTT_PADDINGTON_URL.format(date=travel_date) return render_template( diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 7e9f3e7..c2ef13e 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -22,32 +22,26 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) -ORIGIN_STATION_ID = '7015400' -ORIGIN_STATION_SLUG = 'london-st-pancras-intl' -TIMETABLE_BASE_URL = 'https://www.eurostar.com/uk-en/travel-info/timetable' - -DESTINATION_STATION_IDS = { - 'Paris Gare du Nord': '8727100', - 'Brussels Midi': '8814001', - 'Lille Europe': '8722326', - 'Amsterdam Centraal': '8400058', - 'Rotterdam Centraal': '8400530', +ROUTE_URLS = { + 'Paris Gare du Nord': ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8727100/london-st-pancras-intl/paris-gare-du-nord' + ), + 'Brussels Midi': ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8814001/london-st-pancras-intl/brussels-midi' + ), + 'Lille Europe': ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8722326/london-st-pancras-intl/lille-europe' + ), + 'Amsterdam Centraal': ( + 'https://www.eurostar.com/uk-en/travel-info/timetable/' + '7015400/8400058/london-st-pancras-intl/amsterdam-centraal' + ), } -def _slugify_station_name(name: str) -> str: - return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-') - - -def timetable_url(destination: str) -> str: - dest_id = DESTINATION_STATION_IDS[destination] - dest_slug = _slugify_station_name(destination) - return ( - f'{TIMETABLE_BASE_URL}/{ORIGIN_STATION_ID}/{dest_id}/' - f'{ORIGIN_STATION_SLUG}/{dest_slug}' - ) - - def _hhmm(dt_str: str | None) -> str | None: """'2026-03-30 09:34:00' → '09:34'""" if not dt_str: @@ -80,7 +74,7 @@ def _parse(html: str, destination: str) -> list[dict]: def fetch(destination: str, travel_date: str, user_agent: str = DEFAULT_UA) -> list[dict]: - url = timetable_url(destination) + url = ROUTE_URLS[destination] headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py index 137cf71..ff2d4b1 100644 --- a/tests/test_eurostar_scraper.py +++ b/tests/test_eurostar_scraper.py @@ -1,6 +1,6 @@ import json import pytest -from scraper.eurostar import _hhmm, _parse, timetable_url +from scraper.eurostar import _hhmm, _parse # --------------------------------------------------------------------------- @@ -80,17 +80,3 @@ def test_parse_no_next_data_returns_empty(): def test_parse_empty_departures(): html = _make_next_data([]) assert _parse(html, 'Paris Gare du Nord') == [] - - -def test_timetable_url_uses_station_id_table(): - assert timetable_url('Paris Gare du Nord') == ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8727100/london-st-pancras-intl/paris-gare-du-nord' - ) - - -def test_timetable_url_slugifies_destination_name(): - assert timetable_url('Rotterdam Centraal') == ( - 'https://www.eurostar.com/uk-en/travel-info/timetable/' - '7015400/8400530/london-st-pancras-intl/rotterdam-centraal' - )