96 lines
3 KiB
Python
96 lines
3 KiB
Python
import json
|
|
import pytest
|
|
from scraper.eurostar import _hhmm, _parse, timetable_url
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _hhmm
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_hhmm_parses_datetime_string():
|
|
assert _hhmm('2026-03-30 09:34:00') == '09:34'
|
|
|
|
def test_hhmm_none_input():
|
|
assert _hhmm(None) is None
|
|
|
|
def test_hhmm_empty_string():
|
|
assert _hhmm('') is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _parse
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_next_data(departures: list) -> str:
|
|
data = {
|
|
'props': {
|
|
'pageProps': {
|
|
'pageData': {
|
|
'liveDepartures': departures
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return f'<script id="__NEXT_DATA__" type="application/json">{json.dumps(data)}</script>'
|
|
|
|
|
|
def _departure(dep_dt: str, arr_dt: str) -> dict:
|
|
return {
|
|
'origin': {'model': {'scheduledDepartureDateTime': dep_dt}},
|
|
'destination': {'model': {'scheduledArrivalDateTime': arr_dt}},
|
|
}
|
|
|
|
|
|
def test_parse_single_departure():
|
|
html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')])
|
|
services = _parse(html, 'Paris Gare du Nord')
|
|
assert len(services) == 1
|
|
assert services[0] == {
|
|
'depart_st_pancras': '06:01',
|
|
'arrive_destination': '09:34',
|
|
'destination': 'Paris Gare du Nord',
|
|
}
|
|
|
|
|
|
def test_parse_results_sorted_by_departure():
|
|
html = _make_next_data([
|
|
_departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'),
|
|
_departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'),
|
|
])
|
|
services = _parse(html, 'Paris Gare du Nord')
|
|
assert services[0]['depart_st_pancras'] == '07:00'
|
|
assert services[1]['depart_st_pancras'] == '10:00'
|
|
|
|
|
|
def test_parse_skips_entries_with_missing_times():
|
|
html = _make_next_data([
|
|
_departure(None, '2026-03-30 09:34:00'),
|
|
_departure('2026-03-30 08:00:00', None),
|
|
_departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'),
|
|
])
|
|
services = _parse(html, 'Paris Gare du Nord')
|
|
assert len(services) == 1
|
|
assert services[0]['depart_st_pancras'] == '09:00'
|
|
|
|
|
|
def test_parse_no_next_data_returns_empty():
|
|
assert _parse('<html><body>nothing here</body></html>', 'Paris Gare du Nord') == []
|
|
|
|
|
|
def test_parse_empty_departures():
|
|
html = _make_next_data([])
|
|
assert _parse(html, 'Paris Gare du Nord') == []
|
|
|
|
|
|
def test_timetable_url_uses_station_id_table():
|
|
assert timetable_url('Paris Gare du Nord') == (
|
|
'https://www.eurostar.com/uk-en/travel-info/timetable/'
|
|
'7015400/8727100/london-st-pancras-intl/paris-gare-du-nord'
|
|
)
|
|
|
|
|
|
def test_timetable_url_slugifies_destination_name():
|
|
assert timetable_url('Rotterdam Centraal') == (
|
|
'https://www.eurostar.com/uk-en/travel-info/timetable/'
|
|
'7015400/8400530/london-st-pancras-intl/rotterdam-centraal'
|
|
)
|