Initial commit.
This commit is contained in:
commit
a8e0bd39e5
16 changed files with 981 additions and 0 deletions
82
tests/test_eurostar_scraper.py
Normal file
82
tests/test_eurostar_scraper.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import json
|
||||
import pytest
|
||||
from scraper.eurostar import _hhmm, _parse
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _hhmm
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_hhmm_parses_datetime_string():
|
||||
assert _hhmm('2026-03-30 09:34:00') == '09:34'
|
||||
|
||||
def test_hhmm_none_input():
|
||||
assert _hhmm(None) is None
|
||||
|
||||
def test_hhmm_empty_string():
|
||||
assert _hhmm('') is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_next_data(departures: list) -> str:
|
||||
data = {
|
||||
'props': {
|
||||
'pageProps': {
|
||||
'pageData': {
|
||||
'liveDepartures': departures
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return f'<script id="__NEXT_DATA__" type="application/json">{json.dumps(data)}</script>'
|
||||
|
||||
|
||||
def _departure(dep_dt: str, arr_dt: str) -> dict:
|
||||
return {
|
||||
'origin': {'model': {'scheduledDepartureDateTime': dep_dt}},
|
||||
'destination': {'model': {'scheduledArrivalDateTime': arr_dt}},
|
||||
}
|
||||
|
||||
|
||||
def test_parse_single_departure():
|
||||
html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
assert len(services) == 1
|
||||
assert services[0] == {
|
||||
'depart_st_pancras': '06:01',
|
||||
'arrive_destination': '09:34',
|
||||
'destination': 'Paris Gare du Nord',
|
||||
}
|
||||
|
||||
|
||||
def test_parse_results_sorted_by_departure():
|
||||
html = _make_next_data([
|
||||
_departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'),
|
||||
_departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'),
|
||||
])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
assert services[0]['depart_st_pancras'] == '07:00'
|
||||
assert services[1]['depart_st_pancras'] == '10:00'
|
||||
|
||||
|
||||
def test_parse_skips_entries_with_missing_times():
|
||||
html = _make_next_data([
|
||||
_departure(None, '2026-03-30 09:34:00'),
|
||||
_departure('2026-03-30 08:00:00', None),
|
||||
_departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'),
|
||||
])
|
||||
services = _parse(html, 'Paris Gare du Nord')
|
||||
assert len(services) == 1
|
||||
assert services[0]['depart_st_pancras'] == '09:00'
|
||||
|
||||
|
||||
def test_parse_no_next_data_returns_empty():
|
||||
assert _parse('<html><body>nothing here</body></html>', 'Paris Gare du Nord') == []
|
||||
|
||||
|
||||
def test_parse_empty_departures():
|
||||
html = _make_next_data([])
|
||||
assert _parse(html, 'Paris Gare du Nord') == []
|
||||
71
tests/test_rtt_scraper.py
Normal file
71
tests/test_rtt_scraper.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import pytest
|
||||
from scraper.realtime_trains import _fmt, _parse_services
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _fmt
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_fmt_four_digits():
|
||||
assert _fmt('0830') == '08:30'
|
||||
|
||||
def test_fmt_already_colon():
|
||||
assert _fmt('08:30') == '08:30'
|
||||
|
||||
def test_fmt_strips_non_digits():
|
||||
assert _fmt('08h30') == '08:30'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _parse_services
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_html(services: list[tuple[str, str]], time_class: str) -> str:
|
||||
"""Build a minimal servicelist HTML with (train_id, time) pairs."""
|
||||
items = ''
|
||||
for tid, time in services:
|
||||
items += f'''
|
||||
<a class="service">
|
||||
<div class="tid">{tid}</div>
|
||||
<div class="time plan {time_class}">{time}</div>
|
||||
</a>'''
|
||||
return f'<div class="servicelist">{items}</div>'
|
||||
|
||||
|
||||
def test_parse_services_departures():
|
||||
html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd')
|
||||
result = _parse_services(html, 'div.time.plan.d')
|
||||
assert result == {'1A23': '07:00', '2B45': '08:30'}
|
||||
|
||||
|
||||
def test_parse_services_arrivals():
|
||||
html = _make_html([('1A23', '0845')], 'a')
|
||||
result = _parse_services(html, 'div.time.plan.a')
|
||||
assert result == {'1A23': '08:45'}
|
||||
|
||||
|
||||
def test_parse_services_no_servicelist():
|
||||
assert _parse_services('<html></html>', 'div.time.plan.d') == {}
|
||||
|
||||
|
||||
def test_parse_services_skips_missing_time():
|
||||
html = '''
|
||||
<div class="servicelist">
|
||||
<a class="service"><div class="tid">1A23</div></a>
|
||||
<a class="service"><div class="tid">2B45</div><div class="time plan d">0900</div></a>
|
||||
</div>'''
|
||||
result = _parse_services(html, 'div.time.plan.d')
|
||||
assert '1A23' not in result
|
||||
assert result == {'2B45': '09:00'}
|
||||
|
||||
|
||||
def test_parse_services_skips_empty_time():
|
||||
html = '''
|
||||
<div class="servicelist">
|
||||
<a class="service">
|
||||
<div class="tid">1A23</div>
|
||||
<div class="time plan d"> </div>
|
||||
</a>
|
||||
</div>'''
|
||||
result = _parse_services(html, 'div.time.plan.d')
|
||||
assert result == {}
|
||||
131
tests/test_trip_planner.py
Normal file
131
tests/test_trip_planner.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import pytest
|
||||
from trip_planner import combine_trips, _fmt_duration
|
||||
|
||||
DATE = '2026-03-30'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _fmt_duration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_fmt_duration_hours_and_minutes():
|
||||
assert _fmt_duration(95) == '1h 35m'
|
||||
|
||||
def test_fmt_duration_exact_hours():
|
||||
assert _fmt_duration(120) == '2h'
|
||||
|
||||
def test_fmt_duration_minutes_only():
|
||||
assert _fmt_duration(45) == '45m'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# combine_trips — basic pairing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m
|
||||
GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — over limit
|
||||
|
||||
ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
|
||||
ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'}
|
||||
|
||||
|
||||
def test_valid_trip_is_returned():
|
||||
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
|
||||
assert len(trips) == 1
|
||||
t = trips[0]
|
||||
assert t['depart_bristol'] == '07:00'
|
||||
assert t['arrive_paddington'] == '08:45'
|
||||
assert t['depart_st_pancras'] == '10:01'
|
||||
assert t['arrive_destination'] == '13:34'
|
||||
assert t['destination'] == 'Paris Gare du Nord'
|
||||
|
||||
|
||||
def test_gwr_too_slow_excluded():
|
||||
# 2h 26m GWR journey exceeds MAX_GWR_MINUTES (110)
|
||||
trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE)
|
||||
assert trips == []
|
||||
|
||||
|
||||
def test_eurostar_too_early_excluded():
|
||||
# Eurostar departs before min connection time has elapsed
|
||||
trips = combine_trips([GWR_FAST], [ES_EARLY], DATE)
|
||||
assert trips == []
|
||||
|
||||
|
||||
def test_no_trains_returns_empty():
|
||||
assert combine_trips([], [], DATE) == []
|
||||
|
||||
def test_no_gwr_returns_empty():
|
||||
assert combine_trips([], [ES_PARIS], DATE) == []
|
||||
|
||||
def test_no_eurostar_returns_empty():
|
||||
assert combine_trips([GWR_FAST], [], DATE) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Connection window constraints
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_min_connection_enforced():
|
||||
# Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00
|
||||
# ES at 09:59 should be excluded, 10:00 should be included
|
||||
es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
|
||||
es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
|
||||
assert combine_trips([GWR_FAST], [es_too_close], DATE) == []
|
||||
trips = combine_trips([GWR_FAST], [es_ok], DATE)
|
||||
assert len(trips) == 1
|
||||
|
||||
|
||||
def test_max_connection_enforced():
|
||||
# Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05
|
||||
es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
|
||||
es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
|
||||
trips = combine_trips([GWR_FAST], [es_ok], DATE)
|
||||
assert len(trips) == 1
|
||||
assert combine_trips([GWR_FAST], [es_too_late], DATE) == []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Only earliest valid Eurostar per GWR departure
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_only_earliest_eurostar_per_gwr():
|
||||
es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
|
||||
es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
|
||||
trips = combine_trips([GWR_FAST], [es1, es2], DATE)
|
||||
assert len(trips) == 1
|
||||
assert trips[0]['depart_st_pancras'] == '10:01'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Multiple GWR trains → multiple trips
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_multiple_gwr_trains():
|
||||
gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'}
|
||||
es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
|
||||
trips = combine_trips([GWR_FAST, gwr2], [es], DATE)
|
||||
assert len(trips) == 2
|
||||
assert trips[0]['depart_bristol'] == '07:00'
|
||||
assert trips[1]['depart_bristol'] == '08:00'
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Duration fields
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_gwr_duration_in_trip():
|
||||
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
|
||||
assert trips[0]['gwr_duration'] == '1h 45m'
|
||||
|
||||
|
||||
def test_total_duration_in_trip():
|
||||
# depart 07:00, arrive 13:34 → 6h 34m
|
||||
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
|
||||
assert trips[0]['total_duration'] == '6h 34m'
|
||||
|
||||
|
||||
def test_connection_duration_in_trip():
|
||||
# arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m
|
||||
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
|
||||
assert trips[0]['connection_duration'] == '1h 16m'
|
||||
Loading…
Add table
Add a link
Reference in a new issue