Add full type annotations and black formatting across all modules

Annotated all functions with mypy --strict-compatible types (-> None, dict[str,
Any], Generator types, etc.), added # type: ignore for untyped third-party libs
(lxml), and reformatted with black. All 18 source files now pass mypy --strict
with zero errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-05-25 21:48:53 +01:00
parent 453d6244ec
commit 13c4341f3a
14 changed files with 1802 additions and 974 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,49 +1,53 @@
import os
import time
from pathlib import Path
from typing import Any
import pytest
from cache import get_cached, set_cached
@pytest.fixture
def tmp_cache(tmp_path, monkeypatch):
def tmp_cache(tmp_path: Path, monkeypatch: Any) -> Path:
import cache as cache_module
monkeypatch.setattr(cache_module, 'CACHE_DIR', str(tmp_path))
monkeypatch.setattr(cache_module, "CACHE_DIR", str(tmp_path))
return tmp_path
def test_get_cached_returns_none_for_missing_key(tmp_cache):
assert get_cached('no_such_key') is None
def test_get_cached_returns_none_for_missing_key(tmp_cache: Path) -> None:
assert get_cached("no_such_key") is None
def test_set_and_get_cached_roundtrip(tmp_cache):
set_cached('my_key', {'a': 1})
assert get_cached('my_key') == {'a': 1}
def test_set_and_get_cached_roundtrip(tmp_cache: Path) -> None:
set_cached("my_key", {"a": 1})
assert get_cached("my_key") == {"a": 1}
def test_get_cached_no_ttl_never_expires(tmp_cache):
set_cached('k', [1, 2, 3])
def test_get_cached_no_ttl_never_expires(tmp_cache: Path) -> None:
set_cached("k", [1, 2, 3])
# Backdate the file by 2 days
path = tmp_cache / 'k.json'
path = tmp_cache / "k.json"
old = time.time() - 2 * 86400
os.utime(path, (old, old))
assert get_cached('k') == [1, 2, 3]
assert get_cached("k") == [1, 2, 3]
def test_get_cached_within_ttl(tmp_cache):
set_cached('k', 'fresh')
assert get_cached('k', ttl=3600) == 'fresh'
def test_get_cached_within_ttl(tmp_cache: Path) -> None:
set_cached("k", "fresh")
assert get_cached("k", ttl=3600) == "fresh"
def test_get_cached_expired_returns_none(tmp_cache):
set_cached('k', 'stale')
path = tmp_cache / 'k.json'
def test_get_cached_expired_returns_none(tmp_cache: Path) -> None:
set_cached("k", "stale")
path = tmp_cache / "k.json"
old = time.time() - 25 * 3600 # 25 hours ago
os.utime(path, (old, old))
assert get_cached('k', ttl=24 * 3600) is None
assert get_cached("k", ttl=24 * 3600) is None
def test_get_cached_invalid_json_returns_none(tmp_cache):
path = tmp_cache / 'broken.json'
def test_get_cached_invalid_json_returns_none(tmp_cache: Path) -> None:
path = tmp_cache / "broken.json"
path.write_text('{"not": "finished"')
assert get_cached('broken') is None
assert get_cached("broken") is None

View file

@ -1,30 +1,47 @@
from typing import Any
import pytest
from scraper.eurostar import _parse_graphql, _parse_graphql_leg, search_url
def _gql_response(journeys: list) -> dict:
return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}}
def _gql_response(journeys: list[dict[str, Any]]) -> dict[str, Any]:
return {"data": {"journeySearch": {"outbound": {"journeys": journeys}}}}
def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES',
plus_price=None, plus_seats=None) -> dict:
fares = [{
'classOfService': {'code': 'STANDARD'},
'prices': {'displayPrice': price},
'seats': seats,
'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}]
if service_name else [],
}]
def _journey(
departs: str,
arrives: str,
price: float | None = None,
seats: int | None = None,
service_name: str = "",
carrier: str = "ES",
plus_price: float | None = None,
plus_seats: int | None = None,
) -> dict[str, Any]:
fares: list[dict[str, Any]] = [
{
"classOfService": {"code": "STANDARD"},
"prices": {"displayPrice": price},
"seats": seats,
"legs": (
[{"serviceName": service_name, "serviceType": {"code": carrier}}]
if service_name
else []
),
}
]
if plus_price is not None or plus_seats is not None:
fares.append({
'classOfService': {'code': 'PLUS'},
'prices': {'displayPrice': plus_price},
'seats': plus_seats,
'legs': [],
})
fares.append(
{
"classOfService": {"code": "PLUS"},
"prices": {"displayPrice": plus_price},
"seats": plus_seats,
"legs": [],
}
)
return {
'timing': {'departureTime': departs, 'arrivalTime': arrives},
'fares': fares,
"timing": {"departureTime": departs, "arrivalTime": arrives},
"fares": fares,
}
@ -32,114 +49,149 @@ def _journey(departs: str, arrives: str, price=None, seats=None, service_name=''
# _parse_graphql
# ---------------------------------------------------------------------------
def test_parse_graphql_single_journey():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')])
services = _parse_graphql(data, 'Paris Gare du Nord')
def test_parse_graphql_single_journey() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="9014")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s['depart_st_pancras'] == '09:31'
assert s['arrive_destination'] == '12:55'
assert s['destination'] == 'Paris Gare du Nord'
assert s['train_number'] == 'ES 9014'
assert s['price'] == 156.0
assert s['seats'] == 37
assert s['plus_price'] is None
assert s['plus_seats'] is None
assert s["depart_st_pancras"] == "09:31"
assert s["arrive_destination"] == "12:55"
assert s["destination"] == "Paris Gare du Nord"
assert s["train_number"] == "ES 9014"
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] is None
assert s["plus_seats"] is None
def test_parse_graphql_standard_premier_price():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014',
plus_price=220, plus_seats=12)])
services = _parse_graphql(data, 'Paris Gare du Nord')
def test_parse_graphql_standard_premier_price() -> None:
data = _gql_response(
[
_journey(
"09:31",
"12:55",
price=156,
seats=37,
service_name="9014",
plus_price=220,
plus_seats=12,
)
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s['price'] == 156.0
assert s['seats'] == 37
assert s['plus_price'] == 220.0
assert s['plus_seats'] == 12
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] == 220.0
assert s["plus_seats"] == 12
def test_parse_graphql_plus_price_none_when_not_returned():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37)])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['plus_price'] is None
assert services[0]['plus_seats'] is None
def test_parse_graphql_plus_price_none_when_not_returned() -> None:
data = _gql_response([_journey("09:31", "12:55", price=156, seats=37)])
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["plus_price"] is None
assert services[0]["plus_seats"] is None
def test_parse_graphql_half_pound_price():
data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')])
services = _parse_graphql(data, 'Amsterdam Centraal')
assert services[0]['price'] == 192.5
def test_parse_graphql_half_pound_price() -> None:
data = _gql_response(
[_journey("09:01", "14:20", price=192.5, seats=25, service_name="9116")]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] == 192.5
def test_parse_graphql_null_price():
data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)])
services = _parse_graphql(data, 'Amsterdam Centraal')
assert services[0]['price'] is None
assert services[0]['seats'] == 0
def test_parse_graphql_null_price() -> None:
data = _gql_response([_journey("06:16", "11:09", price=None, seats=0)])
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] is None
assert services[0]["seats"] == 0
def test_parse_graphql_sorted_by_departure():
data = _gql_response([
_journey('10:31', '13:55'),
_journey('07:31', '10:59'),
])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['depart_st_pancras'] == '07:31'
assert services[1]['depart_st_pancras'] == '10:31'
def test_parse_graphql_sorted_by_departure() -> None:
data = _gql_response(
[
_journey("10:31", "13:55"),
_journey("07:31", "10:59"),
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["depart_st_pancras"] == "07:31"
assert services[1]["depart_st_pancras"] == "10:31"
def test_parse_graphql_deduplicates_same_departure_time():
data = _gql_response([
_journey('06:16', '11:09', price=None, seats=0),
_journey('06:16', '11:09', price=None, seats=0),
_journey('06:16', '11:09', price=None, seats=0),
])
services = _parse_graphql(data, 'Amsterdam Centraal')
def test_parse_graphql_deduplicates_same_departure_time() -> None:
data = _gql_response(
[
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert len(services) == 1
def test_parse_graphql_no_legs_gives_empty_train_number():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['train_number'] == ''
def test_parse_graphql_no_legs_gives_empty_train_number() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["train_number"] == ""
def test_parse_graphql_empty_journeys():
def test_parse_graphql_empty_journeys() -> None:
data = _gql_response([])
assert _parse_graphql(data, 'Paris Gare du Nord') == []
assert _parse_graphql(data, "Paris Gare du Nord") == []
def test_parse_graphql_inbound_leg():
data = {'data': {'journeySearch': {'inbound': {'journeys': [
_journey('17:12', '18:30', price=49, seats=43, service_name='9035')
]}}}}
services = _parse_graphql_leg(data, 'Paris Gare du Nord', 'inbound', 'inbound')
def test_parse_graphql_inbound_leg() -> None:
data: dict[str, Any] = {
"data": {
"journeySearch": {
"inbound": {
"journeys": [
_journey(
"17:12", "18:30", price=49, seats=43, service_name="9035"
)
]
}
}
}
}
services = _parse_graphql_leg(data, "Paris Gare du Nord", "inbound", "inbound")
assert services == [{
'depart_destination': '17:12',
'arrive_st_pancras': '18:30',
'destination': 'Paris Gare du Nord',
'train_number': 'ES 9035',
'price': 49.0,
'seats': 43,
'plus_price': None,
'plus_seats': None,
}]
assert services == [
{
"depart_destination": "17:12",
"arrive_st_pancras": "18:30",
"destination": "Paris Gare du Nord",
"train_number": "ES 9035",
"price": 49.0,
"seats": 43,
"plus_price": None,
"plus_seats": None,
}
]
# ---------------------------------------------------------------------------
# search_url
# ---------------------------------------------------------------------------
def test_search_url():
url = search_url('Paris Gare du Nord', '2026-04-10')
def test_search_url() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10")
assert url == (
'https://www.eurostar.com/search/uk-en'
'?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10'
"https://www.eurostar.com/search/uk-en"
"?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10"
)
def test_search_url_return():
url = search_url('Paris Gare du Nord', '2026-04-10', return_date='2026-04-17')
assert url.endswith('&outbound=2026-04-10&inbound=2026-04-17')
def test_search_url_return() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10", return_date="2026-04-17")
assert url.endswith("&outbound=2026-04-10&inbound=2026-04-17")

View file

@ -1,4 +1,5 @@
import threading
from typing import Any, Generator
import pytest
from werkzeug.serving import make_server
@ -8,12 +9,16 @@ import app as app_module
playwright_sync = pytest.importorskip("playwright.sync_api")
sync_playwright = playwright_sync.sync_playwright
rtt_scraper: Any = app_module.rtt_scraper # type: ignore[attr-defined]
gwr_fares_scraper: Any = app_module.gwr_fares_scraper # type: ignore[attr-defined]
eurostar_scraper: Any = app_module.eurostar_scraper # type: ignore[attr-defined]
def _stub_return_data(monkeypatch):
def _stub_return_data(monkeypatch: Any) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -24,7 +29,7 @@ def _stub_return_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch_from_paddington",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -35,7 +40,7 @@ def _stub_return_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date, direction="to_paddington": {
"07:00": {
@ -51,7 +56,11 @@ def _stub_return_data(monkeypatch):
},
)
def fake_advance_streaming(station_crs, travel_date, direction="to_paddington"):
def fake_advance_streaming(
station_crs: str,
travel_date: str,
direction: str = "to_paddington",
) -> Generator[dict[str, Any], None, None]:
if direction == "from_paddington":
yield {
"17:15": {
@ -84,18 +93,20 @@ def _stub_return_data(monkeypatch):
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
fake_advance_streaming,
)
def fake_advance(station_crs, travel_date, direction="to_paddington"):
def fake_advance(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, Any]:
pages = list(fake_advance_streaming(station_crs, travel_date, direction))
return pages[0] if pages else {}
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance", fake_advance)
monkeypatch.setattr(gwr_fares_scraper, "fetch_advance", fake_advance)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch_return",
lambda destination, outbound_date, return_date: {
"outbound": [
@ -126,11 +137,11 @@ def _stub_return_data(monkeypatch):
)
def _stub_single_data(monkeypatch):
def _stub_single_data(monkeypatch: Any) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -141,7 +152,7 @@ def _stub_single_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {
@ -151,7 +162,7 @@ def _stub_single_data(monkeypatch):
},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {
"ticket": "Advance Single",
@ -166,17 +177,17 @@ def _stub_single_data(monkeypatch):
},
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -194,7 +205,7 @@ def _stub_single_data(monkeypatch):
@pytest.fixture
def local_server(monkeypatch):
def local_server(monkeypatch: Any) -> Generator[str, None, None]:
_stub_return_data(monkeypatch)
app_module.app.config["TESTING"] = True
server = make_server("127.0.0.1", 0, app_module.app)
@ -208,7 +219,7 @@ def local_server(monkeypatch):
@pytest.fixture
def single_server(monkeypatch):
def single_server(monkeypatch: Any) -> Generator[str, None, None]:
_stub_single_data(monkeypatch)
app_module.app.config["TESTING"] = True
server = make_server("127.0.0.1", 0, app_module.app)
@ -221,14 +232,14 @@ def single_server(monkeypatch):
thread.join(timeout=5)
def _launch_browser(playwright):
def _launch_browser(playwright: Any) -> Any:
try:
return playwright.chromium.launch(headless=True)
except Exception as exc:
pytest.skip(f"Chromium browser unavailable for Playwright: {exc}")
def test_single_advance_standard_totals_after_click(single_server):
def test_single_advance_standard_totals_after_click(single_server: str) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -250,11 +261,13 @@ def test_single_advance_standard_totals_after_click(single_server):
browser.close()
def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
def test_single_next_date_advance_standard_labels_unreachable_rows(
monkeypatch: Any,
) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -265,7 +278,7 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {
@ -275,7 +288,7 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {
"ticket": "Advance Single",
@ -286,17 +299,17 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
},
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -352,7 +365,9 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
thread.join(timeout=5)
def test_single_advance_standard_premier_totals_on_initial_url(single_server):
def test_single_advance_standard_premier_totals_on_initial_url(
single_server: str,
) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -372,33 +387,47 @@ def test_single_advance_standard_premier_totals_on_initial_url(single_server):
browser.close()
def test_single_advance_first_falls_back_to_walkon_when_unavailable(monkeypatch):
def test_single_advance_first_falls_back_to_walkon_when_unavailable(
monkeypatch: Any,
) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{"depart_bristol": "07:00", "arrive_paddington": "08:45", "headcode": "1A23"},
{
"depart_bristol": "07:00",
"arrive_paddington": "08:45",
"headcode": "1A23",
},
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {"ticket": "Advance Single", "price": 50.0, "code": "ADV"},
"advance_1st": None,
},
}
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance", lambda station_crs, travel_date: advance_fares)
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance_streaming", lambda station_crs, travel_date: iter([advance_fares]))
monkeypatch.setattr(
app_module.eurostar_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -441,7 +470,7 @@ def test_single_advance_first_falls_back_to_walkon_when_unavailable(monkeypatch)
thread.join(timeout=5)
def test_return_advance_first_standard_premier_totals(local_server):
def test_return_advance_first_standard_premier_totals(local_server: str) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -477,7 +506,9 @@ def test_return_advance_first_standard_premier_totals(local_server):
browser.close()
def test_return_advance_first_standard_premier_totals_on_initial_url(local_server):
def test_return_advance_first_standard_premier_totals_on_initial_url(
local_server: str,
) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()

View file

@ -1,71 +1,74 @@
import pytest
from scraper.realtime_trains import _fmt, _parse_services
# ---------------------------------------------------------------------------
# _fmt
# ---------------------------------------------------------------------------
def test_fmt_four_digits():
assert _fmt('0830') == '08:30'
def test_fmt_already_colon():
assert _fmt('08:30') == '08:30'
def test_fmt_four_digits() -> None:
assert _fmt("0830") == "08:30"
def test_fmt_strips_non_digits():
assert _fmt('08h30') == '08:30'
def test_fmt_already_colon() -> None:
assert _fmt("08:30") == "08:30"
def test_fmt_strips_non_digits() -> None:
assert _fmt("08h30") == "08:30"
# ---------------------------------------------------------------------------
# _parse_services
# ---------------------------------------------------------------------------
def _make_html(services: list[tuple[str, str]], time_class: str) -> str:
"""Build a minimal servicelist HTML with (train_id, time) pairs."""
items = ''
items = ""
for tid, time in services:
items += f'''
items += f"""
<a class="service">
<div class="tid">{tid}</div>
<div class="time plan {time_class}">{time}</div>
</a>'''
</a>"""
return f'<div class="servicelist">{items}</div>'
def test_parse_services_departures():
html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd')
result = _parse_services(html, 'div.time.plan.d')
assert result == {'1A23': '07:00', '2B45': '08:30'}
def test_parse_services_departures() -> None:
html = _make_html([("1A23", "0700"), ("2B45", "0830")], "d")
result = _parse_services(html, "div.time.plan.d")
assert result == {"1A23": "07:00", "2B45": "08:30"}
def test_parse_services_arrivals():
html = _make_html([('1A23', '0845')], 'a')
result = _parse_services(html, 'div.time.plan.a')
assert result == {'1A23': '08:45'}
def test_parse_services_arrivals() -> None:
html = _make_html([("1A23", "0845")], "a")
result = _parse_services(html, "div.time.plan.a")
assert result == {"1A23": "08:45"}
def test_parse_services_no_servicelist():
assert _parse_services('<html></html>', 'div.time.plan.d') == {}
def test_parse_services_no_servicelist() -> None:
assert _parse_services("<html></html>", "div.time.plan.d") == {}
def test_parse_services_skips_missing_time():
html = '''
def test_parse_services_skips_missing_time() -> None:
html = """
<div class="servicelist">
<a class="service"><div class="tid">1A23</div></a>
<a class="service"><div class="tid">2B45</div><div class="time plan d">0900</div></a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
assert '1A23' not in result
assert result == {'2B45': '09:00'}
</div>"""
result = _parse_services(html, "div.time.plan.d")
assert "1A23" not in result
assert result == {"2B45": "09:00"}
def test_parse_services_skips_empty_time():
html = '''
def test_parse_services_skips_empty_time() -> None:
html = """
<div class="servicelist">
<a class="service">
<div class="tid">1A23</div>
<div class="time plan d"> </div>
</a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
</div>"""
result = _parse_services(html, "div.time.plan.d")
assert result == {}

View file

@ -6,64 +6,80 @@ from trip_planner import (
_fmt_duration,
)
DATE = '2026-03-30'
DATE = "2026-03-30"
# ---------------------------------------------------------------------------
# _fmt_duration
# ---------------------------------------------------------------------------
def test_fmt_duration_hours_and_minutes():
assert _fmt_duration(95) == '1h 35m'
def test_fmt_duration_exact_hours():
assert _fmt_duration(120) == '2h'
def test_fmt_duration_hours_and_minutes() -> None:
assert _fmt_duration(95) == "1h 35m"
def test_fmt_duration_minutes_only():
assert _fmt_duration(45) == '45m'
def test_fmt_duration_exact_hours() -> None:
assert _fmt_duration(120) == "2h"
def test_fmt_duration_minutes_only() -> None:
assert _fmt_duration(45) == "45m"
# ---------------------------------------------------------------------------
# combine_trips — basic pairing
# ---------------------------------------------------------------------------
GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m
GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — connection too short for ES_PARIS
GWR_FAST = {"depart_bristol": "07:00", "arrive_paddington": "08:45"} # 1h 45m
GWR_SLOW = {
"depart_bristol": "07:00",
"arrive_paddington": "09:26",
} # 2h 26m — connection too short for ES_PARIS
ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'}
ES_PARIS = {
"depart_st_pancras": "10:01",
"arrive_destination": "13:34",
"destination": "Paris Gare du Nord",
}
ES_EARLY = {
"depart_st_pancras": "09:00",
"arrive_destination": "12:00",
"destination": "Paris Gare du Nord",
}
def test_valid_trip_is_returned():
def test_valid_trip_is_returned() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert len(trips) == 1
t = trips[0]
assert t['depart_bristol'] == '07:00'
assert t['arrive_paddington'] == '08:45'
assert t['depart_st_pancras'] == '10:01'
assert t['arrive_destination'] == '13:34'
assert t['destination'] == 'Paris Gare du Nord'
assert t["depart_bristol"] == "07:00"
assert t["arrive_paddington"] == "08:45"
assert t["depart_st_pancras"] == "10:01"
assert t["arrive_destination"] == "13:34"
assert t["destination"] == "Paris Gare du Nord"
def test_gwr_too_slow_excluded():
def test_gwr_too_slow_excluded() -> None:
# arrive 09:26, Eurostar 10:01 → 35 min connection < 50 min minimum
trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE)
assert trips == []
def test_eurostar_too_early_excluded():
def test_eurostar_too_early_excluded() -> None:
# Eurostar departs before min connection time has elapsed
trips = combine_trips([GWR_FAST], [ES_EARLY], DATE)
assert trips == []
def test_no_trains_returns_empty():
def test_no_trains_returns_empty() -> None:
assert combine_trips([], [], DATE) == []
def test_no_gwr_returns_empty():
def test_no_gwr_returns_empty() -> None:
assert combine_trips([], [ES_PARIS], DATE) == []
def test_no_eurostar_returns_empty():
def test_no_eurostar_returns_empty() -> None:
assert combine_trips([GWR_FAST], [], DATE) == []
@ -71,140 +87,211 @@ def test_no_eurostar_returns_empty():
# Connection window constraints
# ---------------------------------------------------------------------------
def test_min_connection_enforced():
def test_min_connection_enforced() -> None:
# Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00
# ES at 09:59 should be excluded, 10:00 should be included
es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
assert combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == []
es_too_close = {
"depart_st_pancras": "09:59",
"arrive_destination": "13:00",
"destination": "Paris Gare du Nord",
}
es_ok = {
"depart_st_pancras": "10:00",
"arrive_destination": "13:00",
"destination": "Paris Gare du Nord",
}
assert (
combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == []
)
trips = combine_trips([GWR_FAST], [es_ok], DATE, min_connection_minutes=75)
assert len(trips) == 1
def test_max_connection_enforced():
def test_max_connection_enforced() -> None:
# Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05
es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
es_ok = {
"depart_st_pancras": "11:05",
"arrive_destination": "14:00",
"destination": "Paris Gare du Nord",
}
es_too_late = {
"depart_st_pancras": "11:06",
"arrive_destination": "14:00",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST], [es_ok], DATE, max_connection_minutes=140)
assert len(trips) == 1
assert combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == []
assert (
combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == []
)
# ---------------------------------------------------------------------------
# Only earliest valid Eurostar per GWR departure
# ---------------------------------------------------------------------------
def test_only_earliest_eurostar_per_gwr():
es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
def test_only_earliest_eurostar_per_gwr() -> None:
es1 = {
"depart_st_pancras": "10:01",
"arrive_destination": "13:34",
"destination": "Paris Gare du Nord",
}
es2 = {
"depart_st_pancras": "11:01",
"arrive_destination": "14:34",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST], [es1, es2], DATE)
assert len(trips) == 1
assert trips[0]['depart_st_pancras'] == '10:01'
assert trips[0]["depart_st_pancras"] == "10:01"
# ---------------------------------------------------------------------------
# Multiple GWR trains → multiple trips
# ---------------------------------------------------------------------------
def test_multiple_gwr_trains():
gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'}
es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
def test_multiple_gwr_trains() -> None:
gwr2 = {"depart_bristol": "08:00", "arrive_paddington": "09:45"}
es = {
"depart_st_pancras": "11:01",
"arrive_destination": "14:34",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST, gwr2], [es], DATE, max_connection_minutes=140)
assert len(trips) == 2
assert trips[0]['depart_bristol'] == '07:00'
assert trips[1]['depart_bristol'] == '08:00'
assert trips[0]["depart_bristol"] == "07:00"
assert trips[1]["depart_bristol"] == "08:00"
# ---------------------------------------------------------------------------
# Duration fields
# ---------------------------------------------------------------------------
def test_gwr_duration_in_trip():
def test_gwr_duration_in_trip() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['gwr_duration'] == '1h 45m'
assert trips[0]["gwr_duration"] == "1h 45m"
def test_total_duration_in_trip():
def test_total_duration_in_trip() -> None:
# depart 07:00, arrive 13:34 → 6h 34m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['total_duration'] == '6h 34m'
assert trips[0]["total_duration"] == "6h 34m"
def test_connection_duration_in_trip():
def test_connection_duration_in_trip() -> None:
# arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['connection_duration'] == '1h 16m'
assert trips[0]["connection_duration"] == "1h 16m"
def test_find_unreachable_eurostars_excludes_connectable_services():
def test_find_unreachable_eurostars_excludes_connectable_services() -> None:
# GWR arrives 08:45; default min=50/max=110 → viable window 09:3510:35.
# 09:30 too early, 10:15 connectable, 12:30 beyond max connection.
gwr = [
{'depart_bristol': '07:00', 'arrive_paddington': '08:45'},
{"depart_bristol": "07:00", "arrive_paddington": "08:45"},
]
eurostar = [
{'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9001'},
{'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'},
{'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9003'},
{
"depart_st_pancras": "09:30",
"arrive_destination": "12:00",
"destination": "Paris Gare du Nord",
"train_number": "ES 9001",
},
{
"depart_st_pancras": "10:15",
"arrive_destination": "13:40",
"destination": "Paris Gare du Nord",
"train_number": "ES 9002",
},
{
"depart_st_pancras": "12:30",
"arrive_destination": "15:55",
"destination": "Paris Gare du Nord",
"train_number": "ES 9003",
},
]
unreachable = find_unreachable_morning_eurostars(gwr, eurostar, DATE)
assert [s['depart_st_pancras'] for s in unreachable] == ['09:30', '12:30']
assert [s["depart_st_pancras"] for s in unreachable] == ["09:30", "12:30"]
def test_combine_trips_includes_ticket_fields():
def test_combine_trips_includes_ticket_fields() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert len(trips) == 1
t = trips[0]
assert 'ticket_name' in t
assert 'ticket_price' in t
assert 'ticket_code' in t
assert "ticket_name" in t
assert "ticket_price" in t
assert "ticket_code" in t
def test_combine_trips_uses_gwr_fares_when_provided():
fares = {'07:00': {'ticket': 'Super Off-Peak Single', 'price': 49.30, 'code': 'SSS'}}
def test_combine_trips_uses_gwr_fares_when_provided() -> None:
fares = {
"07:00": {"ticket": "Super Off-Peak Single", "price": 49.30, "code": "SSS"}
}
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares=fares)
assert len(trips) == 1
assert trips[0]['ticket_price'] == 49.30
assert trips[0]['ticket_code'] == 'SSS'
assert trips[0]["ticket_price"] == 49.30
assert trips[0]["ticket_code"] == "SSS"
def test_combine_trips_ticket_price_none_when_no_fares():
def test_combine_trips_ticket_price_none_when_no_fares() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares={})
assert len(trips) == 1
assert trips[0]['ticket_price'] is None
assert trips[0]["ticket_price"] is None
def test_find_unreachable_eurostars_returns_empty_when_all_connectable():
def test_find_unreachable_eurostars_returns_empty_when_all_connectable() -> None:
gwr = [
{'depart_bristol': '07:00', 'arrive_paddington': '08:45'},
{"depart_bristol": "07:00", "arrive_paddington": "08:45"},
]
eurostar = [
{'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'},
{
"depart_st_pancras": "10:15",
"arrive_destination": "13:40",
"destination": "Paris Gare du Nord",
"train_number": "ES 9002",
},
]
assert find_unreachable_morning_eurostars(gwr, eurostar, DATE) == []
def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure():
eurostar = [{
'depart_destination': '15:12',
'arrive_st_pancras': '16:30',
'destination': 'Paris Gare du Nord',
'train_number': 'ES 9035',
}]
gwr = [{
'depart_paddington': '17:15',
'arrive_destination': '18:55',
'headcode': '1B99',
}]
fares = {'17:15': {'ticket': 'Off-Peak Single', 'price': 63.60, 'code': 'SVS'}}
def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure() -> None:
eurostar = [
{
"depart_destination": "15:12",
"arrive_st_pancras": "16:30",
"destination": "Paris Gare du Nord",
"train_number": "ES 9035",
}
]
gwr = [
{
"depart_paddington": "17:15",
"arrive_destination": "18:55",
"headcode": "1B99",
}
]
fares = {"17:15": {"ticket": "Off-Peak Single", "price": 63.60, "code": "SVS"}}
trips = combine_inbound_trips(eurostar, gwr, DATE, min_connection_minutes=30, max_connection_minutes=120, gwr_fares=fares)
trips = combine_inbound_trips(
eurostar,
gwr,
DATE,
min_connection_minutes=30,
max_connection_minutes=120,
gwr_fares=fares,
)
assert len(trips) == 1
assert trips[0]['depart_destination'] == '15:12'
assert trips[0]['arrive_st_pancras'] == '16:30'
assert trips[0]['depart_paddington'] == '17:15'
assert trips[0]['arrive_uk_station'] == '18:55'
assert trips[0]['ticket_price'] == 63.60
assert trips[0]['check_in_by'] == '14:42'
assert trips[0]["depart_destination"] == "15:12"
assert trips[0]["arrive_st_pancras"] == "16:30"
assert trips[0]["depart_paddington"] == "17:15"
assert trips[0]["arrive_uk_station"] == "18:55"
assert trips[0]["ticket_price"] == 63.60
assert trips[0]["check_in_by"] == "14:42"