Add full type annotations and black formatting across all modules

Annotated all functions with mypy --strict-compatible types (-> None, dict[str,
Any], Generator types, etc.), added # type: ignore for untyped third-party libs
(lxml), and reformatted with black. All 18 source files now pass mypy --strict
with zero errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-05-25 21:48:53 +01:00
parent 453d6244ec
commit 13c4341f3a
14 changed files with 1802 additions and 974 deletions

603
app.py

File diff suppressed because it is too large Load diff

View file

@ -2,16 +2,17 @@ import json
import os
import time
import uuid
from typing import Any
from config.default import CACHE_DIR # overridden by app config after import
def _cache_path(key: str) -> str:
safe_key = key.replace('/', '_').replace(' ', '_')
safe_key = key.replace("/", "_").replace(" ", "_")
return os.path.join(CACHE_DIR, f"{safe_key}.json")
def get_cached(key: str, ttl: int | None = None):
def get_cached(key: str, ttl: int | None = None) -> Any:
"""Return cached data, or None if missing or older than ttl seconds."""
path = _cache_path(key)
try:
@ -25,10 +26,10 @@ def get_cached(key: str, ttl: int | None = None):
return None
def set_cached(key: str, data) -> None:
def set_cached(key: str, data: Any) -> None:
os.makedirs(CACHE_DIR, exist_ok=True)
path = _cache_path(key)
tmp_path = f"{path}.{os.getpid()}.{uuid.uuid4().hex}.tmp"
with open(tmp_path, 'w') as f:
with open(tmp_path, "w") as f:
json.dump(data, f, indent=2)
os.replace(tmp_path, path)

View file

@ -3,16 +3,21 @@ Circle Line timetable between Paddington (H&C Line) and King's Cross St Pancras.
Parses the TransXChange XML file on first use and caches the result in memory.
"""
import os
import re
import xml.etree.ElementTree as ET
from datetime import datetime, timedelta
from typing import Any
_PAD_STOP = '9400ZZLUPAH1' # Paddington (H&C Line)
_KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras
_PAD_STOP = "9400ZZLUPAH1" # Paddington (H&C Line)
_KXP_STOP = "9400ZZLUKSX3" # King's Cross St Pancras
from config.default import CIRCLE_LINE_XML as _TXC_XML # overridden by app config after import
_NS = {'t': 'http://www.transxchange.org.uk/'}
from config.default import (
CIRCLE_LINE_XML as _TXC_XML,
) # overridden by app config after import
_NS = {"t": "http://www.transxchange.org.uk/"}
# Populated on first call to next_service(); maps direction -> day-type -> sorted
# list of (origin_depart_seconds, destination_arrive_seconds) measured from midnight.
@ -22,8 +27,11 @@ _timetable: dict[str, dict[str, list[tuple[int, int]]]] | None = None
def _parse_duration(s: str | None) -> int:
if not s:
return 0
m = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', s)
return int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0)
m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", s)
assert m is not None
return (
int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0)
)
def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
@ -31,23 +39,31 @@ def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
root = tree.getroot()
# Build JPS id -> [(from_stop, to_stop, runtime_secs, wait_secs)]
jps_map: dict[str, list[tuple]] = {}
for jps_el in root.find('t:JourneyPatternSections', _NS):
jps_map: dict[str, list[tuple[str | None, str | None, int, int]]] = {}
jps_sections = root.find("t:JourneyPatternSections", _NS)
assert jps_sections is not None
for jps_el in jps_sections:
links = []
for link in jps_el.findall('t:JourneyPatternTimingLink', _NS):
fr = link.find('t:From/t:StopPointRef', _NS)
to = link.find('t:To/t:StopPointRef', _NS)
rt = link.find('t:RunTime', _NS)
wait = link.find('t:From/t:WaitTime', _NS)
links.append((
fr.text if fr is not None else None,
to.text if to is not None else None,
_parse_duration(rt.text if rt is not None else None),
_parse_duration(wait.text if wait is not None else None),
))
jps_map[jps_el.get('id')] = links
for link in jps_el.findall("t:JourneyPatternTimingLink", _NS):
fr = link.find("t:From/t:StopPointRef", _NS)
to = link.find("t:To/t:StopPointRef", _NS)
rt = link.find("t:RunTime", _NS)
wait = link.find("t:From/t:WaitTime", _NS)
links.append(
(
fr.text if fr is not None else None,
to.text if to is not None else None,
_parse_duration(rt.text if rt is not None else None),
_parse_duration(wait.text if wait is not None else None),
)
)
jps_id = jps_el.get("id")
assert jps_id is not None
jps_map[jps_id] = links
def _seconds_to_depart(links, stop):
def _seconds_to_depart(
links: list[tuple[str | None, str | None, int, int]], stop: str | None
) -> int | None:
"""Seconds from journey start until departure from *stop*."""
elapsed = 0
for fr, to, rt, wait in links:
@ -57,7 +73,9 @@ def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
elapsed += rt
return None
def _seconds_to_arrive(links, stop):
def _seconds_to_arrive(
links: list[tuple[str | None, str | None, int, int]], stop: str | None
) -> int | None:
"""Seconds from journey start until arrival at *stop*."""
elapsed = 0
for fr, to, rt, wait in links:
@ -68,12 +86,14 @@ def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
# Map JP id -> [(direction, origin_depart_offset_secs, destination_arrive_offset_secs)].
jp_offsets: dict[str, list[tuple[str, int, int]]] = {}
for svc in root.find('t:Services', _NS):
for jp in svc.findall('.//t:JourneyPattern', _NS):
jps_ref = jp.find('t:JourneyPatternSectionRefs', _NS)
services_el = root.find("t:Services", _NS)
assert services_el is not None
for svc in services_el:
for jp in svc.findall(".//t:JourneyPattern", _NS):
jps_ref = jp.find("t:JourneyPatternSectionRefs", _NS)
if jps_ref is None:
continue
links = jps_map.get(jps_ref.text, [])
links = jps_map.get(jps_ref.text or "", [])
stops = [l[0] for l in links] + ([links[-1][1]] if links else [])
offsets = []
if (
@ -84,7 +104,7 @@ def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
pad_off = _seconds_to_depart(links, _PAD_STOP)
kxp_off = _seconds_to_arrive(links, _KXP_STOP)
if pad_off is not None and kxp_off is not None:
offsets.append(('pad_to_kx', pad_off, kxp_off))
offsets.append(("pad_to_kx", pad_off, kxp_off))
if (
_PAD_STOP in stops
and _KXP_STOP in stops
@ -93,42 +113,50 @@ def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
kxp_off = _seconds_to_depart(links, _KXP_STOP)
pad_off = _seconds_to_arrive(links, _PAD_STOP)
if kxp_off is not None and pad_off is not None:
offsets.append(('kx_to_pad', kxp_off, pad_off))
offsets.append(("kx_to_pad", kxp_off, pad_off))
if offsets:
jp_offsets[jp.get('id')] = offsets
jp_id = jp.get("id")
assert jp_id is not None
jp_offsets[jp_id] = offsets
result: dict[str, dict[str, list[tuple[int, int]]]] = {
'pad_to_kx': {
'MondayToFriday': [],
'Saturday': [],
'Sunday': [],
"pad_to_kx": {
"MondayToFriday": [],
"Saturday": [],
"Sunday": [],
},
'kx_to_pad': {
'MondayToFriday': [],
'Saturday': [],
'Sunday': [],
"kx_to_pad": {
"MondayToFriday": [],
"Saturday": [],
"Sunday": [],
},
}
for vj in root.find('t:VehicleJourneys', _NS):
jp_ref = vj.find('t:JourneyPatternRef', _NS)
dep_time = vj.find('t:DepartureTime', _NS)
op = vj.find('t:OperatingProfile', _NS)
vehicle_journeys = root.find("t:VehicleJourneys", _NS)
assert vehicle_journeys is not None
for vj in vehicle_journeys:
jp_ref = vj.find("t:JourneyPatternRef", _NS)
dep_time = vj.find("t:DepartureTime", _NS)
op = vj.find("t:OperatingProfile", _NS)
if jp_ref is None or dep_time is None or jp_ref.text not in jp_offsets:
continue
h, m, s = map(int, dep_time.text.split(':'))
if dep_time.text is None:
continue
h, m, s = map(int, dep_time.text.split(":"))
dep_secs = h * 3600 + m * 60 + s
rdt = op.find('.//t:DaysOfWeek', _NS) if op is not None else None
rdt = op.find(".//t:DaysOfWeek", _NS) if op is not None else None
if rdt is None:
continue
for day_el in rdt:
day_type = day_el.tag.split('}')[-1]
day_type = day_el.tag.split("}")[-1]
for direction, origin_off, dest_off in jp_offsets[jp_ref.text]:
if day_type in result[direction]:
result[direction][day_type].append((
dep_secs + origin_off,
dep_secs + dest_off,
))
result[direction][day_type].append(
(
dep_secs + origin_off,
dep_secs + dest_off,
)
)
for direction in result:
for key in result[direction]:
@ -145,12 +173,12 @@ def _get_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]:
def _day_type(weekday: int) -> str:
if weekday < 5:
return 'MondayToFriday'
return 'Saturday' if weekday == 5 else 'Sunday'
return "MondayToFriday"
return "Saturday" if weekday == 5 else "Sunday"
def next_service(
earliest_board: datetime, direction: str = 'pad_to_kx'
earliest_board: datetime, direction: str = "pad_to_kx"
) -> tuple[datetime, datetime] | None:
"""
Given the earliest time a passenger can board at Paddington (H&C Line),
@ -167,7 +195,7 @@ def next_service(
def upcoming_services(
earliest_board: datetime,
count: int = 2,
direction: str = 'pad_to_kx',
direction: str = "pad_to_kx",
preceding: int = 0,
) -> list[tuple[datetime, datetime]]:
"""
@ -179,9 +207,7 @@ def upcoming_services(
"""
timetable = _get_timetable().get(direction, {})[_day_type(earliest_board.weekday())]
board_secs = (
earliest_board.hour * 3600
+ earliest_board.minute * 60
+ earliest_board.second
earliest_board.hour * 3600 + earliest_board.minute * 60 + earliest_board.second
)
midnight = earliest_board.replace(hour=0, minute=0, second=0, microsecond=0)
pre_results = []

View file

@ -1,13 +1,13 @@
import os
# Directory containing TfL reference data (TransXChange XML files etc.)
TFL_DATA_DIR = os.path.expanduser('~/lib/data/tfl')
TFL_DATA_DIR = os.path.expanduser("~/lib/data/tfl")
# Directory for caching scraped train times
CACHE_DIR = os.path.expanduser('~/lib/data/tfl/cache')
CACHE_DIR = os.path.expanduser("~/lib/data/tfl/cache")
# TransXChange timetable file for the Circle Line
CIRCLE_LINE_XML = os.path.join(TFL_DATA_DIR, 'output_txc_01CIR_.xml')
CIRCLE_LINE_XML = os.path.join(TFL_DATA_DIR, "output_txc_01CIR_.xml")
# Default connection window (minutes) between Paddington arrival and St Pancras departure
DEFAULT_MIN_CONNECTION = 70

View file

@ -6,8 +6,10 @@ NewBookingSearch) returns departure time, arrival time, train number,
Eurostar Standard fare price, and seats remaining at that price for every
service on the requested date.
"""
import random
import string
from typing import Any
import requests
@ -16,19 +18,19 @@ DEFAULT_UA = (
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
ST_PANCRAS_STATION_ID = '7015400'
ST_PANCRAS_STATION_ID = "7015400"
ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID
DESTINATION_STATION_IDS = {
'Paris Gare du Nord': '8727100',
'Brussels Midi': '8814001',
'Lille Europe': '8722326',
'Amsterdam Centraal': '8400058',
'Rotterdam Centraal': '8400530',
'Cologne Hbf': '8015458',
"Paris Gare du Nord": "8727100",
"Brussels Midi": "8814001",
"Lille Europe": "8722326",
"Amsterdam Centraal": "8400058",
"Rotterdam Centraal": "8400530",
"Cologne Hbf": "8015458",
}
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
_GATEWAY_URL = "https://site-api.eurostar.com/gateway"
# Query requesting timing, train identity, and Standard fare price + seats.
# Variable names and argument names match the site's own query so the
@ -42,7 +44,7 @@ _GQL_QUERY = (
"journeySearch("
"outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination"
" adults:$adult currency:$currency"
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
' productFamilies:["PUB"] contractCode:"EIL_ALL"'
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
" infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0"
" wheelchairCompanions:0 nonWheelchairCompanions:0"
@ -85,11 +87,16 @@ _GQL_QUERY = (
"}"
)
_STANDARD = 'STANDARD'
_STANDARD_PLUS = 'PLUS'
_STANDARD = "STANDARD"
_STANDARD_PLUS = "PLUS"
def search_url(destination: str, travel_date: str, direction: str = "outbound", return_date: str | None = None) -> str:
def search_url(
destination: str,
travel_date: str,
direction: str = "outbound",
return_date: str | None = None,
) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
origin = ST_PANCRAS_STATION_ID
destination_id = dest_id
@ -99,18 +106,20 @@ def search_url(destination: str, travel_date: str, direction: str = "outbound",
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
inbound = None
return (
f'https://www.eurostar.com/search/uk-en'
f'?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}'
+ (f'&inbound={inbound}' if inbound else '')
f"https://www.eurostar.com/search/uk-en"
f"?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}"
+ (f"&inbound={inbound}" if inbound else "")
)
def _generate_cid() -> str:
chars = string.ascii_letters + string.digits
return 'SRCH-' + ''.join(random.choices(chars, k=22))
return "SRCH-" + "".join(random.choices(chars, k=22))
def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> list[dict]:
def _parse_journeys(
journeys: list[dict[str, Any]], destination: str, direction: str
) -> list[dict[str, Any]]:
"""
Parse a NewBookingSearch GraphQL response into a list of service dicts.
@ -121,101 +130,108 @@ def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> l
connecting trains); we keep the entry with the earliest arrival.
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
"""
best: dict[str, dict] = {}
best: dict[str, dict[str, Any]] = {}
for journey in journeys:
dep = journey['timing']['departureTime']
arr = journey['timing']['arrivalTime']
dep = journey["timing"]["departureTime"]
arr = journey["timing"]["arrivalTime"]
std_price = std_seats = plus_price = plus_seats = None
train_number = ''
for fare in (journey.get('fares') or []):
cos = fare['classOfService']['code']
p = fare.get('prices')
price = float(p['displayPrice']) if p and p.get('displayPrice') else None
seats = fare.get('seats')
train_number = ""
for fare in journey.get("fares") or []:
cos = fare["classOfService"]["code"]
p = fare.get("prices")
price = float(p["displayPrice"]) if p and p.get("displayPrice") else None
seats = fare.get("seats")
if not train_number:
legs = fare.get('legs') or []
train_number = ' + '.join(
legs = fare.get("legs") or []
train_number = " + ".join(
f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}"
for leg in legs if leg.get('serviceName')
for leg in legs
if leg.get("serviceName")
)
if cos == _STANDARD:
std_price, std_seats = price, seats
elif cos == _STANDARD_PLUS:
plus_price, plus_seats = price, seats
if direction == 'inbound':
if direction == "inbound":
service = {
'depart_destination': dep,
'arrive_st_pancras': arr,
'destination': destination,
'train_number': train_number,
'price': std_price,
'seats': std_seats,
'plus_price': plus_price,
'plus_seats': plus_seats,
"depart_destination": dep,
"arrive_st_pancras": arr,
"destination": destination,
"train_number": train_number,
"price": std_price,
"seats": std_seats,
"plus_price": plus_price,
"plus_seats": plus_seats,
}
key = dep
arrive_key = 'arrive_st_pancras'
arrive_key = "arrive_st_pancras"
else:
service = {
'depart_st_pancras': dep,
'arrive_destination': arr,
'destination': destination,
'train_number': train_number,
'price': std_price,
'seats': std_seats,
'plus_price': plus_price,
'plus_seats': plus_seats,
"depart_st_pancras": dep,
"arrive_destination": arr,
"destination": destination,
"train_number": train_number,
"price": std_price,
"seats": std_seats,
"plus_price": plus_price,
"plus_seats": plus_seats,
}
key = dep
arrive_key = 'arrive_destination'
arrive_key = "arrive_destination"
if key not in best or arr < best[key][arrive_key]:
best[key] = service
sort_key = 'depart_destination' if direction == 'inbound' else 'depart_st_pancras'
sort_key = "depart_destination" if direction == "inbound" else "depart_st_pancras"
return sorted(best.values(), key=lambda s: s[sort_key])
def _parse_graphql(data: dict, destination: str) -> list[dict]:
journeys = data['data']['journeySearch']['outbound']['journeys']
return _parse_journeys(journeys, destination, 'outbound')
def _parse_graphql(data: dict[str, Any], destination: str) -> list[dict[str, Any]]:
journeys = data["data"]["journeySearch"]["outbound"]["journeys"]
return _parse_journeys(journeys, destination, "outbound")
def _parse_graphql_leg(data: dict, destination: str, leg: str, direction: str) -> list[dict]:
journeys = data['data']['journeySearch'][leg]['journeys']
def _parse_graphql_leg(
data: dict[str, Any], destination: str, leg: str, direction: str
) -> list[dict[str, Any]]:
journeys = data["data"]["journeySearch"][leg]["journeys"]
return _parse_journeys(journeys, destination, direction)
def _payload(origin: str, destination_id: str, outbound: str, inbound: str | None = None) -> dict:
variables = {
'origin': origin,
'destination': destination_id,
'outbound': outbound,
'inbound': inbound,
'currency': 'GBP',
'adult': 1,
'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS],
def _payload(
origin: str, destination_id: str, outbound: str, inbound: str | None = None
) -> dict[str, Any]:
variables: dict[str, Any] = {
"origin": origin,
"destination": destination_id,
"outbound": outbound,
"inbound": inbound,
"currency": "GBP",
"adult": 1,
"filteredClassesOfService": [_STANDARD, _STANDARD_PLUS],
}
return {
'operationName': 'NewBookingSearch',
'variables': variables,
'query': _GQL_QUERY,
"operationName": "NewBookingSearch",
"variables": variables,
"query": _GQL_QUERY,
}
def _headers() -> dict:
def _headers() -> dict[str, str]:
return {
'User-Agent': DEFAULT_UA,
'Content-Type': 'application/json',
'Accept': '*/*',
'Accept-Language':'en-GB',
'Referer': 'https://www.eurostar.com/',
'x-platform': 'web',
'x-market-code': 'uk',
'x-source-url': 'search-app/',
'cid': _generate_cid(),
"User-Agent": DEFAULT_UA,
"Content-Type": "application/json",
"Accept": "*/*",
"Accept-Language": "en-GB",
"Referer": "https://www.eurostar.com/",
"x-platform": "web",
"x-market-code": "uk",
"x-source-url": "search-app/",
"cid": _generate_cid(),
}
def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> list[dict]:
def fetch(
destination: str, travel_date: str, direction: str = "outbound"
) -> list[dict[str, Any]]:
"""
Return all Eurostar services for destination on travel_date.
@ -223,7 +239,7 @@ def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> li
train_number) plus pricing (price, seats) from a single GraphQL call.
"""
dest_id = DESTINATION_STATION_IDS[destination]
if direction == 'inbound':
if direction == "inbound":
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
else:
origin, destination_id = ST_PANCRAS_STATION_ID, dest_id
@ -234,11 +250,13 @@ def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> li
timeout=20,
)
resp.raise_for_status()
leg_direction = 'inbound' if direction == 'inbound' else 'outbound'
return _parse_graphql_leg(resp.json(), destination, 'outbound', leg_direction)
leg_direction = "inbound" if direction == "inbound" else "outbound"
return _parse_graphql_leg(resp.json(), destination, "outbound", leg_direction)
def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict[str, list[dict]]:
def fetch_return(
destination: str, outbound_date: str, return_date: str
) -> dict[str, list[dict[str, Any]]]:
dest_id = DESTINATION_STATION_IDS[destination]
resp = requests.post(
_GATEWAY_URL,
@ -249,6 +267,6 @@ def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict
resp.raise_for_status()
data = resp.json()
return {
'outbound': _parse_graphql_leg(data, destination, 'outbound', 'outbound'),
'inbound': _parse_graphql_leg(data, destination, 'inbound', 'inbound'),
"outbound": _parse_graphql_leg(data, destination, "outbound", "outbound"),
"inbound": _parse_graphql_leg(data, destination, "inbound", "inbound"),
}

View file

@ -6,6 +6,8 @@ Returns per-train cheapest standard-class fare with restrictions already applied
Cache for 30 days fares rarely change.
"""
from typing import Any, Generator
import httpx
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
@ -16,7 +18,7 @@ _WALKON_CODES = {"SSS", "SVS", "SDS", "CDS"}
_MAX_PAGES = 20
def _headers() -> dict:
def _headers() -> dict[str, str]:
return {
"user-agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
@ -37,7 +39,7 @@ def _request_body(
travel_date: str,
conversation_token: str | None,
later: bool,
) -> dict:
) -> dict[str, Any]:
return {
"IsNextOutward": False,
"IsPreviousOutward": False,
@ -83,7 +85,7 @@ def _run_pages(
travel_date: str,
first_class: bool = False,
direction: str = "to_paddington",
):
) -> Generator[tuple[str, list[Any]], None, None]:
"""
Iterate all pages of GWR journey search results.
@ -96,7 +98,9 @@ def _run_pages(
later = False
from_code, to_code = _od_codes(station_crs, direction)
for _ in range(_MAX_PAGES):
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
body = _request_body(
from_code, to_code, travel_date, conversation_token, later
)
if first_class:
body["firstclass"] = True
body["standardclass"] = False
@ -121,7 +125,7 @@ def _run_pages_batched(
travel_date: str,
first_class: bool = False,
direction: str = "to_paddington",
):
) -> Generator[list[tuple[str, list[Any]]], None, None]:
"""
Like _run_pages but yields one list of (dep_time, fares_list) per API page call,
allowing callers to stream results a page at a time.
@ -132,7 +136,9 @@ def _run_pages_batched(
later = False
from_code, to_code = _od_codes(station_crs, direction)
for _ in range(_MAX_PAGES):
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
body = _request_body(
from_code, to_code, travel_date, conversation_token, later
)
if first_class:
body["firstclass"] = True
body["standardclass"] = False
@ -157,7 +163,7 @@ def _run_pages_batched(
def fetch(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, dict]:
) -> dict[str, dict[str, Any]]:
"""
Fetch GWR walk-on single fares for the selected Paddington direction.
@ -165,7 +171,7 @@ def fetch(
where price is in £ and only the cheapest available standard-class walk-on
ticket per departure (with restrictions already applied by GWR) is kept.
"""
result: dict[str, dict] = {}
result: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction):
cheapest = None
for fare in fares:
@ -193,7 +199,7 @@ def fetch(
def fetch_advance(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, dict]:
) -> dict[str, dict[str, Any]]:
"""
Fetch advance fares: cheapest standard advance and first-class advance per departure.
@ -201,7 +207,7 @@ def fetch_advance(
Returns {departure_time: {'advance_std': dict or None, 'advance_1st': dict or None}}
where each sub-dict has keys 'ticket', 'price', 'code'.
"""
std_advance: dict[str, dict] = {}
std_advance: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(
station_crs, travel_date, first_class=False, direction=direction
):
@ -227,7 +233,7 @@ def fetch_advance(
"code": cheapest["code"],
}
first_advance: dict[str, dict] = {}
first_advance: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(
station_crs, travel_date, first_class=True, direction=direction
):
@ -260,7 +266,7 @@ def fetch_advance(
def fetch_advance_streaming(
station_crs: str, travel_date: str, direction: str = "to_paddington"
):
) -> Generator[dict[str, dict[str, Any]], None, None]:
"""
Generator yielding partial advance fare dicts one GWR API page at a time.
@ -272,7 +278,7 @@ def fetch_advance_streaming(
for batch in _run_pages_batched(
station_crs, travel_date, first_class=False, direction=direction
):
page: dict[str, dict] = {}
page: dict[str, dict[str, Any]] = {}
for dep_time, fares in batch:
cheapest = None
for fare in fares:

View file

@ -7,9 +7,12 @@ Two fetches:
PAD/from/BRI arrival times at Paddington (div.time.plan.a)
Matched by train ID (div.tid).
"""
import re
from typing import Any
import httpx
import lxml.html
import lxml.html # type: ignore[import-untyped]
_TO_PAD_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
@ -38,7 +41,7 @@ DEFAULT_UA = (
)
def _browser_headers(user_agent: str) -> dict:
def _browser_headers(user_agent: str) -> dict[str, str]:
return {
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
@ -55,7 +58,7 @@ def _browser_headers(user_agent: str) -> dict:
def _fmt(hhmm: str) -> str:
"""Convert '0830''08:30'."""
hhmm = re.sub(r'[^0-9]', '', hhmm)
hhmm = re.sub(r"[^0-9]", "", hhmm)
if len(hhmm) == 4:
return f"{hhmm[:2]}:{hhmm[2:]}"
return hhmm
@ -64,12 +67,12 @@ def _fmt(hhmm: str) -> str:
def _parse_services(html: str, time_selector: str) -> dict[str, str]:
"""Return {train_id: time_string} from a servicelist page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
sl = root.cssselect("div.servicelist")
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
for svc in sl[0].cssselect("a.service"):
tid_els = svc.cssselect("div.tid")
time_els = svc.cssselect(time_selector)
if tid_els and time_els:
tid = tid_els[0].text_content().strip()
@ -79,56 +82,58 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
return result
def _parse_arrivals(html: str) -> dict[str, dict]:
def _parse_arrivals(html: str) -> dict[str, dict[str, str]]:
"""Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
sl = root.cssselect("div.servicelist")
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
time_els = svc.cssselect('div.time.plan.a')
for svc in sl[0].cssselect("a.service"):
tid_els = svc.cssselect("div.tid")
time_els = svc.cssselect("div.time.plan.a")
if not (tid_els and time_els):
continue
time_text = time_els[0].text_content().strip()
if not time_text:
continue
plat_els = svc.cssselect('div.platform')
platform = plat_els[0].text_content().strip() if plat_els else ''
plat_els = svc.cssselect("div.platform")
platform = plat_els[0].text_content().strip() if plat_els else ""
result[tid_els[0].text_content().strip()] = {
'time': _fmt(time_text),
'platform': platform,
"time": _fmt(time_text),
"platform": platform,
}
return result
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
def fetch(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from station_crs to PAD."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
r_pad = client.get(_PAD_FROM_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_bri.text, 'div.time.plan.d')
arrivals = _parse_arrivals(r_pad.text)
departures = _parse_services(r_bri.text, "div.time.plan.d")
arrivals = _parse_arrivals(r_pad.text)
trains = [
{
'depart_bristol': dep,
'arrive_paddington': arrivals[tid]['time'],
'arrive_platform': arrivals[tid]['platform'],
'headcode': tid,
"depart_bristol": dep,
"arrive_paddington": arrivals[tid]["time"],
"arrive_platform": arrivals[tid]["platform"],
"headcode": tid,
}
for tid, dep in departures.items()
if tid in arrivals
]
return sorted(trains, key=lambda t: t['depart_bristol'])
return sorted(trains, key=lambda t: t["depart_bristol"])
def fetch_to_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from station_crs to PAD using generic field names."""
return [
{
@ -143,15 +148,15 @@ def fetch_to_paddington(
def fetch_from_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from PAD to station_crs."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date))
r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_pad.text, 'div.time.plan.d')
departures = _parse_services(r_pad.text, "div.time.plan.d")
arrivals = _parse_arrivals(r_station.text)
trains = [

File diff suppressed because it is too large Load diff

View file

@ -1,49 +1,53 @@
import os
import time
from pathlib import Path
from typing import Any
import pytest
from cache import get_cached, set_cached
@pytest.fixture
def tmp_cache(tmp_path, monkeypatch):
def tmp_cache(tmp_path: Path, monkeypatch: Any) -> Path:
import cache as cache_module
monkeypatch.setattr(cache_module, 'CACHE_DIR', str(tmp_path))
monkeypatch.setattr(cache_module, "CACHE_DIR", str(tmp_path))
return tmp_path
def test_get_cached_returns_none_for_missing_key(tmp_cache):
assert get_cached('no_such_key') is None
def test_get_cached_returns_none_for_missing_key(tmp_cache: Path) -> None:
assert get_cached("no_such_key") is None
def test_set_and_get_cached_roundtrip(tmp_cache):
set_cached('my_key', {'a': 1})
assert get_cached('my_key') == {'a': 1}
def test_set_and_get_cached_roundtrip(tmp_cache: Path) -> None:
set_cached("my_key", {"a": 1})
assert get_cached("my_key") == {"a": 1}
def test_get_cached_no_ttl_never_expires(tmp_cache):
set_cached('k', [1, 2, 3])
def test_get_cached_no_ttl_never_expires(tmp_cache: Path) -> None:
set_cached("k", [1, 2, 3])
# Backdate the file by 2 days
path = tmp_cache / 'k.json'
path = tmp_cache / "k.json"
old = time.time() - 2 * 86400
os.utime(path, (old, old))
assert get_cached('k') == [1, 2, 3]
assert get_cached("k") == [1, 2, 3]
def test_get_cached_within_ttl(tmp_cache):
set_cached('k', 'fresh')
assert get_cached('k', ttl=3600) == 'fresh'
def test_get_cached_within_ttl(tmp_cache: Path) -> None:
set_cached("k", "fresh")
assert get_cached("k", ttl=3600) == "fresh"
def test_get_cached_expired_returns_none(tmp_cache):
set_cached('k', 'stale')
path = tmp_cache / 'k.json'
def test_get_cached_expired_returns_none(tmp_cache: Path) -> None:
set_cached("k", "stale")
path = tmp_cache / "k.json"
old = time.time() - 25 * 3600 # 25 hours ago
os.utime(path, (old, old))
assert get_cached('k', ttl=24 * 3600) is None
assert get_cached("k", ttl=24 * 3600) is None
def test_get_cached_invalid_json_returns_none(tmp_cache):
path = tmp_cache / 'broken.json'
def test_get_cached_invalid_json_returns_none(tmp_cache: Path) -> None:
path = tmp_cache / "broken.json"
path.write_text('{"not": "finished"')
assert get_cached('broken') is None
assert get_cached("broken") is None

View file

@ -1,30 +1,47 @@
from typing import Any
import pytest
from scraper.eurostar import _parse_graphql, _parse_graphql_leg, search_url
def _gql_response(journeys: list) -> dict:
return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}}
def _gql_response(journeys: list[dict[str, Any]]) -> dict[str, Any]:
return {"data": {"journeySearch": {"outbound": {"journeys": journeys}}}}
def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES',
plus_price=None, plus_seats=None) -> dict:
fares = [{
'classOfService': {'code': 'STANDARD'},
'prices': {'displayPrice': price},
'seats': seats,
'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}]
if service_name else [],
}]
def _journey(
departs: str,
arrives: str,
price: float | None = None,
seats: int | None = None,
service_name: str = "",
carrier: str = "ES",
plus_price: float | None = None,
plus_seats: int | None = None,
) -> dict[str, Any]:
fares: list[dict[str, Any]] = [
{
"classOfService": {"code": "STANDARD"},
"prices": {"displayPrice": price},
"seats": seats,
"legs": (
[{"serviceName": service_name, "serviceType": {"code": carrier}}]
if service_name
else []
),
}
]
if plus_price is not None or plus_seats is not None:
fares.append({
'classOfService': {'code': 'PLUS'},
'prices': {'displayPrice': plus_price},
'seats': plus_seats,
'legs': [],
})
fares.append(
{
"classOfService": {"code": "PLUS"},
"prices": {"displayPrice": plus_price},
"seats": plus_seats,
"legs": [],
}
)
return {
'timing': {'departureTime': departs, 'arrivalTime': arrives},
'fares': fares,
"timing": {"departureTime": departs, "arrivalTime": arrives},
"fares": fares,
}
@ -32,114 +49,149 @@ def _journey(departs: str, arrives: str, price=None, seats=None, service_name=''
# _parse_graphql
# ---------------------------------------------------------------------------
def test_parse_graphql_single_journey():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')])
services = _parse_graphql(data, 'Paris Gare du Nord')
def test_parse_graphql_single_journey() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="9014")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s['depart_st_pancras'] == '09:31'
assert s['arrive_destination'] == '12:55'
assert s['destination'] == 'Paris Gare du Nord'
assert s['train_number'] == 'ES 9014'
assert s['price'] == 156.0
assert s['seats'] == 37
assert s['plus_price'] is None
assert s['plus_seats'] is None
assert s["depart_st_pancras"] == "09:31"
assert s["arrive_destination"] == "12:55"
assert s["destination"] == "Paris Gare du Nord"
assert s["train_number"] == "ES 9014"
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] is None
assert s["plus_seats"] is None
def test_parse_graphql_standard_premier_price():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014',
plus_price=220, plus_seats=12)])
services = _parse_graphql(data, 'Paris Gare du Nord')
def test_parse_graphql_standard_premier_price() -> None:
data = _gql_response(
[
_journey(
"09:31",
"12:55",
price=156,
seats=37,
service_name="9014",
plus_price=220,
plus_seats=12,
)
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s['price'] == 156.0
assert s['seats'] == 37
assert s['plus_price'] == 220.0
assert s['plus_seats'] == 12
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] == 220.0
assert s["plus_seats"] == 12
def test_parse_graphql_plus_price_none_when_not_returned():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37)])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['plus_price'] is None
assert services[0]['plus_seats'] is None
def test_parse_graphql_plus_price_none_when_not_returned() -> None:
data = _gql_response([_journey("09:31", "12:55", price=156, seats=37)])
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["plus_price"] is None
assert services[0]["plus_seats"] is None
def test_parse_graphql_half_pound_price():
data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')])
services = _parse_graphql(data, 'Amsterdam Centraal')
assert services[0]['price'] == 192.5
def test_parse_graphql_half_pound_price() -> None:
data = _gql_response(
[_journey("09:01", "14:20", price=192.5, seats=25, service_name="9116")]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] == 192.5
def test_parse_graphql_null_price():
data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)])
services = _parse_graphql(data, 'Amsterdam Centraal')
assert services[0]['price'] is None
assert services[0]['seats'] == 0
def test_parse_graphql_null_price() -> None:
data = _gql_response([_journey("06:16", "11:09", price=None, seats=0)])
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] is None
assert services[0]["seats"] == 0
def test_parse_graphql_sorted_by_departure():
data = _gql_response([
_journey('10:31', '13:55'),
_journey('07:31', '10:59'),
])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['depart_st_pancras'] == '07:31'
assert services[1]['depart_st_pancras'] == '10:31'
def test_parse_graphql_sorted_by_departure() -> None:
data = _gql_response(
[
_journey("10:31", "13:55"),
_journey("07:31", "10:59"),
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["depart_st_pancras"] == "07:31"
assert services[1]["depart_st_pancras"] == "10:31"
def test_parse_graphql_deduplicates_same_departure_time():
data = _gql_response([
_journey('06:16', '11:09', price=None, seats=0),
_journey('06:16', '11:09', price=None, seats=0),
_journey('06:16', '11:09', price=None, seats=0),
])
services = _parse_graphql(data, 'Amsterdam Centraal')
def test_parse_graphql_deduplicates_same_departure_time() -> None:
data = _gql_response(
[
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert len(services) == 1
def test_parse_graphql_no_legs_gives_empty_train_number():
data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')])
services = _parse_graphql(data, 'Paris Gare du Nord')
assert services[0]['train_number'] == ''
def test_parse_graphql_no_legs_gives_empty_train_number() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["train_number"] == ""
def test_parse_graphql_empty_journeys():
def test_parse_graphql_empty_journeys() -> None:
data = _gql_response([])
assert _parse_graphql(data, 'Paris Gare du Nord') == []
assert _parse_graphql(data, "Paris Gare du Nord") == []
def test_parse_graphql_inbound_leg():
data = {'data': {'journeySearch': {'inbound': {'journeys': [
_journey('17:12', '18:30', price=49, seats=43, service_name='9035')
]}}}}
services = _parse_graphql_leg(data, 'Paris Gare du Nord', 'inbound', 'inbound')
def test_parse_graphql_inbound_leg() -> None:
data: dict[str, Any] = {
"data": {
"journeySearch": {
"inbound": {
"journeys": [
_journey(
"17:12", "18:30", price=49, seats=43, service_name="9035"
)
]
}
}
}
}
services = _parse_graphql_leg(data, "Paris Gare du Nord", "inbound", "inbound")
assert services == [{
'depart_destination': '17:12',
'arrive_st_pancras': '18:30',
'destination': 'Paris Gare du Nord',
'train_number': 'ES 9035',
'price': 49.0,
'seats': 43,
'plus_price': None,
'plus_seats': None,
}]
assert services == [
{
"depart_destination": "17:12",
"arrive_st_pancras": "18:30",
"destination": "Paris Gare du Nord",
"train_number": "ES 9035",
"price": 49.0,
"seats": 43,
"plus_price": None,
"plus_seats": None,
}
]
# ---------------------------------------------------------------------------
# search_url
# ---------------------------------------------------------------------------
def test_search_url():
url = search_url('Paris Gare du Nord', '2026-04-10')
def test_search_url() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10")
assert url == (
'https://www.eurostar.com/search/uk-en'
'?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10'
"https://www.eurostar.com/search/uk-en"
"?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10"
)
def test_search_url_return():
url = search_url('Paris Gare du Nord', '2026-04-10', return_date='2026-04-17')
assert url.endswith('&outbound=2026-04-10&inbound=2026-04-17')
def test_search_url_return() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10", return_date="2026-04-17")
assert url.endswith("&outbound=2026-04-10&inbound=2026-04-17")

View file

@ -1,4 +1,5 @@
import threading
from typing import Any, Generator
import pytest
from werkzeug.serving import make_server
@ -8,12 +9,16 @@ import app as app_module
playwright_sync = pytest.importorskip("playwright.sync_api")
sync_playwright = playwright_sync.sync_playwright
rtt_scraper: Any = app_module.rtt_scraper # type: ignore[attr-defined]
gwr_fares_scraper: Any = app_module.gwr_fares_scraper # type: ignore[attr-defined]
eurostar_scraper: Any = app_module.eurostar_scraper # type: ignore[attr-defined]
def _stub_return_data(monkeypatch):
def _stub_return_data(monkeypatch: Any) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -24,7 +29,7 @@ def _stub_return_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch_from_paddington",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -35,7 +40,7 @@ def _stub_return_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date, direction="to_paddington": {
"07:00": {
@ -51,7 +56,11 @@ def _stub_return_data(monkeypatch):
},
)
def fake_advance_streaming(station_crs, travel_date, direction="to_paddington"):
def fake_advance_streaming(
station_crs: str,
travel_date: str,
direction: str = "to_paddington",
) -> Generator[dict[str, Any], None, None]:
if direction == "from_paddington":
yield {
"17:15": {
@ -84,18 +93,20 @@ def _stub_return_data(monkeypatch):
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
fake_advance_streaming,
)
def fake_advance(station_crs, travel_date, direction="to_paddington"):
def fake_advance(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, Any]:
pages = list(fake_advance_streaming(station_crs, travel_date, direction))
return pages[0] if pages else {}
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance", fake_advance)
monkeypatch.setattr(gwr_fares_scraper, "fetch_advance", fake_advance)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch_return",
lambda destination, outbound_date, return_date: {
"outbound": [
@ -126,11 +137,11 @@ def _stub_return_data(monkeypatch):
)
def _stub_single_data(monkeypatch):
def _stub_single_data(monkeypatch: Any) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -141,7 +152,7 @@ def _stub_single_data(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {
@ -151,7 +162,7 @@ def _stub_single_data(monkeypatch):
},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {
"ticket": "Advance Single",
@ -166,17 +177,17 @@ def _stub_single_data(monkeypatch):
},
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -194,7 +205,7 @@ def _stub_single_data(monkeypatch):
@pytest.fixture
def local_server(monkeypatch):
def local_server(monkeypatch: Any) -> Generator[str, None, None]:
_stub_return_data(monkeypatch)
app_module.app.config["TESTING"] = True
server = make_server("127.0.0.1", 0, app_module.app)
@ -208,7 +219,7 @@ def local_server(monkeypatch):
@pytest.fixture
def single_server(monkeypatch):
def single_server(monkeypatch: Any) -> Generator[str, None, None]:
_stub_single_data(monkeypatch)
app_module.app.config["TESTING"] = True
server = make_server("127.0.0.1", 0, app_module.app)
@ -221,14 +232,14 @@ def single_server(monkeypatch):
thread.join(timeout=5)
def _launch_browser(playwright):
def _launch_browser(playwright: Any) -> Any:
try:
return playwright.chromium.launch(headless=True)
except Exception as exc:
pytest.skip(f"Chromium browser unavailable for Playwright: {exc}")
def test_single_advance_standard_totals_after_click(single_server):
def test_single_advance_standard_totals_after_click(single_server: str) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -250,11 +261,13 @@ def test_single_advance_standard_totals_after_click(single_server):
browser.close()
def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
def test_single_next_date_advance_standard_labels_unreachable_rows(
monkeypatch: Any,
) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{
@ -265,7 +278,7 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {
@ -275,7 +288,7 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {
"ticket": "Advance Single",
@ -286,17 +299,17 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
},
}
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
app_module.eurostar_scraper,
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -352,7 +365,9 @@ def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch):
thread.join(timeout=5)
def test_single_advance_standard_premier_totals_on_initial_url(single_server):
def test_single_advance_standard_premier_totals_on_initial_url(
single_server: str,
) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -372,33 +387,47 @@ def test_single_advance_standard_premier_totals_on_initial_url(single_server):
browser.close()
def test_single_advance_first_falls_back_to_walkon_when_unavailable(monkeypatch):
def test_single_advance_first_falls_back_to_walkon_when_unavailable(
monkeypatch: Any,
) -> None:
monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None)
monkeypatch.setattr(app_module, "set_cached", lambda key, data: None)
monkeypatch.setattr(
app_module.rtt_scraper,
rtt_scraper,
"fetch",
lambda travel_date, user_agent, station_crs="BRI": [
{"depart_bristol": "07:00", "arrive_paddington": "08:45", "headcode": "1A23"},
{
"depart_bristol": "07:00",
"arrive_paddington": "08:45",
"headcode": "1A23",
},
],
)
monkeypatch.setattr(
app_module.gwr_fares_scraper,
gwr_fares_scraper,
"fetch",
lambda station_crs, travel_date: {
"07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"},
},
)
advance_fares = {
advance_fares: dict[str, Any] = {
"07:00": {
"advance_std": {"ticket": "Advance Single", "price": 50.0, "code": "ADV"},
"advance_1st": None,
},
}
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance", lambda station_crs, travel_date: advance_fares)
monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance_streaming", lambda station_crs, travel_date: iter([advance_fares]))
monkeypatch.setattr(
app_module.eurostar_scraper,
gwr_fares_scraper,
"fetch_advance",
lambda station_crs, travel_date: advance_fares,
)
monkeypatch.setattr(
gwr_fares_scraper,
"fetch_advance_streaming",
lambda station_crs, travel_date: iter([advance_fares]),
)
monkeypatch.setattr(
eurostar_scraper,
"fetch",
lambda destination, travel_date: [
{
@ -441,7 +470,7 @@ def test_single_advance_first_falls_back_to_walkon_when_unavailable(monkeypatch)
thread.join(timeout=5)
def test_return_advance_first_standard_premier_totals(local_server):
def test_return_advance_first_standard_premier_totals(local_server: str) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()
@ -477,7 +506,9 @@ def test_return_advance_first_standard_premier_totals(local_server):
browser.close()
def test_return_advance_first_standard_premier_totals_on_initial_url(local_server):
def test_return_advance_first_standard_premier_totals_on_initial_url(
local_server: str,
) -> None:
with sync_playwright() as p:
browser = _launch_browser(p)
page = browser.new_page()

View file

@ -1,71 +1,74 @@
import pytest
from scraper.realtime_trains import _fmt, _parse_services
# ---------------------------------------------------------------------------
# _fmt
# ---------------------------------------------------------------------------
def test_fmt_four_digits():
assert _fmt('0830') == '08:30'
def test_fmt_already_colon():
assert _fmt('08:30') == '08:30'
def test_fmt_four_digits() -> None:
assert _fmt("0830") == "08:30"
def test_fmt_strips_non_digits():
assert _fmt('08h30') == '08:30'
def test_fmt_already_colon() -> None:
assert _fmt("08:30") == "08:30"
def test_fmt_strips_non_digits() -> None:
assert _fmt("08h30") == "08:30"
# ---------------------------------------------------------------------------
# _parse_services
# ---------------------------------------------------------------------------
def _make_html(services: list[tuple[str, str]], time_class: str) -> str:
"""Build a minimal servicelist HTML with (train_id, time) pairs."""
items = ''
items = ""
for tid, time in services:
items += f'''
items += f"""
<a class="service">
<div class="tid">{tid}</div>
<div class="time plan {time_class}">{time}</div>
</a>'''
</a>"""
return f'<div class="servicelist">{items}</div>'
def test_parse_services_departures():
html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd')
result = _parse_services(html, 'div.time.plan.d')
assert result == {'1A23': '07:00', '2B45': '08:30'}
def test_parse_services_departures() -> None:
html = _make_html([("1A23", "0700"), ("2B45", "0830")], "d")
result = _parse_services(html, "div.time.plan.d")
assert result == {"1A23": "07:00", "2B45": "08:30"}
def test_parse_services_arrivals():
html = _make_html([('1A23', '0845')], 'a')
result = _parse_services(html, 'div.time.plan.a')
assert result == {'1A23': '08:45'}
def test_parse_services_arrivals() -> None:
html = _make_html([("1A23", "0845")], "a")
result = _parse_services(html, "div.time.plan.a")
assert result == {"1A23": "08:45"}
def test_parse_services_no_servicelist():
assert _parse_services('<html></html>', 'div.time.plan.d') == {}
def test_parse_services_no_servicelist() -> None:
assert _parse_services("<html></html>", "div.time.plan.d") == {}
def test_parse_services_skips_missing_time():
html = '''
def test_parse_services_skips_missing_time() -> None:
html = """
<div class="servicelist">
<a class="service"><div class="tid">1A23</div></a>
<a class="service"><div class="tid">2B45</div><div class="time plan d">0900</div></a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
assert '1A23' not in result
assert result == {'2B45': '09:00'}
</div>"""
result = _parse_services(html, "div.time.plan.d")
assert "1A23" not in result
assert result == {"2B45": "09:00"}
def test_parse_services_skips_empty_time():
html = '''
def test_parse_services_skips_empty_time() -> None:
html = """
<div class="servicelist">
<a class="service">
<div class="tid">1A23</div>
<div class="time plan d"> </div>
</a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
</div>"""
result = _parse_services(html, "div.time.plan.d")
assert result == {}

View file

@ -6,64 +6,80 @@ from trip_planner import (
_fmt_duration,
)
DATE = '2026-03-30'
DATE = "2026-03-30"
# ---------------------------------------------------------------------------
# _fmt_duration
# ---------------------------------------------------------------------------
def test_fmt_duration_hours_and_minutes():
assert _fmt_duration(95) == '1h 35m'
def test_fmt_duration_exact_hours():
assert _fmt_duration(120) == '2h'
def test_fmt_duration_hours_and_minutes() -> None:
assert _fmt_duration(95) == "1h 35m"
def test_fmt_duration_minutes_only():
assert _fmt_duration(45) == '45m'
def test_fmt_duration_exact_hours() -> None:
assert _fmt_duration(120) == "2h"
def test_fmt_duration_minutes_only() -> None:
assert _fmt_duration(45) == "45m"
# ---------------------------------------------------------------------------
# combine_trips — basic pairing
# ---------------------------------------------------------------------------
GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m
GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — connection too short for ES_PARIS
GWR_FAST = {"depart_bristol": "07:00", "arrive_paddington": "08:45"} # 1h 45m
GWR_SLOW = {
"depart_bristol": "07:00",
"arrive_paddington": "09:26",
} # 2h 26m — connection too short for ES_PARIS
ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'}
ES_PARIS = {
"depart_st_pancras": "10:01",
"arrive_destination": "13:34",
"destination": "Paris Gare du Nord",
}
ES_EARLY = {
"depart_st_pancras": "09:00",
"arrive_destination": "12:00",
"destination": "Paris Gare du Nord",
}
def test_valid_trip_is_returned():
def test_valid_trip_is_returned() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert len(trips) == 1
t = trips[0]
assert t['depart_bristol'] == '07:00'
assert t['arrive_paddington'] == '08:45'
assert t['depart_st_pancras'] == '10:01'
assert t['arrive_destination'] == '13:34'
assert t['destination'] == 'Paris Gare du Nord'
assert t["depart_bristol"] == "07:00"
assert t["arrive_paddington"] == "08:45"
assert t["depart_st_pancras"] == "10:01"
assert t["arrive_destination"] == "13:34"
assert t["destination"] == "Paris Gare du Nord"
def test_gwr_too_slow_excluded():
def test_gwr_too_slow_excluded() -> None:
# arrive 09:26, Eurostar 10:01 → 35 min connection < 50 min minimum
trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE)
assert trips == []
def test_eurostar_too_early_excluded():
def test_eurostar_too_early_excluded() -> None:
# Eurostar departs before min connection time has elapsed
trips = combine_trips([GWR_FAST], [ES_EARLY], DATE)
assert trips == []
def test_no_trains_returns_empty():
def test_no_trains_returns_empty() -> None:
assert combine_trips([], [], DATE) == []
def test_no_gwr_returns_empty():
def test_no_gwr_returns_empty() -> None:
assert combine_trips([], [ES_PARIS], DATE) == []
def test_no_eurostar_returns_empty():
def test_no_eurostar_returns_empty() -> None:
assert combine_trips([GWR_FAST], [], DATE) == []
@ -71,140 +87,211 @@ def test_no_eurostar_returns_empty():
# Connection window constraints
# ---------------------------------------------------------------------------
def test_min_connection_enforced():
def test_min_connection_enforced() -> None:
# Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00
# ES at 09:59 should be excluded, 10:00 should be included
es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
assert combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == []
es_too_close = {
"depart_st_pancras": "09:59",
"arrive_destination": "13:00",
"destination": "Paris Gare du Nord",
}
es_ok = {
"depart_st_pancras": "10:00",
"arrive_destination": "13:00",
"destination": "Paris Gare du Nord",
}
assert (
combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == []
)
trips = combine_trips([GWR_FAST], [es_ok], DATE, min_connection_minutes=75)
assert len(trips) == 1
def test_max_connection_enforced():
def test_max_connection_enforced() -> None:
# Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05
es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
es_ok = {
"depart_st_pancras": "11:05",
"arrive_destination": "14:00",
"destination": "Paris Gare du Nord",
}
es_too_late = {
"depart_st_pancras": "11:06",
"arrive_destination": "14:00",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST], [es_ok], DATE, max_connection_minutes=140)
assert len(trips) == 1
assert combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == []
assert (
combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == []
)
# ---------------------------------------------------------------------------
# Only earliest valid Eurostar per GWR departure
# ---------------------------------------------------------------------------
def test_only_earliest_eurostar_per_gwr():
es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
def test_only_earliest_eurostar_per_gwr() -> None:
es1 = {
"depart_st_pancras": "10:01",
"arrive_destination": "13:34",
"destination": "Paris Gare du Nord",
}
es2 = {
"depart_st_pancras": "11:01",
"arrive_destination": "14:34",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST], [es1, es2], DATE)
assert len(trips) == 1
assert trips[0]['depart_st_pancras'] == '10:01'
assert trips[0]["depart_st_pancras"] == "10:01"
# ---------------------------------------------------------------------------
# Multiple GWR trains → multiple trips
# ---------------------------------------------------------------------------
def test_multiple_gwr_trains():
gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'}
es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
def test_multiple_gwr_trains() -> None:
gwr2 = {"depart_bristol": "08:00", "arrive_paddington": "09:45"}
es = {
"depart_st_pancras": "11:01",
"arrive_destination": "14:34",
"destination": "Paris Gare du Nord",
}
trips = combine_trips([GWR_FAST, gwr2], [es], DATE, max_connection_minutes=140)
assert len(trips) == 2
assert trips[0]['depart_bristol'] == '07:00'
assert trips[1]['depart_bristol'] == '08:00'
assert trips[0]["depart_bristol"] == "07:00"
assert trips[1]["depart_bristol"] == "08:00"
# ---------------------------------------------------------------------------
# Duration fields
# ---------------------------------------------------------------------------
def test_gwr_duration_in_trip():
def test_gwr_duration_in_trip() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['gwr_duration'] == '1h 45m'
assert trips[0]["gwr_duration"] == "1h 45m"
def test_total_duration_in_trip():
def test_total_duration_in_trip() -> None:
# depart 07:00, arrive 13:34 → 6h 34m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['total_duration'] == '6h 34m'
assert trips[0]["total_duration"] == "6h 34m"
def test_connection_duration_in_trip():
def test_connection_duration_in_trip() -> None:
# arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['connection_duration'] == '1h 16m'
assert trips[0]["connection_duration"] == "1h 16m"
def test_find_unreachable_eurostars_excludes_connectable_services():
def test_find_unreachable_eurostars_excludes_connectable_services() -> None:
# GWR arrives 08:45; default min=50/max=110 → viable window 09:3510:35.
# 09:30 too early, 10:15 connectable, 12:30 beyond max connection.
gwr = [
{'depart_bristol': '07:00', 'arrive_paddington': '08:45'},
{"depart_bristol": "07:00", "arrive_paddington": "08:45"},
]
eurostar = [
{'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9001'},
{'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'},
{'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9003'},
{
"depart_st_pancras": "09:30",
"arrive_destination": "12:00",
"destination": "Paris Gare du Nord",
"train_number": "ES 9001",
},
{
"depart_st_pancras": "10:15",
"arrive_destination": "13:40",
"destination": "Paris Gare du Nord",
"train_number": "ES 9002",
},
{
"depart_st_pancras": "12:30",
"arrive_destination": "15:55",
"destination": "Paris Gare du Nord",
"train_number": "ES 9003",
},
]
unreachable = find_unreachable_morning_eurostars(gwr, eurostar, DATE)
assert [s['depart_st_pancras'] for s in unreachable] == ['09:30', '12:30']
assert [s["depart_st_pancras"] for s in unreachable] == ["09:30", "12:30"]
def test_combine_trips_includes_ticket_fields():
def test_combine_trips_includes_ticket_fields() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert len(trips) == 1
t = trips[0]
assert 'ticket_name' in t
assert 'ticket_price' in t
assert 'ticket_code' in t
assert "ticket_name" in t
assert "ticket_price" in t
assert "ticket_code" in t
def test_combine_trips_uses_gwr_fares_when_provided():
fares = {'07:00': {'ticket': 'Super Off-Peak Single', 'price': 49.30, 'code': 'SSS'}}
def test_combine_trips_uses_gwr_fares_when_provided() -> None:
fares = {
"07:00": {"ticket": "Super Off-Peak Single", "price": 49.30, "code": "SSS"}
}
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares=fares)
assert len(trips) == 1
assert trips[0]['ticket_price'] == 49.30
assert trips[0]['ticket_code'] == 'SSS'
assert trips[0]["ticket_price"] == 49.30
assert trips[0]["ticket_code"] == "SSS"
def test_combine_trips_ticket_price_none_when_no_fares():
def test_combine_trips_ticket_price_none_when_no_fares() -> None:
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares={})
assert len(trips) == 1
assert trips[0]['ticket_price'] is None
assert trips[0]["ticket_price"] is None
def test_find_unreachable_eurostars_returns_empty_when_all_connectable():
def test_find_unreachable_eurostars_returns_empty_when_all_connectable() -> None:
gwr = [
{'depart_bristol': '07:00', 'arrive_paddington': '08:45'},
{"depart_bristol": "07:00", "arrive_paddington": "08:45"},
]
eurostar = [
{'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'},
{
"depart_st_pancras": "10:15",
"arrive_destination": "13:40",
"destination": "Paris Gare du Nord",
"train_number": "ES 9002",
},
]
assert find_unreachable_morning_eurostars(gwr, eurostar, DATE) == []
def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure():
eurostar = [{
'depart_destination': '15:12',
'arrive_st_pancras': '16:30',
'destination': 'Paris Gare du Nord',
'train_number': 'ES 9035',
}]
gwr = [{
'depart_paddington': '17:15',
'arrive_destination': '18:55',
'headcode': '1B99',
}]
fares = {'17:15': {'ticket': 'Off-Peak Single', 'price': 63.60, 'code': 'SVS'}}
def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure() -> None:
eurostar = [
{
"depart_destination": "15:12",
"arrive_st_pancras": "16:30",
"destination": "Paris Gare du Nord",
"train_number": "ES 9035",
}
]
gwr = [
{
"depart_paddington": "17:15",
"arrive_destination": "18:55",
"headcode": "1B99",
}
]
fares = {"17:15": {"ticket": "Off-Peak Single", "price": 63.60, "code": "SVS"}}
trips = combine_inbound_trips(eurostar, gwr, DATE, min_connection_minutes=30, max_connection_minutes=120, gwr_fares=fares)
trips = combine_inbound_trips(
eurostar,
gwr,
DATE,
min_connection_minutes=30,
max_connection_minutes=120,
gwr_fares=fares,
)
assert len(trips) == 1
assert trips[0]['depart_destination'] == '15:12'
assert trips[0]['arrive_st_pancras'] == '16:30'
assert trips[0]['depart_paddington'] == '17:15'
assert trips[0]['arrive_uk_station'] == '18:55'
assert trips[0]['ticket_price'] == 63.60
assert trips[0]['check_in_by'] == '14:42'
assert trips[0]["depart_destination"] == "15:12"
assert trips[0]["arrive_st_pancras"] == "16:30"
assert trips[0]["depart_paddington"] == "17:15"
assert trips[0]["arrive_uk_station"] == "18:55"
assert trips[0]["ticket_price"] == 63.60
assert trips[0]["check_in_by"] == "14:42"

View file

@ -3,6 +3,7 @@ Combine GWR station→Paddington trains with Eurostar St Pancras→destination t
"""
from datetime import datetime, timedelta
from typing import Any
import circle_line
from tfl_fare import circle_line_fare
@ -15,14 +16,16 @@ DATE_FMT = "%Y-%m-%d"
TIME_FMT = "%H:%M"
PAD_WALK_TO_UNDERGROUND_MINUTES = 8 # GWR platform → Paddington (H&C Line) platform
KX_WALK_TO_UNDERGROUND_MINUTES = 10 # St Pancras arrivals → King's Cross St Pancras Underground
KX_WALK_TO_UNDERGROUND_MINUTES = (
10 # St Pancras arrivals → King's Cross St Pancras Underground
)
def _parse_dt(date: str, time: str) -> datetime:
return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}")
def _circle_line_services(arrive_paddington: datetime) -> list[dict]:
def _circle_line_services(arrive_paddington: datetime) -> list[dict[str, Any]]:
"""
Given GWR arrival at Paddington, return up to 2 upcoming Circle line services
as [{'depart': 'HH:MM', 'arrive_kx': 'HH:MM'}, ...].
@ -33,7 +36,9 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]:
earliest_board = arrive_paddington + timedelta(
minutes=PAD_WALK_TO_UNDERGROUND_MINUTES
)
services = circle_line.upcoming_services(earliest_board, count=2, direction='pad_to_kx')
services = circle_line.upcoming_services(
earliest_board, count=2, direction="pad_to_kx"
)
return [
{
"depart": dep.strftime(TIME_FMT),
@ -44,24 +49,32 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]:
]
PAD_WALK_FROM_UNDERGROUND_MINUTES = 5 # Circle line platform → GWR platform at Paddington
INBOUND_COMFORTABLE_MIN_CONN = 40 # threshold above which we apply the platform walk buffer
PAD_WALK_FROM_UNDERGROUND_MINUTES = (
5 # Circle line platform → GWR platform at Paddington
)
INBOUND_COMFORTABLE_MIN_CONN = (
40 # threshold above which we apply the platform walk buffer
)
def _circle_line_services_to_paddington(
arrive_st_pancras: datetime,
dep_paddington: datetime | None = None,
min_conn_minutes: int = INBOUND_MIN_CONNECTION_MINUTES,
) -> list[dict]:
) -> list[dict[str, Any]]:
earliest_board = arrive_st_pancras + timedelta(
minutes=KX_WALK_TO_UNDERGROUND_MINUTES
)
if min_conn_minutes >= INBOUND_COMFORTABLE_MIN_CONN and dep_paddington is not None:
cutoff = dep_paddington - timedelta(minutes=PAD_WALK_FROM_UNDERGROUND_MINUTES)
candidates = circle_line.upcoming_services(earliest_board, count=4, direction='kx_to_pad')
candidates = circle_line.upcoming_services(
earliest_board, count=4, direction="kx_to_pad"
)
services = [(dep, arr) for dep, arr in candidates if arr <= cutoff][:2]
else:
services = circle_line.upcoming_services(earliest_board, count=1, direction='kx_to_pad', preceding=1)
services = circle_line.upcoming_services(
earliest_board, count=1, direction="kx_to_pad", preceding=1
)
return [
{
"depart": dep.strftime(TIME_FMT),
@ -82,8 +95,8 @@ def _fmt_duration(minutes: int) -> str:
def _is_viable_connection(
gwr: dict,
eurostar: dict,
gwr: dict[str, Any],
eurostar: dict[str, Any],
travel_date: str,
min_connection_minutes: int,
max_connection_minutes: int,
@ -112,8 +125,8 @@ def _is_viable_connection(
def _is_viable_inbound_connection(
eurostar: dict,
gwr: dict,
eurostar: dict[str, Any],
gwr: dict[str, Any],
travel_date: str,
min_connection_minutes: int,
max_connection_minutes: int,
@ -143,13 +156,13 @@ def _is_viable_inbound_connection(
def combine_trips(
gwr_trains: list[dict],
eurostar_trains: list[dict],
gwr_trains: list[dict[str, Any]],
eurostar_trains: list[dict[str, Any]],
travel_date: str,
min_connection_minutes: int = MIN_CONNECTION_MINUTES,
max_connection_minutes: int = MAX_CONNECTION_MINUTES,
gwr_fares: dict | None = None,
) -> list[dict]:
gwr_fares: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
"""
Return a list of valid combined trips, sorted by Bristol departure time.
@ -217,13 +230,13 @@ def combine_trips(
def combine_inbound_trips(
eurostar_trains: list[dict],
gwr_trains: list[dict],
eurostar_trains: list[dict[str, Any]],
gwr_trains: list[dict[str, Any]],
travel_date: str,
min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES,
max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES,
gwr_fares: dict | None = None,
) -> list[dict]:
gwr_fares: dict[str, Any] | None = None,
) -> list[dict[str, Any]]:
"""Return valid continent→UK combined trips."""
trips = []
@ -243,12 +256,16 @@ def combine_inbound_trips(
total_mins = int((arr_station - dep_dest).total_seconds() / 60) + 60
eurostar_mins = int((arr_stp - dep_dest).total_seconds() / 60) + 60
fare = (gwr_fares or {}).get(gwr["depart_paddington"])
circle_svcs = _circle_line_services_to_paddington(arr_stp, dep_pad, min_connection_minutes)
circle_svcs = _circle_line_services_to_paddington(
arr_stp, dep_pad, min_connection_minutes
)
trips.append(
{
"direction": "inbound",
"depart_destination": es["depart_destination"],
"check_in_by": (dep_dest - timedelta(minutes=30)).strftime(TIME_FMT),
"check_in_by": (dep_dest - timedelta(minutes=30)).strftime(
TIME_FMT
),
"arrive_st_pancras": es["arrive_st_pancras"],
"depart_paddington": gwr["depart_paddington"],
"arrive_uk_station": gwr["arrive_destination"],
@ -279,12 +296,12 @@ def combine_inbound_trips(
def find_unreachable_morning_eurostars(
gwr_trains: list[dict],
eurostar_trains: list[dict],
gwr_trains: list[dict[str, Any]],
eurostar_trains: list[dict[str, Any]],
travel_date: str,
min_connection_minutes: int = MIN_CONNECTION_MINUTES,
max_connection_minutes: int = MAX_CONNECTION_MINUTES,
) -> list[dict]:
) -> list[dict[str, Any]]:
unreachable = []
for es in eurostar_trains:
@ -311,12 +328,12 @@ def find_unreachable_morning_eurostars(
def find_unreachable_inbound_eurostars(
eurostar_trains: list[dict],
gwr_trains: list[dict],
eurostar_trains: list[dict[str, Any]],
gwr_trains: list[dict[str, Any]],
travel_date: str,
min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES,
max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES,
) -> list[dict]:
) -> list[dict[str, Any]]:
unreachable = []
for es in eurostar_trains: