Add full type annotations and black formatting across all modules

Annotated all functions with mypy --strict-compatible types (-> None, dict[str,
Any], Generator types, etc.), added # type: ignore for untyped third-party libs
(lxml), and reformatted with black. All 18 source files now pass mypy --strict
with zero errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-05-25 21:48:53 +01:00
parent 453d6244ec
commit 13c4341f3a
14 changed files with 1802 additions and 974 deletions

View file

@ -6,8 +6,10 @@ NewBookingSearch) returns departure time, arrival time, train number,
Eurostar Standard fare price, and seats remaining at that price for every
service on the requested date.
"""
import random
import string
from typing import Any
import requests
@ -16,19 +18,19 @@ DEFAULT_UA = (
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
ST_PANCRAS_STATION_ID = '7015400'
ST_PANCRAS_STATION_ID = "7015400"
ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID
DESTINATION_STATION_IDS = {
'Paris Gare du Nord': '8727100',
'Brussels Midi': '8814001',
'Lille Europe': '8722326',
'Amsterdam Centraal': '8400058',
'Rotterdam Centraal': '8400530',
'Cologne Hbf': '8015458',
"Paris Gare du Nord": "8727100",
"Brussels Midi": "8814001",
"Lille Europe": "8722326",
"Amsterdam Centraal": "8400058",
"Rotterdam Centraal": "8400530",
"Cologne Hbf": "8015458",
}
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
_GATEWAY_URL = "https://site-api.eurostar.com/gateway"
# Query requesting timing, train identity, and Standard fare price + seats.
# Variable names and argument names match the site's own query so the
@ -42,7 +44,7 @@ _GQL_QUERY = (
"journeySearch("
"outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination"
" adults:$adult currency:$currency"
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
' productFamilies:["PUB"] contractCode:"EIL_ALL"'
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
" infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0"
" wheelchairCompanions:0 nonWheelchairCompanions:0"
@ -85,11 +87,16 @@ _GQL_QUERY = (
"}"
)
_STANDARD = 'STANDARD'
_STANDARD_PLUS = 'PLUS'
_STANDARD = "STANDARD"
_STANDARD_PLUS = "PLUS"
def search_url(destination: str, travel_date: str, direction: str = "outbound", return_date: str | None = None) -> str:
def search_url(
destination: str,
travel_date: str,
direction: str = "outbound",
return_date: str | None = None,
) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
origin = ST_PANCRAS_STATION_ID
destination_id = dest_id
@ -99,18 +106,20 @@ def search_url(destination: str, travel_date: str, direction: str = "outbound",
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
inbound = None
return (
f'https://www.eurostar.com/search/uk-en'
f'?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}'
+ (f'&inbound={inbound}' if inbound else '')
f"https://www.eurostar.com/search/uk-en"
f"?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}"
+ (f"&inbound={inbound}" if inbound else "")
)
def _generate_cid() -> str:
chars = string.ascii_letters + string.digits
return 'SRCH-' + ''.join(random.choices(chars, k=22))
return "SRCH-" + "".join(random.choices(chars, k=22))
def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> list[dict]:
def _parse_journeys(
journeys: list[dict[str, Any]], destination: str, direction: str
) -> list[dict[str, Any]]:
"""
Parse a NewBookingSearch GraphQL response into a list of service dicts.
@ -121,101 +130,108 @@ def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> l
connecting trains); we keep the entry with the earliest arrival.
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
"""
best: dict[str, dict] = {}
best: dict[str, dict[str, Any]] = {}
for journey in journeys:
dep = journey['timing']['departureTime']
arr = journey['timing']['arrivalTime']
dep = journey["timing"]["departureTime"]
arr = journey["timing"]["arrivalTime"]
std_price = std_seats = plus_price = plus_seats = None
train_number = ''
for fare in (journey.get('fares') or []):
cos = fare['classOfService']['code']
p = fare.get('prices')
price = float(p['displayPrice']) if p and p.get('displayPrice') else None
seats = fare.get('seats')
train_number = ""
for fare in journey.get("fares") or []:
cos = fare["classOfService"]["code"]
p = fare.get("prices")
price = float(p["displayPrice"]) if p and p.get("displayPrice") else None
seats = fare.get("seats")
if not train_number:
legs = fare.get('legs') or []
train_number = ' + '.join(
legs = fare.get("legs") or []
train_number = " + ".join(
f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}"
for leg in legs if leg.get('serviceName')
for leg in legs
if leg.get("serviceName")
)
if cos == _STANDARD:
std_price, std_seats = price, seats
elif cos == _STANDARD_PLUS:
plus_price, plus_seats = price, seats
if direction == 'inbound':
if direction == "inbound":
service = {
'depart_destination': dep,
'arrive_st_pancras': arr,
'destination': destination,
'train_number': train_number,
'price': std_price,
'seats': std_seats,
'plus_price': plus_price,
'plus_seats': plus_seats,
"depart_destination": dep,
"arrive_st_pancras": arr,
"destination": destination,
"train_number": train_number,
"price": std_price,
"seats": std_seats,
"plus_price": plus_price,
"plus_seats": plus_seats,
}
key = dep
arrive_key = 'arrive_st_pancras'
arrive_key = "arrive_st_pancras"
else:
service = {
'depart_st_pancras': dep,
'arrive_destination': arr,
'destination': destination,
'train_number': train_number,
'price': std_price,
'seats': std_seats,
'plus_price': plus_price,
'plus_seats': plus_seats,
"depart_st_pancras": dep,
"arrive_destination": arr,
"destination": destination,
"train_number": train_number,
"price": std_price,
"seats": std_seats,
"plus_price": plus_price,
"plus_seats": plus_seats,
}
key = dep
arrive_key = 'arrive_destination'
arrive_key = "arrive_destination"
if key not in best or arr < best[key][arrive_key]:
best[key] = service
sort_key = 'depart_destination' if direction == 'inbound' else 'depart_st_pancras'
sort_key = "depart_destination" if direction == "inbound" else "depart_st_pancras"
return sorted(best.values(), key=lambda s: s[sort_key])
def _parse_graphql(data: dict, destination: str) -> list[dict]:
journeys = data['data']['journeySearch']['outbound']['journeys']
return _parse_journeys(journeys, destination, 'outbound')
def _parse_graphql(data: dict[str, Any], destination: str) -> list[dict[str, Any]]:
journeys = data["data"]["journeySearch"]["outbound"]["journeys"]
return _parse_journeys(journeys, destination, "outbound")
def _parse_graphql_leg(data: dict, destination: str, leg: str, direction: str) -> list[dict]:
journeys = data['data']['journeySearch'][leg]['journeys']
def _parse_graphql_leg(
data: dict[str, Any], destination: str, leg: str, direction: str
) -> list[dict[str, Any]]:
journeys = data["data"]["journeySearch"][leg]["journeys"]
return _parse_journeys(journeys, destination, direction)
def _payload(origin: str, destination_id: str, outbound: str, inbound: str | None = None) -> dict:
variables = {
'origin': origin,
'destination': destination_id,
'outbound': outbound,
'inbound': inbound,
'currency': 'GBP',
'adult': 1,
'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS],
def _payload(
origin: str, destination_id: str, outbound: str, inbound: str | None = None
) -> dict[str, Any]:
variables: dict[str, Any] = {
"origin": origin,
"destination": destination_id,
"outbound": outbound,
"inbound": inbound,
"currency": "GBP",
"adult": 1,
"filteredClassesOfService": [_STANDARD, _STANDARD_PLUS],
}
return {
'operationName': 'NewBookingSearch',
'variables': variables,
'query': _GQL_QUERY,
"operationName": "NewBookingSearch",
"variables": variables,
"query": _GQL_QUERY,
}
def _headers() -> dict:
def _headers() -> dict[str, str]:
return {
'User-Agent': DEFAULT_UA,
'Content-Type': 'application/json',
'Accept': '*/*',
'Accept-Language':'en-GB',
'Referer': 'https://www.eurostar.com/',
'x-platform': 'web',
'x-market-code': 'uk',
'x-source-url': 'search-app/',
'cid': _generate_cid(),
"User-Agent": DEFAULT_UA,
"Content-Type": "application/json",
"Accept": "*/*",
"Accept-Language": "en-GB",
"Referer": "https://www.eurostar.com/",
"x-platform": "web",
"x-market-code": "uk",
"x-source-url": "search-app/",
"cid": _generate_cid(),
}
def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> list[dict]:
def fetch(
destination: str, travel_date: str, direction: str = "outbound"
) -> list[dict[str, Any]]:
"""
Return all Eurostar services for destination on travel_date.
@ -223,7 +239,7 @@ def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> li
train_number) plus pricing (price, seats) from a single GraphQL call.
"""
dest_id = DESTINATION_STATION_IDS[destination]
if direction == 'inbound':
if direction == "inbound":
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
else:
origin, destination_id = ST_PANCRAS_STATION_ID, dest_id
@ -234,11 +250,13 @@ def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> li
timeout=20,
)
resp.raise_for_status()
leg_direction = 'inbound' if direction == 'inbound' else 'outbound'
return _parse_graphql_leg(resp.json(), destination, 'outbound', leg_direction)
leg_direction = "inbound" if direction == "inbound" else "outbound"
return _parse_graphql_leg(resp.json(), destination, "outbound", leg_direction)
def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict[str, list[dict]]:
def fetch_return(
destination: str, outbound_date: str, return_date: str
) -> dict[str, list[dict[str, Any]]]:
dest_id = DESTINATION_STATION_IDS[destination]
resp = requests.post(
_GATEWAY_URL,
@ -249,6 +267,6 @@ def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict
resp.raise_for_status()
data = resp.json()
return {
'outbound': _parse_graphql_leg(data, destination, 'outbound', 'outbound'),
'inbound': _parse_graphql_leg(data, destination, 'inbound', 'inbound'),
"outbound": _parse_graphql_leg(data, destination, "outbound", "outbound"),
"inbound": _parse_graphql_leg(data, destination, "inbound", "inbound"),
}

View file

@ -6,6 +6,8 @@ Returns per-train cheapest standard-class fare with restrictions already applied
Cache for 30 days fares rarely change.
"""
from typing import Any, Generator
import httpx
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
@ -16,7 +18,7 @@ _WALKON_CODES = {"SSS", "SVS", "SDS", "CDS"}
_MAX_PAGES = 20
def _headers() -> dict:
def _headers() -> dict[str, str]:
return {
"user-agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
@ -37,7 +39,7 @@ def _request_body(
travel_date: str,
conversation_token: str | None,
later: bool,
) -> dict:
) -> dict[str, Any]:
return {
"IsNextOutward": False,
"IsPreviousOutward": False,
@ -83,7 +85,7 @@ def _run_pages(
travel_date: str,
first_class: bool = False,
direction: str = "to_paddington",
):
) -> Generator[tuple[str, list[Any]], None, None]:
"""
Iterate all pages of GWR journey search results.
@ -96,7 +98,9 @@ def _run_pages(
later = False
from_code, to_code = _od_codes(station_crs, direction)
for _ in range(_MAX_PAGES):
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
body = _request_body(
from_code, to_code, travel_date, conversation_token, later
)
if first_class:
body["firstclass"] = True
body["standardclass"] = False
@ -121,7 +125,7 @@ def _run_pages_batched(
travel_date: str,
first_class: bool = False,
direction: str = "to_paddington",
):
) -> Generator[list[tuple[str, list[Any]]], None, None]:
"""
Like _run_pages but yields one list of (dep_time, fares_list) per API page call,
allowing callers to stream results a page at a time.
@ -132,7 +136,9 @@ def _run_pages_batched(
later = False
from_code, to_code = _od_codes(station_crs, direction)
for _ in range(_MAX_PAGES):
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
body = _request_body(
from_code, to_code, travel_date, conversation_token, later
)
if first_class:
body["firstclass"] = True
body["standardclass"] = False
@ -157,7 +163,7 @@ def _run_pages_batched(
def fetch(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, dict]:
) -> dict[str, dict[str, Any]]:
"""
Fetch GWR walk-on single fares for the selected Paddington direction.
@ -165,7 +171,7 @@ def fetch(
where price is in £ and only the cheapest available standard-class walk-on
ticket per departure (with restrictions already applied by GWR) is kept.
"""
result: dict[str, dict] = {}
result: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction):
cheapest = None
for fare in fares:
@ -193,7 +199,7 @@ def fetch(
def fetch_advance(
station_crs: str, travel_date: str, direction: str = "to_paddington"
) -> dict[str, dict]:
) -> dict[str, dict[str, Any]]:
"""
Fetch advance fares: cheapest standard advance and first-class advance per departure.
@ -201,7 +207,7 @@ def fetch_advance(
Returns {departure_time: {'advance_std': dict or None, 'advance_1st': dict or None}}
where each sub-dict has keys 'ticket', 'price', 'code'.
"""
std_advance: dict[str, dict] = {}
std_advance: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(
station_crs, travel_date, first_class=False, direction=direction
):
@ -227,7 +233,7 @@ def fetch_advance(
"code": cheapest["code"],
}
first_advance: dict[str, dict] = {}
first_advance: dict[str, dict[str, Any]] = {}
for dep_time, fares in _run_pages(
station_crs, travel_date, first_class=True, direction=direction
):
@ -260,7 +266,7 @@ def fetch_advance(
def fetch_advance_streaming(
station_crs: str, travel_date: str, direction: str = "to_paddington"
):
) -> Generator[dict[str, dict[str, Any]], None, None]:
"""
Generator yielding partial advance fare dicts one GWR API page at a time.
@ -272,7 +278,7 @@ def fetch_advance_streaming(
for batch in _run_pages_batched(
station_crs, travel_date, first_class=False, direction=direction
):
page: dict[str, dict] = {}
page: dict[str, dict[str, Any]] = {}
for dep_time, fares in batch:
cheapest = None
for fare in fares:

View file

@ -7,9 +7,12 @@ Two fetches:
PAD/from/BRI arrival times at Paddington (div.time.plan.a)
Matched by train ID (div.tid).
"""
import re
from typing import Any
import httpx
import lxml.html
import lxml.html # type: ignore[import-untyped]
_TO_PAD_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
@ -38,7 +41,7 @@ DEFAULT_UA = (
)
def _browser_headers(user_agent: str) -> dict:
def _browser_headers(user_agent: str) -> dict[str, str]:
return {
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
@ -55,7 +58,7 @@ def _browser_headers(user_agent: str) -> dict:
def _fmt(hhmm: str) -> str:
"""Convert '0830''08:30'."""
hhmm = re.sub(r'[^0-9]', '', hhmm)
hhmm = re.sub(r"[^0-9]", "", hhmm)
if len(hhmm) == 4:
return f"{hhmm[:2]}:{hhmm[2:]}"
return hhmm
@ -64,12 +67,12 @@ def _fmt(hhmm: str) -> str:
def _parse_services(html: str, time_selector: str) -> dict[str, str]:
"""Return {train_id: time_string} from a servicelist page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
sl = root.cssselect("div.servicelist")
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
for svc in sl[0].cssselect("a.service"):
tid_els = svc.cssselect("div.tid")
time_els = svc.cssselect(time_selector)
if tid_els and time_els:
tid = tid_els[0].text_content().strip()
@ -79,56 +82,58 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
return result
def _parse_arrivals(html: str) -> dict[str, dict]:
def _parse_arrivals(html: str) -> dict[str, dict[str, str]]:
"""Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
sl = root.cssselect("div.servicelist")
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
time_els = svc.cssselect('div.time.plan.a')
for svc in sl[0].cssselect("a.service"):
tid_els = svc.cssselect("div.tid")
time_els = svc.cssselect("div.time.plan.a")
if not (tid_els and time_els):
continue
time_text = time_els[0].text_content().strip()
if not time_text:
continue
plat_els = svc.cssselect('div.platform')
platform = plat_els[0].text_content().strip() if plat_els else ''
plat_els = svc.cssselect("div.platform")
platform = plat_els[0].text_content().strip() if plat_els else ""
result[tid_els[0].text_content().strip()] = {
'time': _fmt(time_text),
'platform': platform,
"time": _fmt(time_text),
"platform": platform,
}
return result
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
def fetch(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from station_crs to PAD."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
r_pad = client.get(_PAD_FROM_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_bri.text, 'div.time.plan.d')
arrivals = _parse_arrivals(r_pad.text)
departures = _parse_services(r_bri.text, "div.time.plan.d")
arrivals = _parse_arrivals(r_pad.text)
trains = [
{
'depart_bristol': dep,
'arrive_paddington': arrivals[tid]['time'],
'arrive_platform': arrivals[tid]['platform'],
'headcode': tid,
"depart_bristol": dep,
"arrive_paddington": arrivals[tid]["time"],
"arrive_platform": arrivals[tid]["platform"],
"headcode": tid,
}
for tid, dep in departures.items()
if tid in arrivals
]
return sorted(trains, key=lambda t: t['depart_bristol'])
return sorted(trains, key=lambda t: t["depart_bristol"])
def fetch_to_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from station_crs to PAD using generic field names."""
return [
{
@ -143,15 +148,15 @@ def fetch_to_paddington(
def fetch_from_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI"
) -> list[dict[str, Any]]:
"""Fetch trains from PAD to station_crs."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date))
r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_pad.text, 'div.time.plan.d')
departures = _parse_services(r_pad.text, "div.time.plan.d")
arrivals = _parse_arrivals(r_station.text)
trains = [