Annotated all functions with mypy --strict-compatible types (-> None, dict[str, Any], Generator types, etc.), added # type: ignore for untyped third-party libs (lxml), and reformatted with black. All 18 source files now pass mypy --strict with zero errors. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
336 lines
12 KiB
Python
336 lines
12 KiB
Python
"""
|
|
Fetch GWR walk-on single fares from any station to London Paddington.
|
|
|
|
Uses the GWR journey search API (same API as www.gwr.com ticket search).
|
|
Returns per-train cheapest standard-class fare with restrictions already applied.
|
|
Cache for 30 days — fares rarely change.
|
|
"""
|
|
|
|
from typing import Any, Generator
|
|
|
|
import httpx
|
|
|
|
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
|
|
# API key is embedded in the GWR web app (appvalues.prod.json)
|
|
_API_KEY = "OgovGqAlLp4gWAhL7DQLo7pMCt8GHi2U4SPFiZgG"
|
|
_PAD_CODE = "GBQQP" # London Paddington cluster code as used by GWR website
|
|
_WALKON_CODES = {"SSS", "SVS", "SDS", "CDS"}
|
|
_MAX_PAGES = 20
|
|
|
|
|
|
def _headers() -> dict[str, str]:
|
|
return {
|
|
"user-agent": (
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
|
),
|
|
"accept": "application/json, text/plain, */*",
|
|
"channel": "WEB",
|
|
"content-type": "application/json",
|
|
"apikey": _API_KEY,
|
|
"origin": "https://www.gwr.com",
|
|
"referer": "https://www.gwr.com/",
|
|
}
|
|
|
|
|
|
def _request_body(
|
|
from_code: str,
|
|
to_code: str,
|
|
travel_date: str,
|
|
conversation_token: str | None,
|
|
later: bool,
|
|
) -> dict[str, Any]:
|
|
return {
|
|
"IsNextOutward": False,
|
|
"IsPreviousOutward": False,
|
|
"IsNextReturn": False,
|
|
"IsPreviousReturn": False,
|
|
"campaignCode": "",
|
|
"validationCode": "",
|
|
"locfrom": from_code,
|
|
"locto": to_code,
|
|
"datetimedepart": f"{travel_date}T00:00:00",
|
|
"outwarddepartafter": True,
|
|
"datetimereturn": None,
|
|
"returndepartafter": False,
|
|
"directServicesOnly": False,
|
|
"firstclass": False,
|
|
"standardclass": True,
|
|
"adults": 1,
|
|
"children": 0,
|
|
"openreturn": False,
|
|
"via": None,
|
|
"avoid": None,
|
|
"isEarlierSearch": False,
|
|
"isLaterSearch": later,
|
|
"isEarlierSearchReturn": False,
|
|
"isLaterSearchReturn": False,
|
|
"railcards": [],
|
|
"conversationToken": conversation_token,
|
|
}
|
|
|
|
|
|
def _station_code(station_crs: str) -> str:
|
|
return f"GB{station_crs}"
|
|
|
|
|
|
def _od_codes(station_crs: str, direction: str) -> tuple[str, str]:
|
|
if direction == "from_paddington":
|
|
return _PAD_CODE, _station_code(station_crs)
|
|
return _station_code(station_crs), _PAD_CODE
|
|
|
|
|
|
def _run_pages(
|
|
station_crs: str,
|
|
travel_date: str,
|
|
first_class: bool = False,
|
|
direction: str = "to_paddington",
|
|
) -> Generator[tuple[str, list[Any]], None, None]:
|
|
"""
|
|
Iterate all pages of GWR journey search results.
|
|
|
|
Yields (dep_time, fares_list) for each unique departure time seen.
|
|
first_class=True switches the request to first class fares.
|
|
"""
|
|
seen: set[str] = set()
|
|
with httpx.Client(headers=_headers(), timeout=30) as client:
|
|
conversation_token = None
|
|
later = False
|
|
from_code, to_code = _od_codes(station_crs, direction)
|
|
for _ in range(_MAX_PAGES):
|
|
body = _request_body(
|
|
from_code, to_code, travel_date, conversation_token, later
|
|
)
|
|
if first_class:
|
|
body["firstclass"] = True
|
|
body["standardclass"] = False
|
|
resp = client.post(_API_URL, json=body)
|
|
resp.raise_for_status()
|
|
data = resp.json().get("data") or {}
|
|
conversation_token = data.get("conversationToken")
|
|
for journey in data.get("outwardOpenPureReturnFare", []):
|
|
dep_iso = journey.get("departureTime", "")
|
|
dep_time = dep_iso[11:16] # "HH:MM" from "2026-04-10T09:08:00"
|
|
if not dep_time or dep_time in seen:
|
|
continue
|
|
seen.add(dep_time)
|
|
yield dep_time, journey.get("journeyFareDetails", [])
|
|
if not data.get("showLaterOutward", False):
|
|
break
|
|
later = True
|
|
|
|
|
|
def _run_pages_batched(
|
|
station_crs: str,
|
|
travel_date: str,
|
|
first_class: bool = False,
|
|
direction: str = "to_paddington",
|
|
) -> Generator[list[tuple[str, list[Any]]], None, None]:
|
|
"""
|
|
Like _run_pages but yields one list of (dep_time, fares_list) per API page call,
|
|
allowing callers to stream results a page at a time.
|
|
"""
|
|
seen: set[str] = set()
|
|
with httpx.Client(headers=_headers(), timeout=30) as client:
|
|
conversation_token = None
|
|
later = False
|
|
from_code, to_code = _od_codes(station_crs, direction)
|
|
for _ in range(_MAX_PAGES):
|
|
body = _request_body(
|
|
from_code, to_code, travel_date, conversation_token, later
|
|
)
|
|
if first_class:
|
|
body["firstclass"] = True
|
|
body["standardclass"] = False
|
|
resp = client.post(_API_URL, json=body)
|
|
resp.raise_for_status()
|
|
data = resp.json().get("data") or {}
|
|
conversation_token = data.get("conversationToken")
|
|
batch = []
|
|
for journey in data.get("outwardOpenPureReturnFare", []):
|
|
dep_iso = journey.get("departureTime", "")
|
|
dep_time = dep_iso[11:16]
|
|
if not dep_time or dep_time in seen:
|
|
continue
|
|
seen.add(dep_time)
|
|
batch.append((dep_time, journey.get("journeyFareDetails", [])))
|
|
if batch:
|
|
yield batch
|
|
if not data.get("showLaterOutward", False):
|
|
break
|
|
later = True
|
|
|
|
|
|
def fetch(
|
|
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
|
) -> dict[str, dict[str, Any]]:
|
|
"""
|
|
Fetch GWR walk-on single fares for the selected Paddington direction.
|
|
|
|
Returns {departure_time: {'ticket': name, 'price': float, 'code': code}}
|
|
where price is in £ and only the cheapest available standard-class walk-on
|
|
ticket per departure (with restrictions already applied by GWR) is kept.
|
|
"""
|
|
result: dict[str, dict[str, Any]] = {}
|
|
for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction):
|
|
cheapest = None
|
|
for fare in fares:
|
|
code = fare.get("ticketTypeCode")
|
|
if code not in _WALKON_CODES:
|
|
continue
|
|
if not fare.get("isStandardClass"):
|
|
continue
|
|
price_pence = fare.get("fare", 0)
|
|
if cheapest is None or price_pence < cheapest["price_pence"]:
|
|
cheapest = {
|
|
"ticket": fare.get("ticketType", ""),
|
|
"price": price_pence / 100,
|
|
"price_pence": price_pence,
|
|
"code": code,
|
|
}
|
|
if cheapest:
|
|
result[dep_time] = {
|
|
"ticket": cheapest["ticket"],
|
|
"price": cheapest["price"],
|
|
"code": cheapest["code"],
|
|
}
|
|
return result
|
|
|
|
|
|
def fetch_advance(
|
|
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
|
) -> dict[str, dict[str, Any]]:
|
|
"""
|
|
Fetch advance fares: cheapest standard advance and first-class advance per departure.
|
|
|
|
Makes two sets of paginated API calls (standard class, then first class).
|
|
Returns {departure_time: {'advance_std': dict or None, 'advance_1st': dict or None}}
|
|
where each sub-dict has keys 'ticket', 'price', 'code'.
|
|
"""
|
|
std_advance: dict[str, dict[str, Any]] = {}
|
|
for dep_time, fares in _run_pages(
|
|
station_crs, travel_date, first_class=False, direction=direction
|
|
):
|
|
cheapest = None
|
|
for fare in fares:
|
|
code = fare.get("ticketTypeCode")
|
|
if code in _WALKON_CODES:
|
|
continue # skip walk-on fares
|
|
if not fare.get("isStandardClass"):
|
|
continue
|
|
price_pence = fare.get("fare", 0)
|
|
if cheapest is None or price_pence < cheapest["price_pence"]:
|
|
cheapest = {
|
|
"ticket": fare.get("ticketType", ""),
|
|
"price": price_pence / 100,
|
|
"price_pence": price_pence,
|
|
"code": code,
|
|
}
|
|
if cheapest:
|
|
std_advance[dep_time] = {
|
|
"ticket": cheapest["ticket"],
|
|
"price": cheapest["price"],
|
|
"code": cheapest["code"],
|
|
}
|
|
|
|
first_advance: dict[str, dict[str, Any]] = {}
|
|
for dep_time, fares in _run_pages(
|
|
station_crs, travel_date, first_class=True, direction=direction
|
|
):
|
|
cheapest = None
|
|
for fare in fares:
|
|
price_pence = fare.get("fare", 0)
|
|
if cheapest is None or price_pence < cheapest["price_pence"]:
|
|
cheapest = {
|
|
"ticket": fare.get("ticketType", ""),
|
|
"price": price_pence / 100,
|
|
"price_pence": price_pence,
|
|
"code": fare.get("ticketTypeCode"),
|
|
}
|
|
if cheapest:
|
|
first_advance[dep_time] = {
|
|
"ticket": cheapest["ticket"],
|
|
"price": cheapest["price"],
|
|
"code": cheapest["code"],
|
|
}
|
|
|
|
all_times = set(std_advance) | set(first_advance)
|
|
return {
|
|
t: {
|
|
"advance_std": std_advance.get(t),
|
|
"advance_1st": first_advance.get(t),
|
|
}
|
|
for t in all_times
|
|
}
|
|
|
|
|
|
def fetch_advance_streaming(
|
|
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
|
) -> Generator[dict[str, dict[str, Any]], None, None]:
|
|
"""
|
|
Generator yielding partial advance fare dicts one GWR API page at a time.
|
|
|
|
Each yield is {dep_time: {'advance_std': dict|None, 'advance_1st': dict|None}}.
|
|
Two passes are made (standard class then first class); each page of results is
|
|
yielded immediately so callers can stream prices to clients as they arrive.
|
|
"""
|
|
# Pass 1: standard class advance fares
|
|
for batch in _run_pages_batched(
|
|
station_crs, travel_date, first_class=False, direction=direction
|
|
):
|
|
page: dict[str, dict[str, Any]] = {}
|
|
for dep_time, fares in batch:
|
|
cheapest = None
|
|
for fare in fares:
|
|
code = fare.get("ticketTypeCode")
|
|
if code in _WALKON_CODES:
|
|
continue
|
|
if not fare.get("isStandardClass"):
|
|
continue
|
|
price_pence = fare.get("fare", 0)
|
|
if cheapest is None or price_pence < cheapest["price_pence"]:
|
|
cheapest = {
|
|
"ticket": fare.get("ticketType", ""),
|
|
"price": price_pence / 100,
|
|
"price_pence": price_pence,
|
|
"code": code,
|
|
}
|
|
if cheapest:
|
|
page[dep_time] = {
|
|
"advance_std": {
|
|
"ticket": cheapest["ticket"],
|
|
"price": cheapest["price"],
|
|
"code": cheapest["code"],
|
|
},
|
|
"advance_1st": None,
|
|
}
|
|
if page:
|
|
yield page
|
|
|
|
# Pass 2: first class advance fares
|
|
for batch in _run_pages_batched(
|
|
station_crs, travel_date, first_class=True, direction=direction
|
|
):
|
|
page = {}
|
|
for dep_time, fares in batch:
|
|
cheapest = None
|
|
for fare in fares:
|
|
price_pence = fare.get("fare", 0)
|
|
if cheapest is None or price_pence < cheapest["price_pence"]:
|
|
cheapest = {
|
|
"ticket": fare.get("ticketType", ""),
|
|
"price": price_pence / 100,
|
|
"price_pence": price_pence,
|
|
"code": fare.get("ticketTypeCode"),
|
|
}
|
|
if cheapest:
|
|
page[dep_time] = {
|
|
"advance_std": None,
|
|
"advance_1st": {
|
|
"ticket": cheapest["ticket"],
|
|
"price": cheapest["price"],
|
|
"code": cheapest["code"],
|
|
},
|
|
}
|
|
if page:
|
|
yield page
|