Add multi-station support, GWR fares API, and Circle line improvements

- Support any station with direct trains to Paddington; station CRS code
  is now part of the URL (/results/<crs>/<slug>/<date>)
- Load station list from data/direct_to_paddington.tsv; show dropdown on
  index page; 404 for unknown station codes
- Fetch live GWR walk-on fares via api.gwr.com for all stations (SSS/SVS/SDS
  with restrictions already applied per train); cache 30 days
- Scrape Paddington arrival platform numbers from RTT
- Show unreachable morning Eurostars (before first reachable service only)
- Circle line: show actual KX St Pancras arrival times (not check-in estimate)
  and add a second backup service in the transfer column
- Widen page max-width to 1100px for longer station names

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-04-06 20:22:44 +01:00
parent 71be0dd8cf
commit 3c787b33d3
12 changed files with 810 additions and 262 deletions

125
scraper/gwr_fares.py Normal file
View file

@ -0,0 +1,125 @@
"""
Fetch GWR walk-on single fares from any station to London Paddington.
Uses the GWR journey search API (same API as www.gwr.com ticket search).
Returns per-train cheapest standard-class fare with restrictions already applied.
Cache for 30 days fares rarely change.
"""
import httpx
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
# API key is embedded in the GWR web app (appvalues.prod.json)
_API_KEY = "OgovGqAlLp4gWAhL7DQLo7pMCt8GHi2U4SPFiZgG"
_PAD_CODE = "GBQQP" # London Paddington cluster code as used by GWR website
_WANTED_CODES = {"SSS", "SVS", "SDS"}
_MAX_PAGES = 20
def _headers() -> dict:
return {
"user-agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
),
"accept": "application/json, text/plain, */*",
"channel": "WEB",
"content-type": "application/json",
"apikey": _API_KEY,
"origin": "https://www.gwr.com",
"referer": "https://www.gwr.com/",
}
def _request_body(
station_crs: str,
travel_date: str,
conversation_token: str | None,
later: bool,
) -> dict:
return {
"IsNextOutward": False,
"IsPreviousOutward": False,
"IsNextReturn": False,
"IsPreviousReturn": False,
"campaignCode": "",
"validationCode": "",
"locfrom": f"GB{station_crs}",
"locto": _PAD_CODE,
"datetimedepart": f"{travel_date}T00:00:00",
"outwarddepartafter": True,
"datetimereturn": None,
"returndepartafter": False,
"directServicesOnly": False,
"firstclass": False,
"standardclass": True,
"adults": 1,
"children": 0,
"openreturn": False,
"via": None,
"avoid": None,
"isEarlierSearch": False,
"isLaterSearch": later,
"isEarlierSearchReturn": False,
"isLaterSearchReturn": False,
"railcards": [],
"conversationToken": conversation_token,
}
def fetch(station_crs: str, travel_date: str) -> dict[str, dict]:
"""
Fetch GWR single fares from station_crs to London Paddington on travel_date.
Returns {departure_time: {'ticket': name, 'price': float, 'code': code}}
where price is in £ and only the cheapest available standard-class ticket
per departure (with restrictions already applied by GWR) is kept.
"""
result: dict[str, dict] = {}
with httpx.Client(headers=_headers(), timeout=30) as client:
conversation_token = None
later = False
for _ in range(_MAX_PAGES):
body = _request_body(station_crs, travel_date, conversation_token, later)
resp = client.post(_API_URL, json=body)
resp.raise_for_status()
data = resp.json().get("data", {})
conversation_token = data.get("conversationToken")
for journey in data.get("outwardOpenPureReturnFare", []):
dep_iso = journey.get("departureTime", "")
dep_time = dep_iso[11:16] # "HH:MM" from "2026-04-10T09:08:00"
if not dep_time or dep_time in result:
continue
cheapest = None
for fare in journey.get("journeyFareDetails", []):
code = fare.get("ticketTypeCode")
if code not in _WANTED_CODES:
continue
if not fare.get("isStandardClass"):
continue
price_pence = fare.get("fare", 0)
if cheapest is None or price_pence < cheapest["price_pence"]:
cheapest = {
"ticket": fare.get("ticketType", ""),
"price": price_pence / 100,
"price_pence": price_pence,
"code": code,
}
if cheapest:
result[dep_time] = {
"ticket": cheapest["ticket"],
"price": cheapest["price"],
"code": cheapest["code"],
}
if not data.get("showLaterOutward", False):
break
later = True
return result