Add multi-station support, GWR fares API, and Circle line improvements
- Support any station with direct trains to Paddington; station CRS code is now part of the URL (/results/<crs>/<slug>/<date>) - Load station list from data/direct_to_paddington.tsv; show dropdown on index page; 404 for unknown station codes - Fetch live GWR walk-on fares via api.gwr.com for all stations (SSS/SVS/SDS with restrictions already applied per train); cache 30 days - Scrape Paddington arrival platform numbers from RTT - Show unreachable morning Eurostars (before first reachable service only) - Circle line: show actual KX St Pancras arrival times (not check-in estimate) and add a second backup service in the transfer column - Widen page max-width to 1100px for longer station names Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
71be0dd8cf
commit
3c787b33d3
12 changed files with 810 additions and 262 deletions
125
scraper/gwr_fares.py
Normal file
125
scraper/gwr_fares.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
"""
|
||||
Fetch GWR walk-on single fares from any station to London Paddington.
|
||||
|
||||
Uses the GWR journey search API (same API as www.gwr.com ticket search).
|
||||
Returns per-train cheapest standard-class fare with restrictions already applied.
|
||||
Cache for 30 days — fares rarely change.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
|
||||
# API key is embedded in the GWR web app (appvalues.prod.json)
|
||||
_API_KEY = "OgovGqAlLp4gWAhL7DQLo7pMCt8GHi2U4SPFiZgG"
|
||||
_PAD_CODE = "GBQQP" # London Paddington cluster code as used by GWR website
|
||||
_WANTED_CODES = {"SSS", "SVS", "SDS"}
|
||||
_MAX_PAGES = 20
|
||||
|
||||
|
||||
def _headers() -> dict:
|
||||
return {
|
||||
"user-agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
),
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"channel": "WEB",
|
||||
"content-type": "application/json",
|
||||
"apikey": _API_KEY,
|
||||
"origin": "https://www.gwr.com",
|
||||
"referer": "https://www.gwr.com/",
|
||||
}
|
||||
|
||||
|
||||
def _request_body(
|
||||
station_crs: str,
|
||||
travel_date: str,
|
||||
conversation_token: str | None,
|
||||
later: bool,
|
||||
) -> dict:
|
||||
return {
|
||||
"IsNextOutward": False,
|
||||
"IsPreviousOutward": False,
|
||||
"IsNextReturn": False,
|
||||
"IsPreviousReturn": False,
|
||||
"campaignCode": "",
|
||||
"validationCode": "",
|
||||
"locfrom": f"GB{station_crs}",
|
||||
"locto": _PAD_CODE,
|
||||
"datetimedepart": f"{travel_date}T00:00:00",
|
||||
"outwarddepartafter": True,
|
||||
"datetimereturn": None,
|
||||
"returndepartafter": False,
|
||||
"directServicesOnly": False,
|
||||
"firstclass": False,
|
||||
"standardclass": True,
|
||||
"adults": 1,
|
||||
"children": 0,
|
||||
"openreturn": False,
|
||||
"via": None,
|
||||
"avoid": None,
|
||||
"isEarlierSearch": False,
|
||||
"isLaterSearch": later,
|
||||
"isEarlierSearchReturn": False,
|
||||
"isLaterSearchReturn": False,
|
||||
"railcards": [],
|
||||
"conversationToken": conversation_token,
|
||||
}
|
||||
|
||||
|
||||
def fetch(station_crs: str, travel_date: str) -> dict[str, dict]:
|
||||
"""
|
||||
Fetch GWR single fares from station_crs to London Paddington on travel_date.
|
||||
|
||||
Returns {departure_time: {'ticket': name, 'price': float, 'code': code}}
|
||||
where price is in £ and only the cheapest available standard-class ticket
|
||||
per departure (with restrictions already applied by GWR) is kept.
|
||||
"""
|
||||
result: dict[str, dict] = {}
|
||||
|
||||
with httpx.Client(headers=_headers(), timeout=30) as client:
|
||||
conversation_token = None
|
||||
later = False
|
||||
|
||||
for _ in range(_MAX_PAGES):
|
||||
body = _request_body(station_crs, travel_date, conversation_token, later)
|
||||
resp = client.post(_API_URL, json=body)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data", {})
|
||||
|
||||
conversation_token = data.get("conversationToken")
|
||||
|
||||
for journey in data.get("outwardOpenPureReturnFare", []):
|
||||
dep_iso = journey.get("departureTime", "")
|
||||
dep_time = dep_iso[11:16] # "HH:MM" from "2026-04-10T09:08:00"
|
||||
if not dep_time or dep_time in result:
|
||||
continue
|
||||
|
||||
cheapest = None
|
||||
for fare in journey.get("journeyFareDetails", []):
|
||||
code = fare.get("ticketTypeCode")
|
||||
if code not in _WANTED_CODES:
|
||||
continue
|
||||
if not fare.get("isStandardClass"):
|
||||
continue
|
||||
price_pence = fare.get("fare", 0)
|
||||
if cheapest is None or price_pence < cheapest["price_pence"]:
|
||||
cheapest = {
|
||||
"ticket": fare.get("ticketType", ""),
|
||||
"price": price_pence / 100,
|
||||
"price_pence": price_pence,
|
||||
"code": code,
|
||||
}
|
||||
|
||||
if cheapest:
|
||||
result[dep_time] = {
|
||||
"ticket": cheapest["ticket"],
|
||||
"price": cheapest["price"],
|
||||
"code": cheapest["code"],
|
||||
}
|
||||
|
||||
if not data.get("showLaterOutward", False):
|
||||
break
|
||||
later = True
|
||||
|
||||
return result
|
||||
Loading…
Add table
Add a link
Reference in a new issue