Add multi-station support, GWR fares API, and Circle line improvements
- Support any station with direct trains to Paddington; station CRS code is now part of the URL (/results/<crs>/<slug>/<date>) - Load station list from data/direct_to_paddington.tsv; show dropdown on index page; 404 for unknown station codes - Fetch live GWR walk-on fares via api.gwr.com for all stations (SSS/SVS/SDS with restrictions already applied per train); cache 30 days - Scrape Paddington arrival platform numbers from RTT - Show unreachable morning Eurostars (before first reachable service only) - Circle line: show actual KX St Pancras arrival times (not check-in estimate) and add a second backup service in the transfer column - Widen page max-width to 1100px for longer station names Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
71be0dd8cf
commit
3c787b33d3
12 changed files with 810 additions and 262 deletions
125
scraper/gwr_fares.py
Normal file
125
scraper/gwr_fares.py
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
"""
|
||||
Fetch GWR walk-on single fares from any station to London Paddington.
|
||||
|
||||
Uses the GWR journey search API (same API as www.gwr.com ticket search).
|
||||
Returns per-train cheapest standard-class fare with restrictions already applied.
|
||||
Cache for 30 days — fares rarely change.
|
||||
"""
|
||||
|
||||
import httpx
|
||||
|
||||
_API_URL = "https://api.gwr.com/api/shopping/journeysearch"
|
||||
# API key is embedded in the GWR web app (appvalues.prod.json)
|
||||
_API_KEY = "OgovGqAlLp4gWAhL7DQLo7pMCt8GHi2U4SPFiZgG"
|
||||
_PAD_CODE = "GBQQP" # London Paddington cluster code as used by GWR website
|
||||
_WANTED_CODES = {"SSS", "SVS", "SDS"}
|
||||
_MAX_PAGES = 20
|
||||
|
||||
|
||||
def _headers() -> dict:
|
||||
return {
|
||||
"user-agent": (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
),
|
||||
"accept": "application/json, text/plain, */*",
|
||||
"channel": "WEB",
|
||||
"content-type": "application/json",
|
||||
"apikey": _API_KEY,
|
||||
"origin": "https://www.gwr.com",
|
||||
"referer": "https://www.gwr.com/",
|
||||
}
|
||||
|
||||
|
||||
def _request_body(
|
||||
station_crs: str,
|
||||
travel_date: str,
|
||||
conversation_token: str | None,
|
||||
later: bool,
|
||||
) -> dict:
|
||||
return {
|
||||
"IsNextOutward": False,
|
||||
"IsPreviousOutward": False,
|
||||
"IsNextReturn": False,
|
||||
"IsPreviousReturn": False,
|
||||
"campaignCode": "",
|
||||
"validationCode": "",
|
||||
"locfrom": f"GB{station_crs}",
|
||||
"locto": _PAD_CODE,
|
||||
"datetimedepart": f"{travel_date}T00:00:00",
|
||||
"outwarddepartafter": True,
|
||||
"datetimereturn": None,
|
||||
"returndepartafter": False,
|
||||
"directServicesOnly": False,
|
||||
"firstclass": False,
|
||||
"standardclass": True,
|
||||
"adults": 1,
|
||||
"children": 0,
|
||||
"openreturn": False,
|
||||
"via": None,
|
||||
"avoid": None,
|
||||
"isEarlierSearch": False,
|
||||
"isLaterSearch": later,
|
||||
"isEarlierSearchReturn": False,
|
||||
"isLaterSearchReturn": False,
|
||||
"railcards": [],
|
||||
"conversationToken": conversation_token,
|
||||
}
|
||||
|
||||
|
||||
def fetch(station_crs: str, travel_date: str) -> dict[str, dict]:
|
||||
"""
|
||||
Fetch GWR single fares from station_crs to London Paddington on travel_date.
|
||||
|
||||
Returns {departure_time: {'ticket': name, 'price': float, 'code': code}}
|
||||
where price is in £ and only the cheapest available standard-class ticket
|
||||
per departure (with restrictions already applied by GWR) is kept.
|
||||
"""
|
||||
result: dict[str, dict] = {}
|
||||
|
||||
with httpx.Client(headers=_headers(), timeout=30) as client:
|
||||
conversation_token = None
|
||||
later = False
|
||||
|
||||
for _ in range(_MAX_PAGES):
|
||||
body = _request_body(station_crs, travel_date, conversation_token, later)
|
||||
resp = client.post(_API_URL, json=body)
|
||||
resp.raise_for_status()
|
||||
data = resp.json().get("data", {})
|
||||
|
||||
conversation_token = data.get("conversationToken")
|
||||
|
||||
for journey in data.get("outwardOpenPureReturnFare", []):
|
||||
dep_iso = journey.get("departureTime", "")
|
||||
dep_time = dep_iso[11:16] # "HH:MM" from "2026-04-10T09:08:00"
|
||||
if not dep_time or dep_time in result:
|
||||
continue
|
||||
|
||||
cheapest = None
|
||||
for fare in journey.get("journeyFareDetails", []):
|
||||
code = fare.get("ticketTypeCode")
|
||||
if code not in _WANTED_CODES:
|
||||
continue
|
||||
if not fare.get("isStandardClass"):
|
||||
continue
|
||||
price_pence = fare.get("fare", 0)
|
||||
if cheapest is None or price_pence < cheapest["price_pence"]:
|
||||
cheapest = {
|
||||
"ticket": fare.get("ticketType", ""),
|
||||
"price": price_pence / 100,
|
||||
"price_pence": price_pence,
|
||||
"code": code,
|
||||
}
|
||||
|
||||
if cheapest:
|
||||
result[dep_time] = {
|
||||
"ticket": cheapest["ticket"],
|
||||
"price": cheapest["price"],
|
||||
"code": cheapest["code"],
|
||||
}
|
||||
|
||||
if not data.get("showLaterOutward", False):
|
||||
break
|
||||
later = True
|
||||
|
||||
return result
|
||||
|
|
@ -10,14 +10,14 @@ import re
|
|||
import httpx
|
||||
import lxml.html
|
||||
|
||||
BRI_TO_PAD = (
|
||||
_TO_PAD_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:BRI/to/gb-nr:PAD/{date}/0000-2359"
|
||||
"gb-nr:{crs}/to/gb-nr:PAD/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
PAD_FROM_BRI = (
|
||||
_PAD_FROM_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359"
|
||||
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
|
||||
|
|
@ -68,19 +68,48 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
|
|||
return result
|
||||
|
||||
|
||||
def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
|
||||
"""Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}]."""
|
||||
def _parse_arrivals(html: str) -> dict[str, dict]:
|
||||
"""Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page."""
|
||||
root = lxml.html.fromstring(html)
|
||||
sl = root.cssselect('div.servicelist')
|
||||
if not sl:
|
||||
return {}
|
||||
result = {}
|
||||
for svc in sl[0].cssselect('a.service'):
|
||||
tid_els = svc.cssselect('div.tid')
|
||||
time_els = svc.cssselect('div.time.plan.a')
|
||||
if not (tid_els and time_els):
|
||||
continue
|
||||
time_text = time_els[0].text_content().strip()
|
||||
if not time_text:
|
||||
continue
|
||||
plat_els = svc.cssselect('div.platform')
|
||||
platform = plat_els[0].text_content().strip() if plat_els else ''
|
||||
result[tid_els[0].text_content().strip()] = {
|
||||
'time': _fmt(time_text),
|
||||
'platform': platform,
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
|
||||
"""Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}]."""
|
||||
headers = _browser_headers(user_agent)
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||
r_bri = client.get(BRI_TO_PAD.format(date=date))
|
||||
r_pad = client.get(PAD_FROM_BRI.format(date=date))
|
||||
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
|
||||
r_pad = client.get(_PAD_FROM_TMPL.format(crs=station_crs, date=date))
|
||||
|
||||
departures = _parse_services(r_bri.text, 'div.time.plan.d')
|
||||
arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
|
||||
arrivals = _parse_arrivals(r_pad.text)
|
||||
|
||||
trains = [
|
||||
{'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid}
|
||||
{
|
||||
'depart_bristol': dep,
|
||||
'arrive_paddington': arrivals[tid]['time'],
|
||||
'arrive_platform': arrivals[tid]['platform'],
|
||||
'headcode': tid,
|
||||
}
|
||||
for tid, dep in departures.items()
|
||||
if (arr := arrivals.get(tid))
|
||||
if tid in arrivals
|
||||
]
|
||||
return sorted(trains, key=lambda t: t['depart_bristol'])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue