Add multi-station support, GWR fares API, and Circle line improvements

- Support any station with direct trains to Paddington; station CRS code
  is now part of the URL (/results/<crs>/<slug>/<date>)
- Load station list from data/direct_to_paddington.tsv; show dropdown on
  index page; 404 for unknown station codes
- Fetch live GWR walk-on fares via api.gwr.com for all stations (SSS/SVS/SDS
  with restrictions already applied per train); cache 30 days
- Scrape Paddington arrival platform numbers from RTT
- Show unreachable morning Eurostars (before first reachable service only)
- Circle line: show actual KX St Pancras arrival times (not check-in estimate)
  and add a second backup service in the transfer column
- Widen page max-width to 1100px for longer station names

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-04-06 20:22:44 +01:00
parent 71be0dd8cf
commit 3c787b33d3
12 changed files with 810 additions and 262 deletions

View file

@ -10,14 +10,14 @@ import re
import httpx
import lxml.html
BRI_TO_PAD = (
_TO_PAD_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:BRI/to/gb-nr:PAD/{date}/0000-2359"
"gb-nr:{crs}/to/gb-nr:PAD/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
PAD_FROM_BRI = (
_PAD_FROM_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359"
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
@ -68,19 +68,48 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
return result
def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
"""Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}]."""
def _parse_arrivals(html: str) -> dict[str, dict]:
"""Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
time_els = svc.cssselect('div.time.plan.a')
if not (tid_els and time_els):
continue
time_text = time_els[0].text_content().strip()
if not time_text:
continue
plat_els = svc.cssselect('div.platform')
platform = plat_els[0].text_content().strip() if plat_els else ''
result[tid_els[0].text_content().strip()] = {
'time': _fmt(time_text),
'platform': platform,
}
return result
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
"""Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}]."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri = client.get(BRI_TO_PAD.format(date=date))
r_pad = client.get(PAD_FROM_BRI.format(date=date))
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
r_pad = client.get(_PAD_FROM_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_bri.text, 'div.time.plan.d')
arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
arrivals = _parse_arrivals(r_pad.text)
trains = [
{'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid}
{
'depart_bristol': dep,
'arrive_paddington': arrivals[tid]['time'],
'arrive_platform': arrivals[tid]['platform'],
'headcode': tid,
}
for tid, dep in departures.items()
if (arr := arrivals.get(tid))
if tid in arrivals
]
return sorted(trains, key=lambda t: t['depart_bristol'])