Add return and inbound journey support

This commit is contained in:
Edward Betts 2026-05-21 08:46:35 +01:00
parent 6ba71447ef
commit 9691632f65
12 changed files with 1687 additions and 486 deletions

View file

@ -1,5 +1,6 @@
"""
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
Scrape direct trains between a selected station and London Paddington using
Realtime Trains.
Two fetches:
BRI/to/PAD departure times from Bristol (div.time.plan.d)
@ -20,6 +21,16 @@ _PAD_FROM_TMPL = (
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
_PAD_TO_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
_FROM_PAD_TMPL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
DEFAULT_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
@ -69,7 +80,7 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
def _parse_arrivals(html: str) -> dict[str, dict]:
"""Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page."""
"""Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
if not sl:
@ -93,7 +104,7 @@ def _parse_arrivals(html: str) -> dict[str, dict]:
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
"""Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}]."""
"""Fetch trains from station_crs to PAD."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
@ -113,3 +124,44 @@ def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') ->
if tid in arrivals
]
return sorted(trains, key=lambda t: t['depart_bristol'])
def fetch_to_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
"""Fetch trains from station_crs to PAD using generic field names."""
return [
{
**train,
"depart_origin": train["depart_bristol"],
"arrive_paddington": train["arrive_paddington"],
"arrive_platform": train.get("arrive_platform", ""),
"headcode": train.get("headcode", ""),
}
for train in fetch(date, user_agent, station_crs)
]
def fetch_from_paddington(
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
) -> list[dict]:
"""Fetch trains from PAD to station_crs."""
headers = _browser_headers(user_agent)
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date))
r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date))
departures = _parse_services(r_pad.text, 'div.time.plan.d')
arrivals = _parse_arrivals(r_station.text)
trains = [
{
"depart_paddington": dep,
"arrive_destination": arrivals[tid]["time"],
"arrive_platform": arrivals[tid]["platform"],
"headcode": tid,
}
for tid, dep in departures.items()
if tid in arrivals
]
return sorted(trains, key=lambda t: t["depart_paddington"])