Add return and inbound journey support
This commit is contained in:
parent
6ba71447ef
commit
9691632f65
12 changed files with 1687 additions and 486 deletions
|
|
@ -1,5 +1,6 @@
|
|||
"""
|
||||
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
|
||||
Scrape direct trains between a selected station and London Paddington using
|
||||
Realtime Trains.
|
||||
|
||||
Two fetches:
|
||||
BRI/to/PAD → departure times from Bristol (div.time.plan.d)
|
||||
|
|
@ -20,6 +21,16 @@ _PAD_FROM_TMPL = (
|
|||
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
_PAD_TO_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
_FROM_PAD_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
|
||||
DEFAULT_UA = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
|
|
@ -69,7 +80,7 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
|
|||
|
||||
|
||||
def _parse_arrivals(html: str) -> dict[str, dict]:
|
||||
"""Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page."""
|
||||
"""Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page."""
|
||||
root = lxml.html.fromstring(html)
|
||||
sl = root.cssselect('div.servicelist')
|
||||
if not sl:
|
||||
|
|
@ -93,7 +104,7 @@ def _parse_arrivals(html: str) -> dict[str, dict]:
|
|||
|
||||
|
||||
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
|
||||
"""Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}]."""
|
||||
"""Fetch trains from station_crs to PAD."""
|
||||
headers = _browser_headers(user_agent)
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
|
||||
|
|
@ -113,3 +124,44 @@ def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') ->
|
|||
if tid in arrivals
|
||||
]
|
||||
return sorted(trains, key=lambda t: t['depart_bristol'])
|
||||
|
||||
|
||||
def fetch_to_paddington(
|
||||
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
|
||||
) -> list[dict]:
|
||||
"""Fetch trains from station_crs to PAD using generic field names."""
|
||||
return [
|
||||
{
|
||||
**train,
|
||||
"depart_origin": train["depart_bristol"],
|
||||
"arrive_paddington": train["arrive_paddington"],
|
||||
"arrive_platform": train.get("arrive_platform", ""),
|
||||
"headcode": train.get("headcode", ""),
|
||||
}
|
||||
for train in fetch(date, user_agent, station_crs)
|
||||
]
|
||||
|
||||
|
||||
def fetch_from_paddington(
|
||||
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
|
||||
) -> list[dict]:
|
||||
"""Fetch trains from PAD to station_crs."""
|
||||
headers = _browser_headers(user_agent)
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||
r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date))
|
||||
r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date))
|
||||
|
||||
departures = _parse_services(r_pad.text, 'div.time.plan.d')
|
||||
arrivals = _parse_arrivals(r_station.text)
|
||||
|
||||
trains = [
|
||||
{
|
||||
"depart_paddington": dep,
|
||||
"arrive_destination": arrivals[tid]["time"],
|
||||
"arrive_platform": arrivals[tid]["platform"],
|
||||
"headcode": tid,
|
||||
}
|
||||
for tid, dep in departures.items()
|
||||
if tid in arrivals
|
||||
]
|
||||
return sorted(trains, key=lambda t: t["depart_paddington"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue