Various improvements

2026-03-31 10:42:30 +01:00 · 2026-03-31 10:42:30 +01:00 · 876eb6a759
commit 876eb6a759
parent 2090268754
5 changed files with 98 additions and 72 deletions
--- a/scraper/realtime_trains.py
+++ b/scraper/realtime_trains.py
@ -1,14 +1,11 @@
 """
 Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.

-Uses httpx (not Playwright) with browser-like headers.
-
-Two fetches run concurrently:
+Two fetches:
  BRI/to/PAD  → departure times from Bristol (div.time.plan.d)
  PAD/from/BRI → arrival times at Paddington (div.time.plan.a)
 Matched by train ID (div.tid).
 """
-import asyncio
 import re
 import httpx
 import lxml.html
@ -71,26 +68,19 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
    return result


-async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
-    """Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}]."""
+def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
+    """Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}]."""
    headers = _browser_headers(user_agent)
-    async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client:
-        r_bri, r_pad = await asyncio.gather(
-            client.get(BRI_TO_PAD.format(date=date)),
-            client.get(PAD_FROM_BRI.format(date=date)),
-        )
+    with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
+        r_bri = client.get(BRI_TO_PAD.format(date=date))
+        r_pad = client.get(PAD_FROM_BRI.format(date=date))

    departures = _parse_services(r_bri.text, 'div.time.plan.d')
    arrivals   = _parse_services(r_pad.text, 'div.time.plan.a')

    trains = [
-        {'depart_bristol': dep, 'arrive_paddington': arr}
+        {'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid}
        for tid, dep in departures.items()
        if (arr := arrivals.get(tid))
    ]
    return sorted(trains, key=lambda t: t['depart_bristol'])
-
-
-def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
-    """Synchronous wrapper around fetch() for CLI/testing use."""
-    return asyncio.run(fetch(date, user_agent))