diff --git a/app.py b/app.py index 7fef309..075f801 100644 --- a/app.py +++ b/app.py @@ -1,4 +1,6 @@ -import asyncio +""" +Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains. +""" from flask import Flask, render_template, redirect, url_for, request from datetime import date, timedelta @@ -7,6 +9,12 @@ import scraper.eurostar as eurostar_scraper import scraper.realtime_trains as rtt_scraper from trip_planner import combine_trips +RTT_PADDINGTON_URL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) + app = Flask(__name__) DESTINATIONS = { @@ -17,16 +25,6 @@ DESTINATIONS = { } -async def _fetch_both(destination: str, travel_date: str, user_agent: str): - """Fetch GWR trains and Eurostar times simultaneously.""" - gwr, es = await asyncio.gather( - rtt_scraper.fetch(travel_date, user_agent), - eurostar_scraper.fetch(destination, travel_date, user_agent), - return_exceptions=True, - ) - return gwr, es - - @app.route('/') def index(): today = date.today().isoformat() @@ -50,33 +48,35 @@ def results(slug, travel_date): user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA) - cache_key = f"{travel_date}_{destination}" - cached = get_cached(cache_key) + rtt_cache_key = f"rtt_{travel_date}" + es_cache_key = f"eurostar_{travel_date}_{destination}" + + cached_rtt = get_cached(rtt_cache_key) + cached_es = get_cached(es_cache_key) + from_cache = bool(cached_rtt and cached_es) error = None - if cached: - gwr_trains = cached['gwr'] - eurostar_trains = cached['eurostar'] - from_cache = True + + if cached_rtt: + gwr_trains = cached_rtt else: - from_cache = False - gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent)) - - if isinstance(gwr_result, Exception): + try: + gwr_trains = rtt_scraper.fetch(travel_date, user_agent) + set_cached(rtt_cache_key, gwr_trains) + except Exception as e: gwr_trains = [] - error = f"Could not fetch GWR trains: {gwr_result}" - else: - gwr_trains = gwr_result + error = f"Could not fetch GWR trains: {e}" - if isinstance(es_result, Exception): + if cached_es: + eurostar_trains = cached_es + else: + try: + eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent) + set_cached(es_cache_key, eurostar_trains) + except Exception as e: eurostar_trains = [] - msg = f"Could not fetch Eurostar times: {es_result}" + msg = f"Could not fetch Eurostar times: {e}" error = f"{error}; {msg}" if error else msg - else: - eurostar_trains = es_result - - if gwr_trains or eurostar_trains: - set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains}) trips = combine_trips(gwr_trains, eurostar_trains, travel_date) @@ -85,6 +85,9 @@ def results(slug, travel_date): next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime('%A %-d %B %Y') + eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}" + rtt_url = RTT_PADDINGTON_URL.format(date=travel_date) + return render_template( 'results.html', trips=trips, @@ -98,6 +101,8 @@ def results(slug, travel_date): eurostar_count=len(eurostar_trains), from_cache=from_cache, error=error, + eurostar_url=eurostar_url, + rtt_url=rtt_url, ) diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 3a5ef26..c2ef13e 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -13,7 +13,6 @@ Data path: props.pageProps.pageData.liveDepartures[] .destination.model.scheduledArrivalDateTime → destination arrival (already filtered to the requested stop, not the final stop) """ -import asyncio import json import re import httpx @@ -62,29 +61,26 @@ def _parse(html: str, destination: str) -> list[dict]: dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) if dep_time and arr_time: + carrier = dep.get('model', {}).get('carrier', 'ES') + number = dep.get('model', {}).get('trainNumber', '') services.append({ 'depart_st_pancras': dep_time, 'arrive_destination': arr_time, 'destination': destination, + 'train_number': f"{carrier} {number}" if number else '', }) return sorted(services, key=lambda s: s['depart_st_pancras']) -async def fetch(destination: str, travel_date: str, - user_agent: str = DEFAULT_UA) -> list[dict]: +def fetch(destination: str, travel_date: str, + user_agent: str = DEFAULT_UA) -> list[dict]: url = ROUTE_URLS[destination] headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-GB,en;q=0.9', } - async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client: - r = await client.get(url, params={'date': travel_date}) + with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client: + r = client.get(url, params={'date': travel_date}) r.raise_for_status() return _parse(r.text, destination) - - -def get_eurostar_times(destination: str, travel_date: str, - user_agent: str = DEFAULT_UA) -> list[dict]: - """Synchronous wrapper for CLI/testing.""" - return asyncio.run(fetch(destination, travel_date, user_agent)) diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py index 8a8023d..acd8203 100644 --- a/scraper/realtime_trains.py +++ b/scraper/realtime_trains.py @@ -1,14 +1,11 @@ """ Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. -Uses httpx (not Playwright) with browser-like headers. - -Two fetches run concurrently: +Two fetches: BRI/to/PAD → departure times from Bristol (div.time.plan.d) PAD/from/BRI → arrival times at Paddington (div.time.plan.a) Matched by train ID (div.tid). """ -import asyncio import re import httpx import lxml.html @@ -71,26 +68,19 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]: return result -async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: - """Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}].""" +def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: + """Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}].""" headers = _browser_headers(user_agent) - async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client: - r_bri, r_pad = await asyncio.gather( - client.get(BRI_TO_PAD.format(date=date)), - client.get(PAD_FROM_BRI.format(date=date)), - ) + with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: + r_bri = client.get(BRI_TO_PAD.format(date=date)) + r_pad = client.get(PAD_FROM_BRI.format(date=date)) departures = _parse_services(r_bri.text, 'div.time.plan.d') arrivals = _parse_services(r_pad.text, 'div.time.plan.a') trains = [ - {'depart_bristol': dep, 'arrive_paddington': arr} + {'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid} for tid, dep in departures.items() if (arr := arrivals.get(tid)) ] return sorted(trains, key=lambda t: t['depart_bristol']) - - -def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: - """Synchronous wrapper around fetch() for CLI/testing use.""" - return asyncio.run(fetch(date, user_agent)) diff --git a/templates/results.html b/templates/results.html index 5f6d440..f86bfa9 100644 --- a/templates/results.html +++ b/templates/results.html @@ -38,18 +38,33 @@
| Depart Bristol | -Arrive Paddington | +Bristol | +Paddington | Transfer | Depart St Pancras | -Arrive {{ destination }} | +{{ destination }} + | Total |
|---|---|---|---|---|---|---|---|---|
| {{ trip.depart_bristol }} | + {% if trip.total_minutes == best_mins and trips | length > 1 %} + {% set row_bg = 'background:#f0fff4' %} + {% elif trip.total_minutes == worst_mins and trips | length > 1 %} + {% set row_bg = 'background:#fff5f5' %} + {% elif loop.index is odd %} + {% set row_bg = 'background:#f7fafc' %} + {% else %} + {% set row_bg = '' %} + {% endif %} +||||||||
|
+ {{ trip.depart_bristol }}
+ {% if trip.headcode %} {{ trip.headcode }}{% endif %} + |
{{ trip.arrive_paddington }} ({{ trip.gwr_duration }}) @@ -57,9 +72,23 @@ | {{ trip.connection_duration }} | -{{ trip.depart_st_pancras }} | -{{ trip.arrive_destination }} | -{{ trip.total_duration }} | +
+ {{ trip.depart_st_pancras }}
+ {% if trip.train_number %} {{ trip.train_number }}{% endif %} + |
+ + {{ trip.arrive_destination }} + (CET) + | ++ {% if trip.total_minutes == best_mins and trips | length > 1 %} + {{ trip.total_duration }} ⚡ + {% elif trip.total_minutes == worst_mins and trips | length > 1 %} + {{ trip.total_duration }} 🐢 + {% else %} + {{ trip.total_duration }} + {% endif %} + |