diff --git a/app.py b/app.py index 075f801..2329d42 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,4 @@ -""" -Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains. -""" +import asyncio from flask import Flask, render_template, redirect, url_for, request from datetime import date, timedelta @@ -9,12 +7,6 @@ import scraper.eurostar as eurostar_scraper import scraper.realtime_trains as rtt_scraper from trip_planner import combine_trips -RTT_PADDINGTON_URL = ( - "https://www.realtimetrains.co.uk/search/detailed/" - "gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359" - "?stp=WVS&show=pax-calls&order=wtt" -) - app = Flask(__name__) DESTINATIONS = { @@ -25,21 +17,22 @@ DESTINATIONS = { } +async def _fetch_both(destination: str, travel_date: str, user_agent: str): + """Fetch GWR trains and Eurostar times simultaneously.""" + gwr, es = await asyncio.gather( + rtt_scraper.fetch(travel_date, user_agent), + eurostar_scraper.fetch(destination, travel_date, user_agent), + return_exceptions=True, + ) + return gwr, es + + @app.route('/') def index(): today = date.today().isoformat() return render_template('index.html', destinations=DESTINATIONS, today=today) -@app.route('/search') -def search(): - slug = request.args.get('destination', '') - travel_date = request.args.get('travel_date', '') - if slug in DESTINATIONS and travel_date: - return redirect(url_for('results', slug=slug, travel_date=travel_date)) - return redirect(url_for('index')) - - @app.route('/results//') def results(slug, travel_date): destination = DESTINATIONS.get(slug) @@ -48,35 +41,33 @@ def results(slug, travel_date): user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA) - rtt_cache_key = f"rtt_{travel_date}" - es_cache_key = f"eurostar_{travel_date}_{destination}" - - cached_rtt = get_cached(rtt_cache_key) - cached_es = get_cached(es_cache_key) - from_cache = bool(cached_rtt and cached_es) + cache_key = f"{travel_date}_{destination}" + cached = get_cached(cache_key) error = None - - if cached_rtt: - gwr_trains = cached_rtt + if cached: + gwr_trains = cached['gwr'] + eurostar_trains = cached['eurostar'] + from_cache = True else: - try: - gwr_trains = rtt_scraper.fetch(travel_date, user_agent) - set_cached(rtt_cache_key, gwr_trains) - except Exception as e: + from_cache = False + gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent)) + + if isinstance(gwr_result, Exception): gwr_trains = [] - error = f"Could not fetch GWR trains: {e}" + error = f"Could not fetch GWR trains: {gwr_result}" + else: + gwr_trains = gwr_result - if cached_es: - eurostar_trains = cached_es - else: - try: - eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent) - set_cached(es_cache_key, eurostar_trains) - except Exception as e: + if isinstance(es_result, Exception): eurostar_trains = [] - msg = f"Could not fetch Eurostar times: {e}" + msg = f"Could not fetch Eurostar times: {es_result}" error = f"{error}; {msg}" if error else msg + else: + eurostar_trains = es_result + + if gwr_trains or eurostar_trains: + set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains}) trips = combine_trips(gwr_trains, eurostar_trains, travel_date) @@ -85,9 +76,6 @@ def results(slug, travel_date): next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime('%A %-d %B %Y') - eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}" - rtt_url = RTT_PADDINGTON_URL.format(date=travel_date) - return render_template( 'results.html', trips=trips, @@ -101,8 +89,6 @@ def results(slug, travel_date): eurostar_count=len(eurostar_trains), from_cache=from_cache, error=error, - eurostar_url=eurostar_url, - rtt_url=rtt_url, ) diff --git a/scraper/eurostar.py b/scraper/eurostar.py index c2ef13e..3a5ef26 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -13,6 +13,7 @@ Data path: props.pageProps.pageData.liveDepartures[] .destination.model.scheduledArrivalDateTime → destination arrival (already filtered to the requested stop, not the final stop) """ +import asyncio import json import re import httpx @@ -61,26 +62,29 @@ def _parse(html: str, destination: str) -> list[dict]: dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) if dep_time and arr_time: - carrier = dep.get('model', {}).get('carrier', 'ES') - number = dep.get('model', {}).get('trainNumber', '') services.append({ 'depart_st_pancras': dep_time, 'arrive_destination': arr_time, 'destination': destination, - 'train_number': f"{carrier} {number}" if number else '', }) return sorted(services, key=lambda s: s['depart_st_pancras']) -def fetch(destination: str, travel_date: str, - user_agent: str = DEFAULT_UA) -> list[dict]: +async def fetch(destination: str, travel_date: str, + user_agent: str = DEFAULT_UA) -> list[dict]: url = ROUTE_URLS[destination] headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-GB,en;q=0.9', } - with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client: - r = client.get(url, params={'date': travel_date}) + async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client: + r = await client.get(url, params={'date': travel_date}) r.raise_for_status() return _parse(r.text, destination) + + +def get_eurostar_times(destination: str, travel_date: str, + user_agent: str = DEFAULT_UA) -> list[dict]: + """Synchronous wrapper for CLI/testing.""" + return asyncio.run(fetch(destination, travel_date, user_agent)) diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py index acd8203..8a8023d 100644 --- a/scraper/realtime_trains.py +++ b/scraper/realtime_trains.py @@ -1,11 +1,14 @@ """ Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. -Two fetches: +Uses httpx (not Playwright) with browser-like headers. + +Two fetches run concurrently: BRI/to/PAD → departure times from Bristol (div.time.plan.d) PAD/from/BRI → arrival times at Paddington (div.time.plan.a) Matched by train ID (div.tid). """ +import asyncio import re import httpx import lxml.html @@ -68,19 +71,26 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]: return result -def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: - """Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}].""" +async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: + """Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}].""" headers = _browser_headers(user_agent) - with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: - r_bri = client.get(BRI_TO_PAD.format(date=date)) - r_pad = client.get(PAD_FROM_BRI.format(date=date)) + async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client: + r_bri, r_pad = await asyncio.gather( + client.get(BRI_TO_PAD.format(date=date)), + client.get(PAD_FROM_BRI.format(date=date)), + ) departures = _parse_services(r_bri.text, 'div.time.plan.d') arrivals = _parse_services(r_pad.text, 'div.time.plan.a') trains = [ - {'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid} + {'depart_bristol': dep, 'arrive_paddington': arr} for tid, dep in departures.items() if (arr := arrivals.get(tid)) ] return sorted(trains, key=lambda t: t['depart_bristol']) + + +def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: + """Synchronous wrapper around fetch() for CLI/testing use.""" + return asyncio.run(fetch(date, user_agent)) diff --git a/templates/index.html b/templates/index.html index 5f92c52..d9d0dc4 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,7 +2,7 @@ {% block content %}

Plan your journey

-
+
+ {% endblock %} diff --git a/templates/results.html b/templates/results.html index f86bfa9..661c3f1 100644 --- a/templates/results.html +++ b/templates/results.html @@ -2,7 +2,7 @@ {% block content %}

- ← New search + ← New search

@@ -10,11 +10,11 @@ Bristol Temple Meads → {{ destination }}
- ← Prev {{ travel_date_display }} - Next →
@@ -38,33 +38,18 @@ - - + + - + - {% set best_mins = trips | map(attribute='total_minutes') | min %} - {% set worst_mins = trips | map(attribute='total_minutes') | max %} {% for trip in trips %} - {% if trip.total_minutes == best_mins and trips | length > 1 %} - {% set row_bg = 'background:#f0fff4' %} - {% elif trip.total_minutes == worst_mins and trips | length > 1 %} - {% set row_bg = 'background:#fff5f5' %} - {% elif loop.index is odd %} - {% set row_bg = 'background:#f7fafc' %} - {% else %} - {% set row_bg = '' %} - {% endif %} - - + + - - - + + + {% endfor %} @@ -96,11 +67,9 @@

- Paddington → St Pancras connection: 60 min minimum, 2h maximum. + Paddington → St Pancras connection: 75 min minimum, 2h 20m maximum. Eurostar times are from the general timetable and may vary; always check - eurostar.com to book. -  ·  - Paddington arrivals on RTT + eurostar.com to book.

{% else %} diff --git a/trip_planner.py b/trip_planner.py index aa0be7e..0e5c6ea 100644 --- a/trip_planner.py +++ b/trip_planner.py @@ -3,8 +3,8 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination t """ from datetime import datetime, timedelta -MIN_CONNECTION_MINUTES = 50 -MAX_CONNECTION_MINUTES = 110 +MIN_CONNECTION_MINUTES = 75 +MAX_CONNECTION_MINUTES = 140 MAX_GWR_MINUTES = 110 DATE_FMT = '%Y-%m-%d' TIME_FMT = '%H:%M' @@ -72,18 +72,14 @@ def combine_trips( if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES: continue - total_mins = int((arr_dest - dep_bri).total_seconds() / 60) trips.append({ 'depart_bristol': gwr['depart_bristol'], 'arrive_paddington': gwr['arrive_paddington'], - 'headcode': gwr.get('headcode', ''), 'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)), 'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)), 'depart_st_pancras': es['depart_st_pancras'], 'arrive_destination': es['arrive_destination'], - 'train_number': es.get('train_number', ''), - 'total_duration': _fmt_duration(total_mins), - 'total_minutes': total_mins, + 'total_duration': _fmt_duration(int((arr_dest - dep_bri).total_seconds() / 60)), 'destination': es['destination'], }) break # Only the earliest valid Eurostar per GWR departure
BristolPaddingtonDepart BristolArrive Paddington Transfer Depart St Pancras{{ destination }} - Arrive {{ destination }} Total
- {{ trip.depart_bristol }} - {% if trip.headcode %}
{{ trip.headcode }}{% endif %} -
{{ trip.depart_bristol }} {{ trip.arrive_paddington }} ({{ trip.gwr_duration }}) @@ -72,23 +57,9 @@ {{ trip.connection_duration }} - {{ trip.depart_st_pancras }} - {% if trip.train_number %}
{{ trip.train_number }}{% endif %} -
- {{ trip.arrive_destination }} - (CET) - - {% if trip.total_minutes == best_mins and trips | length > 1 %} - {{ trip.total_duration }} ⚡ - {% elif trip.total_minutes == worst_mins and trips | length > 1 %} - {{ trip.total_duration }} 🐢 - {% else %} - {{ trip.total_duration }} - {% endif %} - {{ trip.depart_st_pancras }}{{ trip.arrive_destination }}{{ trip.total_duration }}