Various improvements

This commit is contained in:
Edward Betts 2026-03-31 10:42:30 +01:00
parent 2090268754
commit 876eb6a759
5 changed files with 98 additions and 72 deletions

67
app.py
View file

@ -1,4 +1,6 @@
import asyncio """
Combine GWR BristolPaddington trains with Eurostar St Pancrasdestination trains.
"""
from flask import Flask, render_template, redirect, url_for, request from flask import Flask, render_template, redirect, url_for, request
from datetime import date, timedelta from datetime import date, timedelta
@ -7,6 +9,12 @@ import scraper.eurostar as eurostar_scraper
import scraper.realtime_trains as rtt_scraper import scraper.realtime_trains as rtt_scraper
from trip_planner import combine_trips from trip_planner import combine_trips
RTT_PADDINGTON_URL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
app = Flask(__name__) app = Flask(__name__)
DESTINATIONS = { DESTINATIONS = {
@ -17,16 +25,6 @@ DESTINATIONS = {
} }
async def _fetch_both(destination: str, travel_date: str, user_agent: str):
"""Fetch GWR trains and Eurostar times simultaneously."""
gwr, es = await asyncio.gather(
rtt_scraper.fetch(travel_date, user_agent),
eurostar_scraper.fetch(destination, travel_date, user_agent),
return_exceptions=True,
)
return gwr, es
@app.route('/') @app.route('/')
def index(): def index():
today = date.today().isoformat() today = date.today().isoformat()
@ -50,33 +48,35 @@ def results(slug, travel_date):
user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA) user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA)
cache_key = f"{travel_date}_{destination}" rtt_cache_key = f"rtt_{travel_date}"
cached = get_cached(cache_key) es_cache_key = f"eurostar_{travel_date}_{destination}"
cached_rtt = get_cached(rtt_cache_key)
cached_es = get_cached(es_cache_key)
from_cache = bool(cached_rtt and cached_es)
error = None error = None
if cached:
gwr_trains = cached['gwr']
eurostar_trains = cached['eurostar']
from_cache = True
else:
from_cache = False
gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent))
if isinstance(gwr_result, Exception): if cached_rtt:
gwr_trains = cached_rtt
else:
try:
gwr_trains = rtt_scraper.fetch(travel_date, user_agent)
set_cached(rtt_cache_key, gwr_trains)
except Exception as e:
gwr_trains = [] gwr_trains = []
error = f"Could not fetch GWR trains: {gwr_result}" error = f"Could not fetch GWR trains: {e}"
else:
gwr_trains = gwr_result
if isinstance(es_result, Exception): if cached_es:
eurostar_trains = cached_es
else:
try:
eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent)
set_cached(es_cache_key, eurostar_trains)
except Exception as e:
eurostar_trains = [] eurostar_trains = []
msg = f"Could not fetch Eurostar times: {es_result}" msg = f"Could not fetch Eurostar times: {e}"
error = f"{error}; {msg}" if error else msg error = f"{error}; {msg}" if error else msg
else:
eurostar_trains = es_result
if gwr_trains or eurostar_trains:
set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains})
trips = combine_trips(gwr_trains, eurostar_trains, travel_date) trips = combine_trips(gwr_trains, eurostar_trains, travel_date)
@ -85,6 +85,9 @@ def results(slug, travel_date):
next_date = (dt + timedelta(days=1)).isoformat() next_date = (dt + timedelta(days=1)).isoformat()
travel_date_display = dt.strftime('%A %-d %B %Y') travel_date_display = dt.strftime('%A %-d %B %Y')
eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}"
rtt_url = RTT_PADDINGTON_URL.format(date=travel_date)
return render_template( return render_template(
'results.html', 'results.html',
trips=trips, trips=trips,
@ -98,6 +101,8 @@ def results(slug, travel_date):
eurostar_count=len(eurostar_trains), eurostar_count=len(eurostar_trains),
from_cache=from_cache, from_cache=from_cache,
error=error, error=error,
eurostar_url=eurostar_url,
rtt_url=rtt_url,
) )

View file

@ -13,7 +13,6 @@ Data path: props.pageProps.pageData.liveDepartures[]
.destination.model.scheduledArrivalDateTime destination arrival .destination.model.scheduledArrivalDateTime destination arrival
(already filtered to the requested stop, not the final stop) (already filtered to the requested stop, not the final stop)
""" """
import asyncio
import json import json
import re import re
import httpx import httpx
@ -62,15 +61,18 @@ def _parse(html: str, destination: str) -> list[dict]:
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
if dep_time and arr_time: if dep_time and arr_time:
carrier = dep.get('model', {}).get('carrier', 'ES')
number = dep.get('model', {}).get('trainNumber', '')
services.append({ services.append({
'depart_st_pancras': dep_time, 'depart_st_pancras': dep_time,
'arrive_destination': arr_time, 'arrive_destination': arr_time,
'destination': destination, 'destination': destination,
'train_number': f"{carrier} {number}" if number else '',
}) })
return sorted(services, key=lambda s: s['depart_st_pancras']) return sorted(services, key=lambda s: s['depart_st_pancras'])
async def fetch(destination: str, travel_date: str, def fetch(destination: str, travel_date: str,
user_agent: str = DEFAULT_UA) -> list[dict]: user_agent: str = DEFAULT_UA) -> list[dict]:
url = ROUTE_URLS[destination] url = ROUTE_URLS[destination]
headers = { headers = {
@ -78,13 +80,7 @@ async def fetch(destination: str, travel_date: str,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.9', 'Accept-Language': 'en-GB,en;q=0.9',
} }
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client: with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client:
r = await client.get(url, params={'date': travel_date}) r = client.get(url, params={'date': travel_date})
r.raise_for_status() r.raise_for_status()
return _parse(r.text, destination) return _parse(r.text, destination)
def get_eurostar_times(destination: str, travel_date: str,
user_agent: str = DEFAULT_UA) -> list[dict]:
"""Synchronous wrapper for CLI/testing."""
return asyncio.run(fetch(destination, travel_date, user_agent))

View file

@ -1,14 +1,11 @@
""" """
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
Uses httpx (not Playwright) with browser-like headers. Two fetches:
Two fetches run concurrently:
BRI/to/PAD departure times from Bristol (div.time.plan.d) BRI/to/PAD departure times from Bristol (div.time.plan.d)
PAD/from/BRI arrival times at Paddington (div.time.plan.a) PAD/from/BRI arrival times at Paddington (div.time.plan.a)
Matched by train ID (div.tid). Matched by train ID (div.tid).
""" """
import asyncio
import re import re
import httpx import httpx
import lxml.html import lxml.html
@ -71,26 +68,19 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
return result return result
async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]: def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
"""Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}].""" """Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}]."""
headers = _browser_headers(user_agent) headers = _browser_headers(user_agent)
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client: with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri, r_pad = await asyncio.gather( r_bri = client.get(BRI_TO_PAD.format(date=date))
client.get(BRI_TO_PAD.format(date=date)), r_pad = client.get(PAD_FROM_BRI.format(date=date))
client.get(PAD_FROM_BRI.format(date=date)),
)
departures = _parse_services(r_bri.text, 'div.time.plan.d') departures = _parse_services(r_bri.text, 'div.time.plan.d')
arrivals = _parse_services(r_pad.text, 'div.time.plan.a') arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
trains = [ trains = [
{'depart_bristol': dep, 'arrive_paddington': arr} {'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid}
for tid, dep in departures.items() for tid, dep in departures.items()
if (arr := arrivals.get(tid)) if (arr := arrivals.get(tid))
] ]
return sorted(trains, key=lambda t: t['depart_bristol']) return sorted(trains, key=lambda t: t['depart_bristol'])
def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
"""Synchronous wrapper around fetch() for CLI/testing use."""
return asyncio.run(fetch(date, user_agent))

View file

@ -38,18 +38,33 @@
<table style="width:100%;border-collapse:collapse;font-size:0.95rem"> <table style="width:100%;border-collapse:collapse;font-size:0.95rem">
<thead> <thead>
<tr style="border-bottom:2px solid #e2e8f0;text-align:left"> <tr style="border-bottom:2px solid #e2e8f0;text-align:left">
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart Bristol</th> <th style="padding:0.6rem 0.8rem;white-space:nowrap">Bristol</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive Paddington</th> <th style="padding:0.6rem 0.8rem;white-space:nowrap">Paddington</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Transfer</th> <th style="padding:0.6rem 0.8rem;white-space:nowrap">Transfer</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart St&nbsp;Pancras</th> <th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart St&nbsp;Pancras</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive {{ destination }}</th> <th style="padding:0.6rem 0.8rem">{{ destination }}
</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Total</th> <th style="padding:0.6rem 0.8rem;white-space:nowrap">Total</th>
</tr> </tr>
</thead> </thead>
<tbody> <tbody>
{% set best_mins = trips | map(attribute='total_minutes') | min %}
{% set worst_mins = trips | map(attribute='total_minutes') | max %}
{% for trip in trips %} {% for trip in trips %}
<tr style="border-bottom:1px solid #e2e8f0{% if loop.index is odd %};background:#f7fafc{% endif %}"> {% if trip.total_minutes == best_mins and trips | length > 1 %}
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_bristol }}</td> {% set row_bg = 'background:#f0fff4' %}
{% elif trip.total_minutes == worst_mins and trips | length > 1 %}
{% set row_bg = 'background:#fff5f5' %}
{% elif loop.index is odd %}
{% set row_bg = 'background:#f7fafc' %}
{% else %}
{% set row_bg = '' %}
{% endif %}
<tr style="border-bottom:1px solid #e2e8f0;{{ row_bg }}">
<td style="padding:0.6rem 0.8rem;font-weight:600">
{{ trip.depart_bristol }}
{% if trip.headcode %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{{ trip.headcode }}</span>{% endif %}
</td>
<td style="padding:0.6rem 0.8rem"> <td style="padding:0.6rem 0.8rem">
{{ trip.arrive_paddington }} {{ trip.arrive_paddington }}
<span style="font-size:0.8rem;color:#718096">({{ trip.gwr_duration }})</span> <span style="font-size:0.8rem;color:#718096">({{ trip.gwr_duration }})</span>
@ -57,9 +72,23 @@
<td style="padding:0.6rem 0.8rem;color:#4a5568"> <td style="padding:0.6rem 0.8rem;color:#4a5568">
{{ trip.connection_duration }} {{ trip.connection_duration }}
</td> </td>
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_st_pancras }}</td> <td style="padding:0.6rem 0.8rem;font-weight:600">
<td style="padding:0.6rem 0.8rem">{{ trip.arrive_destination }}</td> {{ trip.depart_st_pancras }}
<td style="padding:0.6rem 0.8rem;font-weight:600;color:#00539f">{{ trip.total_duration }}</td> {% if trip.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{{ trip.train_number }}</span>{% endif %}
</td>
<td style="padding:0.6rem 0.8rem">
{{ trip.arrive_destination }}
<span style="font-weight:400;color:#718096;font-size:0.85em">(CET)</span>
</td>
<td style="padding:0.6rem 0.8rem;font-weight:600">
{% if trip.total_minutes == best_mins and trips | length > 1 %}
<span style="color:#276749" title="Fastest option">{{ trip.total_duration }} ⚡</span>
{% elif trip.total_minutes == worst_mins and trips | length > 1 %}
<span style="color:#c53030" title="Slowest option">{{ trip.total_duration }} 🐢</span>
{% else %}
<span style="color:#00539f">{{ trip.total_duration }}</span>
{% endif %}
</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@ -67,9 +96,11 @@
</div> </div>
<p style="margin-top:1rem;font-size:0.82rem;color:#718096"> <p style="margin-top:1rem;font-size:0.82rem;color:#718096">
Paddington &rarr; St&nbsp;Pancras connection: 75&nbsp;min minimum, 2h&nbsp;20m maximum. Paddington &rarr; St&nbsp;Pancras connection: 60&nbsp;min minimum, 2h maximum.
Eurostar times are from the general timetable and may vary; always check Eurostar times are from the general timetable and may vary; always check
<a href="https://www.eurostar.com" target="_blank" rel="noopener">eurostar.com</a> to book. <a href="{{ eurostar_url }}" target="_blank" rel="noopener">eurostar.com</a> to book.
&nbsp;&middot;&nbsp;
<a href="{{ rtt_url }}" target="_blank" rel="noopener">Paddington arrivals on RTT</a>
</p> </p>
{% else %} {% else %}

View file

@ -3,8 +3,8 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination t
""" """
from datetime import datetime, timedelta from datetime import datetime, timedelta
MIN_CONNECTION_MINUTES = 75 MIN_CONNECTION_MINUTES = 50
MAX_CONNECTION_MINUTES = 140 MAX_CONNECTION_MINUTES = 110
MAX_GWR_MINUTES = 110 MAX_GWR_MINUTES = 110
DATE_FMT = '%Y-%m-%d' DATE_FMT = '%Y-%m-%d'
TIME_FMT = '%H:%M' TIME_FMT = '%H:%M'
@ -72,14 +72,18 @@ def combine_trips(
if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES: if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES:
continue continue
total_mins = int((arr_dest - dep_bri).total_seconds() / 60)
trips.append({ trips.append({
'depart_bristol': gwr['depart_bristol'], 'depart_bristol': gwr['depart_bristol'],
'arrive_paddington': gwr['arrive_paddington'], 'arrive_paddington': gwr['arrive_paddington'],
'headcode': gwr.get('headcode', ''),
'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)), 'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)),
'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)), 'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)),
'depart_st_pancras': es['depart_st_pancras'], 'depart_st_pancras': es['depart_st_pancras'],
'arrive_destination': es['arrive_destination'], 'arrive_destination': es['arrive_destination'],
'total_duration': _fmt_duration(int((arr_dest - dep_bri).total_seconds() / 60)), 'train_number': es.get('train_number', ''),
'total_duration': _fmt_duration(total_mins),
'total_minutes': total_mins,
'destination': es['destination'], 'destination': es['destination'],
}) })
break # Only the earliest valid Eurostar per GWR departure break # Only the earliest valid Eurostar per GWR departure