Various improvements
This commit is contained in:
parent
2090268754
commit
876eb6a759
5 changed files with 98 additions and 72 deletions
67
app.py
67
app.py
|
|
@ -1,4 +1,6 @@
|
||||||
import asyncio
|
"""
|
||||||
|
Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains.
|
||||||
|
"""
|
||||||
from flask import Flask, render_template, redirect, url_for, request
|
from flask import Flask, render_template, redirect, url_for, request
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
|
|
||||||
|
|
@ -7,6 +9,12 @@ import scraper.eurostar as eurostar_scraper
|
||||||
import scraper.realtime_trains as rtt_scraper
|
import scraper.realtime_trains as rtt_scraper
|
||||||
from trip_planner import combine_trips
|
from trip_planner import combine_trips
|
||||||
|
|
||||||
|
RTT_PADDINGTON_URL = (
|
||||||
|
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||||
|
"gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359"
|
||||||
|
"?stp=WVS&show=pax-calls&order=wtt"
|
||||||
|
)
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
DESTINATIONS = {
|
DESTINATIONS = {
|
||||||
|
|
@ -17,16 +25,6 @@ DESTINATIONS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def _fetch_both(destination: str, travel_date: str, user_agent: str):
|
|
||||||
"""Fetch GWR trains and Eurostar times simultaneously."""
|
|
||||||
gwr, es = await asyncio.gather(
|
|
||||||
rtt_scraper.fetch(travel_date, user_agent),
|
|
||||||
eurostar_scraper.fetch(destination, travel_date, user_agent),
|
|
||||||
return_exceptions=True,
|
|
||||||
)
|
|
||||||
return gwr, es
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
@app.route('/')
|
||||||
def index():
|
def index():
|
||||||
today = date.today().isoformat()
|
today = date.today().isoformat()
|
||||||
|
|
@ -50,33 +48,35 @@ def results(slug, travel_date):
|
||||||
|
|
||||||
user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA)
|
user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA)
|
||||||
|
|
||||||
cache_key = f"{travel_date}_{destination}"
|
rtt_cache_key = f"rtt_{travel_date}"
|
||||||
cached = get_cached(cache_key)
|
es_cache_key = f"eurostar_{travel_date}_{destination}"
|
||||||
|
|
||||||
|
cached_rtt = get_cached(rtt_cache_key)
|
||||||
|
cached_es = get_cached(es_cache_key)
|
||||||
|
from_cache = bool(cached_rtt and cached_es)
|
||||||
|
|
||||||
error = None
|
error = None
|
||||||
if cached:
|
|
||||||
gwr_trains = cached['gwr']
|
|
||||||
eurostar_trains = cached['eurostar']
|
|
||||||
from_cache = True
|
|
||||||
else:
|
|
||||||
from_cache = False
|
|
||||||
gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent))
|
|
||||||
|
|
||||||
if isinstance(gwr_result, Exception):
|
if cached_rtt:
|
||||||
|
gwr_trains = cached_rtt
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
gwr_trains = rtt_scraper.fetch(travel_date, user_agent)
|
||||||
|
set_cached(rtt_cache_key, gwr_trains)
|
||||||
|
except Exception as e:
|
||||||
gwr_trains = []
|
gwr_trains = []
|
||||||
error = f"Could not fetch GWR trains: {gwr_result}"
|
error = f"Could not fetch GWR trains: {e}"
|
||||||
else:
|
|
||||||
gwr_trains = gwr_result
|
|
||||||
|
|
||||||
if isinstance(es_result, Exception):
|
if cached_es:
|
||||||
|
eurostar_trains = cached_es
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
eurostar_trains = eurostar_scraper.fetch(destination, travel_date, user_agent)
|
||||||
|
set_cached(es_cache_key, eurostar_trains)
|
||||||
|
except Exception as e:
|
||||||
eurostar_trains = []
|
eurostar_trains = []
|
||||||
msg = f"Could not fetch Eurostar times: {es_result}"
|
msg = f"Could not fetch Eurostar times: {e}"
|
||||||
error = f"{error}; {msg}" if error else msg
|
error = f"{error}; {msg}" if error else msg
|
||||||
else:
|
|
||||||
eurostar_trains = es_result
|
|
||||||
|
|
||||||
if gwr_trains or eurostar_trains:
|
|
||||||
set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains})
|
|
||||||
|
|
||||||
trips = combine_trips(gwr_trains, eurostar_trains, travel_date)
|
trips = combine_trips(gwr_trains, eurostar_trains, travel_date)
|
||||||
|
|
||||||
|
|
@ -85,6 +85,9 @@ def results(slug, travel_date):
|
||||||
next_date = (dt + timedelta(days=1)).isoformat()
|
next_date = (dt + timedelta(days=1)).isoformat()
|
||||||
travel_date_display = dt.strftime('%A %-d %B %Y')
|
travel_date_display = dt.strftime('%A %-d %B %Y')
|
||||||
|
|
||||||
|
eurostar_url = eurostar_scraper.ROUTE_URLS[destination] + f"?date={travel_date}"
|
||||||
|
rtt_url = RTT_PADDINGTON_URL.format(date=travel_date)
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
'results.html',
|
'results.html',
|
||||||
trips=trips,
|
trips=trips,
|
||||||
|
|
@ -98,6 +101,8 @@ def results(slug, travel_date):
|
||||||
eurostar_count=len(eurostar_trains),
|
eurostar_count=len(eurostar_trains),
|
||||||
from_cache=from_cache,
|
from_cache=from_cache,
|
||||||
error=error,
|
error=error,
|
||||||
|
eurostar_url=eurostar_url,
|
||||||
|
rtt_url=rtt_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,6 @@ Data path: props.pageProps.pageData.liveDepartures[]
|
||||||
.destination.model.scheduledArrivalDateTime → destination arrival
|
.destination.model.scheduledArrivalDateTime → destination arrival
|
||||||
(already filtered to the requested stop, not the final stop)
|
(already filtered to the requested stop, not the final stop)
|
||||||
"""
|
"""
|
||||||
import asyncio
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import httpx
|
import httpx
|
||||||
|
|
@ -62,15 +61,18 @@ def _parse(html: str, destination: str) -> list[dict]:
|
||||||
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
|
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
|
||||||
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
|
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
|
||||||
if dep_time and arr_time:
|
if dep_time and arr_time:
|
||||||
|
carrier = dep.get('model', {}).get('carrier', 'ES')
|
||||||
|
number = dep.get('model', {}).get('trainNumber', '')
|
||||||
services.append({
|
services.append({
|
||||||
'depart_st_pancras': dep_time,
|
'depart_st_pancras': dep_time,
|
||||||
'arrive_destination': arr_time,
|
'arrive_destination': arr_time,
|
||||||
'destination': destination,
|
'destination': destination,
|
||||||
|
'train_number': f"{carrier} {number}" if number else '',
|
||||||
})
|
})
|
||||||
return sorted(services, key=lambda s: s['depart_st_pancras'])
|
return sorted(services, key=lambda s: s['depart_st_pancras'])
|
||||||
|
|
||||||
|
|
||||||
async def fetch(destination: str, travel_date: str,
|
def fetch(destination: str, travel_date: str,
|
||||||
user_agent: str = DEFAULT_UA) -> list[dict]:
|
user_agent: str = DEFAULT_UA) -> list[dict]:
|
||||||
url = ROUTE_URLS[destination]
|
url = ROUTE_URLS[destination]
|
||||||
headers = {
|
headers = {
|
||||||
|
|
@ -78,13 +80,7 @@ async def fetch(destination: str, travel_date: str,
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||||
'Accept-Language': 'en-GB,en;q=0.9',
|
'Accept-Language': 'en-GB,en;q=0.9',
|
||||||
}
|
}
|
||||||
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client:
|
with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client:
|
||||||
r = await client.get(url, params={'date': travel_date})
|
r = client.get(url, params={'date': travel_date})
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return _parse(r.text, destination)
|
return _parse(r.text, destination)
|
||||||
|
|
||||||
|
|
||||||
def get_eurostar_times(destination: str, travel_date: str,
|
|
||||||
user_agent: str = DEFAULT_UA) -> list[dict]:
|
|
||||||
"""Synchronous wrapper for CLI/testing."""
|
|
||||||
return asyncio.run(fetch(destination, travel_date, user_agent))
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,11 @@
|
||||||
"""
|
"""
|
||||||
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
|
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
|
||||||
|
|
||||||
Uses httpx (not Playwright) with browser-like headers.
|
Two fetches:
|
||||||
|
|
||||||
Two fetches run concurrently:
|
|
||||||
BRI/to/PAD → departure times from Bristol (div.time.plan.d)
|
BRI/to/PAD → departure times from Bristol (div.time.plan.d)
|
||||||
PAD/from/BRI → arrival times at Paddington (div.time.plan.a)
|
PAD/from/BRI → arrival times at Paddington (div.time.plan.a)
|
||||||
Matched by train ID (div.tid).
|
Matched by train ID (div.tid).
|
||||||
"""
|
"""
|
||||||
import asyncio
|
|
||||||
import re
|
import re
|
||||||
import httpx
|
import httpx
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
|
@ -71,26 +68,19 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
|
def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
|
||||||
"""Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}]."""
|
"""Fetch GWR trains; returns [{'depart_bristol', 'arrive_paddington', 'headcode'}]."""
|
||||||
headers = _browser_headers(user_agent)
|
headers = _browser_headers(user_agent)
|
||||||
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client:
|
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||||
r_bri, r_pad = await asyncio.gather(
|
r_bri = client.get(BRI_TO_PAD.format(date=date))
|
||||||
client.get(BRI_TO_PAD.format(date=date)),
|
r_pad = client.get(PAD_FROM_BRI.format(date=date))
|
||||||
client.get(PAD_FROM_BRI.format(date=date)),
|
|
||||||
)
|
|
||||||
|
|
||||||
departures = _parse_services(r_bri.text, 'div.time.plan.d')
|
departures = _parse_services(r_bri.text, 'div.time.plan.d')
|
||||||
arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
|
arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
|
||||||
|
|
||||||
trains = [
|
trains = [
|
||||||
{'depart_bristol': dep, 'arrive_paddington': arr}
|
{'depart_bristol': dep, 'arrive_paddington': arr, 'headcode': tid}
|
||||||
for tid, dep in departures.items()
|
for tid, dep in departures.items()
|
||||||
if (arr := arrivals.get(tid))
|
if (arr := arrivals.get(tid))
|
||||||
]
|
]
|
||||||
return sorted(trains, key=lambda t: t['depart_bristol'])
|
return sorted(trains, key=lambda t: t['depart_bristol'])
|
||||||
|
|
||||||
|
|
||||||
def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
|
|
||||||
"""Synchronous wrapper around fetch() for CLI/testing use."""
|
|
||||||
return asyncio.run(fetch(date, user_agent))
|
|
||||||
|
|
|
||||||
|
|
@ -38,18 +38,33 @@
|
||||||
<table style="width:100%;border-collapse:collapse;font-size:0.95rem">
|
<table style="width:100%;border-collapse:collapse;font-size:0.95rem">
|
||||||
<thead>
|
<thead>
|
||||||
<tr style="border-bottom:2px solid #e2e8f0;text-align:left">
|
<tr style="border-bottom:2px solid #e2e8f0;text-align:left">
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart Bristol</th>
|
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Bristol</th>
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive Paddington</th>
|
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Paddington</th>
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Transfer</th>
|
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Transfer</th>
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart St Pancras</th>
|
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart St Pancras</th>
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive {{ destination }}</th>
|
<th style="padding:0.6rem 0.8rem">{{ destination }}
|
||||||
|
</th>
|
||||||
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Total</th>
|
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Total</th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
|
{% set best_mins = trips | map(attribute='total_minutes') | min %}
|
||||||
|
{% set worst_mins = trips | map(attribute='total_minutes') | max %}
|
||||||
{% for trip in trips %}
|
{% for trip in trips %}
|
||||||
<tr style="border-bottom:1px solid #e2e8f0{% if loop.index is odd %};background:#f7fafc{% endif %}">
|
{% if trip.total_minutes == best_mins and trips | length > 1 %}
|
||||||
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_bristol }}</td>
|
{% set row_bg = 'background:#f0fff4' %}
|
||||||
|
{% elif trip.total_minutes == worst_mins and trips | length > 1 %}
|
||||||
|
{% set row_bg = 'background:#fff5f5' %}
|
||||||
|
{% elif loop.index is odd %}
|
||||||
|
{% set row_bg = 'background:#f7fafc' %}
|
||||||
|
{% else %}
|
||||||
|
{% set row_bg = '' %}
|
||||||
|
{% endif %}
|
||||||
|
<tr style="border-bottom:1px solid #e2e8f0;{{ row_bg }}">
|
||||||
|
<td style="padding:0.6rem 0.8rem;font-weight:600">
|
||||||
|
{{ trip.depart_bristol }}
|
||||||
|
{% if trip.headcode %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{{ trip.headcode }}</span>{% endif %}
|
||||||
|
</td>
|
||||||
<td style="padding:0.6rem 0.8rem">
|
<td style="padding:0.6rem 0.8rem">
|
||||||
{{ trip.arrive_paddington }}
|
{{ trip.arrive_paddington }}
|
||||||
<span style="font-size:0.8rem;color:#718096">({{ trip.gwr_duration }})</span>
|
<span style="font-size:0.8rem;color:#718096">({{ trip.gwr_duration }})</span>
|
||||||
|
|
@ -57,9 +72,23 @@
|
||||||
<td style="padding:0.6rem 0.8rem;color:#4a5568">
|
<td style="padding:0.6rem 0.8rem;color:#4a5568">
|
||||||
{{ trip.connection_duration }}
|
{{ trip.connection_duration }}
|
||||||
</td>
|
</td>
|
||||||
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_st_pancras }}</td>
|
<td style="padding:0.6rem 0.8rem;font-weight:600">
|
||||||
<td style="padding:0.6rem 0.8rem">{{ trip.arrive_destination }}</td>
|
{{ trip.depart_st_pancras }}
|
||||||
<td style="padding:0.6rem 0.8rem;font-weight:600;color:#00539f">{{ trip.total_duration }}</td>
|
{% if trip.train_number %}<br><span style="font-size:0.75rem;font-weight:400;color:#718096">{{ trip.train_number }}</span>{% endif %}
|
||||||
|
</td>
|
||||||
|
<td style="padding:0.6rem 0.8rem">
|
||||||
|
{{ trip.arrive_destination }}
|
||||||
|
<span style="font-weight:400;color:#718096;font-size:0.85em">(CET)</span>
|
||||||
|
</td>
|
||||||
|
<td style="padding:0.6rem 0.8rem;font-weight:600">
|
||||||
|
{% if trip.total_minutes == best_mins and trips | length > 1 %}
|
||||||
|
<span style="color:#276749" title="Fastest option">{{ trip.total_duration }} ⚡</span>
|
||||||
|
{% elif trip.total_minutes == worst_mins and trips | length > 1 %}
|
||||||
|
<span style="color:#c53030" title="Slowest option">{{ trip.total_duration }} 🐢</span>
|
||||||
|
{% else %}
|
||||||
|
<span style="color:#00539f">{{ trip.total_duration }}</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
|
|
@ -67,9 +96,11 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<p style="margin-top:1rem;font-size:0.82rem;color:#718096">
|
<p style="margin-top:1rem;font-size:0.82rem;color:#718096">
|
||||||
Paddington → St Pancras connection: 75 min minimum, 2h 20m maximum.
|
Paddington → St Pancras connection: 60 min minimum, 2h maximum.
|
||||||
Eurostar times are from the general timetable and may vary; always check
|
Eurostar times are from the general timetable and may vary; always check
|
||||||
<a href="https://www.eurostar.com" target="_blank" rel="noopener">eurostar.com</a> to book.
|
<a href="{{ eurostar_url }}" target="_blank" rel="noopener">eurostar.com</a> to book.
|
||||||
|
·
|
||||||
|
<a href="{{ rtt_url }}" target="_blank" rel="noopener">Paddington arrivals on RTT</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination t
|
||||||
"""
|
"""
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
MIN_CONNECTION_MINUTES = 75
|
MIN_CONNECTION_MINUTES = 50
|
||||||
MAX_CONNECTION_MINUTES = 140
|
MAX_CONNECTION_MINUTES = 110
|
||||||
MAX_GWR_MINUTES = 110
|
MAX_GWR_MINUTES = 110
|
||||||
DATE_FMT = '%Y-%m-%d'
|
DATE_FMT = '%Y-%m-%d'
|
||||||
TIME_FMT = '%H:%M'
|
TIME_FMT = '%H:%M'
|
||||||
|
|
@ -72,14 +72,18 @@ def combine_trips(
|
||||||
if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES:
|
if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
total_mins = int((arr_dest - dep_bri).total_seconds() / 60)
|
||||||
trips.append({
|
trips.append({
|
||||||
'depart_bristol': gwr['depart_bristol'],
|
'depart_bristol': gwr['depart_bristol'],
|
||||||
'arrive_paddington': gwr['arrive_paddington'],
|
'arrive_paddington': gwr['arrive_paddington'],
|
||||||
|
'headcode': gwr.get('headcode', ''),
|
||||||
'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)),
|
'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)),
|
||||||
'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)),
|
'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)),
|
||||||
'depart_st_pancras': es['depart_st_pancras'],
|
'depart_st_pancras': es['depart_st_pancras'],
|
||||||
'arrive_destination': es['arrive_destination'],
|
'arrive_destination': es['arrive_destination'],
|
||||||
'total_duration': _fmt_duration(int((arr_dest - dep_bri).total_seconds() / 60)),
|
'train_number': es.get('train_number', ''),
|
||||||
|
'total_duration': _fmt_duration(total_mins),
|
||||||
|
'total_minutes': total_mins,
|
||||||
'destination': es['destination'],
|
'destination': es['destination'],
|
||||||
})
|
})
|
||||||
break # Only the earliest valid Eurostar per GWR departure
|
break # Only the earliest valid Eurostar per GWR departure
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue