Cache provisional weekday timetables

This commit is contained in:
Edward Betts 2026-05-21 11:31:17 +01:00
parent 378d2484d0
commit bc7cb9cffa
6 changed files with 686 additions and 58 deletions

410
app.py
View file

@ -96,6 +96,117 @@ VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'}
VALID_ES_CLASSES = {'standard', 'plus'}
DEFAULT_NR_CLASS = 'walkon'
DEFAULT_ES_CLASS = 'standard'
NR_TIMETABLE_PERIODS = [
(date(2026, 5, 17), date(2026, 12, 12), "2026-05-17_2026-12-12"),
]
def _weekday_for(section_date: str) -> str:
return date.fromisoformat(section_date).strftime("%a").lower()
def _month_for(section_date: str) -> str:
return date.fromisoformat(section_date).strftime("%Y-%m")
def _nr_timetable_period_key(section_date: str) -> str:
dt = date.fromisoformat(section_date)
for start, end, key in NR_TIMETABLE_PERIODS:
if start <= dt <= end:
return key
return dt.strftime("%Y-%m")
def _nr_exact_cache_key(direction: str, station_crs: str, section_date: str) -> str:
return f"rtt_{direction}_{station_crs}_{section_date}"
def _nr_weekday_cache_key(direction: str, station_crs: str, section_date: str) -> str:
return (
f"weekday_rtt_{direction}_{station_crs}_"
f"{_nr_timetable_period_key(section_date)}_{_weekday_for(section_date)}"
)
def _eurostar_exact_cache_key(direction: str, section_date: str, destination: str) -> str:
return f"eurostar_{direction}_{section_date}_{destination}"
def _eurostar_weekday_cache_key(direction: str, section_date: str, destination: str) -> str:
return (
f"weekday_eurostar_{direction}_{destination}_"
f"{_month_for(section_date)}_{_weekday_for(section_date)}"
)
def _eurostar_return_exact_cache_key(travel_date: str, return_date: str, destination: str) -> str:
return f"eurostar_return_{travel_date}_{return_date}_{destination}"
def _eurostar_return_weekday_cache_key(travel_date: str, return_date: str, destination: str) -> str:
return (
f"weekday_eurostar_return_{destination}_"
f"{_month_for(travel_date)}_{_weekday_for(travel_date)}_"
f"{_month_for(return_date)}_{_weekday_for(return_date)}"
)
def _strip_nr_timetable(trains):
keys = {
"depart_bristol",
"arrive_paddington",
"depart_paddington",
"arrive_destination",
"arrive_platform",
"headcode",
}
return [{k: train[k] for k in keys if k in train} for train in trains]
def _strip_eurostar_timetable(services):
keys = {
"depart_st_pancras",
"arrive_destination",
"depart_destination",
"arrive_st_pancras",
"destination",
"train_number",
}
return [{k: service[k] for k in keys if k in service} for service in services]
def _strip_eurostar_return_timetable(es_return):
if not isinstance(es_return, dict):
return {"outbound": [], "inbound": []}
return {
"outbound": _strip_eurostar_timetable(es_return.get("outbound", [])),
"inbound": _strip_eurostar_timetable(es_return.get("inbound", [])),
}
def _timetable_signature(data) -> str:
return json.dumps(data, sort_keys=True, separators=(",", ":"))
def _eurostar_prices_by_row(section_id: str, direction: str, services):
prices = {}
for service in services:
key = service.get("depart_st_pancras") if direction == "outbound" else service.get("depart_destination")
if not key:
continue
prices[f"{section_id}:{key}"] = {
"es_standard": (
{"price": service.get("price"), "seats": service.get("seats")}
if service.get("price") is not None
else None
),
"es_plus": (
{"price": service.get("plus_price"), "seats": service.get("plus_seats")}
if service.get("plus_price") is not None
else None
),
}
return prices
def _get_defaults():
@ -255,6 +366,11 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
):
dt = date.fromisoformat(travel_date)
travel_date_display = dt.strftime("%A %-d %B %Y")
return_date_display = (
date.fromisoformat(return_date).strftime("%A %-d %B %Y")
if return_date
else None
)
full_args = dict(request.args)
full_args.pop("progressive", None)
full_args.pop("journey_type", None)
@ -267,6 +383,7 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
journey_type=journey_type,
travel_date_display=travel_date_display,
return_date=return_date,
return_date_display=return_date_display,
full_results_url=_results_url(
station_crs=station_crs,
slug=slug,
@ -281,6 +398,7 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA)
error_messages = []
from_cache_parts = []
provisional_timetable = False
def cached_fetch(key, ttl, fetcher, label):
cached = get_cached(key, ttl=ttl)
@ -295,14 +413,38 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
error_messages.append(f"Could not fetch {label}: {e}")
return [] if label != "GWR fares" else {}
def cached_timetable_fetch(exact_key, weekday_key, fetcher, label, stripper, ttl=None):
nonlocal provisional_timetable
cached = get_cached(exact_key, ttl=ttl)
if cached is not None:
from_cache_parts.append(exact_key)
return cached, False
weekday_cached = get_cached(weekday_key)
if weekday_cached is not None:
from_cache_parts.append(weekday_key)
provisional_timetable = True
return weekday_cached, True
try:
data = fetcher()
set_cached(exact_key, data)
set_cached(weekday_key, stripper(data))
return data, False
except Exception as e:
error_messages.append(f"Could not fetch {label}: {e}")
if label == "Eurostar return times":
return {"outbound": [], "inbound": []}, False
return [], False
es_return = None
es_return_provisional = False
if journey_type == "return":
es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}"
es_return = cached_fetch(
es_return_key,
24 * 3600,
es_return, es_return_provisional = cached_timetable_fetch(
_eurostar_return_exact_cache_key(travel_date, return_date, destination),
_eurostar_return_weekday_cache_key(travel_date, return_date, destination),
lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date),
"Eurostar times",
"Eurostar return times",
_strip_eurostar_return_timetable,
24 * 3600,
)
if not isinstance(es_return, dict):
es_return = {"outbound": [], "inbound": []}
@ -314,50 +456,63 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
section_min_connection = INBOUND_MIN_CONNECTION_MINUTES
section_max_connection = INBOUND_MAX_CONNECTION_MINUTES
rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington"
rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}"
rtt_cache_key = _nr_exact_cache_key(rtt_direction, station_crs, section_date)
rtt_weekday_cache_key = _nr_weekday_cache_key(rtt_direction, station_crs, section_date)
gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}"
advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}"
if direction == "outbound":
trains = cached_fetch(
trains, nr_provisional = cached_timetable_fetch(
rtt_cache_key,
None,
rtt_weekday_cache_key,
lambda: rtt_scraper.fetch(section_date, user_agent, station_crs),
"GWR trains",
_strip_nr_timetable,
)
else:
trains = cached_fetch(
trains, nr_provisional = cached_timetable_fetch(
rtt_cache_key,
None,
rtt_weekday_cache_key,
lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs),
"GWR trains",
_strip_nr_timetable,
)
es_provisional = es_return_provisional if journey_type == "return" else False
if eurostar_services is None:
es_cache_key = f"eurostar_{direction}_{section_date}_{destination}"
es_cache_key = _eurostar_exact_cache_key(direction, section_date, destination)
es_weekday_cache_key = _eurostar_weekday_cache_key(direction, section_date, destination)
es_fetcher = (
(lambda: eurostar_scraper.fetch(destination, section_date))
if direction == "outbound"
else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction))
)
eurostar_services = cached_fetch(
eurostar_services, es_provisional = cached_timetable_fetch(
es_cache_key,
24 * 3600,
es_weekday_cache_key,
es_fetcher,
"Eurostar times",
_strip_eurostar_timetable,
24 * 3600,
)
fare_direction = "to_paddington" if direction == "outbound" else "from_paddington"
gwr_fares = cached_fetch(
gwr_cache_key,
30 * 24 * 3600,
(
(lambda: gwr_fares_scraper.fetch(station_crs, section_date))
if fare_direction == "to_paddington"
else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction))
),
"GWR fares",
)
gwr_fares = get_cached(gwr_cache_key, ttl=30 * 24 * 3600)
if gwr_fares is not None:
from_cache_parts.append(gwr_cache_key)
elif nr_provisional or es_provisional:
gwr_fares = {}
else:
gwr_fares = cached_fetch(
gwr_cache_key,
30 * 24 * 3600,
(
(lambda: gwr_fares_scraper.fetch(station_crs, section_date))
if fare_direction == "to_paddington"
else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction))
),
"GWR fares",
)
cached_advance = get_cached(advance_cache_key, ttl=24 * 3600)
if direction == "outbound":
@ -424,7 +579,12 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
row["eurostar_seats"] = es.get("seats")
row["eurostar_plus_price"] = es.get("plus_price")
row["eurostar_plus_seats"] = es.get("plus_seats")
row["row_key"] = f"{section_id}:{key}"
row["eurostar_key"] = f"{section_id}:{key}"
if row.get("row_type") == "trip":
nr_key = row.get("depart_bristol") or row.get("depart_paddington")
row["row_key"] = f"{section_id}:{nr_key}:{key}"
else:
row["row_key"] = f"{section_id}:unreachable:{key}"
dt = date.fromisoformat(section_date)
return {
@ -438,7 +598,14 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
"eurostar_count": len(eurostar_services),
"min_connection": section_min_connection,
"max_connection": section_max_connection,
"provisional_timetable": nr_provisional or es_provisional,
"advance_fares": cached_advance,
"walkon_api_url": url_for(
"api_walkon_fares",
station_crs=station_crs,
travel_date=section_date,
direction=fare_direction,
),
"advance_api_url": url_for(
"api_advance_fares",
station_crs=station_crs,
@ -475,6 +642,14 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
prev_date = (dt - timedelta(days=1)).isoformat()
next_date = (dt + timedelta(days=1)).isoformat()
travel_date_display = dt.strftime("%A %-d %B %Y")
return_date_display = None
prev_return_date = return_date
next_return_date = return_date
if return_date:
return_dt = date.fromisoformat(return_date)
return_date_display = return_dt.strftime("%A %-d %B %Y")
prev_return_date = (return_dt - timedelta(days=1)).isoformat()
next_return_date = (return_dt + timedelta(days=1)).isoformat()
eurostar_url = eurostar_scraper.search_url(
destination, travel_date, direction=journey_type, return_date=return_date
@ -498,13 +673,13 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
station_crs,
slug,
prev_date,
**common_url_args,
**{**common_url_args, "return_date": prev_return_date},
)
next_results_url = _results_url(
station_crs,
slug,
next_date,
**common_url_args,
**{**common_url_args, "return_date": next_return_date},
)
destination_links = [
(
@ -529,10 +704,12 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
trip_fares = {}
advance_fares = {}
walkon_api_urls = {}
advance_api_urls = {}
advance_stream_urls = {}
for section in sections:
advance_fares[section["id"]] = section["advance_fares"]
walkon_api_urls[section["id"]] = section["walkon_api_url"]
advance_api_urls[section["id"]] = section["advance_api_url"]
advance_stream_urls[section["id"]] = section["advance_stream_url"]
for row in section["rows"]:
@ -555,6 +732,7 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
)
trip_fares[row["row_key"]] = {
"section": section["id"],
"eurostar_key": row.get("eurostar_key"),
"advance_key": row.get("depart_bristol") or row.get("depart_paddington"),
"walkon": walkon,
"es_standard": es_std,
@ -562,6 +740,23 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
"circle_fare": circle_fare,
}
if journey_type == "return":
timetable_refresh_url = url_for(
"api_return_results_refresh",
station_crs=station_crs,
slug=slug,
travel_date=travel_date,
return_date=return_date,
)
else:
timetable_refresh_url = url_for(
"api_results_refresh",
station_crs=station_crs,
slug=slug,
travel_date=travel_date,
journey_type=journey_type if journey_type == "inbound" else None,
)
return render_template(
"results.html",
sections=sections,
@ -583,9 +778,11 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
destination_links=destination_links,
results_base_url=results_base_url,
travel_date_display=travel_date_display,
return_date_display=return_date_display,
gwr_count=sum(section["gwr_count"] for section in sections),
eurostar_count=sum(section["eurostar_count"] for section in sections),
from_cache=bool(from_cache_parts),
provisional_timetable=provisional_timetable,
error="; ".join(error_messages) if error_messages else None,
no_prices_note=no_prices_note,
eurostar_url=eurostar_url,
@ -603,8 +800,10 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
url_es_class=url_es,
trip_fares_json=json.dumps(trip_fares),
advance_fares_json=json.dumps(advance_fares),
walkon_api_urls_json=json.dumps(walkon_api_urls),
advance_api_urls_json=json.dumps(advance_api_urls),
advance_stream_urls_json=json.dumps(advance_stream_urls),
timetable_refresh_url=timetable_refresh_url,
advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date),
advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date),
valid_min_connections=sorted(valid_min),
@ -612,6 +811,165 @@ def _results(station_crs, slug, travel_date, journey_type, return_date):
)
def _fetch_exact_nr_timetable(station_crs, section_date, direction, user_agent):
rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington"
exact_key = _nr_exact_cache_key(rtt_direction, station_crs, section_date)
weekday_key = _nr_weekday_cache_key(rtt_direction, station_crs, section_date)
trains = (
rtt_scraper.fetch(section_date, user_agent, station_crs)
if direction == "outbound"
else rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs)
)
set_cached(exact_key, trains)
set_cached(weekday_key, _strip_nr_timetable(trains))
return trains
def _fetch_exact_eurostar_single(destination, section_date, direction):
exact_key = _eurostar_exact_cache_key(direction, section_date, destination)
weekday_key = _eurostar_weekday_cache_key(direction, section_date, destination)
services = (
eurostar_scraper.fetch(destination, section_date)
if direction == "outbound"
else eurostar_scraper.fetch(destination, section_date, direction=direction)
)
set_cached(exact_key, services)
set_cached(weekday_key, _strip_eurostar_timetable(services))
return services
def _fetch_exact_eurostar_return(destination, travel_date, return_date):
exact_key = _eurostar_return_exact_cache_key(travel_date, return_date, destination)
weekday_key = _eurostar_return_weekday_cache_key(travel_date, return_date, destination)
services = eurostar_scraper.fetch_return(destination, travel_date, return_date)
set_cached(exact_key, services)
set_cached(weekday_key, _strip_eurostar_return_timetable(services))
return services
@app.route("/api/walkon_fares/<station_crs>/<travel_date>")
def api_walkon_fares(station_crs, travel_date):
if station_crs not in STATION_BY_CRS:
abort(404)
direction = request.args.get("direction", "to_paddington")
if direction not in {"to_paddington", "from_paddington"}:
direction = "to_paddington"
cache_key = f"gwr_fares_{direction}_{station_crs}_{travel_date}"
cached = get_cached(cache_key, ttl=30 * 24 * 3600)
if cached is not None:
return jsonify(cached)
try:
fares = (
gwr_fares_scraper.fetch(station_crs, travel_date)
if direction == "to_paddington"
else gwr_fares_scraper.fetch(station_crs, travel_date, direction=direction)
)
set_cached(cache_key, fares)
return jsonify(fares)
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/results_refresh/<station_crs>/<slug>/<travel_date>")
def api_results_refresh(station_crs, slug, travel_date):
return _api_results_refresh(station_crs, slug, travel_date, request.args.get("return_date"))
@app.route("/api/results_refresh/<station_crs>/<slug>/<travel_date>/return/<return_date>")
def api_return_results_refresh(station_crs, slug, travel_date, return_date):
return _api_results_refresh(station_crs, slug, travel_date, return_date, "return")
def _api_results_refresh(station_crs, slug, travel_date, return_date=None, path_journey_type=None):
if station_crs not in STATION_BY_CRS:
abort(404)
destination = DESTINATIONS.get(slug)
if not destination:
abort(404)
journey_type = path_journey_type or request.args.get("journey_type", "outbound")
if journey_type not in VALID_JOURNEY_TYPES:
journey_type = "outbound"
if return_date is None:
return_date = request.args.get("return_date")
if journey_type == "return" and not return_date:
abort(400)
user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA)
def generate():
try:
old_es_weekdays = {}
if journey_type == "return":
old_es_weekdays["return"] = get_cached(
_eurostar_return_weekday_cache_key(travel_date, return_date, destination)
)
es_return = _fetch_exact_eurostar_return(destination, travel_date, return_date)
sections = [
("outbound", "outbound", travel_date, es_return.get("outbound", [])),
("inbound", "inbound", return_date, es_return.get("inbound", [])),
]
else:
direction = journey_type
section_date = travel_date
old_es_weekdays["main"] = get_cached(
_eurostar_weekday_cache_key(direction, section_date, destination)
)
es_services = _fetch_exact_eurostar_single(destination, section_date, direction)
sections = [("main", direction, section_date, es_services)]
reload_needed = False
eurostar_prices = {}
for section_id, direction, section_date, es_services in sections:
nr_weekday_key = _nr_weekday_cache_key(
"to_paddington" if direction == "outbound" else "from_paddington",
station_crs,
section_date,
)
old_nr_weekday = get_cached(nr_weekday_key)
exact_nr = _fetch_exact_nr_timetable(station_crs, section_date, direction, user_agent)
if old_nr_weekday is not None and _timetable_signature(old_nr_weekday) != _timetable_signature(_strip_nr_timetable(exact_nr)):
reload_needed = True
old_es_weekday = old_es_weekdays["return"] if journey_type == "return" else old_es_weekdays[section_id]
exact_es_timetable = (
_strip_eurostar_return_timetable(es_return)
if journey_type == "return"
else _strip_eurostar_timetable(es_services)
)
if old_es_weekday is not None and _timetable_signature(old_es_weekday) != _timetable_signature(exact_es_timetable):
reload_needed = True
eurostar_prices.update(_eurostar_prices_by_row(section_id, direction, es_services))
if reload_needed:
yield f"data: {json.dumps({'type': 'reload'})}\n\n"
yield f"data: {json.dumps({'type': 'done'})}\n\n"
return
if eurostar_prices:
yield f"data: {json.dumps({'type': 'eurostar_prices', 'prices': eurostar_prices})}\n\n"
for section_id, direction, section_date, _es_services in sections:
fare_direction = "to_paddington" if direction == "outbound" else "from_paddington"
cache_key = f"gwr_fares_{fare_direction}_{station_crs}_{section_date}"
cached = get_cached(cache_key, ttl=30 * 24 * 3600)
if cached is None:
cached = (
gwr_fares_scraper.fetch(station_crs, section_date)
if fare_direction == "to_paddington"
else gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction)
)
set_cached(cache_key, cached)
yield f"data: {json.dumps({'type': 'walkon_fares', 'section': section_id, 'fares': cached})}\n\n"
except Exception as e:
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
return
yield f"data: {json.dumps({'type': 'done'})}\n\n"
return Response(stream_with_context(generate()), mimetype="text/event-stream")
@app.route("/api/advance_fares/<station_crs>/<travel_date>")
def api_advance_fares(station_crs, travel_date):
if station_crs not in STATION_BY_CRS: