paddington-eurostar/app.py

571 lines
22 KiB
Python

"""
Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains.
"""
from flask import Flask, render_template, redirect, url_for, request, abort, jsonify, Response, stream_with_context
from datetime import date, timedelta
from pathlib import Path
import json
import os
from cache import get_cached, set_cached
import scraper.eurostar as eurostar_scraper
import scraper.gwr_fares as gwr_fares_scraper
import scraper.realtime_trains as rtt_scraper
from trip_planner import (
INBOUND_MAX_CONNECTION_MINUTES,
INBOUND_MIN_CONNECTION_MINUTES,
combine_inbound_trips,
combine_trips,
find_unreachable_inbound_eurostars,
find_unreachable_morning_eurostars,
)
RTT_PADDINGTON_URL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
RTT_STATION_URL = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:{crs}/to/gb-nr:PAD/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
app = Flask(__name__, instance_relative_config=False)
app.config.from_object("config.default")
_local = os.path.join(os.path.dirname(__file__), "config", "local.py")
if os.path.exists(_local):
app.config.from_pyfile(_local)
import cache
import circle_line
cache.CACHE_DIR = app.config["CACHE_DIR"]
circle_line._TXC_XML = app.config["CIRCLE_LINE_XML"]
def _load_stations():
tsv = Path(__file__).parent / "data" / "direct_to_paddington.tsv"
stations = []
for line in tsv.read_text().splitlines():
line = line.strip()
if "\t" in line:
name, crs = line.split("\t", 1)
stations.append((name, crs))
return sorted(stations, key=lambda x: x[0])
STATIONS = _load_stations()
STATION_BY_CRS = {crs: name for name, crs in STATIONS}
DESTINATIONS = {
"paris": "Paris Gare du Nord",
"brussels": "Brussels Midi",
"lille": "Lille Europe",
"amsterdam": "Amsterdam Centraal",
"rotterdam": "Rotterdam Centraal",
"cologne": "Cologne Hbf",
}
@app.route("/")
def index():
today = date.today().isoformat()
default_min, default_max = _get_defaults()
return render_template(
"index.html",
destinations=DESTINATIONS,
today=today,
stations=STATIONS,
default_min_connection=default_min,
default_max_connection=default_max,
valid_min_connections=sorted(VALID_MIN_CONNECTIONS),
valid_max_connections=sorted(VALID_MAX_CONNECTIONS),
default_return_date=(date.today() + timedelta(days=7)).isoformat(),
)
VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120}
VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180}
VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120}
VALID_INBOUND_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180}
VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"}
VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'}
VALID_ES_CLASSES = {'standard', 'plus'}
DEFAULT_NR_CLASS = 'walkon'
DEFAULT_ES_CLASS = 'standard'
def _get_defaults():
return (
app.config["DEFAULT_MIN_CONNECTION"],
app.config["DEFAULT_MAX_CONNECTION"],
)
def _parse_connection(raw, default, valid_set):
try:
val = int(raw)
except (TypeError, ValueError):
return default
return val if val in valid_set else default
@app.route("/search")
def search():
slug = request.args.get("destination", "")
travel_date = request.args.get("travel_date", "")
return_date = request.args.get("return_date", "")
journey_type = request.args.get("journey_type", "outbound")
if journey_type not in VALID_JOURNEY_TYPES:
journey_type = "outbound"
station_crs = request.args.get("station_crs", "BRI")
if station_crs not in STATION_BY_CRS:
station_crs = "BRI"
if journey_type == "inbound":
default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES
valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS
else:
default_min, default_max = _get_defaults()
valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS
min_conn = _parse_connection(
request.args.get("min_connection"), default_min, valid_min
)
max_conn = _parse_connection(
request.args.get("max_connection"), default_max, valid_max
)
nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS)
if nr_class not in VALID_NR_CLASSES:
nr_class = DEFAULT_NR_CLASS
es_class = request.args.get("es_class", DEFAULT_ES_CLASS)
if es_class not in VALID_ES_CLASSES:
es_class = DEFAULT_ES_CLASS
if journey_type == "return":
try:
if return_date and date.fromisoformat(return_date) < date.fromisoformat(travel_date):
return_date = ""
except ValueError:
return_date = ""
if slug in DESTINATIONS and travel_date and (journey_type != "return" or return_date):
return redirect(
url_for(
"results",
station_crs=station_crs,
slug=slug,
travel_date=travel_date,
journey_type=None if journey_type == "outbound" else journey_type,
return_date=return_date if journey_type == "return" else None,
min_connection=None if min_conn == default_min else min_conn,
max_connection=None if max_conn == default_max else max_conn,
nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class,
es_class=None if es_class == DEFAULT_ES_CLASS else es_class,
)
)
return redirect(url_for("index"))
@app.route("/results/<station_crs>/<slug>/<travel_date>")
def results(station_crs, slug, travel_date):
departure_station_name = STATION_BY_CRS.get(station_crs)
if departure_station_name is None:
abort(404)
destination = DESTINATIONS.get(slug)
if not destination or not travel_date:
return redirect(url_for("index"))
journey_type = request.args.get("journey_type", "outbound")
if journey_type not in VALID_JOURNEY_TYPES:
journey_type = "outbound"
return_date = request.args.get("return_date")
if journey_type == "return":
try:
if not return_date or date.fromisoformat(return_date) < date.fromisoformat(travel_date):
return redirect(url_for("index"))
except ValueError:
return redirect(url_for("index"))
if journey_type == "inbound":
default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES
valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS
else:
default_min, default_max = _get_defaults()
valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS
min_connection = _parse_connection(
request.args.get("min_connection"), default_min, valid_min
)
max_connection = _parse_connection(
request.args.get("max_connection"), default_max, valid_max
)
nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS)
if nr_class not in VALID_NR_CLASSES:
nr_class = DEFAULT_NR_CLASS
es_class = request.args.get("es_class", DEFAULT_ES_CLASS)
if es_class not in VALID_ES_CLASSES:
es_class = DEFAULT_ES_CLASS
user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA)
error_messages = []
from_cache_parts = []
def cached_fetch(key, ttl, fetcher, label):
cached = get_cached(key, ttl=ttl)
if cached is not None:
from_cache_parts.append(key)
return cached
try:
data = fetcher()
set_cached(key, data)
return data
except Exception as e:
error_messages.append(f"Could not fetch {label}: {e}")
return [] if label != "GWR fares" else {}
es_return = None
if journey_type == "return":
es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}"
es_return = cached_fetch(
es_return_key,
24 * 3600,
lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date),
"Eurostar times",
)
if not isinstance(es_return, dict):
es_return = {"outbound": [], "inbound": []}
def build_section(section_id, direction, section_date, eurostar_services=None):
section_min_connection = min_connection
section_max_connection = max_connection
if journey_type == "return" and direction == "inbound":
section_min_connection = INBOUND_MIN_CONNECTION_MINUTES
section_max_connection = INBOUND_MAX_CONNECTION_MINUTES
rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington"
rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}"
gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}"
advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}"
if direction == "outbound":
trains = cached_fetch(
rtt_cache_key,
None,
lambda: rtt_scraper.fetch(section_date, user_agent, station_crs),
"GWR trains",
)
else:
trains = cached_fetch(
rtt_cache_key,
None,
lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs),
"GWR trains",
)
if eurostar_services is None:
es_cache_key = f"eurostar_{direction}_{section_date}_{destination}"
es_fetcher = (
(lambda: eurostar_scraper.fetch(destination, section_date))
if direction == "outbound"
else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction))
)
eurostar_services = cached_fetch(
es_cache_key,
24 * 3600,
es_fetcher,
"Eurostar times",
)
fare_direction = "to_paddington" if direction == "outbound" else "from_paddington"
gwr_fares = cached_fetch(
gwr_cache_key,
30 * 24 * 3600,
(
(lambda: gwr_fares_scraper.fetch(station_crs, section_date))
if fare_direction == "to_paddington"
else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction))
),
"GWR fares",
)
cached_advance = get_cached(advance_cache_key, ttl=24 * 3600)
if direction == "outbound":
trips = combine_trips(
trains,
eurostar_services,
section_date,
section_min_connection,
section_max_connection,
gwr_fares,
)
unreachable = find_unreachable_morning_eurostars(
trains,
eurostar_services,
section_date,
section_min_connection,
section_max_connection,
)
if trips:
first_es_depart = min(t["depart_st_pancras"] for t in trips)
unreachable = [
s for s in unreachable if s["depart_st_pancras"] < first_es_depart
]
rows = sorted(
[{"row_type": "trip", "direction": direction, **trip} for trip in trips]
+ [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable],
key=lambda row: row["depart_st_pancras"],
)
else:
trips = combine_inbound_trips(
eurostar_services,
trains,
section_date,
section_min_connection,
section_max_connection,
gwr_fares,
)
unreachable = find_unreachable_inbound_eurostars(
eurostar_services,
trains,
section_date,
section_min_connection,
section_max_connection,
)
if trips:
first_es_depart = min(t["depart_destination"] for t in trips)
unreachable = [
s for s in unreachable if s["depart_destination"] < first_es_depart
]
rows = sorted(
[{"row_type": "trip", "direction": direction, **trip} for trip in trips]
+ [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable],
key=lambda row: row["depart_destination"],
)
es_by_key = {
(svc.get("depart_st_pancras") if direction == "outbound" else svc.get("depart_destination")): svc
for svc in eurostar_services
}
for row in rows:
key = row.get("depart_st_pancras") if direction == "outbound" else row.get("depart_destination")
es = es_by_key.get(key, {})
row["eurostar_price"] = es.get("price")
row["eurostar_seats"] = es.get("seats")
row["eurostar_plus_price"] = es.get("plus_price")
row["eurostar_plus_seats"] = es.get("plus_seats")
row["row_key"] = f"{section_id}:{key}"
dt = date.fromisoformat(section_date)
return {
"id": section_id,
"direction": direction,
"date": section_date,
"date_display": dt.strftime("%A %-d %B %Y"),
"rows": rows,
"trips": trips,
"gwr_count": len(trains),
"eurostar_count": len(eurostar_services),
"min_connection": section_min_connection,
"max_connection": section_max_connection,
"advance_fares": cached_advance,
"advance_api_url": url_for(
"api_advance_fares",
station_crs=station_crs,
travel_date=section_date,
direction=fare_direction,
),
"advance_stream_url": url_for(
"api_advance_fares_stream",
station_crs=station_crs,
travel_date=section_date,
direction=fare_direction,
),
}
if journey_type == "return":
sections = [
build_section("outbound", "outbound", travel_date, es_return.get("outbound", [])),
build_section("inbound", "inbound", return_date, es_return.get("inbound", [])),
]
else:
sections = [build_section("main", journey_type, travel_date)]
no_prices_note = None
all_es_prices = [
row.get("eurostar_price")
for section in sections
for row in section["rows"]
if row.get("row_type") == "trip"
]
if all_es_prices and all(price is None for price in all_es_prices):
no_prices_note = "Eurostar prices not yet available — tickets may not be on sale yet."
dt = date.fromisoformat(travel_date)
prev_date = (dt - timedelta(days=1)).isoformat()
next_date = (dt + timedelta(days=1)).isoformat()
travel_date_display = dt.strftime("%A %-d %B %Y")
eurostar_url = eurostar_scraper.search_url(
destination, travel_date, direction=journey_type, return_date=return_date
)
rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date)
rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date)
url_min = None if min_connection == default_min else min_connection
url_max = None if max_connection == default_max else max_connection
url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class
url_es = None if es_class == DEFAULT_ES_CLASS else es_class
trip_fares = {}
advance_fares = {}
advance_api_urls = {}
advance_stream_urls = {}
for section in sections:
advance_fares[section["id"]] = section["advance_fares"]
advance_api_urls[section["id"]] = section["advance_api_url"]
advance_stream_urls[section["id"]] = section["advance_stream_url"]
for row in section["rows"]:
circle_svcs = row.get("circle_services") or []
circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0
walkon = (
{"price": row["ticket_price"], "ticket": row.get("ticket_name", "")}
if row.get("ticket_price") is not None
else None
)
es_std = (
{"price": row["eurostar_price"], "seats": row.get("eurostar_seats")}
if row.get("eurostar_price") is not None
else None
)
es_plus = (
{"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")}
if row.get("eurostar_plus_price") is not None
else None
)
trip_fares[row["row_key"]] = {
"section": section["id"],
"advance_key": row.get("depart_bristol") or row.get("depart_paddington"),
"walkon": walkon,
"es_standard": es_std,
"es_plus": es_plus,
"circle_fare": circle_fare,
}
return render_template(
"results.html",
sections=sections,
trips=sections[0]["trips"] if sections else [],
result_rows=sections[0]["rows"] if sections else [],
unreachable_morning_services=[],
destinations=DESTINATIONS,
destination=destination,
travel_date=travel_date,
return_date=return_date,
journey_type=journey_type,
slug=slug,
station_crs=station_crs,
departure_station_name=departure_station_name,
prev_date=prev_date,
next_date=next_date,
travel_date_display=travel_date_display,
gwr_count=sum(section["gwr_count"] for section in sections),
eurostar_count=sum(section["eurostar_count"] for section in sections),
from_cache=bool(from_cache_parts),
error="; ".join(error_messages) if error_messages else None,
no_prices_note=no_prices_note,
eurostar_url=eurostar_url,
rtt_url=rtt_url,
rtt_station_url=rtt_station_url,
min_connection=min_connection,
max_connection=max_connection,
default_min_connection=default_min,
default_max_connection=default_max,
url_min_connection=url_min,
url_max_connection=url_max,
nr_class=nr_class,
es_class=es_class,
url_nr_class=url_nr,
url_es_class=url_es,
url_journey_type=None if journey_type == "outbound" else journey_type,
trip_fares_json=json.dumps(trip_fares),
advance_fares_json=json.dumps(advance_fares),
advance_api_urls_json=json.dumps(advance_api_urls),
advance_stream_urls_json=json.dumps(advance_stream_urls),
advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date),
advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date),
valid_min_connections=sorted(valid_min),
valid_max_connections=sorted(valid_max),
)
@app.route("/api/advance_fares/<station_crs>/<travel_date>")
def api_advance_fares(station_crs, travel_date):
if station_crs not in STATION_BY_CRS:
abort(404)
direction = request.args.get("direction", "to_paddington")
if direction not in {"to_paddington", "from_paddington"}:
direction = "to_paddington"
cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}"
cached = get_cached(cache_key, ttl=24 * 3600)
if cached is not None:
return jsonify(cached)
try:
fares = (
gwr_fares_scraper.fetch_advance(station_crs, travel_date)
if direction == "to_paddington"
else gwr_fares_scraper.fetch_advance(station_crs, travel_date, direction=direction)
)
set_cached(cache_key, fares)
return jsonify(fares)
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/api/advance_fares_stream/<station_crs>/<travel_date>")
def api_advance_fares_stream(station_crs, travel_date):
if station_crs not in STATION_BY_CRS:
abort(404)
direction = request.args.get("direction", "to_paddington")
if direction not in {"to_paddington", "from_paddington"}:
direction = "to_paddington"
cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}"
def generate():
cached = get_cached(cache_key, ttl=24 * 3600)
if cached is not None:
yield f"data: {json.dumps({'type': 'fares', 'fares': cached})}\n\n"
yield f"data: {json.dumps({'type': 'done'})}\n\n"
return
accumulated: dict = {}
try:
stream = (
gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date)
if direction == "to_paddington"
else gwr_fares_scraper.fetch_advance_streaming(
station_crs, travel_date, direction=direction
)
)
for page_fares in stream:
for dep_time, fare_data in page_fares.items():
if dep_time not in accumulated:
accumulated[dep_time] = {"advance_std": None, "advance_1st": None}
if fare_data.get("advance_std"):
accumulated[dep_time]["advance_std"] = fare_data["advance_std"]
if fare_data.get("advance_1st"):
accumulated[dep_time]["advance_1st"] = fare_data["advance_1st"]
yield f"data: {json.dumps({'type': 'fares', 'fares': page_fares})}\n\n"
except Exception as e:
yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
return
set_cached(cache_key, accumulated)
yield f"data: {json.dumps({'type': 'done'})}\n\n"
return Response(
stream_with_context(generate()),
mimetype="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
if __name__ == "__main__":
app.run(debug=True, host="0.0.0.0")