Add return and inbound journey support

This commit is contained in:
Edward Betts 2026-05-21 08:46:35 +01:00
parent 6ba71447ef
commit 9691632f65
12 changed files with 1687 additions and 486 deletions

482
app.py
View file

@ -12,7 +12,14 @@ from cache import get_cached, set_cached
import scraper.eurostar as eurostar_scraper
import scraper.gwr_fares as gwr_fares_scraper
import scraper.realtime_trains as rtt_scraper
from trip_planner import combine_trips, find_unreachable_morning_eurostars
from trip_planner import (
INBOUND_MAX_CONNECTION_MINUTES,
INBOUND_MIN_CONNECTION_MINUTES,
combine_inbound_trips,
combine_trips,
find_unreachable_inbound_eurostars,
find_unreachable_morning_eurostars,
)
RTT_PADDINGTON_URL = (
"https://www.realtimetrains.co.uk/search/detailed/"
@ -76,11 +83,15 @@ def index():
default_max_connection=default_max,
valid_min_connections=sorted(VALID_MIN_CONNECTIONS),
valid_max_connections=sorted(VALID_MAX_CONNECTIONS),
default_return_date=(date.today() + timedelta(days=7)).isoformat(),
)
VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120}
VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180}
VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120}
VALID_INBOUND_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180}
VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"}
VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'}
VALID_ES_CLASSES = {'standard', 'plus'}
DEFAULT_NR_CLASS = 'walkon'
@ -106,15 +117,24 @@ def _parse_connection(raw, default, valid_set):
def search():
slug = request.args.get("destination", "")
travel_date = request.args.get("travel_date", "")
return_date = request.args.get("return_date", "")
journey_type = request.args.get("journey_type", "outbound")
if journey_type not in VALID_JOURNEY_TYPES:
journey_type = "outbound"
station_crs = request.args.get("station_crs", "BRI")
if station_crs not in STATION_BY_CRS:
station_crs = "BRI"
default_min, default_max = _get_defaults()
if journey_type == "inbound":
default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES
valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS
else:
default_min, default_max = _get_defaults()
valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS
min_conn = _parse_connection(
request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS
request.args.get("min_connection"), default_min, valid_min
)
max_conn = _parse_connection(
request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS
request.args.get("max_connection"), default_max, valid_max
)
nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS)
if nr_class not in VALID_NR_CLASSES:
@ -122,13 +142,21 @@ def search():
es_class = request.args.get("es_class", DEFAULT_ES_CLASS)
if es_class not in VALID_ES_CLASSES:
es_class = DEFAULT_ES_CLASS
if slug in DESTINATIONS and travel_date:
if journey_type == "return":
try:
if return_date and date.fromisoformat(return_date) < date.fromisoformat(travel_date):
return_date = ""
except ValueError:
return_date = ""
if slug in DESTINATIONS and travel_date and (journey_type != "return" or return_date):
return redirect(
url_for(
"results",
station_crs=station_crs,
slug=slug,
travel_date=travel_date,
journey_type=None if journey_type == "outbound" else journey_type,
return_date=return_date if journey_type == "return" else None,
min_connection=None if min_conn == default_min else min_conn,
max_connection=None if max_conn == default_max else max_conn,
nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class,
@ -147,12 +175,28 @@ def results(station_crs, slug, travel_date):
if not destination or not travel_date:
return redirect(url_for("index"))
default_min, default_max = _get_defaults()
journey_type = request.args.get("journey_type", "outbound")
if journey_type not in VALID_JOURNEY_TYPES:
journey_type = "outbound"
return_date = request.args.get("return_date")
if journey_type == "return":
try:
if not return_date or date.fromisoformat(return_date) < date.fromisoformat(travel_date):
return redirect(url_for("index"))
except ValueError:
return redirect(url_for("index"))
if journey_type == "inbound":
default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES
valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS
else:
default_min, default_max = _get_defaults()
valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS
min_connection = _parse_connection(
request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS
request.args.get("min_connection"), default_min, valid_min
)
max_connection = _parse_connection(
request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS
request.args.get("max_connection"), default_max, valid_max
)
nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS)
if nr_class not in VALID_NR_CLASSES:
@ -161,150 +205,207 @@ def results(station_crs, slug, travel_date):
if es_class not in VALID_ES_CLASSES:
es_class = DEFAULT_ES_CLASS
# Redirect to clean URL when all params are at their defaults
_clean_url_params = ["min_connection", "max_connection", "nr_class", "es_class"]
if any(k in request.args for k in _clean_url_params) and (
min_connection == default_min
and max_connection == default_max
and nr_class == DEFAULT_NR_CLASS
and es_class == DEFAULT_ES_CLASS
):
return redirect(
url_for("results", station_crs=station_crs, slug=slug, travel_date=travel_date)
)
user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA)
error_messages = []
from_cache_parts = []
rtt_cache_key = f"rtt_{station_crs}_{travel_date}"
es_cache_key = f"eurostar_{travel_date}_{destination}"
gwr_fares_cache_key = f"gwr_fares_{station_crs}_{travel_date}"
gwr_advance_cache_key = f"gwr_advance_{station_crs}_{travel_date}"
cached_rtt = get_cached(rtt_cache_key)
cached_es = get_cached(es_cache_key, ttl=24 * 3600)
cached_gwr_fares = get_cached(gwr_fares_cache_key, ttl=30 * 24 * 3600)
cached_advance_fares = get_cached(gwr_advance_cache_key, ttl=24 * 3600)
from_cache = bool(cached_rtt and cached_es)
error = None
if cached_rtt:
gwr_trains = cached_rtt
else:
def cached_fetch(key, ttl, fetcher, label):
cached = get_cached(key, ttl=ttl)
if cached is not None:
from_cache_parts.append(key)
return cached
try:
gwr_trains = rtt_scraper.fetch(travel_date, user_agent, station_crs)
set_cached(rtt_cache_key, gwr_trains)
data = fetcher()
set_cached(key, data)
return data
except Exception as e:
gwr_trains = []
error = f"Could not fetch GWR trains: {e}"
error_messages.append(f"Could not fetch {label}: {e}")
return [] if label != "GWR fares" else {}
if cached_es:
eurostar_services = cached_es
else:
try:
eurostar_services = eurostar_scraper.fetch(destination, travel_date)
set_cached(es_cache_key, eurostar_services)
except Exception as e:
eurostar_services = []
msg = f"Could not fetch Eurostar times: {e}"
error = f"{error}; {msg}" if error else msg
es_return = None
if journey_type == "return":
es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}"
es_return = cached_fetch(
es_return_key,
24 * 3600,
lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date),
"Eurostar times",
)
if not isinstance(es_return, dict):
es_return = {"outbound": [], "inbound": []}
if cached_gwr_fares:
gwr_fares = cached_gwr_fares
else:
try:
gwr_fares = gwr_fares_scraper.fetch(station_crs, travel_date)
set_cached(gwr_fares_cache_key, gwr_fares)
except Exception as e:
gwr_fares = {}
msg = f"Could not fetch GWR fares: {e}"
error = f"{error}; {msg}" if error else msg
def build_section(section_id, direction, section_date, eurostar_services=None):
section_min_connection = min_connection
section_max_connection = max_connection
if journey_type == "return" and direction == "inbound":
section_min_connection = INBOUND_MIN_CONNECTION_MINUTES
section_max_connection = INBOUND_MAX_CONNECTION_MINUTES
rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington"
rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}"
gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}"
advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}"
eurostar_trains = eurostar_services
eurostar_prices = {
s["depart_st_pancras"]: {
"price": s.get("price"),
"seats": s.get("seats"),
"plus_price": s.get("plus_price"),
"plus_seats": s.get("plus_seats"),
if direction == "outbound":
trains = cached_fetch(
rtt_cache_key,
None,
lambda: rtt_scraper.fetch(section_date, user_agent, station_crs),
"GWR trains",
)
else:
trains = cached_fetch(
rtt_cache_key,
None,
lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs),
"GWR trains",
)
if eurostar_services is None:
es_cache_key = f"eurostar_{direction}_{section_date}_{destination}"
es_fetcher = (
(lambda: eurostar_scraper.fetch(destination, section_date))
if direction == "outbound"
else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction))
)
eurostar_services = cached_fetch(
es_cache_key,
24 * 3600,
es_fetcher,
"Eurostar times",
)
fare_direction = "to_paddington" if direction == "outbound" else "from_paddington"
gwr_fares = cached_fetch(
gwr_cache_key,
30 * 24 * 3600,
(
(lambda: gwr_fares_scraper.fetch(station_crs, section_date))
if fare_direction == "to_paddington"
else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction))
),
"GWR fares",
)
cached_advance = get_cached(advance_cache_key, ttl=24 * 3600)
if direction == "outbound":
trips = combine_trips(
trains,
eurostar_services,
section_date,
section_min_connection,
section_max_connection,
gwr_fares,
)
unreachable = find_unreachable_morning_eurostars(
trains,
eurostar_services,
section_date,
section_min_connection,
section_max_connection,
)
if trips:
first_es_depart = min(t["depart_st_pancras"] for t in trips)
unreachable = [
s for s in unreachable if s["depart_st_pancras"] < first_es_depart
]
rows = sorted(
[{"row_type": "trip", "direction": direction, **trip} for trip in trips]
+ [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable],
key=lambda row: row["depart_st_pancras"],
)
else:
trips = combine_inbound_trips(
eurostar_services,
trains,
section_date,
section_min_connection,
section_max_connection,
gwr_fares,
)
unreachable = find_unreachable_inbound_eurostars(
eurostar_services,
trains,
section_date,
section_min_connection,
section_max_connection,
)
if trips:
first_es_depart = min(t["depart_destination"] for t in trips)
unreachable = [
s for s in unreachable if s["depart_destination"] < first_es_depart
]
rows = sorted(
[{"row_type": "trip", "direction": direction, **trip} for trip in trips]
+ [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable],
key=lambda row: row["depart_destination"],
)
es_by_key = {
(svc.get("depart_st_pancras") if direction == "outbound" else svc.get("depart_destination")): svc
for svc in eurostar_services
}
for s in eurostar_services
}
for row in rows:
key = row.get("depart_st_pancras") if direction == "outbound" else row.get("depart_destination")
es = es_by_key.get(key, {})
row["eurostar_price"] = es.get("price")
row["eurostar_seats"] = es.get("seats")
row["eurostar_plus_price"] = es.get("plus_price")
row["eurostar_plus_seats"] = es.get("plus_seats")
row["row_key"] = f"{section_id}:{key}"
trips = combine_trips(
gwr_trains,
eurostar_trains,
travel_date,
min_connection,
max_connection,
gwr_fares,
)
dt = date.fromisoformat(section_date)
return {
"id": section_id,
"direction": direction,
"date": section_date,
"date_display": dt.strftime("%A %-d %B %Y"),
"rows": rows,
"trips": trips,
"gwr_count": len(trains),
"eurostar_count": len(eurostar_services),
"min_connection": section_min_connection,
"max_connection": section_max_connection,
"advance_fares": cached_advance,
"advance_api_url": url_for(
"api_advance_fares",
station_crs=station_crs,
travel_date=section_date,
direction=fare_direction,
),
"advance_stream_url": url_for(
"api_advance_fares_stream",
station_crs=station_crs,
travel_date=section_date,
direction=fare_direction,
),
}
# Annotate each trip with Eurostar prices and total cost (walk-on + standard)
for trip in trips:
es = eurostar_prices.get(trip["depart_st_pancras"], {})
es_price = es.get("price")
trip["eurostar_price"] = es_price
trip["eurostar_seats"] = es.get("seats")
trip["eurostar_plus_price"] = es.get("plus_price")
trip["eurostar_plus_seats"] = es.get("plus_seats")
gwr_p = trip.get("ticket_price")
circle_svcs = trip.get("circle_services")
circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0
trip["total_price"] = (
gwr_p + es_price + circle_fare
if (gwr_p is not None and es_price is not None)
else None
)
# If the API returned journeys but every price is None, tickets aren't on sale yet
no_prices_note = None
if eurostar_prices and all(
v.get("price") is None for v in eurostar_prices.values()
):
no_prices_note = (
"Eurostar prices not yet available — tickets may not be on sale yet."
)
unreachable_morning_services = find_unreachable_morning_eurostars(
gwr_trains,
eurostar_trains,
travel_date,
min_connection,
max_connection,
)
for svc in unreachable_morning_services:
es = eurostar_prices.get(svc["depart_st_pancras"], {})
svc["eurostar_price"] = es.get("price")
svc["eurostar_seats"] = es.get("seats")
svc["eurostar_plus_price"] = es.get("plus_price")
svc["eurostar_plus_seats"] = es.get("plus_seats")
# Only keep unreachable services that depart before the first reachable Eurostar.
# Services after the first reachable one are omitted (they aren't "Too early").
if trips:
first_es_depart = min(t["depart_st_pancras"] for t in trips)
unreachable_morning_services = [
s
for s in unreachable_morning_services
if s["depart_st_pancras"] < first_es_depart
if journey_type == "return":
sections = [
build_section("outbound", "outbound", travel_date, es_return.get("outbound", [])),
build_section("inbound", "inbound", return_date, es_return.get("inbound", [])),
]
else:
sections = [build_section("main", journey_type, travel_date)]
result_rows = sorted(
[{"row_type": "trip", **trip} for trip in trips]
+ [
{"row_type": "unreachable", **service}
for service in unreachable_morning_services
],
key=lambda row: row["depart_st_pancras"],
)
no_prices_note = None
all_es_prices = [
row.get("eurostar_price")
for section in sections
for row in section["rows"]
if row.get("row_type") == "trip"
]
if all_es_prices and all(price is None for price in all_es_prices):
no_prices_note = "Eurostar prices not yet available — tickets may not be on sale yet."
dt = date.fromisoformat(travel_date)
prev_date = (dt - timedelta(days=1)).isoformat()
next_date = (dt + timedelta(days=1)).isoformat()
travel_date_display = dt.strftime("%A %-d %B %Y")
eurostar_url = eurostar_scraper.search_url(destination, travel_date)
eurostar_url = eurostar_scraper.search_url(
destination, travel_date, direction=journey_type, return_date=return_date
)
rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date)
rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date)
@ -313,55 +414,62 @@ def results(station_crs, slug, travel_date):
url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class
url_es = None if es_class == DEFAULT_ES_CLASS else es_class
# Build per-row fare data for JS consumption
trip_fares = {}
for row in result_rows:
stp = row.get("depart_st_pancras")
if not stp:
continue
circle_svcs = row.get("circle_services") or []
circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0
walkon = (
{"price": row["ticket_price"], "ticket": row.get("ticket_name", "")}
if row.get("ticket_price") is not None
else None
)
es_std = (
{"price": row["eurostar_price"], "seats": row.get("eurostar_seats")}
if row.get("eurostar_price") is not None
else None
)
es_plus = (
{"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")}
if row.get("eurostar_plus_price") is not None
else None
)
trip_fares[stp] = {
"depart_bristol": row.get("depart_bristol"),
"walkon": walkon,
"es_standard": es_std,
"es_plus": es_plus,
"circle_fare": circle_fare,
}
advance_fares = {}
advance_api_urls = {}
advance_stream_urls = {}
for section in sections:
advance_fares[section["id"]] = section["advance_fares"]
advance_api_urls[section["id"]] = section["advance_api_url"]
advance_stream_urls[section["id"]] = section["advance_stream_url"]
for row in section["rows"]:
circle_svcs = row.get("circle_services") or []
circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0
walkon = (
{"price": row["ticket_price"], "ticket": row.get("ticket_name", "")}
if row.get("ticket_price") is not None
else None
)
es_std = (
{"price": row["eurostar_price"], "seats": row.get("eurostar_seats")}
if row.get("eurostar_price") is not None
else None
)
es_plus = (
{"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")}
if row.get("eurostar_plus_price") is not None
else None
)
trip_fares[row["row_key"]] = {
"section": section["id"],
"advance_key": row.get("depart_bristol") or row.get("depart_paddington"),
"walkon": walkon,
"es_standard": es_std,
"es_plus": es_plus,
"circle_fare": circle_fare,
}
return render_template(
"results.html",
trips=trips,
result_rows=result_rows,
unreachable_morning_services=unreachable_morning_services,
sections=sections,
trips=sections[0]["trips"] if sections else [],
result_rows=sections[0]["rows"] if sections else [],
unreachable_morning_services=[],
destinations=DESTINATIONS,
destination=destination,
travel_date=travel_date,
return_date=return_date,
journey_type=journey_type,
slug=slug,
station_crs=station_crs,
departure_station_name=departure_station_name,
prev_date=prev_date,
next_date=next_date,
travel_date_display=travel_date_display,
gwr_count=len(gwr_trains),
eurostar_count=len(eurostar_trains),
from_cache=from_cache,
error=error,
gwr_count=sum(section["gwr_count"] for section in sections),
eurostar_count=sum(section["eurostar_count"] for section in sections),
from_cache=bool(from_cache_parts),
error="; ".join(error_messages) if error_messages else None,
no_prices_note=no_prices_note,
eurostar_url=eurostar_url,
rtt_url=rtt_url,
@ -376,12 +484,15 @@ def results(station_crs, slug, travel_date):
es_class=es_class,
url_nr_class=url_nr,
url_es_class=url_es,
url_journey_type=None if journey_type == "outbound" else journey_type,
trip_fares_json=json.dumps(trip_fares),
advance_fares_json=json.dumps(cached_advance_fares),
advance_fares_json=json.dumps(advance_fares),
advance_api_urls_json=json.dumps(advance_api_urls),
advance_stream_urls_json=json.dumps(advance_stream_urls),
advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date),
advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date),
valid_min_connections=sorted(VALID_MIN_CONNECTIONS),
valid_max_connections=sorted(VALID_MAX_CONNECTIONS),
valid_min_connections=sorted(valid_min),
valid_max_connections=sorted(valid_max),
)
@ -389,12 +500,19 @@ def results(station_crs, slug, travel_date):
def api_advance_fares(station_crs, travel_date):
if station_crs not in STATION_BY_CRS:
abort(404)
cache_key = f"gwr_advance_{station_crs}_{travel_date}"
direction = request.args.get("direction", "to_paddington")
if direction not in {"to_paddington", "from_paddington"}:
direction = "to_paddington"
cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}"
cached = get_cached(cache_key, ttl=24 * 3600)
if cached is not None:
return jsonify(cached)
try:
fares = gwr_fares_scraper.fetch_advance(station_crs, travel_date)
fares = (
gwr_fares_scraper.fetch_advance(station_crs, travel_date)
if direction == "to_paddington"
else gwr_fares_scraper.fetch_advance(station_crs, travel_date, direction=direction)
)
set_cached(cache_key, fares)
return jsonify(fares)
except Exception as e:
@ -405,7 +523,10 @@ def api_advance_fares(station_crs, travel_date):
def api_advance_fares_stream(station_crs, travel_date):
if station_crs not in STATION_BY_CRS:
abort(404)
cache_key = f"gwr_advance_{station_crs}_{travel_date}"
direction = request.args.get("direction", "to_paddington")
if direction not in {"to_paddington", "from_paddington"}:
direction = "to_paddington"
cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}"
def generate():
cached = get_cached(cache_key, ttl=24 * 3600)
@ -416,7 +537,14 @@ def api_advance_fares_stream(station_crs, travel_date):
accumulated: dict = {}
try:
for page_fares in gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date):
stream = (
gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date)
if direction == "to_paddington"
else gwr_fares_scraper.fetch_advance_streaming(
station_crs, travel_date, direction=direction
)
)
for page_fares in stream:
for dep_time, fare_data in page_fares.items():
if dep_time not in accumulated:
accumulated[dep_time] = {"advance_std": None, "advance_1st": None}