diff --git a/.codex b/.codex new file mode 100644 index 0000000..e69de29 diff --git a/app.py b/app.py index debc5a1..1527c00 100644 --- a/app.py +++ b/app.py @@ -2,16 +2,38 @@ Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains. """ -from flask import Flask, render_template, redirect, url_for, request, abort, jsonify +from flask import ( + Flask, + render_template, + redirect, + url_for, + request, + abort, + jsonify, + Response, + stream_with_context, +) +from flask.typing import ResponseReturnValue from datetime import date, timedelta from pathlib import Path +from typing import Any, Callable, Generator +import json import os from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper import scraper.gwr_fares as gwr_fares_scraper import scraper.realtime_trains as rtt_scraper -from trip_planner import combine_trips, find_unreachable_morning_eurostars +from trip_planner import ( + INBOUND_MAX_CONNECTION_MINUTES, + INBOUND_MIN_CONNECTION_MINUTES, + combine_inbound_trips, + combine_trips, + find_unreachable_inbound_eurostars, + find_unreachable_morning_eurostars, +) +import cache +import circle_line RTT_PADDINGTON_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" @@ -31,14 +53,11 @@ _local = os.path.join(os.path.dirname(__file__), "config", "local.py") if os.path.exists(_local): app.config.from_pyfile(_local) -import cache -import circle_line - -cache.CACHE_DIR = app.config["CACHE_DIR"] -circle_line._TXC_XML = app.config["CIRCLE_LINE_XML"] +cache.CACHE_DIR = app.config["CACHE_DIR"] # type: ignore[attr-defined] +circle_line._TXC_XML = app.config["CIRCLE_LINE_XML"] # type: ignore[attr-defined] -def _load_stations(): +def _load_stations() -> list[tuple[str, str]]: tsv = Path(__file__).parent / "data" / "direct_to_paddington.tsv" stations = [] for line in tsv.read_text().splitlines(): @@ -52,81 +71,423 @@ def _load_stations(): STATIONS = _load_stations() STATION_BY_CRS = {crs: name for name, crs in STATIONS} +DESTINATION_OPTIONS = [ + {"slug": "paris", "city": "Paris", "destination": "Paris Gare du Nord"}, + {"slug": "brussels", "city": "Brussels", "destination": "Brussels Midi"}, + {"slug": "lille", "city": "Lille", "destination": "Lille Europe"}, + { + "slug": "amsterdam", + "city": "Amsterdam", + "destination": "Amsterdam Centraal", + }, + { + "slug": "rotterdam", + "city": "Rotterdam", + "destination": "Rotterdam Centraal", + }, + {"slug": "cologne", "city": "Cologne", "destination": "Cologne Hbf"}, +] + DESTINATIONS = { - "paris": "Paris Gare du Nord", - "brussels": "Brussels Midi", - "lille": "Lille Europe", - "amsterdam": "Amsterdam Centraal", - "rotterdam": "Rotterdam Centraal", - "cologne": "Cologne Hbf", + option["slug"]: option["destination"] for option in DESTINATION_OPTIONS } @app.route("/") -def index(): +def index() -> ResponseReturnValue: today = date.today().isoformat() default_min, default_max = _get_defaults() return render_template( "index.html", - destinations=DESTINATIONS, + destination_options=DESTINATION_OPTIONS, today=today, stations=STATIONS, default_min_connection=default_min, default_max_connection=default_max, valid_min_connections=sorted(VALID_MIN_CONNECTIONS), valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + default_return_date=(date.today() + timedelta(days=7)).isoformat(), ) VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120} VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} +VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120} +VALID_INBOUND_RETURN_MIN_CONNECTIONS = {30, 40, 50, 60} +VALID_INBOUND_MAX_CONNECTIONS = { + 60, + 70, + 80, + 90, + 100, + 110, + 120, + 130, + 140, + 150, + 160, + 170, + 180, +} +VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"} +VALID_NR_CLASSES = {"walkon", "advance_std", "advance_1st"} +VALID_ES_CLASSES = {"standard", "plus"} +DEFAULT_NR_CLASS = "walkon" +DEFAULT_ES_CLASS = "standard" +NR_TIMETABLE_PERIODS = [ + (date(2026, 5, 17), date(2026, 12, 12), "2026-05-17_2026-12-12"), +] -def _get_defaults(): +def _weekday_for(section_date: str) -> str: + return date.fromisoformat(section_date).strftime("%a").lower() + + +def _month_for(section_date: str) -> str: + return date.fromisoformat(section_date).strftime("%Y-%m") + + +def _nr_timetable_period_key(section_date: str) -> str: + dt = date.fromisoformat(section_date) + for start, end, key in NR_TIMETABLE_PERIODS: + if start <= dt <= end: + return key + return dt.strftime("%Y-%m") + + +def _nr_exact_cache_key(direction: str, station_crs: str, section_date: str) -> str: + return f"rtt_{direction}_{station_crs}_{section_date}" + + +def _nr_weekday_cache_key(direction: str, station_crs: str, section_date: str) -> str: + return ( + f"weekday_rtt_{direction}_{station_crs}_" + f"{_nr_timetable_period_key(section_date)}_{_weekday_for(section_date)}" + ) + + +def _walkon_weekday_cache_key( + direction: str, station_crs: str, section_date: str +) -> str: + return ( + f"weekday_gwr_fares_{direction}_{station_crs}_" + f"{_nr_timetable_period_key(section_date)}_{_weekday_for(section_date)}" + ) + + +def _eurostar_exact_cache_key( + direction: str, section_date: str, destination: str +) -> str: + return f"eurostar_{direction}_{section_date}_{destination}" + + +def _eurostar_weekday_cache_key( + direction: str, section_date: str, destination: str +) -> str: + return ( + f"weekday_eurostar_{direction}_{destination}_" + f"{_month_for(section_date)}_{_weekday_for(section_date)}" + ) + + +def _eurostar_return_exact_cache_key( + travel_date: str, return_date: str, destination: str +) -> str: + return f"eurostar_return_{travel_date}_{return_date}_{destination}" + + +def _eurostar_return_weekday_cache_key( + travel_date: str, return_date: str, destination: str +) -> str: + return ( + f"weekday_eurostar_return_{destination}_" + f"{_month_for(travel_date)}_{_weekday_for(travel_date)}_" + f"{_month_for(return_date)}_{_weekday_for(return_date)}" + ) + + +def _strip_nr_timetable(trains: list[dict[str, Any]]) -> list[dict[str, Any]]: + keys = { + "depart_bristol", + "arrive_paddington", + "depart_paddington", + "arrive_destination", + "arrive_platform", + "headcode", + } + return [{k: train[k] for k in keys if k in train} for train in trains] + + +def _strip_eurostar_timetable(services: list[dict[str, Any]]) -> list[dict[str, Any]]: + keys = { + "depart_st_pancras", + "arrive_destination", + "depart_destination", + "arrive_st_pancras", + "destination", + "train_number", + } + return [{k: service[k] for k in keys if k in service} for service in services] + + +def _strip_eurostar_return_timetable(es_return: Any) -> dict[str, list[dict[str, Any]]]: + if not isinstance(es_return, dict): + return {"outbound": [], "inbound": []} + return { + "outbound": _strip_eurostar_timetable(es_return.get("outbound", [])), + "inbound": _strip_eurostar_timetable(es_return.get("inbound", [])), + } + + +def _timetable_signature(data: Any) -> str: + return json.dumps(data, sort_keys=True, separators=(",", ":")) + + +def _eurostar_prices_by_row( + section_id: str, direction: str, services: list[dict[str, Any]] +) -> dict[str, dict[str, Any]]: + prices = {} + for service in services: + key = ( + service.get("depart_st_pancras") + if direction == "outbound" + else service.get("depart_destination") + ) + if not key: + continue + prices[f"{section_id}:{key}"] = { + "es_standard": ( + {"price": service.get("price"), "seats": service.get("seats")} + if service.get("price") is not None + else None + ), + "es_standard_status": _eurostar_price_status( + service.get("price"), service.get("seats") + ), + "es_plus": ( + {"price": service.get("plus_price"), "seats": service.get("plus_seats")} + if service.get("plus_price") is not None + else None + ), + "es_plus_status": _eurostar_price_status( + service.get("plus_price"), service.get("plus_seats") + ), + } + return prices + + +def _eurostar_price_status(price: Any, seats: Any) -> str | None: + if price is not None: + return None + if seats == 0: + return "sold_out" + return "price_not_returned" + + +def _get_defaults() -> tuple[int, int]: return ( app.config["DEFAULT_MIN_CONNECTION"], app.config["DEFAULT_MAX_CONNECTION"], ) -def _parse_connection(raw, default, valid_set): +def _parse_connection(raw: str | None, default: int, valid_set: set[int]) -> int: try: - val = int(raw) + val = int(raw or "") except (TypeError, ValueError): return default return val if val in valid_set else default +def _section_trip_fares(section: dict[str, Any]) -> dict[str, Any]: + trip_fares = {} + for row in section["rows"]: + circle_svcs = row.get("circle_services") or [] + circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 + walkon = ( + {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} + if row.get("ticket_price") is not None + else None + ) + es_std = ( + {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} + if row.get("eurostar_price") is not None + else None + ) + es_std_status = _eurostar_price_status( + row.get("eurostar_price"), row.get("eurostar_seats") + ) + es_plus = ( + { + "price": row["eurostar_plus_price"], + "seats": row.get("eurostar_plus_seats"), + } + if row.get("eurostar_plus_price") is not None + else None + ) + es_plus_status = _eurostar_price_status( + row.get("eurostar_plus_price"), row.get("eurostar_plus_seats") + ) + trip_fares[row["row_key"]] = { + "section": section["id"], + "eurostar_key": row.get("eurostar_key"), + "advance_key": row.get("depart_bristol") or row.get("depart_paddington"), + "walkon": walkon, + "es_standard": es_std, + "es_standard_status": es_std_status, + "es_plus": es_plus, + "es_plus_status": es_plus_status, + "circle_fare": circle_fare, + } + return trip_fares + + +def _build_summary_html( + sections: list[dict[str, Any]], + journey_type: str, + from_cache_parts: list[str], + provisional_timetable: bool, +) -> str: + def pl(n: int, word: str) -> str: + return f"{n} {word}{'s' if n != 1 else ''}" + + if journey_type == "return": + parts = [] + for s in sections: + label = "Outbound" if s["direction"] == "outbound" else "Return" + parts.append( + f"{label}: {pl(s['gwr_count'], 'National Rail service')}, {pl(s['eurostar_count'], 'Eurostar service')}" + ) + html = "  ·  ".join(parts) + else: + s = sections[0] + html = f"{pl(s['gwr_count'], 'National Rail service')}  ·  {pl(s['eurostar_count'], 'Eurostar service')}" + + if from_cache_parts: + html += '  ·  (cached)' + if provisional_timetable: + html += '  ·  checking exact timetable' + return html + + +def _results_url( + station_crs: str, + slug: str, + travel_date: str, + journey_type: str = "outbound", + return_date: str | None = None, + **params: Any, +) -> str: + params = {k: v for k, v in params.items() if v is not None} + if journey_type == "return": + return url_for( + "return_results", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + return_date=return_date, + **params, + ) + if journey_type == "inbound": + params["journey_type"] = "inbound" + return url_for( + "results", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + **params, + ) + + @app.route("/search") -def search(): +def search() -> ResponseReturnValue: slug = request.args.get("destination", "") travel_date = request.args.get("travel_date", "") + return_date = request.args.get("return_date", "") + journey_type = request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" station_crs = request.args.get("station_crs", "BRI") if station_crs not in STATION_BY_CRS: station_crs = "BRI" - default_min, default_max = _get_defaults() + if journey_type == "inbound": + default_min, default_max = ( + INBOUND_MIN_CONNECTION_MINUTES, + INBOUND_MAX_CONNECTION_MINUTES, + ) + valid_min, valid_max = ( + VALID_INBOUND_MIN_CONNECTIONS, + VALID_INBOUND_MAX_CONNECTIONS, + ) + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_conn = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_conn = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) - if slug in DESTINATIONS and travel_date: + nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) + if nr_class not in VALID_NR_CLASSES: + nr_class = DEFAULT_NR_CLASS + es_class = request.args.get("es_class", DEFAULT_ES_CLASS) + if es_class not in VALID_ES_CLASSES: + es_class = DEFAULT_ES_CLASS + if journey_type == "return": + try: + if return_date and date.fromisoformat(return_date) < date.fromisoformat( + travel_date + ): + return_date = "" + except ValueError: + return_date = "" + if ( + slug in DESTINATIONS + and travel_date + and (journey_type != "return" or return_date) + ): return redirect( - url_for( - "results", + _results_url( station_crs=station_crs, slug=slug, travel_date=travel_date, + journey_type=journey_type, + return_date=return_date if journey_type == "return" else None, min_connection=None if min_conn == default_min else min_conn, max_connection=None if max_conn == default_max else max_conn, + nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class, + es_class=None if es_class == DEFAULT_ES_CLASS else es_class, ) ) return redirect(url_for("index")) @app.route("/results///") -def results(station_crs, slug, travel_date): +def results(station_crs: str, slug: str, travel_date: str) -> ResponseReturnValue: + return _results( + station_crs, + slug, + travel_date, + request.args.get("journey_type", "outbound"), + request.args.get("return_date"), + ) + + +@app.route("/results////return/") +def return_results( + station_crs: str, slug: str, travel_date: str, return_date: str +) -> ResponseReturnValue: + return _results(station_crs, slug, travel_date, "return", return_date) + + +def _results( + station_crs: str, + slug: str, + travel_date: str, + journey_type: str, + return_date: str | None, +) -> ResponseReturnValue: departure_station_name = STATION_BY_CRS.get(station_crs) if departure_station_name is None: abort(404) @@ -134,178 +495,876 @@ def results(station_crs, slug, travel_date): if not destination or not travel_date: return redirect(url_for("index")) - default_min, default_max = _get_defaults() + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" + if journey_type == "return": + try: + if not return_date or date.fromisoformat(return_date) < date.fromisoformat( + travel_date + ): + return redirect(url_for("index")) + except ValueError: + return redirect(url_for("index")) + + if journey_type == "inbound": + default_min, default_max = ( + INBOUND_MIN_CONNECTION_MINUTES, + INBOUND_MAX_CONNECTION_MINUTES, + ) + valid_min, valid_max = ( + VALID_INBOUND_MIN_CONNECTIONS, + VALID_INBOUND_MAX_CONNECTIONS, + ) + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_connection = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_connection = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) + nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) + if nr_class not in VALID_NR_CLASSES: + nr_class = DEFAULT_NR_CLASS + es_class = request.args.get("es_class", DEFAULT_ES_CLASS) + if es_class not in VALID_ES_CLASSES: + es_class = DEFAULT_ES_CLASS - # Redirect to clean URL when both params are at their defaults - if ( - "min_connection" in request.args or "max_connection" in request.args - ) and min_connection == default_min and max_connection == default_max: - return redirect( - url_for("results", station_crs=station_crs, slug=slug, travel_date=travel_date) + inbound_min_connection = INBOUND_MIN_CONNECTION_MINUTES + if journey_type == "return": + + def _p(raw: str | None, default: str, valid: set[str]) -> str: + return raw if raw in valid else default + + nr_class_out = _p( + request.args.get("nr_class_out"), DEFAULT_NR_CLASS, VALID_NR_CLASSES + ) + nr_class_in = _p( + request.args.get("nr_class_in"), DEFAULT_NR_CLASS, VALID_NR_CLASSES + ) + es_class_out = _p( + request.args.get("es_class_out"), DEFAULT_ES_CLASS, VALID_ES_CLASSES + ) + es_class_in = _p( + request.args.get("es_class_in"), DEFAULT_ES_CLASS, VALID_ES_CLASSES + ) + inbound_min_connection = _parse_connection( + request.args.get("min_connection_in"), + INBOUND_MIN_CONNECTION_MINUTES, + VALID_INBOUND_RETURN_MIN_CONNECTIONS, + ) + else: + nr_class_out = nr_class_in = nr_class + es_class_out = es_class_in = es_class + + render = request.args.get("render") + + if render not in ("full", "stream") and not ( + app.config.get("TESTING") and request.args.get("progressive") != "1" + ): + dt = date.fromisoformat(travel_date) + travel_date_display = dt.strftime("%A %-d %B %Y") + return_date_display = ( + date.fromisoformat(return_date).strftime("%A %-d %B %Y") + if return_date + else None + ) + base_args = dict(request.args) + base_args.pop("progressive", None) + base_args.pop("journey_type", None) + base_args.pop("return_date", None) + stream_args = {**base_args, "render": "stream"} + full_args = {**base_args, "render": "full"} + return render_template( + "results_loading.html", + destination=destination, + departure_station_name=departure_station_name, + journey_type=journey_type, + travel_date_display=travel_date_display, + return_date=return_date, + return_date_display=return_date_display, + stream_url=_results_url( + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + journey_type=journey_type, + return_date=return_date, + **stream_args, + ), + full_results_url=_results_url( + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + journey_type=journey_type, + return_date=return_date, + **full_args, + ), + index_url=url_for("index"), ) user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA) + error_messages = [] + from_cache_parts = [] + provisional_timetable = False - rtt_cache_key = f"rtt_{station_crs}_{travel_date}" - es_cache_key = f"eurostar_{travel_date}_{destination}" - gwr_fares_cache_key = f"gwr_fares_{station_crs}_{travel_date}" - gwr_advance_cache_key = f"gwr_advance_{station_crs}_{travel_date}" - - cached_rtt = get_cached(rtt_cache_key) - cached_es = get_cached(es_cache_key, ttl=24 * 3600) - cached_gwr_fares = get_cached(gwr_fares_cache_key, ttl=30 * 24 * 3600) - cached_advance_fares = get_cached(gwr_advance_cache_key, ttl=24 * 3600) - from_cache = bool(cached_rtt and cached_es) - - error = None - - if cached_rtt: - gwr_trains = cached_rtt - else: + def cached_fetch(key: str, ttl: int, fetcher: Callable[[], Any], label: str) -> Any: + cached = get_cached(key, ttl=ttl) + if cached is not None: + from_cache_parts.append(key) + return cached try: - gwr_trains = rtt_scraper.fetch(travel_date, user_agent, station_crs) - set_cached(rtt_cache_key, gwr_trains) + data = fetcher() + set_cached(key, data) + return data except Exception as e: - gwr_trains = [] - error = f"Could not fetch GWR trains: {e}" + error_messages.append(f"Could not fetch {label}: {e}") + return [] if label != "GWR fares" else {} - if cached_es: - eurostar_services = cached_es - else: + def cached_timetable_fetch( + exact_key: str, + weekday_key: str, + fetcher: Callable[[], Any], + label: str, + stripper: Callable[[Any], Any], + ttl: int | None = None, + ) -> tuple[Any, bool]: + nonlocal provisional_timetable + cached = get_cached(exact_key, ttl=ttl) + if cached is not None: + from_cache_parts.append(exact_key) + return cached, False + weekday_cached = get_cached(weekday_key) + if weekday_cached is not None: + from_cache_parts.append(weekday_key) + provisional_timetable = True + return weekday_cached, True try: - eurostar_services = eurostar_scraper.fetch(destination, travel_date) - set_cached(es_cache_key, eurostar_services) + data = fetcher() + set_cached(exact_key, data) + set_cached(weekday_key, stripper(data)) + return data, False except Exception as e: - eurostar_services = [] - msg = f"Could not fetch Eurostar times: {e}" - error = f"{error}; {msg}" if error else msg + error_messages.append(f"Could not fetch {label}: {e}") + if label == "Eurostar return times": + return {"outbound": [], "inbound": []}, False + return [], False - if cached_gwr_fares: - gwr_fares = cached_gwr_fares - else: - try: - gwr_fares = gwr_fares_scraper.fetch(station_crs, travel_date) - set_cached(gwr_fares_cache_key, gwr_fares) - except Exception as e: - gwr_fares = {} - msg = f"Could not fetch GWR fares: {e}" - error = f"{error}; {msg}" if error else msg + es_return: dict[str, Any] = {"outbound": [], "inbound": []} + es_return_provisional = False - eurostar_trains = eurostar_services - eurostar_prices = { - s["depart_st_pancras"]: { - "price": s.get("price"), - "seats": s.get("seats"), - "plus_price": s.get("plus_price"), - "plus_seats": s.get("plus_seats"), - } - for s in eurostar_services - } + def _fetch_es_return() -> None: + nonlocal es_return, es_return_provisional + assert return_date is not None + es_return, es_return_provisional = cached_timetable_fetch( + _eurostar_return_exact_cache_key(travel_date, return_date, destination), + _eurostar_return_weekday_cache_key(travel_date, return_date, destination), + lambda: eurostar_scraper.fetch_return( + destination, travel_date, return_date + ), + "Eurostar return times", + _strip_eurostar_return_timetable, + 24 * 3600, + ) + if not isinstance(es_return, dict): + es_return = {"outbound": [], "inbound": []} - trips = combine_trips( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - gwr_fares, - ) + def build_section( + section_id: str, + direction: str, + section_date: str, + eurostar_services: list[dict[str, Any]] | None = None, + ) -> dict[str, Any]: + section_min_connection = min_connection + section_max_connection = max_connection + if journey_type == "return" and direction == "inbound": + section_min_connection = inbound_min_connection + section_max_connection = INBOUND_MAX_CONNECTION_MINUTES + rtt_direction = ( + "to_paddington" if direction == "outbound" else "from_paddington" + ) + rtt_cache_key = _nr_exact_cache_key(rtt_direction, station_crs, section_date) + rtt_weekday_cache_key = _nr_weekday_cache_key( + rtt_direction, station_crs, section_date + ) + gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}" + advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}" - # Annotate each trip with Eurostar prices and total cost (walk-on + standard) - for trip in trips: - es = eurostar_prices.get(trip["depart_st_pancras"], {}) - es_price = es.get("price") - trip["eurostar_price"] = es_price - trip["eurostar_seats"] = es.get("seats") - trip["eurostar_plus_price"] = es.get("plus_price") - trip["eurostar_plus_seats"] = es.get("plus_seats") - gwr_p = trip.get("ticket_price") - circle_svcs = trip.get("circle_services") - circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 - trip["total_price"] = ( - gwr_p + es_price + circle_fare - if (gwr_p is not None and es_price is not None) - else None + if direction == "outbound": + trains, nr_provisional = cached_timetable_fetch( + rtt_cache_key, + rtt_weekday_cache_key, + lambda: rtt_scraper.fetch(section_date, user_agent, station_crs), + "GWR trains", + _strip_nr_timetable, + ) + else: + trains, nr_provisional = cached_timetable_fetch( + rtt_cache_key, + rtt_weekday_cache_key, + lambda: rtt_scraper.fetch_from_paddington( + section_date, user_agent, station_crs + ), + "GWR trains", + _strip_nr_timetable, + ) + + es_provisional = es_return_provisional if journey_type == "return" else False + if eurostar_services is None: + es_cache_key = _eurostar_exact_cache_key( + direction, section_date, destination + ) + es_weekday_cache_key = _eurostar_weekday_cache_key( + direction, section_date, destination + ) + es_fetcher = ( + (lambda: eurostar_scraper.fetch(destination, section_date)) + if direction == "outbound" + else ( + lambda: eurostar_scraper.fetch( + destination, section_date, direction=direction + ) + ) + ) + eurostar_services, es_provisional = cached_timetable_fetch( + es_cache_key, + es_weekday_cache_key, + es_fetcher, + "Eurostar times", + _strip_eurostar_timetable, + 24 * 3600, + ) + + fare_direction = ( + "to_paddington" if direction == "outbound" else "from_paddington" + ) + gwr_fares: dict[str, Any] = {} + cached_advance = get_cached(advance_cache_key, ttl=24 * 3600) + walkon_weekday_key = _walkon_weekday_cache_key( + rtt_direction, station_crs, section_date + ) + exact_walkon = get_cached(gwr_cache_key, ttl=30 * 24 * 3600) + cached_walkon = ( + exact_walkon if exact_walkon is not None else get_cached(walkon_weekday_key) + ) + + if direction == "outbound": + trips = combine_trips( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_morning_eurostars( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_st_pancras"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_st_pancras"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [ + {"row_type": "unreachable", "direction": direction, **svc} + for svc in unreachable + ], + key=lambda row: row["depart_st_pancras"], + ) + else: + trips = combine_inbound_trips( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_inbound_eurostars( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_destination"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_destination"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [ + {"row_type": "unreachable", "direction": direction, **svc} + for svc in unreachable + ], + key=lambda row: row["depart_destination"], + ) + + es_by_key = { + ( + svc.get("depart_st_pancras") + if direction == "outbound" + else svc.get("depart_destination") + ): svc + for svc in eurostar_services + } + for row in rows: + key = ( + row.get("depart_st_pancras") + if direction == "outbound" + else row.get("depart_destination") + ) + es = es_by_key.get(key, {}) + row["eurostar_price"] = es.get("price") + row["eurostar_seats"] = es.get("seats") + row["eurostar_plus_price"] = es.get("plus_price") + row["eurostar_plus_seats"] = es.get("plus_seats") + row["eurostar_key"] = f"{section_id}:{key}" + if row.get("row_type") == "trip": + nr_key = row.get("depart_bristol") or row.get("depart_paddington") + row["row_key"] = f"{section_id}:{nr_key}:{key}" + else: + row["row_key"] = f"{section_id}:unreachable:{key}" + + dt = date.fromisoformat(section_date) + return { + "id": section_id, + "direction": direction, + "date": section_date, + "date_display": dt.strftime("%A %-d %B %Y"), + "rows": rows, + "trips": trips, + "gwr_count": len(trains), + "eurostar_count": len(eurostar_services), + "min_connection": section_min_connection, + "max_connection": section_max_connection, + "provisional_timetable": nr_provisional or es_provisional, + "advance_fares": cached_advance, + "cached_walkon_fares": cached_walkon, + "walkon_api_url": url_for( + "api_walkon_fares", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + "advance_api_url": url_for( + "api_advance_fares", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + "advance_stream_url": url_for( + "api_advance_fares_stream", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + } + + if render == "stream": + + def generate() -> Generator[str, None, None]: + dt = date.fromisoformat(travel_date) + prev_date = (dt - timedelta(days=1)).isoformat() + next_date = (dt + timedelta(days=1)).isoformat() + travel_date_display = dt.strftime("%A %-d %B %Y") + return_date_display = None + prev_return_date = return_date + next_return_date = return_date + if return_date: + return_dt = date.fromisoformat(return_date) + return_date_display = return_dt.strftime("%A %-d %B %Y") + prev_return_date = (return_dt - timedelta(days=1)).isoformat() + next_return_date = (return_dt + timedelta(days=1)).isoformat() + + eurostar_url = eurostar_scraper.search_url( + destination, + travel_date, + direction=journey_type, + return_date=return_date, + ) + rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date) + rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date) + + url_min = None if min_connection == default_min else min_connection + url_max = None if max_connection == default_max else max_connection + url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class + url_es = None if es_class == DEFAULT_ES_CLASS else es_class + + if journey_type == "return": + common_url_args: dict[str, Any] = { + "journey_type": journey_type, + "return_date": return_date, + "min_connection": url_min, + "max_connection": url_max, + "min_connection_in": ( + None + if inbound_min_connection == INBOUND_MIN_CONNECTION_MINUTES + else inbound_min_connection + ), + "nr_class_out": ( + None if nr_class_out == DEFAULT_NR_CLASS else nr_class_out + ), + "nr_class_in": ( + None if nr_class_in == DEFAULT_NR_CLASS else nr_class_in + ), + "es_class_out": ( + None if es_class_out == DEFAULT_ES_CLASS else es_class_out + ), + "es_class_in": ( + None if es_class_in == DEFAULT_ES_CLASS else es_class_in + ), + } + else: + common_url_args = { + "journey_type": journey_type, + "return_date": return_date, + "min_connection": url_min, + "max_connection": url_max, + "nr_class": url_nr, + "es_class": url_es, + } + + prev_results_url = _results_url( + station_crs, + slug, + prev_date, + **{**common_url_args, "return_date": prev_return_date}, + ) + next_results_url = _results_url( + station_crs, + slug, + next_date, + **{**common_url_args, "return_date": next_return_date}, + ) + prev_outbound_url = _results_url( + station_crs, slug, prev_date, **common_url_args + ) + next_outbound_url = _results_url( + station_crs, slug, next_date, **common_url_args + ) + prev_return_url = ( + _results_url( + station_crs, + slug, + travel_date, + **{**common_url_args, "return_date": prev_return_date}, + ) + if return_date + else None + ) + next_return_url = ( + _results_url( + station_crs, + slug, + travel_date, + **{**common_url_args, "return_date": next_return_date}, + ) + if return_date + else None + ) + destination_links = [ + ( + destination_slug, + destination_name, + _results_url( + station_crs, destination_slug, travel_date, **common_url_args + ), + ) + for destination_slug, destination_name in DESTINATIONS.items() + ] + results_base_url = _results_url( + station_crs, + slug, + travel_date, + journey_type=journey_type, + return_date=return_date, + ) + + if journey_type == "return": + shell_sections = [ + { + "id": "outbound", + "direction": "outbound", + "min_connection": min_connection, + "max_connection": max_connection, + }, + { + "id": "inbound", + "direction": "inbound", + "min_connection": inbound_min_connection, + "max_connection": INBOUND_MAX_CONNECTION_MINUTES, + }, + ] + shell_nr_classes = {"outbound": nr_class_out, "inbound": nr_class_in} + shell_es_classes = {"outbound": es_class_out, "inbound": es_class_in} + shell_section_directions = { + "outbound": "outbound", + "inbound": "inbound", + } + else: + shell_sections = [ + { + "id": "main", + "direction": journey_type, + "min_connection": min_connection, + "max_connection": max_connection, + }, + ] + shell_nr_classes = {"main": nr_class} + shell_es_classes = {"main": es_class} + shell_section_directions = {"main": journey_type} + + shell_html = render_template( + "results_shell.html", + journey_type=journey_type, + destination=destination, + departure_station_name=departure_station_name, + travel_date=travel_date, + return_date=return_date, + travel_date_display=travel_date_display, + return_date_display=return_date_display, + slug=slug, + sections=shell_sections, + nr_classes=shell_nr_classes, + es_classes=shell_es_classes, + nr_classes_json=json.dumps(shell_nr_classes), + es_classes_json=json.dumps(shell_es_classes), + section_directions_json=json.dumps(shell_section_directions), + results_base_url=results_base_url, + prev_results_url=prev_results_url, + next_results_url=next_results_url, + prev_outbound_url=prev_outbound_url, + next_outbound_url=next_outbound_url, + prev_return_url=prev_return_url, + next_return_url=next_return_url, + destination_links=destination_links, + eurostar_url=eurostar_url, + rtt_url=rtt_url, + rtt_station_url=rtt_station_url, + min_connection=min_connection, + max_connection=max_connection, + default_min_connection=default_min, + default_max_connection=default_max, + default_inbound_min_connection=INBOUND_MIN_CONNECTION_MINUTES, + valid_min_connections=sorted(valid_min), + valid_max_connections=sorted(valid_max), + inbound_min_connection=inbound_min_connection, + valid_inbound_return_min_connections=sorted( + VALID_INBOUND_RETURN_MIN_CONNECTIONS + ), + ) + yield f"data: {json.dumps({'type': 'shell', 'html': shell_html})}\n\n" + + if journey_type == "return": + assert return_date is not None + _fetch_es_return() + sections_spec: list[ + tuple[str, str, str, list[dict[str, Any]] | None] + ] = [ + ( + "outbound", + "outbound", + travel_date, + es_return.get("outbound", []), + ), + ("inbound", "inbound", return_date, es_return.get("inbound", [])), + ] + else: + sections_spec = [("main", journey_type, travel_date, None)] + + built_sections: list[dict[str, Any]] = [] + for section_id, direction, section_date, eurostar_services in sections_spec: + section = build_section( + section_id, direction, section_date, eurostar_services + ) + built_sections.append(section) + section_html = render_template( + "results_section.html", + section=section, + destination=destination, + departure_station_name=departure_station_name, + ) + yield f"data: {json.dumps({'type': 'section', 'id': section_id, 'html': section_html, 'trip_fares': _section_trip_fares(section), 'advance_fares': section['advance_fares'], 'walkon_cached_fares': section.get('cached_walkon_fares'), 'walkon_api_url': section['walkon_api_url'], 'advance_api_url': section['advance_api_url'], 'advance_stream_url': section['advance_stream_url']})}\n\n" + + if journey_type == "return": + timetable_refresh_url = url_for( + "api_return_results_refresh", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + return_date=return_date, + ) + else: + timetable_refresh_url = url_for( + "api_results_refresh", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + journey_type=journey_type if journey_type == "inbound" else None, + ) + + summary_html = _build_summary_html( + built_sections, journey_type, from_cache_parts, provisional_timetable + ) + yield f"data: {json.dumps({'type': 'done', 'timetable_refresh_url': timetable_refresh_url, 'provisional_timetable': provisional_timetable, 'summary_html': summary_html})}\n\n" + + return Response(stream_with_context(generate()), mimetype="text/event-stream") + + if journey_type == "return": + assert return_date is not None + _fetch_es_return() + sections = [ + build_section( + "outbound", "outbound", travel_date, es_return.get("outbound", []) + ), + build_section( + "inbound", "inbound", return_date, es_return.get("inbound", []) + ), + ] + else: + sections = [build_section("main", journey_type, travel_date)] + + nr_classes = {} + es_classes = {} + section_directions = {} + for section in sections: + direction = section["direction"] + section_directions[section["id"]] = direction + nr_classes[section["id"]] = ( + nr_class_out if direction == "outbound" else nr_class_in + ) + es_classes[section["id"]] = ( + es_class_out if direction == "outbound" else es_class_in ) - # If the API returned journeys but every price is None, tickets aren't on sale yet no_prices_note = None - if eurostar_prices and all( - v.get("price") is None for v in eurostar_prices.values() + all_es_prices = [ + row.get("eurostar_price") + for section in sections + for row in section["rows"] + if row.get("row_type") == "trip" + ] + if ( + not provisional_timetable + and all_es_prices + and all(price is None for price in all_es_prices) ): no_prices_note = ( "Eurostar prices not yet available — tickets may not be on sale yet." ) - unreachable_morning_services = find_unreachable_morning_eurostars( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - ) - for svc in unreachable_morning_services: - es = eurostar_prices.get(svc["depart_st_pancras"], {}) - svc["eurostar_price"] = es.get("price") - svc["eurostar_seats"] = es.get("seats") - svc["eurostar_plus_price"] = es.get("plus_price") - svc["eurostar_plus_seats"] = es.get("plus_seats") - - # Only keep unreachable services that depart before the first reachable Eurostar. - # Services after the first reachable one are omitted (they aren't "Too early"). - if trips: - first_es_depart = min(t["depart_st_pancras"] for t in trips) - unreachable_morning_services = [ - s - for s in unreachable_morning_services - if s["depart_st_pancras"] < first_es_depart - ] - - result_rows = sorted( - [{"row_type": "trip", **trip} for trip in trips] - + [ - {"row_type": "unreachable", **service} - for service in unreachable_morning_services - ], - key=lambda row: row["depart_st_pancras"], - ) - dt = date.fromisoformat(travel_date) prev_date = (dt - timedelta(days=1)).isoformat() next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime("%A %-d %B %Y") + return_date_display = None + prev_return_date = return_date + next_return_date = return_date + if return_date: + return_dt = date.fromisoformat(return_date) + return_date_display = return_dt.strftime("%A %-d %B %Y") + prev_return_date = (return_dt - timedelta(days=1)).isoformat() + next_return_date = (return_dt + timedelta(days=1)).isoformat() - eurostar_url = eurostar_scraper.search_url(destination, travel_date) + eurostar_url = eurostar_scraper.search_url( + destination, travel_date, direction=journey_type, return_date=return_date + ) rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date) rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date) url_min = None if min_connection == default_min else min_connection url_max = None if max_connection == default_max else max_connection + url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class + url_es = None if es_class == DEFAULT_ES_CLASS else es_class + common_url_args: dict[str, Any] + if journey_type == "return": + common_url_args = { + "journey_type": journey_type, + "return_date": return_date, + "min_connection": url_min, + "max_connection": url_max, + "min_connection_in": ( + None + if inbound_min_connection == INBOUND_MIN_CONNECTION_MINUTES + else inbound_min_connection + ), + "nr_class_out": None if nr_class_out == DEFAULT_NR_CLASS else nr_class_out, + "nr_class_in": None if nr_class_in == DEFAULT_NR_CLASS else nr_class_in, + "es_class_out": None if es_class_out == DEFAULT_ES_CLASS else es_class_out, + "es_class_in": None if es_class_in == DEFAULT_ES_CLASS else es_class_in, + } + else: + common_url_args = { + "journey_type": journey_type, + "return_date": return_date, + "min_connection": url_min, + "max_connection": url_max, + "nr_class": url_nr, + "es_class": url_es, + } + prev_results_url = _results_url( + station_crs, + slug, + prev_date, + **{**common_url_args, "return_date": prev_return_date}, + ) + next_results_url = _results_url( + station_crs, + slug, + next_date, + **{**common_url_args, "return_date": next_return_date}, + ) + prev_outbound_url = _results_url(station_crs, slug, prev_date, **common_url_args) + next_outbound_url = _results_url(station_crs, slug, next_date, **common_url_args) + prev_return_url = ( + _results_url( + station_crs, + slug, + travel_date, + **{**common_url_args, "return_date": prev_return_date}, + ) + if return_date + else None + ) + next_return_url = ( + _results_url( + station_crs, + slug, + travel_date, + **{**common_url_args, "return_date": next_return_date}, + ) + if return_date + else None + ) + destination_links = [ + ( + destination_slug, + destination_name, + _results_url( + station_crs, + destination_slug, + travel_date, + **common_url_args, + ), + ) + for destination_slug, destination_name in DESTINATIONS.items() + ] + results_base_url = _results_url( + station_crs, + slug, + travel_date, + journey_type=journey_type, + return_date=return_date, + ) + + trip_fares = {} + advance_fares = {} + walkon_cached_fares = {} + walkon_api_urls = {} + advance_api_urls = {} + advance_stream_urls = {} + for section in sections: + advance_fares[section["id"]] = section["advance_fares"] + walkon_cached_fares[section["id"]] = section.get("cached_walkon_fares") + walkon_api_urls[section["id"]] = section["walkon_api_url"] + advance_api_urls[section["id"]] = section["advance_api_url"] + advance_stream_urls[section["id"]] = section["advance_stream_url"] + for row in section["rows"]: + circle_svcs = row.get("circle_services") or [] + circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 + walkon = ( + {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} + if row.get("ticket_price") is not None + else None + ) + es_std = ( + {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} + if row.get("eurostar_price") is not None + else None + ) + es_std_status = _eurostar_price_status( + row.get("eurostar_price"), row.get("eurostar_seats") + ) + es_plus = ( + { + "price": row["eurostar_plus_price"], + "seats": row.get("eurostar_plus_seats"), + } + if row.get("eurostar_plus_price") is not None + else None + ) + es_plus_status = _eurostar_price_status( + row.get("eurostar_plus_price"), row.get("eurostar_plus_seats") + ) + trip_fares[row["row_key"]] = { + "section": section["id"], + "eurostar_key": row.get("eurostar_key"), + "advance_key": row.get("depart_bristol") + or row.get("depart_paddington"), + "walkon": walkon, + "es_standard": es_std, + "es_standard_status": es_std_status, + "es_plus": es_plus, + "es_plus_status": es_plus_status, + "circle_fare": circle_fare, + } + + if journey_type == "return": + timetable_refresh_url = url_for( + "api_return_results_refresh", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + return_date=return_date, + ) + else: + timetable_refresh_url = url_for( + "api_results_refresh", + station_crs=station_crs, + slug=slug, + travel_date=travel_date, + journey_type=journey_type if journey_type == "inbound" else None, + ) return render_template( "results.html", - trips=trips, - result_rows=result_rows, - unreachable_morning_services=unreachable_morning_services, + sections=sections, + trips=sections[0]["trips"] if sections else [], + result_rows=sections[0]["rows"] if sections else [], + unreachable_morning_services=[], destinations=DESTINATIONS, destination=destination, travel_date=travel_date, + return_date=return_date, + journey_type=journey_type, slug=slug, station_crs=station_crs, departure_station_name=departure_station_name, prev_date=prev_date, next_date=next_date, + prev_results_url=prev_results_url, + next_results_url=next_results_url, + prev_outbound_url=prev_outbound_url, + next_outbound_url=next_outbound_url, + prev_return_url=prev_return_url, + next_return_url=next_return_url, + destination_links=destination_links, + results_base_url=results_base_url, travel_date_display=travel_date_display, - gwr_count=len(gwr_trains), - eurostar_count=len(eurostar_trains), - from_cache=from_cache, - error=error, + return_date_display=return_date_display, + gwr_count=sum(section["gwr_count"] for section in sections), + eurostar_count=sum(section["eurostar_count"] for section in sections), + from_cache=bool(from_cache_parts), + provisional_timetable=provisional_timetable, + error="; ".join(error_messages) if error_messages else None, no_prices_note=no_prices_note, eurostar_url=eurostar_url, rtt_url=rtt_url, @@ -316,27 +1375,328 @@ def results(station_crs, slug, travel_date): default_max_connection=default_max, url_min_connection=url_min, url_max_connection=url_max, - cached_advance_fares=cached_advance_fares, - valid_min_connections=sorted(VALID_MIN_CONNECTIONS), - valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + nr_class=nr_class, + es_class=es_class, + url_nr_class=url_nr, + url_es_class=url_es, + nr_classes=nr_classes, + es_classes=es_classes, + nr_classes_json=json.dumps(nr_classes), + es_classes_json=json.dumps(es_classes), + section_directions_json=json.dumps(section_directions), + trip_fares_json=json.dumps(trip_fares), + advance_fares_json=json.dumps(advance_fares), + walkon_cached_fares_json=json.dumps(walkon_cached_fares), + walkon_api_urls_json=json.dumps(walkon_api_urls), + advance_api_urls_json=json.dumps(advance_api_urls), + advance_stream_urls_json=json.dumps(advance_stream_urls), + timetable_refresh_url=timetable_refresh_url, + advance_fares_api_url=url_for( + "api_advance_fares", station_crs=station_crs, travel_date=travel_date + ), + advance_fares_stream_url=url_for( + "api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date + ), + valid_min_connections=sorted(valid_min), + valid_max_connections=sorted(valid_max), + inbound_min_connection=inbound_min_connection, + default_inbound_min_connection=INBOUND_MIN_CONNECTION_MINUTES, + valid_inbound_return_min_connections=sorted( + VALID_INBOUND_RETURN_MIN_CONNECTIONS + ), ) -@app.route("/api/advance_fares//") -def api_advance_fares(station_crs, travel_date): +def _fetch_exact_nr_timetable( + station_crs: str, section_date: str, direction: str, user_agent: str +) -> list[dict[str, Any]]: + rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington" + exact_key = _nr_exact_cache_key(rtt_direction, station_crs, section_date) + weekday_key = _nr_weekday_cache_key(rtt_direction, station_crs, section_date) + trains = ( + rtt_scraper.fetch(section_date, user_agent, station_crs) + if direction == "outbound" + else rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs) + ) + set_cached(exact_key, trains) + set_cached(weekday_key, _strip_nr_timetable(trains)) + return trains + + +def _fetch_exact_eurostar_single( + destination: str, section_date: str, direction: str +) -> list[dict[str, Any]]: + exact_key = _eurostar_exact_cache_key(direction, section_date, destination) + weekday_key = _eurostar_weekday_cache_key(direction, section_date, destination) + services = ( + eurostar_scraper.fetch(destination, section_date) + if direction == "outbound" + else eurostar_scraper.fetch(destination, section_date, direction=direction) + ) + set_cached(exact_key, services) + set_cached(weekday_key, _strip_eurostar_timetable(services)) + return services + + +def _fetch_exact_eurostar_return( + destination: str, travel_date: str, return_date: str +) -> dict[str, list[dict[str, Any]]]: + exact_key = _eurostar_return_exact_cache_key(travel_date, return_date, destination) + weekday_key = _eurostar_return_weekday_cache_key( + travel_date, return_date, destination + ) + services = eurostar_scraper.fetch_return(destination, travel_date, return_date) + set_cached(exact_key, services) + set_cached(weekday_key, _strip_eurostar_return_timetable(services)) + return services + + +@app.route("/api/walkon_fares//") +def api_walkon_fares( + station_crs: str, travel_date: str +) -> Response | tuple[Response, int]: if station_crs not in STATION_BY_CRS: abort(404) - cache_key = f"gwr_advance_{station_crs}_{travel_date}" + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_fares_{direction}_{station_crs}_{travel_date}" + weekday_key = _walkon_weekday_cache_key(direction, station_crs, travel_date) + cached = get_cached(cache_key, ttl=30 * 24 * 3600) + if cached is not None: + if get_cached(weekday_key) is None: + set_cached(weekday_key, cached) + return jsonify(cached) + try: + fares = ( + gwr_fares_scraper.fetch(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch(station_crs, travel_date, direction=direction) + ) + set_cached(cache_key, fares) + set_cached(weekday_key, fares) + return jsonify(fares) + except Exception as e: + return jsonify({"error": str(e)}), 500 + + +@app.route("/api/results_refresh///") +def api_results_refresh(station_crs: str, slug: str, travel_date: str) -> Response: + return _api_results_refresh( + station_crs, slug, travel_date, request.args.get("return_date") + ) + + +@app.route( + "/api/results_refresh////return/" +) +def api_return_results_refresh( + station_crs: str, slug: str, travel_date: str, return_date: str +) -> Response: + return _api_results_refresh(station_crs, slug, travel_date, return_date, "return") + + +def _api_results_refresh( + station_crs: str, + slug: str, + travel_date: str, + return_date: str | None = None, + path_journey_type: str | None = None, +) -> Response: + if station_crs not in STATION_BY_CRS: + abort(404) + destination = DESTINATIONS.get(slug) + if not destination: + abort(404) + journey_type = path_journey_type or request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" + if return_date is None: + return_date = request.args.get("return_date") + if journey_type == "return" and not return_date: + abort(400) + + user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA) + + def generate() -> Generator[str, None, None]: + try: + old_es_weekdays: dict[str, Any] = {} + if journey_type == "return": + assert return_date is not None + old_es_weekdays["return"] = get_cached( + _eurostar_return_weekday_cache_key( + travel_date, return_date, destination + ) + ) + es_return = _fetch_exact_eurostar_return( + destination, travel_date, return_date + ) + sections = [ + ( + "outbound", + "outbound", + travel_date, + es_return.get("outbound", []), + ), + ("inbound", "inbound", return_date, es_return.get("inbound", [])), + ] + else: + direction = journey_type + section_date = travel_date + old_es_weekdays["main"] = get_cached( + _eurostar_weekday_cache_key(direction, section_date, destination) + ) + es_services = _fetch_exact_eurostar_single( + destination, section_date, direction + ) + sections = [("main", direction, section_date, es_services)] + + reload_needed = False + eurostar_prices = {} + for section_id, direction, section_date, es_services in sections: + nr_weekday_key = _nr_weekday_cache_key( + "to_paddington" if direction == "outbound" else "from_paddington", + station_crs, + section_date, + ) + old_nr_weekday = get_cached(nr_weekday_key) + exact_nr = _fetch_exact_nr_timetable( + station_crs, section_date, direction, user_agent + ) + if old_nr_weekday is not None and _timetable_signature( + old_nr_weekday + ) != _timetable_signature(_strip_nr_timetable(exact_nr)): + reload_needed = True + + old_es_weekday = ( + old_es_weekdays["return"] + if journey_type == "return" + else old_es_weekdays[section_id] + ) + exact_es_timetable = ( + _strip_eurostar_return_timetable(es_return) + if journey_type == "return" + else _strip_eurostar_timetable(es_services) + ) + if old_es_weekday is not None and _timetable_signature( + old_es_weekday + ) != _timetable_signature(exact_es_timetable): + reload_needed = True + + eurostar_prices.update( + _eurostar_prices_by_row(section_id, direction, es_services) + ) + + if reload_needed: + yield f"data: {json.dumps({'type': 'reload'})}\n\n" + yield f"data: {json.dumps({'type': 'done'})}\n\n" + return + + if eurostar_prices: + yield f"data: {json.dumps({'type': 'eurostar_prices', 'prices': eurostar_prices})}\n\n" + + for section_id, direction, section_date, _es_services in sections: + fare_direction = ( + "to_paddington" if direction == "outbound" else "from_paddington" + ) + cache_key = f"gwr_fares_{fare_direction}_{station_crs}_{section_date}" + cached = get_cached(cache_key, ttl=30 * 24 * 3600) + if cached is None: + cached = ( + gwr_fares_scraper.fetch(station_crs, section_date) + if fare_direction == "to_paddington" + else gwr_fares_scraper.fetch( + station_crs, section_date, direction=fare_direction + ) + ) + set_cached(cache_key, cached) + yield f"data: {json.dumps({'type': 'walkon_fares', 'section': section_id, 'fares': cached})}\n\n" + except Exception as e: + yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" + return + + yield f"data: {json.dumps({'type': 'done'})}\n\n" + + return Response(stream_with_context(generate()), mimetype="text/event-stream") + + +@app.route("/api/advance_fares//") +def api_advance_fares( + station_crs: str, travel_date: str +) -> Response | tuple[Response, int]: + if station_crs not in STATION_BY_CRS: + abort(404) + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" cached = get_cached(cache_key, ttl=24 * 3600) if cached is not None: return jsonify(cached) try: - fares = gwr_fares_scraper.fetch_advance(station_crs, travel_date) + fares = ( + gwr_fares_scraper.fetch_advance(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance( + station_crs, travel_date, direction=direction + ) + ) set_cached(cache_key, fares) return jsonify(fares) except Exception as e: return jsonify({"error": str(e)}), 500 +@app.route("/api/advance_fares_stream//") +def api_advance_fares_stream(station_crs: str, travel_date: str) -> Response: + if station_crs not in STATION_BY_CRS: + abort(404) + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" + + def generate() -> Generator[str, None, None]: + cached = get_cached(cache_key, ttl=24 * 3600) + if cached is not None: + yield f"data: {json.dumps({'type': 'fares', 'fares': cached})}\n\n" + yield f"data: {json.dumps({'type': 'done'})}\n\n" + return + + accumulated: dict[str, Any] = {} + try: + stream = ( + gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance_streaming( + station_crs, travel_date, direction=direction + ) + ) + for page_fares in stream: + for dep_time, fare_data in page_fares.items(): + if dep_time not in accumulated: + accumulated[dep_time] = { + "advance_std": None, + "advance_1st": None, + } + if fare_data.get("advance_std"): + accumulated[dep_time]["advance_std"] = fare_data["advance_std"] + if fare_data.get("advance_1st"): + accumulated[dep_time]["advance_1st"] = fare_data["advance_1st"] + yield f"data: {json.dumps({'type': 'fares', 'fares': page_fares})}\n\n" + except Exception as e: + yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" + return + + set_cached(cache_key, accumulated) + yield f"data: {json.dumps({'type': 'done'})}\n\n" + + return Response( + stream_with_context(generate()), + mimetype="text/event-stream", + headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, + ) + + if __name__ == "__main__": app.run(debug=True, host="0.0.0.0") diff --git a/cache.py b/cache.py index 31fad05..6b09fdb 100644 --- a/cache.py +++ b/cache.py @@ -1,27 +1,35 @@ import json import os import time +import uuid +from typing import Any from config.default import CACHE_DIR # overridden by app config after import def _cache_path(key: str) -> str: - safe_key = key.replace('/', '_').replace(' ', '_') + safe_key = key.replace("/", "_").replace(" ", "_") return os.path.join(CACHE_DIR, f"{safe_key}.json") -def get_cached(key: str, ttl: int | None = None): +def get_cached(key: str, ttl: int | None = None) -> Any: """Return cached data, or None if missing or older than ttl seconds.""" path = _cache_path(key) - if not os.path.exists(path): + try: + if not os.path.exists(path): + return None + if ttl is not None and time.time() - os.path.getmtime(path) > ttl: + return None + with open(path) as f: + return json.load(f) + except (OSError, json.JSONDecodeError): return None - if ttl is not None and time.time() - os.path.getmtime(path) > ttl: - return None - with open(path) as f: - return json.load(f) -def set_cached(key: str, data) -> None: +def set_cached(key: str, data: Any) -> None: os.makedirs(CACHE_DIR, exist_ok=True) - with open(_cache_path(key), 'w') as f: + path = _cache_path(key) + tmp_path = f"{path}.{os.getpid()}.{uuid.uuid4().hex}.tmp" + with open(tmp_path, "w") as f: json.dump(data, f, indent=2) + os.replace(tmp_path, path) diff --git a/circle_line.md b/circle_line.md new file mode 100644 index 0000000..03e828f --- /dev/null +++ b/circle_line.md @@ -0,0 +1,9 @@ +The route from Paddington to Kings Cross St Pancras is via the Circle Line. + +Departs every 10 minutes at these times past the hour: 08, 18, 28, 38, 48, 58 + +Journey time is 11 minutes. + +Results could show what service London Underground service we expect to catch. Assume it takes 10 minutes to get from the GWR platform in Paddington station to the Paddington (H&C Line) Underground platform. Also takes 10 minutes to get from Kings Cross St Pancras platform to St Pancras check-in / security queue. Eurostar wants passengers to arrive at least 30 minutes before departure. + +Can we encorporate this info somehow? diff --git a/circle_line.py b/circle_line.py index 6292f24..eb5a42d 100644 --- a/circle_line.py +++ b/circle_line.py @@ -1,53 +1,69 @@ """ -Circle Line timetable: Paddington (H&C Line) → King's Cross St Pancras. +Circle Line timetable between Paddington (H&C Line) and King's Cross St Pancras. Parses the TransXChange XML file on first use and caches the result in memory. """ + import os import re import xml.etree.ElementTree as ET from datetime import datetime, timedelta +from typing import Any -_PAD_STOP = '9400ZZLUPAH1' # Paddington (H&C Line) -_KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras +_PAD_STOP = "9400ZZLUPAH1" # Paddington (H&C Line) +_KXP_STOP = "9400ZZLUKSX3" # King's Cross St Pancras -from config.default import CIRCLE_LINE_XML as _TXC_XML # overridden by app config after import -_NS = {'t': 'http://www.transxchange.org.uk/'} +from config.default import ( + CIRCLE_LINE_XML as _TXC_XML, +) # overridden by app config after import -# Populated on first call to next_service(); maps day-type -> sorted list of -# (pad_depart_seconds, kxp_arrive_seconds) measured from midnight. -_timetable: dict[str, list[tuple[int, int]]] | None = None +_NS = {"t": "http://www.transxchange.org.uk/"} + +# Populated on first call to next_service(); maps direction -> day-type -> sorted +# list of (origin_depart_seconds, destination_arrive_seconds) measured from midnight. +_timetable: dict[str, dict[str, list[tuple[int, int]]]] | None = None def _parse_duration(s: str | None) -> int: if not s: return 0 - m = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', s) - return int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0) + m = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", s) + assert m is not None + return ( + int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0) + ) -def _load_timetable() -> dict[str, list[tuple[int, int]]]: +def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: tree = ET.parse(_TXC_XML) root = tree.getroot() # Build JPS id -> [(from_stop, to_stop, runtime_secs, wait_secs)] - jps_map: dict[str, list[tuple]] = {} - for jps_el in root.find('t:JourneyPatternSections', _NS): + jps_map: dict[str, list[tuple[str | None, str | None, int, int]]] = {} + jps_sections = root.find("t:JourneyPatternSections", _NS) + assert jps_sections is not None + for jps_el in jps_sections: links = [] - for link in jps_el.findall('t:JourneyPatternTimingLink', _NS): - fr = link.find('t:From/t:StopPointRef', _NS) - to = link.find('t:To/t:StopPointRef', _NS) - rt = link.find('t:RunTime', _NS) - wait = link.find('t:From/t:WaitTime', _NS) - links.append(( - fr.text if fr is not None else None, - to.text if to is not None else None, - _parse_duration(rt.text if rt is not None else None), - _parse_duration(wait.text if wait is not None else None), - )) - jps_map[jps_el.get('id')] = links + for link in jps_el.findall("t:JourneyPatternTimingLink", _NS): + fr = link.find("t:From/t:StopPointRef", _NS) + to = link.find("t:To/t:StopPointRef", _NS) + rt = link.find("t:RunTime", _NS) + wait = link.find("t:From/t:WaitTime", _NS) + links.append( + ( + fr.text if fr is not None else None, + to.text if to is not None else None, + _parse_duration(rt.text if rt is not None else None), + _parse_duration(wait.text if wait is not None else None), + ) + ) + jps_id = jps_el.get("id") + assert jps_id is not None + jps_map[jps_id] = links - def _seconds_to_depart(links, stop): + def _seconds_to_depart( + links: list[tuple[str | None, str | None, int, int]], stop: str | None + ) -> int | None: """Seconds from journey start until departure from *stop*.""" elapsed = 0 for fr, to, rt, wait in links: @@ -57,7 +73,9 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: elapsed += rt return None - def _seconds_to_arrive(links, stop): + def _seconds_to_arrive( + links: list[tuple[str | None, str | None, int, int]], stop: str | None + ) -> int | None: """Seconds from journey start until arrival at *stop*.""" elapsed = 0 for fr, to, rt, wait in links: @@ -66,15 +84,18 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: return elapsed return None - # Map JP id -> (pad_offset_secs, kxp_arrive_offset_secs) - jp_offsets: dict[str, tuple[int, int]] = {} - for svc in root.find('t:Services', _NS): - for jp in svc.findall('.//t:JourneyPattern', _NS): - jps_ref = jp.find('t:JourneyPatternSectionRefs', _NS) + # Map JP id -> [(direction, origin_depart_offset_secs, destination_arrive_offset_secs)]. + jp_offsets: dict[str, list[tuple[str, int, int]]] = {} + services_el = root.find("t:Services", _NS) + assert services_el is not None + for svc in services_el: + for jp in svc.findall(".//t:JourneyPattern", _NS): + jps_ref = jp.find("t:JourneyPatternSectionRefs", _NS) if jps_ref is None: continue - links = jps_map.get(jps_ref.text, []) + links = jps_map.get(jps_ref.text or "", []) stops = [l[0] for l in links] + ([links[-1][1]] if links else []) + offsets = [] if ( _PAD_STOP in stops and _KXP_STOP in stops @@ -83,37 +104,67 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: pad_off = _seconds_to_depart(links, _PAD_STOP) kxp_off = _seconds_to_arrive(links, _KXP_STOP) if pad_off is not None and kxp_off is not None: - jp_offsets[jp.get('id')] = (pad_off, kxp_off) + offsets.append(("pad_to_kx", pad_off, kxp_off)) + if ( + _PAD_STOP in stops + and _KXP_STOP in stops + and stops.index(_KXP_STOP) < stops.index(_PAD_STOP) + ): + kxp_off = _seconds_to_depart(links, _KXP_STOP) + pad_off = _seconds_to_arrive(links, _PAD_STOP) + if kxp_off is not None and pad_off is not None: + offsets.append(("kx_to_pad", kxp_off, pad_off)) + if offsets: + jp_id = jp.get("id") + assert jp_id is not None + jp_offsets[jp_id] = offsets - result: dict[str, list[tuple[int, int]]] = { - 'MondayToFriday': [], - 'Saturday': [], - 'Sunday': [], + result: dict[str, dict[str, list[tuple[int, int]]]] = { + "pad_to_kx": { + "MondayToFriday": [], + "Saturday": [], + "Sunday": [], + }, + "kx_to_pad": { + "MondayToFriday": [], + "Saturday": [], + "Sunday": [], + }, } - for vj in root.find('t:VehicleJourneys', _NS): - jp_ref = vj.find('t:JourneyPatternRef', _NS) - dep_time = vj.find('t:DepartureTime', _NS) - op = vj.find('t:OperatingProfile', _NS) + vehicle_journeys = root.find("t:VehicleJourneys", _NS) + assert vehicle_journeys is not None + for vj in vehicle_journeys: + jp_ref = vj.find("t:JourneyPatternRef", _NS) + dep_time = vj.find("t:DepartureTime", _NS) + op = vj.find("t:OperatingProfile", _NS) if jp_ref is None or dep_time is None or jp_ref.text not in jp_offsets: continue - pad_off, kxp_off = jp_offsets[jp_ref.text] - h, m, s = map(int, dep_time.text.split(':')) + if dep_time.text is None: + continue + h, m, s = map(int, dep_time.text.split(":")) dep_secs = h * 3600 + m * 60 + s - rdt = op.find('.//t:DaysOfWeek', _NS) if op is not None else None + rdt = op.find(".//t:DaysOfWeek", _NS) if op is not None else None if rdt is None: continue for day_el in rdt: - day_type = day_el.tag.split('}')[-1] - if day_type in result: - result[day_type].append((dep_secs + pad_off, dep_secs + kxp_off)) + day_type = day_el.tag.split("}")[-1] + for direction, origin_off, dest_off in jp_offsets[jp_ref.text]: + if day_type in result[direction]: + result[direction][day_type].append( + ( + dep_secs + origin_off, + dep_secs + dest_off, + ) + ) - for key in result: - result[key].sort() + for direction in result: + for key in result[direction]: + result[direction][key].sort() return result -def _get_timetable() -> dict[str, list[tuple[int, int]]]: +def _get_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: global _timetable if _timetable is None: _timetable = _load_timetable() @@ -122,11 +173,13 @@ def _get_timetable() -> dict[str, list[tuple[int, int]]]: def _day_type(weekday: int) -> str: if weekday < 5: - return 'MondayToFriday' - return 'Saturday' if weekday == 5 else 'Sunday' + return "MondayToFriday" + return "Saturday" if weekday == 5 else "Sunday" -def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: +def next_service( + earliest_board: datetime, direction: str = "pad_to_kx" +) -> tuple[datetime, datetime] | None: """ Given the earliest time a passenger can board at Paddington (H&C Line), return (circle_line_depart, arrive_kings_cross) as datetimes, or None if @@ -135,33 +188,39 @@ def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: The caller is responsible for adding any walk time from the GWR platform before passing *earliest_board*. """ - services = upcoming_services(earliest_board, count=1) + services = upcoming_services(earliest_board, count=1, direction=direction) return services[0] if services else None def upcoming_services( - earliest_board: datetime, count: int = 2 + earliest_board: datetime, + count: int = 2, + direction: str = "pad_to_kx", + preceding: int = 0, ) -> list[tuple[datetime, datetime]]: """ - Return up to *count* Circle line services from Paddington (H&C Line) to - King's Cross St Pancras, starting from *earliest_board*. + Return Circle line services for *direction* around *earliest_board*. - Each element is (depart_paddington, arrive_kings_cross) as datetimes. + Returns up to *preceding* services before earliest_board followed by up to + *count* services at or after earliest_board. Each element is + (depart_origin, arrive_destination) as datetimes. """ - timetable = _get_timetable()[_day_type(earliest_board.weekday())] + timetable = _get_timetable().get(direction, {})[_day_type(earliest_board.weekday())] board_secs = ( - earliest_board.hour * 3600 - + earliest_board.minute * 60 - + earliest_board.second + earliest_board.hour * 3600 + earliest_board.minute * 60 + earliest_board.second ) midnight = earliest_board.replace(hour=0, minute=0, second=0, microsecond=0) + pre_results = [] results = [] for pad_secs, kxp_secs in timetable: - if pad_secs >= board_secs: - results.append(( - midnight + timedelta(seconds=pad_secs), - midnight + timedelta(seconds=kxp_secs), - )) + entry = ( + midnight + timedelta(seconds=pad_secs), + midnight + timedelta(seconds=kxp_secs), + ) + if pad_secs < board_secs: + pre_results.append(entry) + else: + results.append(entry) if len(results) == count: break - return results + return pre_results[-preceding:] + results if preceding else results diff --git a/config/default.py b/config/default.py index dfdb4fa..3d9c4e2 100644 --- a/config/default.py +++ b/config/default.py @@ -1,13 +1,13 @@ import os # Directory containing TfL reference data (TransXChange XML files etc.) -TFL_DATA_DIR = os.path.expanduser('~/lib/data/tfl') +TFL_DATA_DIR = os.path.expanduser("~/lib/data/tfl") # Directory for caching scraped train times -CACHE_DIR = os.path.expanduser('~/lib/data/tfl/cache') +CACHE_DIR = os.path.expanduser("~/lib/data/tfl/cache") # TransXChange timetable file for the Circle Line -CIRCLE_LINE_XML = os.path.join(TFL_DATA_DIR, 'output_txc_01CIR_.xml') +CIRCLE_LINE_XML = os.path.join(TFL_DATA_DIR, "output_txc_01CIR_.xml") # Default connection window (minutes) between Paddington arrival and St Pancras departure DEFAULT_MIN_CONNECTION = 70 diff --git a/prices.md b/prices.md new file mode 100644 index 0000000..f717a75 --- /dev/null +++ b/prices.md @@ -0,0 +1,145 @@ +Walk on fares for the Bristol Temple Meads to Paddington. + +### Super off-peak single: £45 +ticket code: SSS + +Restricted Days: Mondays to Fridays + +#### Outward Travel + + Outward Travel; Not valid for travel on trains timed to depart after 04:29 and before the times shown from the following stations: + 09:00 from Ashchurch; + 09:10 from Avoncliff (if travel is via Bath Spa, the restriction time shown for Bath Spa also applies for journeys eastbound from this station); + 10:00 from Bath Spa; + 09:35 from Bradford on Avon (if travel is via Bath Spa, the restriction time shown for Bath Spa also applies for journeys eastbound from this station); + 09:21 from Bridgwater; + 10:11 from Bristol Parkway; (also valid between 02:00 and 05:30) + 09:58 from Bristol Temple Meads; (also valid between 02:00 and 05:04) + 09:40 from Castle Cary (if travel is via Bath Spa, the restriction time shown for Bath Spa also applies for journeys eastbound from this station); + 09:15 from Cheltenham Spa (if travel is via Bristol Parkway, the restriction time shown for Bristol Parkway also applies for journeys eastbound from this station); + 10:10 from Chippenham; + 09:00 from Dawlish; + 08:45 from Dawlish Warren; + 09:20 from Exeter St Davids; + 09:10 from Freshford (if travel is via Bath Spa, the restriction time shown for Bath Spa also applies for journeys eastbound from this station); + 09:55 from Frome; + 09:30 from Gloucester (if travel is via Bristol Parkway, the restriction time shown for Bristol Parkway also applies for journeys eastbound from this station); + 09:00 from Highbridge; + 09;00 from Ivybridge; + 10:16 from Kemble; + 09:10 from Nailsea & Backwell; + 09:30 from Patchway; + 10:30 from Pewsey; + 08:45 from Starcross; + 09:30 from Stonehouse; + 09:50 from Stroud; + 10:30 from Swindon; (also valid between 02:00 and 05:45) + 09:50 from Taunton; + 09:00 from Teignmouth; + 09:35 from Tiverton Parkway; + 09:23 from Trowbridge; + 09:30 from Westbury (if travel is via Bath Spa, the restriction time shown for Bath Spa also applies for journeys eastbound from this station); + 09:20 from Weston Milton; + 09:00 from Weston-Super-Mare; + 09:30 from Worle; + 09:40 from Yatton. + Not valid on trains timed to arrive at Birmingham New Street before 10:15. + Not valid on trains timed to arrive at London Waterloo before 11:48. + +### Seasonal Variations + +GWR Christmas and New Year Travel Restrictions 2025/26 + +Up to and including Friday 19 December: Usual ticket restrictions apply. + +Monday 22 to Wednesday 24 December: + +Off-Peak tickets valid all day. + +Super Off-Peak tickets subject to normal restriction times. + +Monday 29 December to Friday 02 January \* No ticket restrictions apply. + +Monday 05 January 2026 onwards Usual ticket restrictions apply. + +Journey planners have been updated with the above information. + +\* Please note this only applies to journeys priced by GWR. Flows within Wales, between Cheltenham and Gloucester and long-distance routes to areas not served by GWR are priced by other train companies, who may have different Christmas travel restrictions in place. Usual restrictions will apply to CPAY and may apply to other Operators’ services. Please check retail systems if unsure. + +Customers using Off-Peak travel cards and arriving at London Paddington before 09:30 on weekdays, will need to wait until 09:30 for onward travel from Paddington. + +### Off-peak single: £63.60 +ticket code: SVS + +Restricted Days: Mondays to Fridays + +#### Outward Travel + +Not valid for travel on trains timed to depart earlier than shown from the following stations: + + 07:46 from Ashchurch for Tewkesbury + 08:20 from Avoncliff (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 08:39 from Bath Spa + 08:20 from Bradford on Avon (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 07:10 from Bridgwater + 08:55 from Bristol Parkway (also valid between 02:00 and 05:30) + 08:26 from Bristol Temple Meads (also valid between 02:00 and 05:10) + 08:30 from Castle Cary (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 08:21 from Cheltenham Spa (if travel is via Bristol Parkway, the restriction time shown for Bristol Parkway also applies for journeys eastbound from this station) + 08:46 from Chippenham + 07:25 from Dawlish + 07:56 from Exeter St Davids (also valid on the 07:20 CrossCountry service, changing at Bristol Temple Meads or Parkway; also valid on the 07:44 service) + 08:17 from Freshford (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 07:46 from Frome + 08:26 from Gloucester (if travel is via Bristol Parkway, the restriction time shown for Bristol Parkway also applies for journeys eastbound from this station) + 07:22 from Highbridge + 09:05 from Kemble + 08:30 from Keynsham + 08:10 from Nailsea & Backwell + 09:00 from Pewsey + 08:31 from Patchway + 09:07 from Slough + 08:45 from Stonehouse + 08:50 from Stroud + 09:05 from Swindon (also valid between 02:00 and 05:45) + 08:16 from Taunton (also valid on the 07:24 service via Bath Spa; also valid on the 07:46 CrossCountry service changing at Bristol Temple Meads or Parkway) + 07:20 from Teignmouth + 07:33 from Tiverton Parkway + 08:06 from Trowbridge (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 08:56 from Westbury (if travel via Bath Spa, restriction time shown applies eastbound from this station) + 07:56 from Weston Milton + 07:45 from Weston-super-Mare + 07:56 from Worle + 07:55 from Yatton + +Not valid on trains timed to arrive at London Waterloo after 04:29 and before 09:52. + +Also not valid on trains that arrive at Birmingham New Street before 10:15. + +#### Seasonal Variations + +GWR Christmas and New Year Travel Restrictions 2025/26 + +Up to and including Friday 19 December: Usual ticket restrictions apply. + +Monday 22 to Wednesday 24 December: + +Off-Peak tickets valid all day. + +Super Off-Peak tickets subject to normal restriction times. + +Monday 29 December to Friday 02 January \* No ticket restrictions apply. + +Monday 05 January 2026 onwards Usual ticket restrictions apply. + +Journey planners have been updated with the above information. + +\* Please note this only applies to journeys priced by GWR. Flows within Wales, between Cheltenham and Gloucester and long-distance routes to areas not served by GWR are priced by other train companies, who may have different Christmas travel restrictions in place. Usual restrictions will apply to CPAY and may apply to other Operators’ services. Please check retail systems if unsure. + +Customers using Off-Peak travel cards and arriving at London Paddington before 09:30 on weekdays, will need to wait until 09:30 for onward travel from Paddington. + +--- +### Anytime day single: £138.70 +ticket code: SDS + +No restrictions diff --git a/run.fcgi b/run.fcgi new file mode 100755 index 0000000..8a59638 --- /dev/null +++ b/run.fcgi @@ -0,0 +1,24 @@ +#!/usr/bin/python3 +from flipflop import WSGIServer +import sys +sys.path.append('/home/edward/src/paddington-eurostar') # isort:skip +from app import app # isort:skip + + +class ScriptNameMiddleware: + def __init__(self, wsgi_app, script_name): + self.app = wsgi_app + self.script_name = script_name + + def __call__(self, environ, start_response): + environ['SCRIPT_NAME'] = self.script_name + path_info = environ.get('PATH_INFO', '') + if path_info.startswith(self.script_name): + environ['PATH_INFO'] = path_info[len(self.script_name):] + return self.app(environ, start_response) + + +app.wsgi_app = ScriptNameMiddleware(app.wsgi_app, '/paddington-eurostar') + +if __name__ == '__main__': + WSGIServer(app).run() diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 667decf..27d4f9f 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -6,8 +6,10 @@ NewBookingSearch) returns departure time, arrival time, train number, Eurostar Standard fare price, and seats remaining at that price for every service on the requested date. """ + import random import string +from typing import Any import requests @@ -16,18 +18,19 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) -ORIGIN_STATION_ID = '7015400' +ST_PANCRAS_STATION_ID = "7015400" +ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID DESTINATION_STATION_IDS = { - 'Paris Gare du Nord': '8727100', - 'Brussels Midi': '8814001', - 'Lille Europe': '8722326', - 'Amsterdam Centraal': '8400058', - 'Rotterdam Centraal': '8400530', - 'Cologne Hbf': '8015458', + "Paris Gare du Nord": "8727100", + "Brussels Midi": "8814001", + "Lille Europe": "8722326", + "Amsterdam Centraal": "8400058", + "Rotterdam Centraal": "8400530", + "Cologne Hbf": "8015458", } -_GATEWAY_URL = 'https://site-api.eurostar.com/gateway' +_GATEWAY_URL = "https://site-api.eurostar.com/gateway" # Query requesting timing, train identity, and Standard fare price + seats. # Variable names and argument names match the site's own query so the @@ -35,13 +38,13 @@ _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' _GQL_QUERY = ( "query NewBookingSearch(" "$origin:String!,$destination:String!,$outbound:String!," - "$currency:Currency!,$adult:Int," + "$inbound:String,$currency:Currency!,$adult:Int," "$filteredClassesOfService:[ClassOfServiceEnum]" "){" "journeySearch(" - "outboundDate:$outbound origin:$origin destination:$destination" + "outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination" " adults:$adult currency:$currency" - " productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\"" + ' productFamilies:["PUB"] contractCode:"EIL_ALL"' " adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0" " infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0" " wheelchairCompanions:0 nonWheelchairCompanions:0" @@ -64,28 +67,59 @@ _GQL_QUERY = ( "}" "}" "}" + "inbound{" + "journeys(" + "hideIndirectTrainsWhenDisruptedAndCancelled:false" + " hideDepartedTrains:true" + " hideExternalCarrierTrains:true" + " hideDirectExternalCarrierTrains:true" + "){" + "timing{departureTime:departs arrivalTime:arrives}" + "fares(filteredClassesOfService:$filteredClassesOfService){" + "classOfService{code}" + "prices{displayPrice}" + "seats " + "legs{serviceName serviceType{code}}" + "}" + "}" + "}" "}" "}" ) -_STANDARD = 'STANDARD' -_STANDARD_PLUS = 'PLUS' +_STANDARD = "STANDARD" +_STANDARD_PLUS = "PLUS" -def search_url(destination: str, travel_date: str) -> str: +def search_url( + destination: str, + travel_date: str, + direction: str = "outbound", + return_date: str | None = None, +) -> str: dest_id = DESTINATION_STATION_IDS[destination] + origin = ST_PANCRAS_STATION_ID + destination_id = dest_id + outbound = travel_date + inbound = return_date + if direction == "inbound": + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + inbound = None return ( - f'https://www.eurostar.com/search/uk-en' - f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}' + f"https://www.eurostar.com/search/uk-en" + f"?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}" + + (f"&inbound={inbound}" if inbound else "") ) def _generate_cid() -> str: chars = string.ascii_letters + string.digits - return 'SRCH-' + ''.join(random.choices(chars, k=22)) + return "SRCH-" + "".join(random.choices(chars, k=22)) -def _parse_graphql(data: dict, destination: str) -> list[dict]: +def _parse_journeys( + journeys: list[dict[str, Any]], destination: str, direction: str +) -> list[dict[str, Any]]: """ Parse a NewBookingSearch GraphQL response into a list of service dicts. @@ -96,43 +130,108 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: connecting trains); we keep the entry with the earliest arrival. Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329'). """ - best: dict[str, dict] = {} - journeys = data['data']['journeySearch']['outbound']['journeys'] + best: dict[str, dict[str, Any]] = {} for journey in journeys: - dep = journey['timing']['departureTime'] - arr = journey['timing']['arrivalTime'] + dep = journey["timing"]["departureTime"] + arr = journey["timing"]["arrivalTime"] std_price = std_seats = plus_price = plus_seats = None - train_number = '' - for fare in (journey.get('fares') or []): - cos = fare['classOfService']['code'] - p = fare.get('prices') - price = float(p['displayPrice']) if p and p.get('displayPrice') else None - seats = fare.get('seats') + train_number = "" + for fare in journey.get("fares") or []: + cos = fare["classOfService"]["code"] + p = fare.get("prices") + price = float(p["displayPrice"]) if p and p.get("displayPrice") else None + seats = fare.get("seats") if not train_number: - legs = fare.get('legs') or [] - train_number = ' + '.join( + legs = fare.get("legs") or [] + train_number = " + ".join( f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}" - for leg in legs if leg.get('serviceName') + for leg in legs + if leg.get("serviceName") ) if cos == _STANDARD: std_price, std_seats = price, seats elif cos == _STANDARD_PLUS: plus_price, plus_seats = price, seats - if dep not in best or arr < best[dep]['arrive_destination']: - best[dep] = { - 'depart_st_pancras': dep, - 'arrive_destination': arr, - 'destination': destination, - 'train_number': train_number, - 'price': std_price, - 'seats': std_seats, - 'plus_price': plus_price, - 'plus_seats': plus_seats, + if direction == "inbound": + service = { + "depart_destination": dep, + "arrive_st_pancras": arr, + "destination": destination, + "train_number": train_number, + "price": std_price, + "seats": std_seats, + "plus_price": plus_price, + "plus_seats": plus_seats, } - return sorted(best.values(), key=lambda s: s['depart_st_pancras']) + key = dep + arrive_key = "arrive_st_pancras" + else: + service = { + "depart_st_pancras": dep, + "arrive_destination": arr, + "destination": destination, + "train_number": train_number, + "price": std_price, + "seats": std_seats, + "plus_price": plus_price, + "plus_seats": plus_seats, + } + key = dep + arrive_key = "arrive_destination" + if key not in best or arr < best[key][arrive_key]: + best[key] = service + sort_key = "depart_destination" if direction == "inbound" else "depart_st_pancras" + return sorted(best.values(), key=lambda s: s[sort_key]) -def fetch(destination: str, travel_date: str) -> list[dict]: +def _parse_graphql(data: dict[str, Any], destination: str) -> list[dict[str, Any]]: + journeys = data["data"]["journeySearch"]["outbound"]["journeys"] + return _parse_journeys(journeys, destination, "outbound") + + +def _parse_graphql_leg( + data: dict[str, Any], destination: str, leg: str, direction: str +) -> list[dict[str, Any]]: + journeys = data["data"]["journeySearch"][leg]["journeys"] + return _parse_journeys(journeys, destination, direction) + + +def _payload( + origin: str, destination_id: str, outbound: str, inbound: str | None = None +) -> dict[str, Any]: + variables: dict[str, Any] = { + "origin": origin, + "destination": destination_id, + "outbound": outbound, + "inbound": inbound, + "currency": "GBP", + "adult": 1, + "filteredClassesOfService": [_STANDARD, _STANDARD_PLUS], + } + return { + "operationName": "NewBookingSearch", + "variables": variables, + "query": _GQL_QUERY, + } + + +def _headers() -> dict[str, str]: + return { + "User-Agent": DEFAULT_UA, + "Content-Type": "application/json", + "Accept": "*/*", + "Accept-Language": "en-GB", + "Referer": "https://www.eurostar.com/", + "x-platform": "web", + "x-market-code": "uk", + "x-source-url": "search-app/", + "cid": _generate_cid(), + } + + +def fetch( + destination: str, travel_date: str, direction: str = "outbound" +) -> list[dict[str, Any]]: """ Return all Eurostar services for destination on travel_date. @@ -140,29 +239,34 @@ def fetch(destination: str, travel_date: str) -> list[dict]: train_number) plus pricing (price, seats) from a single GraphQL call. """ dest_id = DESTINATION_STATION_IDS[destination] - headers = { - 'User-Agent': DEFAULT_UA, - 'Content-Type': 'application/json', - 'Accept': '*/*', - 'Accept-Language':'en-GB', - 'Referer': 'https://www.eurostar.com/', - 'x-platform': 'web', - 'x-market-code': 'uk', - 'x-source-url': 'search-app/', - 'cid': _generate_cid(), - } - payload = { - 'operationName': 'NewBookingSearch', - 'variables': { - 'origin': ORIGIN_STATION_ID, - 'destination': dest_id, - 'outbound': travel_date, - 'currency': 'GBP', - 'adult': 1, - 'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS], - }, - 'query': _GQL_QUERY, - } - resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) + if direction == "inbound": + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + else: + origin, destination_id = ST_PANCRAS_STATION_ID, dest_id + resp = requests.post( + _GATEWAY_URL, + json=_payload(origin, destination_id, travel_date), + headers=_headers(), + timeout=20, + ) resp.raise_for_status() - return _parse_graphql(resp.json(), destination) + leg_direction = "inbound" if direction == "inbound" else "outbound" + return _parse_graphql_leg(resp.json(), destination, "outbound", leg_direction) + + +def fetch_return( + destination: str, outbound_date: str, return_date: str +) -> dict[str, list[dict[str, Any]]]: + dest_id = DESTINATION_STATION_IDS[destination] + resp = requests.post( + _GATEWAY_URL, + json=_payload(ST_PANCRAS_STATION_ID, dest_id, outbound_date, return_date), + headers=_headers(), + timeout=20, + ) + resp.raise_for_status() + data = resp.json() + return { + "outbound": _parse_graphql_leg(data, destination, "outbound", "outbound"), + "inbound": _parse_graphql_leg(data, destination, "inbound", "inbound"), + } diff --git a/scraper/gwr_fares.py b/scraper/gwr_fares.py index c36b0e9..3aa3f11 100644 --- a/scraper/gwr_fares.py +++ b/scraper/gwr_fares.py @@ -6,6 +6,8 @@ Returns per-train cheapest standard-class fare with restrictions already applied Cache for 30 days — fares rarely change. """ +from typing import Any, Generator + import httpx _API_URL = "https://api.gwr.com/api/shopping/journeysearch" @@ -16,7 +18,7 @@ _WALKON_CODES = {"SSS", "SVS", "SDS", "CDS"} _MAX_PAGES = 20 -def _headers() -> dict: +def _headers() -> dict[str, str]: return { "user-agent": ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " @@ -32,11 +34,12 @@ def _headers() -> dict: def _request_body( - station_crs: str, + from_code: str, + to_code: str, travel_date: str, conversation_token: str | None, later: bool, -) -> dict: +) -> dict[str, Any]: return { "IsNextOutward": False, "IsPreviousOutward": False, @@ -44,8 +47,8 @@ def _request_body( "IsPreviousReturn": False, "campaignCode": "", "validationCode": "", - "locfrom": f"GB{station_crs}", - "locto": _PAD_CODE, + "locfrom": from_code, + "locto": to_code, "datetimedepart": f"{travel_date}T00:00:00", "outwarddepartafter": True, "datetimereturn": None, @@ -67,7 +70,22 @@ def _request_body( } -def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): +def _station_code(station_crs: str) -> str: + return f"GB{station_crs}" + + +def _od_codes(station_crs: str, direction: str) -> tuple[str, str]: + if direction == "from_paddington": + return _PAD_CODE, _station_code(station_crs) + return _station_code(station_crs), _PAD_CODE + + +def _run_pages( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +) -> Generator[tuple[str, list[Any]], None, None]: """ Iterate all pages of GWR journey search results. @@ -78,14 +96,17 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): with httpx.Client(headers=_headers(), timeout=30) as client: conversation_token = None later = False + from_code, to_code = _od_codes(station_crs, direction) for _ in range(_MAX_PAGES): - body = _request_body(station_crs, travel_date, conversation_token, later) + body = _request_body( + from_code, to_code, travel_date, conversation_token, later + ) if first_class: body["firstclass"] = True body["standardclass"] = False resp = client.post(_API_URL, json=body) resp.raise_for_status() - data = resp.json().get("data", {}) + data = resp.json().get("data") or {} conversation_token = data.get("conversationToken") for journey in data.get("outwardOpenPureReturnFare", []): dep_iso = journey.get("departureTime", "") @@ -99,16 +120,59 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): later = True -def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: +def _run_pages_batched( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +) -> Generator[list[tuple[str, list[Any]]], None, None]: """ - Fetch GWR walk-on single fares from station_crs to London Paddington on travel_date. + Like _run_pages but yields one list of (dep_time, fares_list) per API page call, + allowing callers to stream results a page at a time. + """ + seen: set[str] = set() + with httpx.Client(headers=_headers(), timeout=30) as client: + conversation_token = None + later = False + from_code, to_code = _od_codes(station_crs, direction) + for _ in range(_MAX_PAGES): + body = _request_body( + from_code, to_code, travel_date, conversation_token, later + ) + if first_class: + body["firstclass"] = True + body["standardclass"] = False + resp = client.post(_API_URL, json=body) + resp.raise_for_status() + data = resp.json().get("data") or {} + conversation_token = data.get("conversationToken") + batch = [] + for journey in data.get("outwardOpenPureReturnFare", []): + dep_iso = journey.get("departureTime", "") + dep_time = dep_iso[11:16] + if not dep_time or dep_time in seen: + continue + seen.add(dep_time) + batch.append((dep_time, journey.get("journeyFareDetails", []))) + if batch: + yield batch + if not data.get("showLaterOutward", False): + break + later = True + + +def fetch( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict[str, Any]]: + """ + Fetch GWR walk-on single fares for the selected Paddington direction. Returns {departure_time: {'ticket': name, 'price': float, 'code': code}} where price is in £ and only the cheapest available standard-class walk-on ticket per departure (with restrictions already applied by GWR) is kept. """ - result: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date): + result: dict[str, dict[str, Any]] = {} + for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -133,7 +197,9 @@ def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: return result -def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: +def fetch_advance( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict[str, Any]]: """ Fetch advance fares: cheapest standard advance and first-class advance per departure. @@ -141,8 +207,10 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: Returns {departure_time: {'advance_std': dict or None, 'advance_1st': dict or None}} where each sub-dict has keys 'ticket', 'price', 'code'. """ - std_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=False): + std_advance: dict[str, dict[str, Any]] = {} + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=False, direction=direction + ): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -165,8 +233,10 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: "code": cheapest["code"], } - first_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=True): + first_advance: dict[str, dict[str, Any]] = {} + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=True, direction=direction + ): cheapest = None for fare in fares: price_pence = fare.get("fare", 0) @@ -192,3 +262,75 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: } for t in all_times } + + +def fetch_advance_streaming( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> Generator[dict[str, dict[str, Any]], None, None]: + """ + Generator yielding partial advance fare dicts one GWR API page at a time. + + Each yield is {dep_time: {'advance_std': dict|None, 'advance_1st': dict|None}}. + Two passes are made (standard class then first class); each page of results is + yielded immediately so callers can stream prices to clients as they arrive. + """ + # Pass 1: standard class advance fares + for batch in _run_pages_batched( + station_crs, travel_date, first_class=False, direction=direction + ): + page: dict[str, dict[str, Any]] = {} + for dep_time, fares in batch: + cheapest = None + for fare in fares: + code = fare.get("ticketTypeCode") + if code in _WALKON_CODES: + continue + if not fare.get("isStandardClass"): + continue + price_pence = fare.get("fare", 0) + if cheapest is None or price_pence < cheapest["price_pence"]: + cheapest = { + "ticket": fare.get("ticketType", ""), + "price": price_pence / 100, + "price_pence": price_pence, + "code": code, + } + if cheapest: + page[dep_time] = { + "advance_std": { + "ticket": cheapest["ticket"], + "price": cheapest["price"], + "code": cheapest["code"], + }, + "advance_1st": None, + } + if page: + yield page + + # Pass 2: first class advance fares + for batch in _run_pages_batched( + station_crs, travel_date, first_class=True, direction=direction + ): + page = {} + for dep_time, fares in batch: + cheapest = None + for fare in fares: + price_pence = fare.get("fare", 0) + if cheapest is None or price_pence < cheapest["price_pence"]: + cheapest = { + "ticket": fare.get("ticketType", ""), + "price": price_pence / 100, + "price_pence": price_pence, + "code": fare.get("ticketTypeCode"), + } + if cheapest: + page[dep_time] = { + "advance_std": None, + "advance_1st": { + "ticket": cheapest["ticket"], + "price": cheapest["price"], + "code": cheapest["code"], + }, + } + if page: + yield page diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py index 9b5e936..75132f1 100644 --- a/scraper/realtime_trains.py +++ b/scraper/realtime_trains.py @@ -1,14 +1,18 @@ """ -Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. +Scrape direct trains between a selected station and London Paddington using +Realtime Trains. Two fetches: BRI/to/PAD → departure times from Bristol (div.time.plan.d) PAD/from/BRI → arrival times at Paddington (div.time.plan.a) Matched by train ID (div.tid). """ + import re +from typing import Any + import httpx -import lxml.html +import lxml.html # type: ignore[import-untyped] _TO_PAD_TMPL = ( "https://www.realtimetrains.co.uk/search/detailed/" @@ -20,6 +24,16 @@ _PAD_FROM_TMPL = ( "gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359" "?stp=WVS&show=pax-calls&order=wtt" ) +_PAD_TO_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) +_FROM_PAD_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) DEFAULT_UA = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " @@ -27,7 +41,7 @@ DEFAULT_UA = ( ) -def _browser_headers(user_agent: str) -> dict: +def _browser_headers(user_agent: str) -> dict[str, str]: return { "User-Agent": user_agent, "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", @@ -44,7 +58,7 @@ def _browser_headers(user_agent: str) -> dict: def _fmt(hhmm: str) -> str: """Convert '0830' → '08:30'.""" - hhmm = re.sub(r'[^0-9]', '', hhmm) + hhmm = re.sub(r"[^0-9]", "", hhmm) if len(hhmm) == 4: return f"{hhmm[:2]}:{hhmm[2:]}" return hhmm @@ -53,12 +67,12 @@ def _fmt(hhmm: str) -> str: def _parse_services(html: str, time_selector: str) -> dict[str, str]: """Return {train_id: time_string} from a servicelist page.""" root = lxml.html.fromstring(html) - sl = root.cssselect('div.servicelist') + sl = root.cssselect("div.servicelist") if not sl: return {} result = {} - for svc in sl[0].cssselect('a.service'): - tid_els = svc.cssselect('div.tid') + for svc in sl[0].cssselect("a.service"): + tid_els = svc.cssselect("div.tid") time_els = svc.cssselect(time_selector) if tid_els and time_els: tid = tid_els[0].text_content().strip() @@ -68,48 +82,91 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]: return result -def _parse_arrivals(html: str) -> dict[str, dict]: - """Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page.""" +def _parse_arrivals(html: str) -> dict[str, dict[str, str]]: + """Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page.""" root = lxml.html.fromstring(html) - sl = root.cssselect('div.servicelist') + sl = root.cssselect("div.servicelist") if not sl: return {} result = {} - for svc in sl[0].cssselect('a.service'): - tid_els = svc.cssselect('div.tid') - time_els = svc.cssselect('div.time.plan.a') + for svc in sl[0].cssselect("a.service"): + tid_els = svc.cssselect("div.tid") + time_els = svc.cssselect("div.time.plan.a") if not (tid_els and time_els): continue time_text = time_els[0].text_content().strip() if not time_text: continue - plat_els = svc.cssselect('div.platform') - platform = plat_els[0].text_content().strip() if plat_els else '' + plat_els = svc.cssselect("div.platform") + platform = plat_els[0].text_content().strip() if plat_els else "" result[tid_els[0].text_content().strip()] = { - 'time': _fmt(time_text), - 'platform': platform, + "time": _fmt(time_text), + "platform": platform, } return result -def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]: - """Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}].""" +def fetch( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI" +) -> list[dict[str, Any]]: + """Fetch trains from station_crs to PAD.""" headers = _browser_headers(user_agent) with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date)) r_pad = client.get(_PAD_FROM_TMPL.format(crs=station_crs, date=date)) - departures = _parse_services(r_bri.text, 'div.time.plan.d') - arrivals = _parse_arrivals(r_pad.text) + departures = _parse_services(r_bri.text, "div.time.plan.d") + arrivals = _parse_arrivals(r_pad.text) trains = [ { - 'depart_bristol': dep, - 'arrive_paddington': arrivals[tid]['time'], - 'arrive_platform': arrivals[tid]['platform'], - 'headcode': tid, + "depart_bristol": dep, + "arrive_paddington": arrivals[tid]["time"], + "arrive_platform": arrivals[tid]["platform"], + "headcode": tid, } for tid, dep in departures.items() if tid in arrivals ] - return sorted(trains, key=lambda t: t['depart_bristol']) + return sorted(trains, key=lambda t: t["depart_bristol"]) + + +def fetch_to_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI" +) -> list[dict[str, Any]]: + """Fetch trains from station_crs to PAD using generic field names.""" + return [ + { + **train, + "depart_origin": train["depart_bristol"], + "arrive_paddington": train["arrive_paddington"], + "arrive_platform": train.get("arrive_platform", ""), + "headcode": train.get("headcode", ""), + } + for train in fetch(date, user_agent, station_crs) + ] + + +def fetch_from_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = "BRI" +) -> list[dict[str, Any]]: + """Fetch trains from PAD to station_crs.""" + headers = _browser_headers(user_agent) + with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: + r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date)) + r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date)) + + departures = _parse_services(r_pad.text, "div.time.plan.d") + arrivals = _parse_arrivals(r_station.text) + + trains = [ + { + "depart_paddington": dep, + "arrive_destination": arrivals[tid]["time"], + "arrive_platform": arrivals[tid]["platform"], + "headcode": tid, + } + for tid, dep in departures.items() + if tid in arrivals + ] + return sorted(trains, key=lambda t: t["depart_paddington"]) diff --git a/static/favicon.svg b/static/favicon.svg new file mode 100644 index 0000000..f3dacde --- /dev/null +++ b/static/favicon.svg @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/templates/base.html b/templates/base.html index 00e6724..5aeafa9 100644 --- a/templates/base.html +++ b/templates/base.html @@ -95,6 +95,10 @@ gap: 0.75rem; } + .destination-grid--eurostar { + grid-template-columns: repeat(6, minmax(0, 1fr)); + } + .destination-option { position: relative; } @@ -110,7 +114,7 @@ .destination-option label { display: block; min-height: 100%; - padding: 0.95rem 1rem; + padding: 0.8rem 0.85rem; border: 1px solid #cbd5e0; border-radius: 10px; background: linear-gradient(180deg, #ffffff 0%, #f8fbff 100%); @@ -121,7 +125,7 @@ .destination-option label strong { display: block; color: #0f172a; - font-size: 1rem; + font-size: 0.98rem; margin-bottom: 0.2rem; } @@ -182,10 +186,51 @@ } @media (max-width: 640px) { - .card { - padding: 1.25rem; + .card { padding: 1.25rem; } + + .destination-grid--eurostar { + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); } - .col-transfer { display: none; } + + /* Convert results table to a 2-column card layout per row */ + .results-table, .results-table tbody { display: block; } + .results-table thead { display: none; } + .results-table tr { + display: grid; + grid-template-columns: 1fr 1fr; + border-bottom: 2px solid #e2e8f0; + padding: 0.1rem 0; + } + .results-table td { padding: 0.35rem 0.45rem; font-size: 0.8rem; border-bottom: none; } + + /* First journey leg (NR outbound / Eurostar inbound) */ + .results-table td:nth-child(1) { grid-column: 1; grid-row: 1; } + /* Transfer column: hidden */ + .col-transfer { display: none !important; } + /* Second journey leg */ + .results-table td:nth-child(3) { grid-column: 2; grid-row: 1; } + /* Total: spans both columns, right-aligned */ + .results-table td:nth-child(4) { + grid-column: 1 / -1; grid-row: 2; + text-align: right; + border-top: 1px solid #e2e8f0; + padding: 0.25rem 0.45rem 0.3rem; + } + + /* Hide non-essential detail on mobile */ + .mobile-hide { display: none !important; } + .fare-seats { display: none !important; } + + /* Show connection time hint */ + .mobile-conn { display: block !important; } + + /* Flow arrow: hide on mobile */ + .results-table thead th.flow-step::after { display: none; } + .results-table thead th.flow-step { padding-right: 0; } + + /* Selection bar: smaller on mobile */ + #selection-bar { padding: 0.5rem 0.75rem; font-size: 0.8rem; } + .sel-totals { gap: 0.75rem; } } a { color: #00539f; } @@ -196,6 +241,22 @@ /* Form groups */ .form-group { margin-bottom: 1.2rem; } .form-group-lg { margin-bottom: 1.5rem; } + .form-row { + display: grid; + grid-template-columns: repeat(2, minmax(0, 1fr)); + gap: 1rem; + margin-bottom: 1.5rem; + } + .form-row .form-group, + .form-row .form-group-lg { + margin-bottom: 0; + } + @media (max-width: 640px) { + .form-row { + grid-template-columns: 1fr; + } + } + #return-date-group:has(input:disabled) { cursor: pointer; } /* Buttons */ .btn-primary { @@ -246,6 +307,7 @@ /* Results page layout */ .back-link { margin-bottom: 1rem; } .date-nav { display: flex; align-items: center; gap: 0.75rem; margin-bottom: 0.5rem; } + .date-nav-label { min-width: 6rem; font-weight: 600; font-size: 0.9rem; } .switcher-section { margin: 0.9rem 0 1rem; } .section-label { font-size: 0.9rem; font-weight: 600; margin-bottom: 0.45rem; } .filter-row { margin-top: 0.75rem; display: flex; gap: 1.5rem; align-items: center; flex-wrap: wrap; } @@ -269,11 +331,47 @@ .results-table th, .results-table td { padding: 0.6rem 0.8rem; } .results-table thead tr { border-bottom: 2px solid #e2e8f0; text-align: left; } + .results-table th { position: sticky; top: 0; background: #fff; z-index: 1; } .results-table tbody tr { border-bottom: 1px solid #e2e8f0; } .row-fast { background: #f0fff4; } .row-slow { background: #fff5f5; } .row-alt { background: #f7fafc; } .row-unreachable { background: #f7fafc; color: #a0aec0; } + .row-selected { background: #ebf8ff !important; } + tr.row-selectable { cursor: pointer; } + tr.row-selectable:hover:not(.row-selected) { filter: brightness(0.97); } + + /* Journey flow arrow between column headers */ + .results-table thead th.flow-step { position: relative; padding-right: 1.4rem; } + .results-table thead th.flow-step::after { + content: '›'; + position: absolute; right: 0.2rem; top: 50%; transform: translateY(-50%); + color: #cbd5e0; font-size: 1.5rem; font-weight: 300; line-height: 1; + } + + /* Mobile: hidden by default, shown on mobile */ + .mobile-conn { display: none; } + .fare-seats { display: inline; } + + /* Selection summary bar */ + #selection-bar { + display: none; position: fixed; bottom: 0; left: 0; right: 0; + background: #fff; border-top: 2px solid #00539f; + padding: 0.65rem 1rem; + box-shadow: 0 -2px 10px rgba(0,0,0,0.12); z-index: 200; + font-size: 0.88rem; + } + .sel-bar-inner { + max-width: 1100px; margin: 0 auto; + display: flex; justify-content: space-between; align-items: center; + flex-wrap: wrap; gap: 0.5rem; + } + .sel-totals { display: flex; gap: 1.25rem; align-items: center; flex-wrap: wrap; } + .sel-clear { + background: none; border: 1px solid #cbd5e0; border-radius: 4px; + padding: 0.2rem 0.6rem; font-size: 0.8rem; cursor: pointer; color: #718096; + } + .sel-clear:hover { background: #f0f4f8; } /* Empty state */ .empty-state { color: #4a5568; text-align: center; padding: 3rem 2rem; } @@ -281,6 +379,51 @@ .empty-state p:first-child { font-size: 1.1rem; margin-bottom: 0.5rem; } .empty-state p:last-child { font-size: 0.9rem; } + /* Ticket class button group */ + .btn-group { display: inline-flex; border: 1px solid #cbd5e0; border-radius: 4px; overflow: hidden; vertical-align: middle; } + .btn-group-option { padding: 0.28rem 0.65rem; font-size: 0.82rem; background: #fff; border: none; border-right: 1px solid #cbd5e0; cursor: pointer; color: #374151; white-space: nowrap; } + .btn-group-option:last-child { border-right: none; } + .btn-group-option.active { background: #00539f; color: #fff; font-weight: 600; } + .btn-group-option:hover:not(.active) { background: #f0f4f8; } + + /* Flash animation for total price */ + @keyframes price-flash { 0%,100% { background-color: transparent; } 40% { background-color: #fef08a; } } + .price-flash { animation: price-flash 0.7s ease-out; border-radius: 3px; } + + /* Loading state */ + #advance-loading { font-size: 0.82rem; color: #718096; margin-left: 0.5rem; } + .loading-panel { + display: flex; + gap: 1rem; + align-items: flex-start; + margin-top: 1rem; + padding: 1rem; + border: 1px solid #cbd5e0; + border-radius: 6px; + background: #f8fbff; + } + .loading-panel p { margin: 0.25rem 0 0; } + .spinner { + width: 1.5rem; + height: 1.5rem; + border: 3px solid #cbd5e0; + border-top-color: #00539f; + border-radius: 50%; + flex: 0 0 auto; + animation: spin 0.8s linear infinite; + } + .spinner-inline { + width: 0.85rem; + height: 0.85rem; + border-width: 2px; + margin-right: 0.35rem; + } + @keyframes spin { to { transform: rotate(360deg); } } + + /* Fare lines — show all, dim inactive */ + .fare-line { display: block; line-height: 1.6; transition: opacity 0.15s; } + .fare-inactive { opacity: 0.4; } + /* Utilities */ .text-muted { color: #718096; } .text-dimmed { color: #a0aec0; } diff --git a/templates/index.html b/templates/index.html index 4e7140b..8d29dfe 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,8 +1,75 @@ {% extends "base.html" %} {% block content %} + +

Plan your journey

-
+ +
+
+ Journey type +
+
+ + +
+
+ + +
+
+ + +
+
+
+
Eurostar destination -
- {% for slug, name in destinations.items() %} - {% set city = name.replace(' Gare du Nord', '').replace(' Centraal', '').replace(' Midi', '').replace(' Europe', '') %} +
+ {% for destination in destination_options %}
- - + +
{% endfor %}
+
- - + Travel dates +
+
+ +
+ +
+
+
+
+ + +
-
- - +
+
+ + +
+ +
+ + +
-
- - -
- - + + +
{% endblock %} diff --git a/templates/results.html b/templates/results.html index 92642d0..7f3da71 100644 --- a/templates/results.html +++ b/templates/results.html @@ -1,8 +1,8 @@ {% extends "base.html" %} -{% block title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} -{% block og_title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} +{% block title %}{% if journey_type == 'inbound' %}{{ destination }} to {{ departure_station_name }} via Eurostar{% elif journey_type == 'return' %}{{ departure_station_name }} to {{ destination }} return via Eurostar{% else %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endif %}{% endblock %} +{% block og_title %}{{ self.title()|trim }}{% endblock %} {% block og_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} -{% block twitter_title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} +{% block twitter_title %}{{ self.title()|trim }}{% endblock %} {% block twitter_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} {% block content %} @@ -12,25 +12,46 @@

- {{ departure_station_name }} → {{ destination }} + {% if journey_type == 'inbound' %} + {{ destination }} → {{ departure_station_name }} + {% elif journey_type == 'return' %} + {{ departure_station_name }} ↔ {{ destination }} + {% else %} + {{ departure_station_name }} → {{ destination }} + {% endif %}

+ {% if journey_type == 'return' %}
- Outbound: + ← Prev + {{ travel_date_display }} + Next → +
+
+ Return: + ← Prev + {{ return_date_display }} + Next → +
+ {% else %} +
+ ← Prev {{ travel_date_display }} - Next →
+ {% endif %}
- {% for destination_slug, destination_name in destinations.items() %} + {% for destination_slug, destination_name, destination_url in destination_links %} {% if destination_slug == slug %} {{ destination_name }} {% else %} {{ destination_name }} {% endif %} {% endfor %} @@ -38,48 +59,645 @@
- - {% for mins in valid_min_connections %} {% endfor %}
- - {% for mins in valid_max_connections %} {% endfor %}
+ {% if journey_type == 'return' %} + {% for section in sections %} +
+ {{ 'Outbound' if section.direction == 'outbound' else 'Return' }}: + {% if section.direction == 'inbound' %} +
+ + +
+ {% endif %} +
+ NR: +
+ + + +
+
+
+ Eurostar: +
+ + +
+
+
+ {% endfor %} +
+ Loading fares + +
+ {% else %} + {% set section = sections[0] %} +
+
+ NR ticket: + + Load advance prices +
+ + + +
+ Loading fares + +
+
+ Eurostar: + +
+ + +
+
+
+ {% endif %}

- {{ gwr_count }} GWR service{{ 's' if gwr_count != 1 }} -  ·  - {{ eurostar_count }} Eurostar service{{ 's' if eurostar_count != 1 }} + {% if journey_type == 'return' %} + {% for section in sections %} + {% if section.direction == 'outbound' %}Outbound{% else %}Return{% endif %}: + {{ section.gwr_count }} National Rail service{{ 's' if section.gwr_count != 1 }}, + {{ section.eurostar_count }} Eurostar service{{ 's' if section.eurostar_count != 1 }} + {% if not loop.last %} · {% endif %} + {% endfor %} + {% else %} + {{ gwr_count }} National Rail service{{ 's' if gwr_count != 1 }} +  ·  + {{ eurostar_count }} Eurostar service{{ 's' if eurostar_count != 1 }} + {% endif %} {% if from_cache %}  ·  (cached) {% endif %} + {% if provisional_timetable %} +  ·  checking exact timetable + {% endif %}

{% if error %}
@@ -93,301 +711,41 @@ {% endif %}
-{% if trips or unreachable_morning_services %} -
- {% if trips %} -
-
- - -
-
- - -
- -
- {% endif %} - - - - - - - - - - - {% if trips %} - {% set best_mins = trips | map(attribute='total_minutes') | min %} - {% set worst_mins = trips | map(attribute='total_minutes') | max %} - {% endif %} - {% for row in result_rows %} - {% if row.row_type == 'trip' and row.total_minutes <= best_mins + 5 and trips | length > 1 %} - {% set row_class = 'row-fast' %} - {% elif row.row_type == 'trip' and row.total_minutes >= worst_mins - 5 and trips | length > 1 %} - {% set row_class = 'row-slow' %} - {% elif row.row_type == 'unreachable' %} - {% set row_class = 'row-unreachable' %} - {% elif loop.index is odd %} - {% set row_class = 'row-alt' %} - {% else %} - {% set row_class = '' %} - {% endif %} - - {% if row.row_type == 'trip' %} - - - - - {% else %} - - - - - {% endif %} - - {% endfor %} - -
National Rail
{{ departure_station_name }} → Paddington
Transfer
Paddington → St Pancras
Eurostar
St Pancras → {{ destination }}
Total
- {{ row.depart_bristol }} → {{ row.arrive_paddington }} - ({{ row.gwr_duration }}) - {% if row.headcode or row.arrive_platform %} -
- {%- if row.headcode %}{{ row.headcode }}{% endif %} - {%- if row.headcode and row.arrive_platform %} · {% endif %} - {%- if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} - - {% endif %} - {% if row.ticket_price is not none %} -
£{{ "%.2f"|format(row.ticket_price) }} - {{ row.ticket_name }} - {% else %} -
- {% endif %} - - -
- {{ row.connection_duration }}{% if row.connection_minutes < 80 %} ⚠️{% endif %} - {% if row.circle_services %} - {% set c = row.circle_services[0] %} -
Circle {{ c.depart }} → KX {{ c.arrive_kx }} · £{{ "%.2f"|format(c.fare) }} - {% if row.circle_services | length > 1 %} - {% set c2 = row.circle_services[1] %} -
next {{ c2.depart }} → KX {{ c2.arrive_kx }} · £{{ "%.2f"|format(c2.fare) }} - {% endif %} - {% endif %} -
- {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) - {% if row.eurostar_duration or row.train_number %} -
- {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} - {%- if row.eurostar_duration and row.train_number %} · {% endif %} - {%- if row.train_number %}{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} - - {% endif %} - {% if row.eurostar_price is not none %} -
£{{ "%.2f"|format(row.eurostar_price) }} - Std{% if row.eurostar_seats is not none %} · {{ row.eurostar_seats }}{% endif %} - {% else %} -
- {% endif %} - {% if row.eurostar_plus_price is not none %} -
£{{ "%.2f"|format(row.eurostar_plus_price) }} - Plus{% if row.eurostar_plus_seats is not none %} · {{ row.eurostar_plus_seats }}{% endif %} - {% endif %} -
- {% if row.total_minutes <= best_mins + 5 and trips | length > 1 %} - {{ row.total_duration }} ⚡ - {% elif row.total_minutes >= worst_mins - 5 and trips | length > 1 %} - {{ row.total_duration }} 🐢 - {% else %} - {{ row.total_duration }} - {% endif %} - - - Too early - - {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) - {% if row.eurostar_duration or row.train_number %} -
- {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} - {%- if row.eurostar_duration and row.train_number %} · {% endif %} - {%- if row.train_number %}{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} - - {% endif %} - {% if row.eurostar_price is not none %} -
£{{ "%.2f"|format(row.eurostar_price) }} - Std{% if row.eurostar_seats is not none %} · {{ row.eurostar_seats }}{% endif %} - {% else %} -
- {% endif %} - {% if row.eurostar_plus_price is not none %} -
£{{ "%.2f"|format(row.eurostar_plus_price) }} - Plus{% if row.eurostar_plus_seats is not none %} · {{ row.eurostar_plus_seats }}{% endif %} - {% endif %} -
-
+{% if sections %} + {% for section in sections %} + {% include "results_section.html" %} + {% endfor %} -

- Paddington → St Pancras connection: {{ min_connection }}–{{ max_connection }} min. - GWR walk-on single prices from - gwr.com. - Eurostar Standard and Plus prices are for 1 adult in GBP; always check - eurostar.com to book. -  ·  - {{ departure_station_name }} departures on RTT -  ·  - Paddington arrivals on RTT -

- - -{% else %} -
-

No valid journeys found.

-

- {% if gwr_count == 0 and eurostar_count == 0 %} - Could not retrieve train data. Check your network connection or try again. - {% elif gwr_count == 0 %} - No GWR trains found for this date. - {% elif eurostar_count == 0 %} - No Eurostar services found for {{ destination }} on this date. - {% else %} - No GWR + Eurostar combination has at least a {{ min_connection }}-minute connection at Paddington/St Pancras. - {% endif %} +

+ Connection windows: + {% for section in sections %} + {% if section.direction == 'inbound' %}return{% else %}outbound{% endif %} + {{ section.min_connection }}–{{ section.max_connection }} min{% if not loop.last %}; {% endif %} + {% endfor %}. + National Rail prices from gwr.com. + Eurostar prices are for 1 adult in GBP; return searches use Eurostar return-search prices. + Always check eurostar.com to book. +  ·  + {{ departure_station_name }} on RTT +  ·  + Paddington on RTT

-
{% endif %} +
+
+
+ + +
+
+ + + + +
+
+
+ {% endblock %} diff --git a/templates/results_loading.html b/templates/results_loading.html new file mode 100644 index 0000000..4f20c12 --- /dev/null +++ b/templates/results_loading.html @@ -0,0 +1,118 @@ +{% extends "base.html" %} +{% block title %}{% if journey_type == 'inbound' %}{{ destination }} to {{ departure_station_name }} via Eurostar{% elif journey_type == 'return' %}{{ departure_station_name }} to {{ destination }} return via Eurostar{% else %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endif %}{% endblock %} +{% block og_title %}{{ self.title()|trim }}{% endblock %} +{% block og_description %}Train options from {{ departure_station_name }} to {{ destination }} via Paddington, St Pancras, and Eurostar.{% endblock %} +{% block twitter_title %}{{ self.title()|trim }}{% endblock %} +{% block twitter_description %}Train options from {{ departure_station_name }} to {{ destination }} via Paddington, St Pancras, and Eurostar.{% endblock %} +{% block content %} + + + +
+

+ {% if journey_type == 'inbound' %} + {{ destination }} → {{ departure_station_name }} + {% elif journey_type == 'return' %} + {{ departure_station_name }} ↔ {{ destination }} + {% else %} + {{ departure_station_name }} → {{ destination }} + {% endif %} +

+

+ {{ travel_date_display }}{% if return_date_display %} to {{ return_date_display }}{% endif %} +

+
+ +
+ Loading train times and fares +

Fetching National Rail, Eurostar, and fare data. Results will appear here as soon as they are ready.

+
+
+ +
+ + + +{% endblock %} diff --git a/templates/results_section.html b/templates/results_section.html new file mode 100644 index 0000000..be0782b --- /dev/null +++ b/templates/results_section.html @@ -0,0 +1,172 @@ +
+

+ {% if section.direction == 'inbound' %} + Return: {{ destination }} → {{ departure_station_name }} + {% else %} + Outbound: {{ departure_station_name }} → {{ destination }} + {% endif %} +

+

{{ section.date_display }}

+ {% if section.rows %} + + + + {% if section.direction == 'inbound' %} + + + + {% else %} + + + + {% endif %} + + + + + {% set trip_rows = section.rows | selectattr('row_type', 'equalto', 'trip') | list %} + {% if trip_rows %} + {% set best_mins = trip_rows | map(attribute='total_minutes') | min %} + {% set worst_mins = trip_rows | map(attribute='total_minutes') | max %} + {% endif %} + {% for row in section.rows %} + {% if row.row_type == 'trip' and row.total_minutes <= best_mins + 5 and trip_rows | length > 1 %} + {% set row_class = 'row-fast' %} + {% elif row.row_type == 'trip' and row.total_minutes >= worst_mins - 5 and trip_rows | length > 1 %} + {% set row_class = 'row-slow' %} + {% elif row.row_type == 'unreachable' %} + {% set row_class = 'row-unreachable' %} + {% elif loop.index is odd %} + {% set row_class = 'row-alt' %} + {% else %} + {% set row_class = '' %} + {% endif %} + + {% if row.row_type == 'trip' %} + {% if section.direction == 'inbound' %} + + + + {% else %} + + + + {% endif %} + + {% else %} + + + + + {% endif %} + + {% endfor %} + +
Eurostar
{{ destination }} → St Pancras
Transfer
St Pancras → Paddington
National Rail
Paddington → {{ departure_station_name }}
National Rail
{{ departure_station_name }} → Paddington
Transfer
Paddington → St Pancras
Eurostar
St Pancras → {{ destination }}
Total
click row to select
+ (CET) {{ row.depart_destination }} → {{ row.arrive_st_pancras }} (UK) +
check in by {{ row.check_in_by }} + {% if row.eurostar_duration or row.train_number %} +
+ {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} + {%- if row.eurostar_duration and row.train_number %} · {% endif %} + {%- if row.train_number %}{{ row.train_number }}{% endif %} + + {% endif %} + Std{% if row.eurostar_price is not none %} £{{ "%.2f"|format(row.eurostar_price) }}{% endif %} + SP{% if row.eurostar_plus_price is not none %} £{{ "%.2f"|format(row.eurostar_plus_price) }}{% endif %} + then {{ row.connection_duration }} to station{% if row.connection_minutes < 45 %} ⚠️{% endif %} +
+ {{ row.connection_duration }}{% if row.connection_minutes < 45 %} ⚠️{% endif %} + {% if row.circle_services %} + {% if row.circle_services | length > 1 %} + {% set c_early = row.circle_services[0] %} + {% set c = row.circle_services[1] %} +
Circle {{ c_early.depart }} → PAD {{ c_early.arrive_pad }} · £{{ "%.2f"|format(c_early.fare) }} +
next {{ c.depart }} → PAD {{ c.arrive_pad }} + {% else %} + {% set c = row.circle_services[0] %} +
Circle {{ c.depart }} → PAD {{ c.arrive_pad }} · £{{ "%.2f"|format(c.fare) }} + {% endif %} + {% endif %} +
+ {{ row.depart_paddington }} → {{ row.arrive_uk_station }} + ({{ row.gwr_duration }}) + {% if row.headcode or row.arrive_platform %} +
{{ row.headcode }}{% if row.headcode and row.arrive_platform %} · {% endif %}{% if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} + {% endif %} + {% if row.ticket_price is not none %}£{{ "%.2f"|format(row.ticket_price) }}{% endif %} + + +
+ {{ row.depart_bristol }} → {{ row.arrive_paddington }} + ({{ row.gwr_duration }}) + {% if row.headcode or row.arrive_platform %} +
{{ row.headcode }}{% if row.headcode and row.arrive_platform %} · {% endif %}{% if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} + {% endif %} + {% if row.ticket_price is not none %}£{{ "%.2f"|format(row.ticket_price) }}{% endif %} + + + then {{ row.connection_duration }} to Eurostar{% if row.connection_minutes < 80 %} ⚠️{% endif %} +
+ {{ row.connection_duration }}{% if row.connection_minutes < 80 %} ⚠️{% endif %} + {% if row.circle_services %} + {% set c = row.circle_services[0] %} +
Circle {{ c.depart }} → KX {{ c.arrive_kx }} · £{{ "%.2f"|format(c.fare) }} + {% if row.circle_services | length > 1 %} + {% set c2 = row.circle_services[1] %} +
next {{ c2.depart }} → KX {{ c2.arrive_kx }} · £{{ "%.2f"|format(c2.fare) }} + {% endif %} + {% endif %} +
+ {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) + {% if row.eurostar_duration or row.train_number %} +
+ {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} + {%- if row.eurostar_duration and row.train_number %} · {% endif %} + {%- if row.train_number %}{{ row.train_number }}{% endif %} + + {% endif %} + Std{% if row.eurostar_price is not none %} £{{ "%.2f"|format(row.eurostar_price) }}{% endif %} + SP{% if row.eurostar_plus_price is not none %} £{{ "%.2f"|format(row.eurostar_plus_price) }}{% endif %} +
+ {% if row.total_minutes <= best_mins + 5 and trip_rows | length > 1 %} + {{ row.total_duration }} ⚡ + {% elif row.total_minutes >= worst_mins - 5 and trip_rows | length > 1 %} + {{ row.total_duration }} 🐢 + {% else %} + {{ row.total_duration }} + {% endif %} +
+
+ Too early + + {% if section.direction == 'inbound' %} + {{ row.depart_destination }} → {{ row.arrive_st_pancras }} + {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% else %} + {{ row.depart_st_pancras }} → {{ row.arrive_destination }} + {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% endif %} + + +
+ {% else %} +
+

No valid journeys found.

+

+ {% if section.gwr_count == 0 and section.eurostar_count == 0 %} + Could not retrieve train data. Check your network connection or try again. + {% elif section.gwr_count == 0 %} + No National Rail trains found for this date. + {% elif section.eurostar_count == 0 %} + No Eurostar services found for {{ destination }} on this date. + {% else %} + No National Rail + Eurostar combination has a {{ section.min_connection }}-{{ section.max_connection }} minute connection. + {% endif %} +

+
+ {% endif %} +
diff --git a/templates/results_shell.html b/templates/results_shell.html new file mode 100644 index 0000000..b3715c1 --- /dev/null +++ b/templates/results_shell.html @@ -0,0 +1,741 @@ +{% extends "base.html" %} +{% block title %}{% if journey_type == 'inbound' %}{{ destination }} to {{ departure_station_name }} via Eurostar{% elif journey_type == 'return' %}{{ departure_station_name }} to {{ destination }} return via Eurostar{% else %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endif %}{% endblock %} +{% block og_title %}{{ self.title()|trim }}{% endblock %} +{% block og_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} +{% block twitter_title %}{{ self.title()|trim }}{% endblock %} +{% block twitter_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} +{% block content %} + + + +
+

+ {% if journey_type == 'inbound' %} + {{ destination }} → {{ departure_station_name }} + {% elif journey_type == 'return' %} + {{ departure_station_name }} ↔ {{ destination }} + {% else %} + {{ departure_station_name }} → {{ destination }} + {% endif %} +

+ {% if journey_type == 'return' %} +
+ Outbound: + ← Prev + {{ travel_date_display }} + Next → +
+
+ Return: + ← Prev + {{ return_date_display }} + Next → +
+ {% else %} +
+ ← Prev + {{ travel_date_display }} + Next → +
+ {% endif %} +
+ +
+ {% for destination_slug, destination_name, destination_url in destination_links %} + {% if destination_slug == slug %} + {{ destination_name }} + {% else %} + {{ destination_name }} + {% endif %} + {% endfor %} +
+
+
+
+ + +
+
+ + +
+
+ {% if journey_type == 'return' %} + {% for section in sections %} +
+ {{ 'Outbound' if section.direction == 'outbound' else 'Return' }}: + {% if section.direction == 'inbound' %} +
+ + +
+ {% endif %} +
+ NR: +
+ + + +
+
+
+ Eurostar: +
+ + +
+
+
+ {% endfor %} +
+ Loading fares + +
+ {% else %} + {% set section = sections[0] %} +
+
+ NR ticket: + + Load advance prices +
+ + + +
+ Loading fares + +
+
+ Eurostar: + +
+ + +
+
+
+ {% endif %} + +

+ +

+
+
+ +{% for section in sections %} +
+
+ +
Loading {{ 'return' if section.direction == 'inbound' else 'outbound' }} results…
+
+
+{% endfor %} + +

+ Connection windows: + {% for section in sections %} + {% if section.direction == 'inbound' %}return{% else %}outbound{% endif %} + {{ section.min_connection }}–{{ section.max_connection }} min{% if not loop.last %}; {% endif %} + {% endfor %}. + National Rail prices from gwr.com. + Eurostar prices are for 1 adult in GBP; return searches use Eurostar return-search prices. + Always check eurostar.com to book. +  ·  + {{ departure_station_name }} on RTT +  ·  + Paddington on RTT +

+ +
+
+
+ + +
+
+ + + + +
+
+
+ +{% endblock %} diff --git a/tests/test_app.py b/tests/test_app.py index 08b97fd..b484e8b 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -1,337 +1,992 @@ +from datetime import datetime +from typing import Any + import app as app_module +import trip_planner as trip_planner_module + +rtt_scraper: Any = app_module.rtt_scraper # type: ignore[attr-defined] +gwr_fares_scraper: Any = app_module.gwr_fares_scraper # type: ignore[attr-defined] +eurostar_scraper: Any = app_module.eurostar_scraper # type: ignore[attr-defined] +circle_line: Any = trip_planner_module.circle_line # type: ignore[attr-defined] -def _client(): - app_module.app.config['TESTING'] = True +def _client() -> Any: + app_module.app.config["TESTING"] = True return app_module.app.test_client() -def _stub_data(monkeypatch, prices=None, gwr_fares=None): - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) +def _stub_data(monkeypatch: Any, prices: Any = None, gwr_fares: Any = None) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) monkeypatch.setattr( - app_module.rtt_scraper, - 'fetch', - lambda travel_date, user_agent, station_crs='BRI': [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, ], ) monkeypatch.setattr( - app_module.gwr_fares_scraper, - 'fetch', - lambda station_crs, travel_date: gwr_fares or {'07:00': {'ticket': 'Anytime Day Single', 'price': 138.70, 'code': 'SDS'}}, + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: gwr_fares + or {"07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}}, ) - p = (prices or {}).get('10:01', {}) + p = (prices or {}).get("10:01", {}) monkeypatch.setattr( - app_module.eurostar_scraper, - 'fetch', + eurostar_scraper, + "fetch", lambda destination, travel_date: [ { - 'depart_st_pancras': '10:01', - 'arrive_destination': '13:34', - 'destination': destination, - 'train_number': 'ES 9014', - 'price': p.get('price') if isinstance(p, dict) else None, - 'seats': p.get('seats') if isinstance(p, dict) else None, - 'plus_price': p.get('plus_price') if isinstance(p, dict) else None, - 'plus_seats': p.get('plus_seats') if isinstance(p, dict) else None, + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": p.get("price") if isinstance(p, dict) else None, + "seats": p.get("seats") if isinstance(p, dict) else None, + "plus_price": p.get("plus_price") if isinstance(p, dict) else None, + "plus_seats": p.get("plus_seats") if isinstance(p, dict) else None, }, ], ) -def test_index_shows_station_dropdown_and_destination_radios(): +def test_index_shows_station_dropdown_and_destination_radios() -> None: client = _client() - resp = client.get('/') + resp = client.get("/") html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'Departure point' in html - assert 'Bristol Temple Meads' in html + assert "Departure point" in html + assert "Bristol Temple Meads" in html assert 'name="station_crs"' in html - assert html.count('type="radio"') == len(app_module.DESTINATIONS) - assert 'destination-rotterdam' in html + assert html.count('name="destination"') == len(app_module.DESTINATIONS) + assert 'id="dest-rotterdam"' in html + assert "Cologne HbfCologne Hbf" in html -def test_search_redirects_to_results_with_selected_params(): +def test_search_redirects_to_results_with_selected_params() -> None: client = _client() - resp = client.get('/search?destination=rotterdam&travel_date=2026-04-10&min_connection=60&max_connection=120&station_crs=BRI') + resp = client.get( + "/search?destination=rotterdam&travel_date=2026-04-10&min_connection=60&max_connection=120&station_crs=BRI" + ) assert resp.status_code == 302 - assert resp.headers['Location'].endswith( - '/results/BRI/rotterdam/2026-04-10?min_connection=60&max_connection=120' + assert resp.headers["Location"].endswith( + "/results/BRI/rotterdam/2026-04-10?min_connection=60&max_connection=120" ) -def test_results_shows_same_day_destination_switcher(monkeypatch): +def test_search_redirects_return_with_return_date() -> None: + client = _client() + + resp = client.get( + "/search?journey_type=return&destination=paris&travel_date=2026-04-10&return_date=2026-04-17&station_crs=BRI" + ) + + assert resp.status_code == 302 + assert resp.headers["Location"].endswith( + "/results/BRI/paris/2026-04-10/return/2026-04-17" + ) + + +def test_nr_weekday_cache_key_includes_timetable_period() -> None: + key = app_module._nr_weekday_cache_key("to_paddington", "BRI", "2026-06-22") + + assert key == "weekday_rtt_to_paddington_BRI_2026-05-17_2026-12-12_mon" + + +def test_results_shows_same_day_destination_switcher(monkeypatch: Any) -> None: _stub_data(monkeypatch) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'Switch destination for Friday 10 April 2026' in html + assert "Switch destination for Friday 10 April 2026" in html assert 'Paris Gare du Nord' in html - assert '/results/BRI/brussels/2026-04-10?min_connection=60&max_connection=120' in html - assert '/results/BRI/rotterdam/2026-04-10?min_connection=60&max_connection=120' in html - assert 'ES 9014' in html + assert ( + "/results/BRI/brussels/2026-04-10?min_connection=60&max_connection=120" + in html + ) + assert ( + "/results/BRI/rotterdam/2026-04-10?min_connection=60&max_connection=120" + in html + ) + assert "ES 9014" in html -def test_results_title_and_social_meta_include_destination(monkeypatch): - _stub_data(monkeypatch) +def test_results_can_render_from_weekday_timetable_cache(monkeypatch: Any) -> None: + travel_date = "2026-06-22" + cache: dict[str, Any] = { + app_module._nr_weekday_cache_key("to_paddington", "BRI", travel_date): [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + app_module._eurostar_weekday_cache_key( + "outbound", travel_date, "Paris Gare du Nord" + ): [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": "Paris Gare du Nord", + "train_number": "ES 9014", + }, + ], + } + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: cache.get(key)) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda *args, **kwargs: (_ for _ in ()).throw( + AssertionError("should use weekday NR cache") + ), + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda *args, **kwargs: (_ for _ in ()).throw( + AssertionError("should use weekday Eurostar cache") + ), + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda *args, **kwargs: (_ for _ in ()).throw( + AssertionError("should stream prices later") + ), + ) client = _client() - resp = client.get('/results/BRI/lille/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-06-22?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'Bristol Temple Meads to Lille Europe via Eurostar' in html - assert '' in html + assert "07:00 → 08:45" in html + assert "10:01 → 13:34" in html + assert "ES 9014" in html + assert "checking exact timetable" in html + assert "/api/results_refresh/BRI/paris/2026-06-22" in html + assert "refreshFullResults()" in html + assert "window.location.reload()" not in html + assert "Checking Eurostar price" in html + assert "Eurostar prices not yet available" not in html + + +def test_eurostar_price_status_distinguishes_sold_out() -> None: + prices = app_module._eurostar_prices_by_row( + "outbound", + "outbound", + [ + { + "depart_st_pancras": "10:01", + "price": None, + "seats": 0, + "plus_price": None, + "plus_seats": None, + } + ], + ) + + assert prices["outbound:10:01"]["es_standard"] is None + assert prices["outbound:10:01"]["es_standard_status"] == "sold_out" + assert prices["outbound:10:01"]["es_plus_status"] == "price_not_returned" + + +def test_results_refresh_reloads_when_exact_timetable_differs(monkeypatch: Any) -> None: + travel_date = "2026-06-22" + cache: dict[str, Any] = { + app_module._nr_weekday_cache_key("to_paddington", "BRI", travel_date): [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + app_module._eurostar_weekday_cache_key( + "outbound", travel_date, "Paris Gare du Nord" + ): [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": "Paris Gare du Nord", + "train_number": "ES 9014", + }, + ], + } + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: cache.get(key)) + monkeypatch.setattr( + app_module, "set_cached", lambda key, data: cache.__setitem__(key, data) + ) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:05", + "arrive_paddington": "08:50", + "headcode": "1A24", + }, + ], + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda *args, **kwargs: (_ for _ in ()).throw( + AssertionError("reload should stop before fare fetch") + ), + ) + client = _client() + + resp = client.get("/api/results_refresh/BRI/paris/2026-06-22") + body = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert '"type": "reload"' in body + assert ( + cache[app_module._nr_exact_cache_key("to_paddington", "BRI", travel_date)][0][ + "depart_bristol" + ] + == "07:05" + ) + assert ( + cache[app_module._nr_weekday_cache_key("to_paddington", "BRI", travel_date)][0][ + "depart_bristol" + ] + == "07:05" + ) + + +def test_results_refresh_streams_prices_when_timetable_matches( + monkeypatch: Any, +) -> None: + travel_date = "2026-06-22" + nr_timetable = [ + {"depart_bristol": "07:00", "arrive_paddington": "08:45", "headcode": "1A23"}, + ] + es_timetable = [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": "Paris Gare du Nord", + "train_number": "ES 9014", + }, + ] + cache: dict[str, Any] = { + app_module._nr_weekday_cache_key( + "to_paddington", "BRI", travel_date + ): nr_timetable, + app_module._eurostar_weekday_cache_key( + "outbound", travel_date, "Paris Gare du Nord" + ): es_timetable, + } + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: cache.get(key)) + monkeypatch.setattr( + app_module, "set_cached", lambda key, data: cache.__setitem__(key, data) + ) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": nr_timetable, + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + **es_timetable[0], + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}, + }, + ) + client = _client() + + resp = client.get("/api/results_refresh/BRI/paris/2026-06-22") + body = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert '"type": "reload"' not in body + assert '"type": "eurostar_prices"' in body + assert '"main:10:01"' in body + assert '"price": 59' in body + assert '"type": "walkon_fares"' in body + assert '"price": 138.7' in body + + +def test_results_progressive_shell_loads_without_scraping(monkeypatch: Any) -> None: + def fail_fetch(*args: Any, **kwargs: Any) -> None: + raise AssertionError("progressive shell should not fetch data") + + monkeypatch.setattr(rtt_scraper, "fetch", fail_fetch) + monkeypatch.setattr(eurostar_scraper, "fetch", fail_fetch) + monkeypatch.setattr(gwr_fares_scraper, "fetch", fail_fetch) + client = _client() + + resp = client.get("/results/BRI/paris/2026-04-10?progressive=1") + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert "Loading train times and fares" in html + assert "render=full" in html + + +def test_return_progressive_shell_formats_return_date(monkeypatch: Any) -> None: + def fail_fetch(*args: Any, **kwargs: Any) -> None: + raise AssertionError("progressive shell should not fetch data") + + monkeypatch.setattr(rtt_scraper, "fetch", fail_fetch) + monkeypatch.setattr(eurostar_scraper, "fetch_return", fail_fetch) + monkeypatch.setattr(gwr_fares_scraper, "fetch", fail_fetch) + client = _client() + + resp = client.get("/results/BRI/paris/2026-04-10/return/2026-04-17?progressive=1") + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert "Friday 10 April 2026 to Friday 17 April 2026" in html + assert "to 2026-04-17" not in html + + +def test_results_title_and_social_meta_include_destination(monkeypatch: Any) -> None: + _stub_data(monkeypatch) + client = _client() + + resp = client.get( + "/results/BRI/lille/2026-04-10?min_connection=60&max_connection=120" + ) + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert "Bristol Temple Meads to Lille Europe via Eurostar" in html + assert ( + '' + in html + ) assert ( '' ) in html - assert '' in html + assert ( + '' + in html + ) -def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest(monkeypatch): - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module.gwr_fares_scraper, 'fetch', lambda s, d: {}) +def test_results_marks_trips_within_five_minutes_of_fastest_and_slowest( + monkeypatch: Any, +) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr(gwr_fares_scraper, "fetch", lambda s, d: {}) monkeypatch.setattr( - app_module.rtt_scraper, - 'fetch', - lambda travel_date, user_agent, station_crs='BRI': [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:30', 'headcode': '1A01'}, - {'depart_bristol': '07:05', 'arrive_paddington': '08:36', 'headcode': '1A02'}, - {'depart_bristol': '07:10', 'arrive_paddington': '08:46', 'headcode': '1A03'}, - {'depart_bristol': '07:15', 'arrive_paddington': '08:56', 'headcode': '1A04'}, - {'depart_bristol': '07:20', 'arrive_paddington': '09:06', 'headcode': '1A05'}, + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:30", + "headcode": "1A01", + }, + { + "depart_bristol": "07:05", + "arrive_paddington": "08:36", + "headcode": "1A02", + }, + { + "depart_bristol": "07:10", + "arrive_paddington": "08:46", + "headcode": "1A03", + }, + { + "depart_bristol": "07:15", + "arrive_paddington": "08:56", + "headcode": "1A04", + }, + { + "depart_bristol": "07:20", + "arrive_paddington": "09:06", + "headcode": "1A05", + }, ], ) monkeypatch.setattr( - app_module.eurostar_scraper, - 'fetch', + eurostar_scraper, + "fetch", lambda destination, travel_date: [ - {'depart_st_pancras': '09:30', 'arrive_destination': '11:50', 'destination': destination, 'train_number': 'ES 1001', 'price': None, 'seats': None}, - {'depart_st_pancras': '09:40', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 1002', 'price': None, 'seats': None}, - {'depart_st_pancras': '09:50', 'arrive_destination': '12:20', 'destination': destination, 'train_number': 'ES 1003', 'price': None, 'seats': None}, - {'depart_st_pancras': '10:00', 'arrive_destination': '12:35', 'destination': destination, 'train_number': 'ES 1004', 'price': None, 'seats': None}, - {'depart_st_pancras': '10:10', 'arrive_destination': '12:45', 'destination': destination, 'train_number': 'ES 1005', 'price': None, 'seats': None}, + { + "depart_st_pancras": "09:30", + "arrive_destination": "11:50", + "destination": destination, + "train_number": "ES 1001", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "09:40", + "arrive_destination": "12:00", + "destination": destination, + "train_number": "ES 1002", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "09:50", + "arrive_destination": "12:20", + "destination": destination, + "train_number": "ES 1003", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "10:00", + "arrive_destination": "12:35", + "destination": destination, + "train_number": "ES 1004", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "10:10", + "arrive_destination": "12:45", + "destination": destination, + "train_number": "ES 1005", + "price": None, + "seats": None, + }, ], ) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 assert html.count('title="Fastest journey"') == 2 assert html.count('title="Slowest journey"') == 2 - assert '4h 50m ⚡' in html - assert '4h 55m ⚡' in html - assert '5h 20m 🐢' in html - assert '5h 25m 🐢' in html - assert '5h 10m ⚡' not in html - assert '5h 10m 🐢' not in html + assert "4h 50m ⚡" in html + assert "4h 55m ⚡" in html + assert "5h 20m 🐢" in html + assert "5h 25m 🐢" in html + assert "5h 10m ⚡" not in html + assert "5h 10m 🐢" not in html -def test_results_shows_only_pre_first_reachable_unreachable_services(monkeypatch): +def test_results_shows_only_pre_first_reachable_unreachable_services( + monkeypatch: Any, +) -> None: # GWR arrives 08:45; min=60 → earliest viable Eurostar 09:45; max=120 → latest 10:45. # 09:30 too early → shown as "Too early" # 10:15 reachable → shown as a trip (needs circle line XML, so not tested here) # 12:30 after first reachable → hidden - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module.gwr_fares_scraper, 'fetch', lambda s, d: {}) + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr(gwr_fares_scraper, "fetch", lambda s, d: {}) monkeypatch.setattr( - app_module.rtt_scraper, - 'fetch', - lambda travel_date, user_agent, station_crs='BRI': [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, ], ) monkeypatch.setattr( - app_module.eurostar_scraper, - 'fetch', + eurostar_scraper, + "fetch", lambda destination, travel_date: [ - {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None}, - {'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': destination, 'train_number': 'ES 9002', 'price': None, 'seats': None}, - {'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': destination, 'train_number': 'ES 9003', 'price': None, 'seats': None}, + { + "depart_st_pancras": "09:30", + "arrive_destination": "12:00", + "destination": destination, + "train_number": "ES 9001", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "10:15", + "arrive_destination": "13:40", + "destination": destination, + "train_number": "ES 9002", + "price": None, + "seats": None, + }, + { + "depart_st_pancras": "12:30", + "arrive_destination": "15:55", + "destination": destination, + "train_number": "ES 9003", + "price": None, + "seats": None, + }, ], ) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'ES 9001' in html # before first reachable → shown - assert 'Too early' in html - assert 'ES 9003' not in html # after first reachable → hidden + assert "ES 9001" in html # before first reachable → shown + assert "Too early" in html + assert "ES 9003" not in html # after first reachable → hidden -def test_results_shows_eurostar_price_and_total(monkeypatch): +def test_results_shows_eurostar_price_and_total(monkeypatch: Any) -> None: # 07:00 on Friday 2026-04-10 → Anytime £138.70 walk-on + ES £59.00 - _stub_data(monkeypatch, prices={'10:01': {'price': 59, 'seats': 42}}) + _stub_data(monkeypatch, prices={"10:01": {"price": 59, "seats": 42}}) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert '£59' in html # Eurostar Standard price - assert '£138.70' in html # Walk-on price shown in NR cell - # Total (£197.70) is computed client-side; verify data attributes carry the right values - assert 'data-walkon="138.7"' in html + assert "£59" in html # Eurostar Standard price in initial render + assert "£138.70" not in html # Walk-on price is streamed, not server-rendered assert 'data-es-std="59"' in html + assert "/api/walkon_fares/BRI/" in html # client will fetch walk-on fares -def test_results_shows_unreachable_service_when_no_trips(monkeypatch): +def test_results_uses_unique_row_keys_for_same_eurostar(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda s, d: { + "07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}, + "07:30": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}, + }, + ) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:30", + "headcode": "1A01", + }, + { + "depart_bristol": "07:30", + "arrive_paddington": "09:00", + "headcode": "1A02", + }, + ], + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + client = _client() + + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert 'data-row-key="main:07:00:10:01"' in html + assert 'data-row-key="main:07:30:10:01"' in html + assert '"main:07:00:10:01"' in html + assert '"main:07:30:10:01"' in html + + +def test_results_shows_unreachable_service_when_no_trips(monkeypatch: Any) -> None: # Only one Eurostar at 09:30; GWR arrives 08:45 with min=60 → unreachable. # No trips at all, so the unreachable service is shown as "Too early". - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) - monkeypatch.setattr(app_module.gwr_fares_scraper, 'fetch', lambda s, d: {}) + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr(gwr_fares_scraper, "fetch", lambda s, d: {}) monkeypatch.setattr( - app_module.rtt_scraper, - 'fetch', - lambda travel_date, user_agent, station_crs='BRI': [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, ], ) monkeypatch.setattr( - app_module.eurostar_scraper, - 'fetch', + eurostar_scraper, + "fetch", lambda destination, travel_date: [ - {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': destination, 'train_number': 'ES 9001', 'price': None, 'seats': None}, + { + "depart_st_pancras": "09:30", + "arrive_destination": "12:00", + "destination": destination, + "train_number": "ES 9001", + "price": None, + "seats": None, + }, ], ) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'ES 9001' in html - assert 'Too early' in html - assert 'No valid journeys found.' not in html + assert "ES 9001" in html + assert "Too early" in html + assert "No valid journeys found." not in html -def test_results_shows_eurostar_plus_price(monkeypatch): - _stub_data(monkeypatch, prices={'10:01': {'price': 59, 'seats': 42, 'plus_price': 89, 'plus_seats': 5}}) +def test_results_shows_eurostar_plus_price(monkeypatch: Any) -> None: + _stub_data( + monkeypatch, + prices={"10:01": {"price": 59, "seats": 42, "plus_price": 89, "plus_seats": 5}}, + ) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert '£59' in html # Standard price - assert '£89' in html # Plus price - assert 'Plus' in html # Plus label + assert "£59" in html # Standard price + assert "£89" in html # Plus price + assert "Plus" in html # Plus label -def test_results_selectors_present(monkeypatch): +def test_results_selectors_present(monkeypatch: Any) -> None: _stub_data(monkeypatch) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 - assert 'nr-type-select' in html - assert 'es-type-select' in html - assert 'Load advance prices' in html - assert 'Plus' in html + assert "nr-type-select" in html + assert "es-type-select" in html + assert "Load advance prices" in html + assert "Plus" in html -def test_results_preloads_cached_advance_fares(monkeypatch): +def test_results_preloads_cached_advance_fares(monkeypatch: Any) -> None: advance_data = { - '07:00': { - 'advance_std': {'ticket': 'Advance Single', 'price': 45.0, 'code': 'ADV'}, - 'advance_1st': None, + "07:00": { + "advance_std": {"ticket": "Advance Single", "price": 45.0, "code": "ADV"}, + "advance_1st": None, } } - def fake_get_cached(key, ttl=None): - if 'gwr_advance' in key: + + def fake_get_cached(key: str, ttl: Any = None) -> Any: + if "gwr_advance" in key: return advance_data return None - monkeypatch.setattr(app_module, 'get_cached', fake_get_cached) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) + + monkeypatch.setattr(app_module, "get_cached", fake_get_cached) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) monkeypatch.setattr( - app_module.rtt_scraper, 'fetch', - lambda travel_date, user_agent, station_crs='BRI': [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, ], ) - monkeypatch.setattr(app_module.gwr_fares_scraper, 'fetch', lambda s, d: {}) + monkeypatch.setattr(gwr_fares_scraper, "fetch", lambda s, d: {}) monkeypatch.setattr( - app_module.eurostar_scraper, 'fetch', + eurostar_scraper, + "fetch", lambda destination, travel_date: [ - {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', - 'destination': destination, 'train_number': 'ES 9014', - 'price': None, 'seats': None, 'plus_price': None, 'plus_seats': None}, + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": None, + "seats": None, + "plus_price": None, + "plus_seats": None, + }, ], ) client = _client() - resp = client.get('/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120') + resp = client.get( + "/results/BRI/paris/2026-04-10?min_connection=60&max_connection=120" + ) html = resp.get_data(as_text=True) assert resp.status_code == 200 # Cached advance fares are embedded in the page JS assert '"advance_std"' in html - assert '45.0' in html + assert "45.0" in html # Button is absent (hidden via cachedAdvanceFares check in JS) # The JS will hide it on load; the data is present for applyAdvanceFares() - assert 'cachedAdvanceFares' in html + assert "cachedAdvanceFares" in html -def test_api_advance_fares_returns_json(monkeypatch): - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) +def test_results_inbound_uses_reverse_legs(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) monkeypatch.setattr( - app_module.gwr_fares_scraper, - 'fetch_advance', + rtt_scraper, + "fetch_from_paddington", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_paddington": "17:15", + "arrive_destination": "18:55", + "headcode": "1B99", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date, direction="to_paddington": { + "17:15": {"ticket": "Off-Peak Single", "price": 63.60, "code": "SVS"} + }, + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date, direction="outbound": [ + { + "depart_destination": "15:12", + "arrive_st_pancras": "16:30", + "destination": destination, + "train_number": "ES 9035", + "price": 49, + "seats": 43, + "plus_price": None, + "plus_seats": None, + }, + ], + ) + client = _client() + + resp = client.get("/results/BRI/paris/2026-04-10?journey_type=inbound") + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert "Paris Gare du Nord → Bristol Temple Meads" in html + assert "15:12 → 16:30" in html + assert "17:15 → 18:55" in html + assert "ES 9035" in html + + +def test_results_return_renders_outbound_and_inbound_tables(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + rtt_scraper, + "fetch_from_paddington", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_paddington": "17:15", + "arrive_destination": "18:55", + "headcode": "1B99", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date, direction="to_paddington": { + "07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}, + "17:15": {"ticket": "Off-Peak Single", "price": 63.60, "code": "SVS"}, + }, + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch_return", + lambda destination, outbound_date, return_date: { + "outbound": [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": None, + "plus_seats": None, + }, + ], + "inbound": [ + { + "depart_destination": "15:12", + "arrive_st_pancras": "16:30", + "destination": destination, + "train_number": "ES 9035", + "price": 49, + "seats": 43, + "plus_price": None, + "plus_seats": None, + }, + ], + }, + ) + monkeypatch.setattr( + circle_line, + "upcoming_services", + lambda earliest_board, count=2, direction="pad_to_kx", preceding=0: ( + [ + (datetime(2026, 4, 10, 9, 10), datetime(2026, 4, 10, 9, 25)), + (datetime(2026, 4, 10, 9, 15), datetime(2026, 4, 10, 9, 30)), + ] + if direction == "pad_to_kx" + else [ + (datetime(2026, 4, 17, 16, 40), datetime(2026, 4, 17, 16, 55)), + (datetime(2026, 4, 17, 16, 45), datetime(2026, 4, 17, 17, 0)), + ] + ), + ) + client = _client() + + resp = client.get("/results/BRI/paris/2026-04-10/return/2026-04-17") + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert "Outbound: Bristol Temple Meads → Paris Gare du Nord" in html + assert "Return: Paris Gare du Nord → Bristol Temple Meads" in html + assert "Friday 10 April 2026" in html + assert "Friday 17 April 2026" in html + assert "/results/BRI/paris/2026-04-09/return/2026-04-17" in html + assert "/results/BRI/paris/2026-04-11/return/2026-04-17" in html + assert "/results/BRI/paris/2026-04-10/return/2026-04-16" in html + assert "/results/BRI/paris/2026-04-10/return/2026-04-18" in html + assert "/results/BRI/paris/2026-04-10/return/2026-04-17" in html + assert "journey_type=return" not in html + assert "return_date=2026-04-17" not in html + assert "Circle 09:10 → KX 09:25" in html + assert "next 09:15 → KX 09:30" in html + assert "Circle 16:40 → PAD 16:55" in html + assert "next 16:45 → PAD 17:00" in html + assert 'title="Tight connection">⚠️' in html + assert "ES 9014" in html + assert "ES 9035" in html + + +def test_api_advance_fares_returns_json(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance", lambda station_crs, travel_date: { - '07:00': { - 'advance_std': {'ticket': 'Advance Single', 'price': 45.0, 'code': 'ADV'}, - 'advance_1st': {'ticket': '1st Advance', 'price': 65.0, 'code': 'AFA'}, + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 45.0, + "code": "ADV", + }, + "advance_1st": {"ticket": "1st Advance", "price": 65.0, "code": "AFA"}, } }, ) client = _client() - resp = client.get('/api/advance_fares/BRI/2026-04-10') + resp = client.get("/api/advance_fares/BRI/2026-04-10") data = resp.get_json() assert resp.status_code == 200 - assert '07:00' in data - assert data['07:00']['advance_std']['price'] == 45.0 - assert data['07:00']['advance_1st']['price'] == 65.0 + assert "07:00" in data + assert data["07:00"]["advance_std"]["price"] == 45.0 + assert data["07:00"]["advance_1st"]["price"] == 65.0 -def test_api_advance_fares_404_for_unknown_station(monkeypatch): +def test_api_advance_fares_404_for_unknown_station() -> None: client = _client() - resp = client.get('/api/advance_fares/XYZ/2026-04-10') + resp = client.get("/api/advance_fares/XYZ/2026-04-10") assert resp.status_code == 404 -def test_api_advance_fares_returns_error_on_scraper_failure(monkeypatch): - monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) - monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) +def test_api_advance_fares_returns_error_on_scraper_failure(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) monkeypatch.setattr( - app_module.gwr_fares_scraper, - 'fetch_advance', - lambda s, d: (_ for _ in ()).throw(Exception('network error')), + gwr_fares_scraper, + "fetch_advance", + lambda s, d: (_ for _ in ()).throw(Exception("network error")), ) client = _client() - resp = client.get('/api/advance_fares/BRI/2026-04-10') + resp = client.get("/api/advance_fares/BRI/2026-04-10") data = resp.get_json() assert resp.status_code == 500 - assert 'error' in data + assert "error" in data diff --git a/tests/test_cache.py b/tests/test_cache.py index 006af57..9434942 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -1,42 +1,53 @@ import os import time +from pathlib import Path +from typing import Any + import pytest from cache import get_cached, set_cached @pytest.fixture -def tmp_cache(tmp_path, monkeypatch): +def tmp_cache(tmp_path: Path, monkeypatch: Any) -> Path: import cache as cache_module - monkeypatch.setattr(cache_module, 'CACHE_DIR', str(tmp_path)) + + monkeypatch.setattr(cache_module, "CACHE_DIR", str(tmp_path)) return tmp_path -def test_get_cached_returns_none_for_missing_key(tmp_cache): - assert get_cached('no_such_key') is None +def test_get_cached_returns_none_for_missing_key(tmp_cache: Path) -> None: + assert get_cached("no_such_key") is None -def test_set_and_get_cached_roundtrip(tmp_cache): - set_cached('my_key', {'a': 1}) - assert get_cached('my_key') == {'a': 1} +def test_set_and_get_cached_roundtrip(tmp_cache: Path) -> None: + set_cached("my_key", {"a": 1}) + assert get_cached("my_key") == {"a": 1} -def test_get_cached_no_ttl_never_expires(tmp_cache): - set_cached('k', [1, 2, 3]) +def test_get_cached_no_ttl_never_expires(tmp_cache: Path) -> None: + set_cached("k", [1, 2, 3]) # Backdate the file by 2 days - path = tmp_cache / 'k.json' + path = tmp_cache / "k.json" old = time.time() - 2 * 86400 os.utime(path, (old, old)) - assert get_cached('k') == [1, 2, 3] + assert get_cached("k") == [1, 2, 3] -def test_get_cached_within_ttl(tmp_cache): - set_cached('k', 'fresh') - assert get_cached('k', ttl=3600) == 'fresh' +def test_get_cached_within_ttl(tmp_cache: Path) -> None: + set_cached("k", "fresh") + assert get_cached("k", ttl=3600) == "fresh" -def test_get_cached_expired_returns_none(tmp_cache): - set_cached('k', 'stale') - path = tmp_cache / 'k.json' +def test_get_cached_expired_returns_none(tmp_cache: Path) -> None: + set_cached("k", "stale") + path = tmp_cache / "k.json" old = time.time() - 25 * 3600 # 25 hours ago os.utime(path, (old, old)) - assert get_cached('k', ttl=24 * 3600) is None + assert get_cached("k", ttl=24 * 3600) is None + + +def test_get_cached_invalid_json_returns_none(tmp_cache: Path) -> None: + path = tmp_cache / "broken.json" + path.write_text('{"not": "finished"') + + assert get_cached("broken") is None diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py index b73597f..64eff91 100644 --- a/tests/test_eurostar_scraper.py +++ b/tests/test_eurostar_scraper.py @@ -1,30 +1,47 @@ +from typing import Any + import pytest -from scraper.eurostar import _parse_graphql, search_url +from scraper.eurostar import _parse_graphql, _parse_graphql_leg, search_url -def _gql_response(journeys: list) -> dict: - return {'data': {'journeySearch': {'outbound': {'journeys': journeys}}}} +def _gql_response(journeys: list[dict[str, Any]]) -> dict[str, Any]: + return {"data": {"journeySearch": {"outbound": {"journeys": journeys}}}} -def _journey(departs: str, arrives: str, price=None, seats=None, service_name='', carrier='ES', - plus_price=None, plus_seats=None) -> dict: - fares = [{ - 'classOfService': {'code': 'STANDARD'}, - 'prices': {'displayPrice': price}, - 'seats': seats, - 'legs': [{'serviceName': service_name, 'serviceType': {'code': carrier}}] - if service_name else [], - }] +def _journey( + departs: str, + arrives: str, + price: float | None = None, + seats: int | None = None, + service_name: str = "", + carrier: str = "ES", + plus_price: float | None = None, + plus_seats: int | None = None, +) -> dict[str, Any]: + fares: list[dict[str, Any]] = [ + { + "classOfService": {"code": "STANDARD"}, + "prices": {"displayPrice": price}, + "seats": seats, + "legs": ( + [{"serviceName": service_name, "serviceType": {"code": carrier}}] + if service_name + else [] + ), + } + ] if plus_price is not None or plus_seats is not None: - fares.append({ - 'classOfService': {'code': 'PLUS'}, - 'prices': {'displayPrice': plus_price}, - 'seats': plus_seats, - 'legs': [], - }) + fares.append( + { + "classOfService": {"code": "PLUS"}, + "prices": {"displayPrice": plus_price}, + "seats": plus_seats, + "legs": [], + } + ) return { - 'timing': {'departureTime': departs, 'arrivalTime': arrives}, - 'fares': fares, + "timing": {"departureTime": departs, "arrivalTime": arrives}, + "fares": fares, } @@ -32,91 +49,149 @@ def _journey(departs: str, arrives: str, price=None, seats=None, service_name='' # _parse_graphql # --------------------------------------------------------------------------- -def test_parse_graphql_single_journey(): - data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014')]) - services = _parse_graphql(data, 'Paris Gare du Nord') + +def test_parse_graphql_single_journey() -> None: + data = _gql_response( + [_journey("09:31", "12:55", price=156, seats=37, service_name="9014")] + ) + services = _parse_graphql(data, "Paris Gare du Nord") assert len(services) == 1 s = services[0] - assert s['depart_st_pancras'] == '09:31' - assert s['arrive_destination'] == '12:55' - assert s['destination'] == 'Paris Gare du Nord' - assert s['train_number'] == 'ES 9014' - assert s['price'] == 156.0 - assert s['seats'] == 37 - assert s['plus_price'] is None - assert s['plus_seats'] is None + assert s["depart_st_pancras"] == "09:31" + assert s["arrive_destination"] == "12:55" + assert s["destination"] == "Paris Gare du Nord" + assert s["train_number"] == "ES 9014" + assert s["price"] == 156.0 + assert s["seats"] == 37 + assert s["plus_price"] is None + assert s["plus_seats"] is None -def test_parse_graphql_standard_premier_price(): - data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='9014', - plus_price=220, plus_seats=12)]) - services = _parse_graphql(data, 'Paris Gare du Nord') +def test_parse_graphql_standard_premier_price() -> None: + data = _gql_response( + [ + _journey( + "09:31", + "12:55", + price=156, + seats=37, + service_name="9014", + plus_price=220, + plus_seats=12, + ) + ] + ) + services = _parse_graphql(data, "Paris Gare du Nord") assert len(services) == 1 s = services[0] - assert s['price'] == 156.0 - assert s['seats'] == 37 - assert s['plus_price'] == 220.0 - assert s['plus_seats'] == 12 + assert s["price"] == 156.0 + assert s["seats"] == 37 + assert s["plus_price"] == 220.0 + assert s["plus_seats"] == 12 -def test_parse_graphql_plus_price_none_when_not_returned(): - data = _gql_response([_journey('09:31', '12:55', price=156, seats=37)]) - services = _parse_graphql(data, 'Paris Gare du Nord') - assert services[0]['plus_price'] is None - assert services[0]['plus_seats'] is None +def test_parse_graphql_plus_price_none_when_not_returned() -> None: + data = _gql_response([_journey("09:31", "12:55", price=156, seats=37)]) + services = _parse_graphql(data, "Paris Gare du Nord") + assert services[0]["plus_price"] is None + assert services[0]["plus_seats"] is None -def test_parse_graphql_half_pound_price(): - data = _gql_response([_journey('09:01', '14:20', price=192.5, seats=25, service_name='9116')]) - services = _parse_graphql(data, 'Amsterdam Centraal') - assert services[0]['price'] == 192.5 +def test_parse_graphql_half_pound_price() -> None: + data = _gql_response( + [_journey("09:01", "14:20", price=192.5, seats=25, service_name="9116")] + ) + services = _parse_graphql(data, "Amsterdam Centraal") + assert services[0]["price"] == 192.5 -def test_parse_graphql_null_price(): - data = _gql_response([_journey('06:16', '11:09', price=None, seats=0)]) - services = _parse_graphql(data, 'Amsterdam Centraal') - assert services[0]['price'] is None - assert services[0]['seats'] == 0 +def test_parse_graphql_null_price() -> None: + data = _gql_response([_journey("06:16", "11:09", price=None, seats=0)]) + services = _parse_graphql(data, "Amsterdam Centraal") + assert services[0]["price"] is None + assert services[0]["seats"] == 0 -def test_parse_graphql_sorted_by_departure(): - data = _gql_response([ - _journey('10:31', '13:55'), - _journey('07:31', '10:59'), - ]) - services = _parse_graphql(data, 'Paris Gare du Nord') - assert services[0]['depart_st_pancras'] == '07:31' - assert services[1]['depart_st_pancras'] == '10:31' +def test_parse_graphql_sorted_by_departure() -> None: + data = _gql_response( + [ + _journey("10:31", "13:55"), + _journey("07:31", "10:59"), + ] + ) + services = _parse_graphql(data, "Paris Gare du Nord") + assert services[0]["depart_st_pancras"] == "07:31" + assert services[1]["depart_st_pancras"] == "10:31" -def test_parse_graphql_deduplicates_same_departure_time(): - data = _gql_response([ - _journey('06:16', '11:09', price=None, seats=0), - _journey('06:16', '11:09', price=None, seats=0), - _journey('06:16', '11:09', price=None, seats=0), - ]) - services = _parse_graphql(data, 'Amsterdam Centraal') +def test_parse_graphql_deduplicates_same_departure_time() -> None: + data = _gql_response( + [ + _journey("06:16", "11:09", price=None, seats=0), + _journey("06:16", "11:09", price=None, seats=0), + _journey("06:16", "11:09", price=None, seats=0), + ] + ) + services = _parse_graphql(data, "Amsterdam Centraal") assert len(services) == 1 -def test_parse_graphql_no_legs_gives_empty_train_number(): - data = _gql_response([_journey('09:31', '12:55', price=156, seats=37, service_name='')]) - services = _parse_graphql(data, 'Paris Gare du Nord') - assert services[0]['train_number'] == '' +def test_parse_graphql_no_legs_gives_empty_train_number() -> None: + data = _gql_response( + [_journey("09:31", "12:55", price=156, seats=37, service_name="")] + ) + services = _parse_graphql(data, "Paris Gare du Nord") + assert services[0]["train_number"] == "" -def test_parse_graphql_empty_journeys(): +def test_parse_graphql_empty_journeys() -> None: data = _gql_response([]) - assert _parse_graphql(data, 'Paris Gare du Nord') == [] + assert _parse_graphql(data, "Paris Gare du Nord") == [] + + +def test_parse_graphql_inbound_leg() -> None: + data: dict[str, Any] = { + "data": { + "journeySearch": { + "inbound": { + "journeys": [ + _journey( + "17:12", "18:30", price=49, seats=43, service_name="9035" + ) + ] + } + } + } + } + services = _parse_graphql_leg(data, "Paris Gare du Nord", "inbound", "inbound") + + assert services == [ + { + "depart_destination": "17:12", + "arrive_st_pancras": "18:30", + "destination": "Paris Gare du Nord", + "train_number": "ES 9035", + "price": 49.0, + "seats": 43, + "plus_price": None, + "plus_seats": None, + } + ] # --------------------------------------------------------------------------- # search_url # --------------------------------------------------------------------------- -def test_search_url(): - url = search_url('Paris Gare du Nord', '2026-04-10') + +def test_search_url() -> None: + url = search_url("Paris Gare du Nord", "2026-04-10") assert url == ( - 'https://www.eurostar.com/search/uk-en' - '?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10' + "https://www.eurostar.com/search/uk-en" + "?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10" ) + + +def test_search_url_return() -> None: + url = search_url("Paris Gare du Nord", "2026-04-10", return_date="2026-04-17") + assert url.endswith("&outbound=2026-04-10&inbound=2026-04-17") diff --git a/tests/test_playwright_return_fares.py b/tests/test_playwright_return_fares.py new file mode 100644 index 0000000..01205ff --- /dev/null +++ b/tests/test_playwright_return_fares.py @@ -0,0 +1,560 @@ +import threading +from typing import Any, Generator + +import pytest +from werkzeug.serving import make_server + +import app as app_module + +playwright_sync = pytest.importorskip("playwright.sync_api") +sync_playwright = playwright_sync.sync_playwright + +rtt_scraper: Any = app_module.rtt_scraper # type: ignore[attr-defined] +gwr_fares_scraper: Any = app_module.gwr_fares_scraper # type: ignore[attr-defined] +eurostar_scraper: Any = app_module.eurostar_scraper # type: ignore[attr-defined] + + +def _stub_return_data(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + rtt_scraper, + "fetch_from_paddington", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_paddington": "17:15", + "arrive_destination": "18:55", + "headcode": "1B99", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date, direction="to_paddington": { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + "17:15": { + "ticket": "Off-Peak Single", + "price": 63.60, + "code": "SVS", + }, + }, + ) + + def fake_advance_streaming( + station_crs: str, + travel_date: str, + direction: str = "to_paddington", + ) -> Generator[dict[str, Any], None, None]: + if direction == "from_paddington": + yield { + "17:15": { + "advance_std": { + "ticket": "Advance Single", + "price": 25.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 45.0, + "code": "AFA", + }, + } + } + else: + yield { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 80.0, + "code": "AFA", + }, + } + } + + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance_streaming", + fake_advance_streaming, + ) + + def fake_advance( + station_crs: str, travel_date: str, direction: str = "to_paddington" + ) -> dict[str, Any]: + pages = list(fake_advance_streaming(station_crs, travel_date, direction)) + return pages[0] if pages else {} + + monkeypatch.setattr(gwr_fares_scraper, "fetch_advance", fake_advance) + monkeypatch.setattr( + eurostar_scraper, + "fetch_return", + lambda destination, outbound_date, return_date: { + "outbound": [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + "inbound": [ + { + "depart_destination": "15:12", + "arrive_st_pancras": "16:30", + "destination": destination, + "train_number": "ES 9035", + "price": 49, + "seats": 43, + "plus_price": 79, + "plus_seats": 6, + }, + ], + }, + ) + + +def _stub_single_data(monkeypatch: Any) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + }, + ) + advance_fares: dict[str, Any] = { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 80.0, + "code": "AFA", + }, + }, + } + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance", + lambda station_crs, travel_date: advance_fares, + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance_streaming", + lambda station_crs, travel_date: iter([advance_fares]), + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + + +@pytest.fixture +def local_server(monkeypatch: Any) -> Generator[str, None, None]: + _stub_return_data(monkeypatch) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield f"http://127.0.0.1:{server.server_port}" + finally: + server.shutdown() + thread.join(timeout=5) + + +@pytest.fixture +def single_server(monkeypatch: Any) -> Generator[str, None, None]: + _stub_single_data(monkeypatch) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield f"http://127.0.0.1:{server.server_port}" + finally: + server.shutdown() + thread.join(timeout=5) + + +def _launch_browser(playwright: Any) -> Any: + try: + return playwright.chromium.launch(headless=True) + except Exception as exc: + pytest.skip(f"Chromium browser unavailable for Playwright: {exc}") + + +def test_single_advance_standard_totals_after_click(single_server: str) -> None: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{single_server}/results/BRI/paris/2026-07-20", + wait_until="domcontentloaded", + ) + + page.get_by_role("button", name="Advance Std").click() + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£112.10'))", + timeout=10000, + ) + assert "nr_class=advance_std" in page.url + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£112.10"] + browser.close() + + +def test_single_next_date_advance_standard_labels_unreachable_rows( + monkeypatch: Any, +) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + }, + ) + advance_fares: dict[str, Any] = { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": None, + }, + } + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance", + lambda station_crs, travel_date: advance_fares, + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance_streaming", + lambda station_crs, travel_date: iter([advance_fares]), + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "09:30", + "arrive_destination": "12:30", + "destination": destination, + "train_number": "ES 9001", + "price": 59, + "seats": 42, + "plus_price": None, + "plus_seats": None, + }, + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": None, + "plus_seats": None, + }, + ], + ) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"http://127.0.0.1:{server.server_port}" + "/results/BRI/brussels/2026-06-16", + wait_until="domcontentloaded", + ) + page.get_by_role("link", name="Next →").click() + page.wait_for_url("**/2026-06-17**", timeout=10000) + page.get_by_role("button", name="Advance Std").click() + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£112.10'))", + timeout=10000, + ) + + assert page.get_by_text("No connection").count() == 1 + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£112.10"] + browser.close() + finally: + server.shutdown() + thread.join(timeout=5) + + +def test_single_advance_standard_premier_totals_on_initial_url( + single_server: str, +) -> None: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{single_server}/results/BRI/paris/2026-07-20" + "?nr_class=advance_std&es_class=plus", + wait_until="domcontentloaded", + ) + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£142.10'))", + timeout=10000, + ) + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£142.10"] + browser.close() + + +def test_single_advance_first_falls_back_to_walkon_when_unavailable( + monkeypatch: Any, +) -> None: + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": {"ticket": "Anytime Day Single", "price": 138.70, "code": "SDS"}, + }, + ) + advance_fares: dict[str, Any] = { + "07:00": { + "advance_std": {"ticket": "Advance Single", "price": 50.0, "code": "ADV"}, + "advance_1st": None, + }, + } + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance", + lambda station_crs, travel_date: advance_fares, + ) + monkeypatch.setattr( + gwr_fares_scraper, + "fetch_advance_streaming", + lambda station_crs, travel_date: iter([advance_fares]), + ) + monkeypatch.setattr( + eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"http://127.0.0.1:{server.server_port}" + "/results/BRI/paris/2026-07-20?nr_class=advance_1st&es_class=standard", + wait_until="domcontentloaded", + ) + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£200.80'))", + timeout=10000, + ) + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£200.80"] + assert "No NR fare" not in " ".join(totals) + browser.close() + finally: + server.shutdown() + thread.join(timeout=5) + + +def test_return_advance_first_standard_premier_totals(local_server: str) -> None: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto(f"{local_server}/", wait_until="domcontentloaded") + page.locator("#journey-return").check(force=True) + page.locator("#destination-paris").check(force=True) + page.locator("#travel_date").fill("2026-07-20") + page.locator("#return_date").fill("2026-07-27") + page.locator('button[type="submit"]').click() + page.wait_for_url("**/results/**", timeout=10000) + + page.get_by_role("button", name="Advance 1st").click() + page.get_by_role("button", name="Standard Premier").click() + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£172.10'))", + timeout=10000, + ) + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£127.10'))", + timeout=10000, + ) + + assert "/results/BRI/paris/2026-07-20/return/2026-07-27" in page.url + assert "journey_type=return" not in page.url + assert "return_date=2026-07-27" not in page.url + assert "nr_class=advance_1st" in page.url + assert "es_class=plus" in page.url + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£172.10 💸", "£127.10 🪙"] + browser.close() + + +def test_return_calendar_selects_outbound_before_return(local_server: str) -> None: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto(f"{local_server}/", wait_until="domcontentloaded") + + page.locator("#journey-return").check(force=True) + assert page.locator("#cal-hint").inner_text() == "Select outbound date" + assert page.locator("#travel_date").input_value() == "" + assert page.locator("#return_date").input_value() == "" + + page.get_by_role("button", name="10 June 2026").click() + assert page.locator("#travel_date").input_value() == "2026-06-10" + assert page.locator("#return_date").input_value() == "" + assert "Now select return date" in page.locator("#cal-hint").inner_text() + + page.get_by_role("button", name="17 June 2026").click() + assert page.locator("#travel_date").input_value() == "2026-06-10" + assert page.locator("#return_date").input_value() == "2026-06-17" + assert "Return: Wed 17 Jun" in page.locator("#cal-hint").inner_text() + + page.locator('button[type="submit"]').click() + page.wait_for_url("**/results/BRI/paris/2026-06-10/return/2026-06-17", timeout=10000) + browser.close() + + +def test_return_advance_first_standard_premier_totals_on_initial_url( + local_server: str, +) -> None: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{local_server}/results/BRI/paris/2026-07-20/return/2026-07-27" + "?nr_class=advance_1st&es_class=plus", + wait_until="domcontentloaded", + ) + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£172.10'))", + timeout=10000, + ) + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£127.10'))", + timeout=10000, + ) + + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£172.10 💸", "£127.10 🪙"] + browser.close() diff --git a/tests/test_rtt_scraper.py b/tests/test_rtt_scraper.py index 7bceb61..98590f6 100644 --- a/tests/test_rtt_scraper.py +++ b/tests/test_rtt_scraper.py @@ -1,71 +1,74 @@ import pytest from scraper.realtime_trains import _fmt, _parse_services - # --------------------------------------------------------------------------- # _fmt # --------------------------------------------------------------------------- -def test_fmt_four_digits(): - assert _fmt('0830') == '08:30' -def test_fmt_already_colon(): - assert _fmt('08:30') == '08:30' +def test_fmt_four_digits() -> None: + assert _fmt("0830") == "08:30" -def test_fmt_strips_non_digits(): - assert _fmt('08h30') == '08:30' + +def test_fmt_already_colon() -> None: + assert _fmt("08:30") == "08:30" + + +def test_fmt_strips_non_digits() -> None: + assert _fmt("08h30") == "08:30" # --------------------------------------------------------------------------- # _parse_services # --------------------------------------------------------------------------- + def _make_html(services: list[tuple[str, str]], time_class: str) -> str: """Build a minimal servicelist HTML with (train_id, time) pairs.""" - items = '' + items = "" for tid, time in services: - items += f''' + items += f"""
{tid}
{time}
-
''' + """ return f'
{items}
' -def test_parse_services_departures(): - html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd') - result = _parse_services(html, 'div.time.plan.d') - assert result == {'1A23': '07:00', '2B45': '08:30'} +def test_parse_services_departures() -> None: + html = _make_html([("1A23", "0700"), ("2B45", "0830")], "d") + result = _parse_services(html, "div.time.plan.d") + assert result == {"1A23": "07:00", "2B45": "08:30"} -def test_parse_services_arrivals(): - html = _make_html([('1A23', '0845')], 'a') - result = _parse_services(html, 'div.time.plan.a') - assert result == {'1A23': '08:45'} +def test_parse_services_arrivals() -> None: + html = _make_html([("1A23", "0845")], "a") + result = _parse_services(html, "div.time.plan.a") + assert result == {"1A23": "08:45"} -def test_parse_services_no_servicelist(): - assert _parse_services('', 'div.time.plan.d') == {} +def test_parse_services_no_servicelist() -> None: + assert _parse_services("", "div.time.plan.d") == {} -def test_parse_services_skips_missing_time(): - html = ''' +def test_parse_services_skips_missing_time() -> None: + html = """ ''' - result = _parse_services(html, 'div.time.plan.d') - assert '1A23' not in result - assert result == {'2B45': '09:00'} +
""" + result = _parse_services(html, "div.time.plan.d") + assert "1A23" not in result + assert result == {"2B45": "09:00"} -def test_parse_services_skips_empty_time(): - html = ''' +def test_parse_services_skips_empty_time() -> None: + html = """ ''' - result = _parse_services(html, 'div.time.plan.d') +
""" + result = _parse_services(html, "div.time.plan.d") assert result == {} diff --git a/tests/test_trip_planner.py b/tests/test_trip_planner.py index 306d391..91d8988 100644 --- a/tests/test_trip_planner.py +++ b/tests/test_trip_planner.py @@ -1,64 +1,85 @@ import pytest -from trip_planner import combine_trips, find_unreachable_morning_eurostars, _fmt_duration +from trip_planner import ( + combine_inbound_trips, + combine_trips, + find_unreachable_morning_eurostars, + _fmt_duration, +) -DATE = '2026-03-30' +DATE = "2026-03-30" # --------------------------------------------------------------------------- # _fmt_duration # --------------------------------------------------------------------------- -def test_fmt_duration_hours_and_minutes(): - assert _fmt_duration(95) == '1h 35m' -def test_fmt_duration_exact_hours(): - assert _fmt_duration(120) == '2h' +def test_fmt_duration_hours_and_minutes() -> None: + assert _fmt_duration(95) == "1h 35m" -def test_fmt_duration_minutes_only(): - assert _fmt_duration(45) == '45m' + +def test_fmt_duration_exact_hours() -> None: + assert _fmt_duration(120) == "2h" + + +def test_fmt_duration_minutes_only() -> None: + assert _fmt_duration(45) == "45m" # --------------------------------------------------------------------------- # combine_trips — basic pairing # --------------------------------------------------------------------------- -GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m -GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — connection too short for ES_PARIS +GWR_FAST = {"depart_bristol": "07:00", "arrive_paddington": "08:45"} # 1h 45m +GWR_SLOW = { + "depart_bristol": "07:00", + "arrive_paddington": "09:26", +} # 2h 26m — connection too short for ES_PARIS -ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'} -ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'} +ES_PARIS = { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": "Paris Gare du Nord", +} +ES_EARLY = { + "depart_st_pancras": "09:00", + "arrive_destination": "12:00", + "destination": "Paris Gare du Nord", +} -def test_valid_trip_is_returned(): +def test_valid_trip_is_returned() -> None: trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) assert len(trips) == 1 t = trips[0] - assert t['depart_bristol'] == '07:00' - assert t['arrive_paddington'] == '08:45' - assert t['depart_st_pancras'] == '10:01' - assert t['arrive_destination'] == '13:34' - assert t['destination'] == 'Paris Gare du Nord' + assert t["depart_bristol"] == "07:00" + assert t["arrive_paddington"] == "08:45" + assert t["depart_st_pancras"] == "10:01" + assert t["arrive_destination"] == "13:34" + assert t["destination"] == "Paris Gare du Nord" -def test_gwr_too_slow_excluded(): +def test_gwr_too_slow_excluded() -> None: # arrive 09:26, Eurostar 10:01 → 35 min connection < 50 min minimum trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE) assert trips == [] -def test_eurostar_too_early_excluded(): +def test_eurostar_too_early_excluded() -> None: # Eurostar departs before min connection time has elapsed trips = combine_trips([GWR_FAST], [ES_EARLY], DATE) assert trips == [] -def test_no_trains_returns_empty(): +def test_no_trains_returns_empty() -> None: assert combine_trips([], [], DATE) == [] -def test_no_gwr_returns_empty(): + +def test_no_gwr_returns_empty() -> None: assert combine_trips([], [ES_PARIS], DATE) == [] -def test_no_eurostar_returns_empty(): + +def test_no_eurostar_returns_empty() -> None: assert combine_trips([GWR_FAST], [], DATE) == [] @@ -66,115 +87,211 @@ def test_no_eurostar_returns_empty(): # Connection window constraints # --------------------------------------------------------------------------- -def test_min_connection_enforced(): + +def test_min_connection_enforced() -> None: # Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00 # ES at 09:59 should be excluded, 10:00 should be included - es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'} - es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'} - assert combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == [] + es_too_close = { + "depart_st_pancras": "09:59", + "arrive_destination": "13:00", + "destination": "Paris Gare du Nord", + } + es_ok = { + "depart_st_pancras": "10:00", + "arrive_destination": "13:00", + "destination": "Paris Gare du Nord", + } + assert ( + combine_trips([GWR_FAST], [es_too_close], DATE, min_connection_minutes=75) == [] + ) trips = combine_trips([GWR_FAST], [es_ok], DATE, min_connection_minutes=75) assert len(trips) == 1 -def test_max_connection_enforced(): +def test_max_connection_enforced() -> None: # Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05 - es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'} - es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'} + es_ok = { + "depart_st_pancras": "11:05", + "arrive_destination": "14:00", + "destination": "Paris Gare du Nord", + } + es_too_late = { + "depart_st_pancras": "11:06", + "arrive_destination": "14:00", + "destination": "Paris Gare du Nord", + } trips = combine_trips([GWR_FAST], [es_ok], DATE, max_connection_minutes=140) assert len(trips) == 1 - assert combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == [] + assert ( + combine_trips([GWR_FAST], [es_too_late], DATE, max_connection_minutes=140) == [] + ) # --------------------------------------------------------------------------- # Only earliest valid Eurostar per GWR departure # --------------------------------------------------------------------------- -def test_only_earliest_eurostar_per_gwr(): - es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'} - es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'} + +def test_only_earliest_eurostar_per_gwr() -> None: + es1 = { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": "Paris Gare du Nord", + } + es2 = { + "depart_st_pancras": "11:01", + "arrive_destination": "14:34", + "destination": "Paris Gare du Nord", + } trips = combine_trips([GWR_FAST], [es1, es2], DATE) assert len(trips) == 1 - assert trips[0]['depart_st_pancras'] == '10:01' + assert trips[0]["depart_st_pancras"] == "10:01" # --------------------------------------------------------------------------- # Multiple GWR trains → multiple trips # --------------------------------------------------------------------------- -def test_multiple_gwr_trains(): - gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'} - es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'} + +def test_multiple_gwr_trains() -> None: + gwr2 = {"depart_bristol": "08:00", "arrive_paddington": "09:45"} + es = { + "depart_st_pancras": "11:01", + "arrive_destination": "14:34", + "destination": "Paris Gare du Nord", + } trips = combine_trips([GWR_FAST, gwr2], [es], DATE, max_connection_minutes=140) assert len(trips) == 2 - assert trips[0]['depart_bristol'] == '07:00' - assert trips[1]['depart_bristol'] == '08:00' + assert trips[0]["depart_bristol"] == "07:00" + assert trips[1]["depart_bristol"] == "08:00" # --------------------------------------------------------------------------- # Duration fields # --------------------------------------------------------------------------- -def test_gwr_duration_in_trip(): + +def test_gwr_duration_in_trip() -> None: trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) - assert trips[0]['gwr_duration'] == '1h 45m' + assert trips[0]["gwr_duration"] == "1h 45m" -def test_total_duration_in_trip(): +def test_total_duration_in_trip() -> None: # depart 07:00, arrive 13:34 → 6h 34m trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) - assert trips[0]['total_duration'] == '6h 34m' + assert trips[0]["total_duration"] == "6h 34m" -def test_connection_duration_in_trip(): +def test_connection_duration_in_trip() -> None: # arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) - assert trips[0]['connection_duration'] == '1h 16m' + assert trips[0]["connection_duration"] == "1h 16m" -def test_find_unreachable_eurostars_excludes_connectable_services(): +def test_find_unreachable_eurostars_excludes_connectable_services() -> None: # GWR arrives 08:45; default min=50/max=110 → viable window 09:35–10:35. # 09:30 too early, 10:15 connectable, 12:30 beyond max connection. gwr = [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45'}, + {"depart_bristol": "07:00", "arrive_paddington": "08:45"}, ] eurostar = [ - {'depart_st_pancras': '09:30', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9001'}, - {'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'}, - {'depart_st_pancras': '12:30', 'arrive_destination': '15:55', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9003'}, + { + "depart_st_pancras": "09:30", + "arrive_destination": "12:00", + "destination": "Paris Gare du Nord", + "train_number": "ES 9001", + }, + { + "depart_st_pancras": "10:15", + "arrive_destination": "13:40", + "destination": "Paris Gare du Nord", + "train_number": "ES 9002", + }, + { + "depart_st_pancras": "12:30", + "arrive_destination": "15:55", + "destination": "Paris Gare du Nord", + "train_number": "ES 9003", + }, ] unreachable = find_unreachable_morning_eurostars(gwr, eurostar, DATE) - assert [s['depart_st_pancras'] for s in unreachable] == ['09:30', '12:30'] + assert [s["depart_st_pancras"] for s in unreachable] == ["09:30", "12:30"] -def test_combine_trips_includes_ticket_fields(): +def test_combine_trips_includes_ticket_fields() -> None: trips = combine_trips([GWR_FAST], [ES_PARIS], DATE) assert len(trips) == 1 t = trips[0] - assert 'ticket_name' in t - assert 'ticket_price' in t - assert 'ticket_code' in t + assert "ticket_name" in t + assert "ticket_price" in t + assert "ticket_code" in t -def test_combine_trips_uses_gwr_fares_when_provided(): - fares = {'07:00': {'ticket': 'Super Off-Peak Single', 'price': 49.30, 'code': 'SSS'}} + +def test_combine_trips_uses_gwr_fares_when_provided() -> None: + fares = { + "07:00": {"ticket": "Super Off-Peak Single", "price": 49.30, "code": "SSS"} + } trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares=fares) assert len(trips) == 1 - assert trips[0]['ticket_price'] == 49.30 - assert trips[0]['ticket_code'] == 'SSS' + assert trips[0]["ticket_price"] == 49.30 + assert trips[0]["ticket_code"] == "SSS" -def test_combine_trips_ticket_price_none_when_no_fares(): + +def test_combine_trips_ticket_price_none_when_no_fares() -> None: trips = combine_trips([GWR_FAST], [ES_PARIS], DATE, gwr_fares={}) assert len(trips) == 1 - assert trips[0]['ticket_price'] is None + assert trips[0]["ticket_price"] is None -def test_find_unreachable_eurostars_returns_empty_when_all_connectable(): +def test_find_unreachable_eurostars_returns_empty_when_all_connectable() -> None: gwr = [ - {'depart_bristol': '07:00', 'arrive_paddington': '08:45'}, + {"depart_bristol": "07:00", "arrive_paddington": "08:45"}, ] eurostar = [ - {'depart_st_pancras': '10:15', 'arrive_destination': '13:40', 'destination': 'Paris Gare du Nord', 'train_number': 'ES 9002'}, + { + "depart_st_pancras": "10:15", + "arrive_destination": "13:40", + "destination": "Paris Gare du Nord", + "train_number": "ES 9002", + }, ] assert find_unreachable_morning_eurostars(gwr, eurostar, DATE) == [] + + +def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure() -> None: + eurostar = [ + { + "depart_destination": "15:12", + "arrive_st_pancras": "16:30", + "destination": "Paris Gare du Nord", + "train_number": "ES 9035", + } + ] + gwr = [ + { + "depart_paddington": "17:15", + "arrive_destination": "18:55", + "headcode": "1B99", + } + ] + fares = {"17:15": {"ticket": "Off-Peak Single", "price": 63.60, "code": "SVS"}} + + trips = combine_inbound_trips( + eurostar, + gwr, + DATE, + min_connection_minutes=30, + max_connection_minutes=120, + gwr_fares=fares, + ) + + assert len(trips) == 1 + assert trips[0]["depart_destination"] == "15:12" + assert trips[0]["arrive_st_pancras"] == "16:30" + assert trips[0]["depart_paddington"] == "17:15" + assert trips[0]["arrive_uk_station"] == "18:55" + assert trips[0]["ticket_price"] == 63.60 + assert trips[0]["check_in_by"] == "14:42" diff --git a/trip_planner.py b/trip_planner.py index 324a703..155eda5 100644 --- a/trip_planner.py +++ b/trip_planner.py @@ -3,23 +3,29 @@ Combine GWR station→Paddington trains with Eurostar St Pancras→destination t """ from datetime import datetime, timedelta +from typing import Any import circle_line from tfl_fare import circle_line_fare MIN_CONNECTION_MINUTES = 50 MAX_CONNECTION_MINUTES = 110 +INBOUND_MIN_CONNECTION_MINUTES = 30 +INBOUND_MAX_CONNECTION_MINUTES = 120 DATE_FMT = "%Y-%m-%d" TIME_FMT = "%H:%M" PAD_WALK_TO_UNDERGROUND_MINUTES = 8 # GWR platform → Paddington (H&C Line) platform +KX_WALK_TO_UNDERGROUND_MINUTES = ( + 10 # St Pancras arrivals → King's Cross St Pancras Underground +) def _parse_dt(date: str, time: str) -> datetime: return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}") -def _circle_line_services(arrive_paddington: datetime) -> list[dict]: +def _circle_line_services(arrive_paddington: datetime) -> list[dict[str, Any]]: """ Given GWR arrival at Paddington, return up to 2 upcoming Circle line services as [{'depart': 'HH:MM', 'arrive_kx': 'HH:MM'}, ...]. @@ -30,7 +36,9 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]: earliest_board = arrive_paddington + timedelta( minutes=PAD_WALK_TO_UNDERGROUND_MINUTES ) - services = circle_line.upcoming_services(earliest_board, count=2) + services = circle_line.upcoming_services( + earliest_board, count=2, direction="pad_to_kx" + ) return [ { "depart": dep.strftime(TIME_FMT), @@ -41,6 +49,42 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]: ] +PAD_WALK_FROM_UNDERGROUND_MINUTES = ( + 5 # Circle line platform → GWR platform at Paddington +) +INBOUND_COMFORTABLE_MIN_CONN = ( + 40 # threshold above which we apply the platform walk buffer +) + + +def _circle_line_services_to_paddington( + arrive_st_pancras: datetime, + dep_paddington: datetime | None = None, + min_conn_minutes: int = INBOUND_MIN_CONNECTION_MINUTES, +) -> list[dict[str, Any]]: + earliest_board = arrive_st_pancras + timedelta( + minutes=KX_WALK_TO_UNDERGROUND_MINUTES + ) + if min_conn_minutes >= INBOUND_COMFORTABLE_MIN_CONN and dep_paddington is not None: + cutoff = dep_paddington - timedelta(minutes=PAD_WALK_FROM_UNDERGROUND_MINUTES) + candidates = circle_line.upcoming_services( + earliest_board, count=4, direction="kx_to_pad" + ) + services = [(dep, arr) for dep, arr in candidates if arr <= cutoff][:2] + else: + services = circle_line.upcoming_services( + earliest_board, count=1, direction="kx_to_pad", preceding=1 + ) + return [ + { + "depart": dep.strftime(TIME_FMT), + "arrive_pad": arr.strftime(TIME_FMT), + "fare": circle_line_fare(dep), + } + for dep, arr in services + ] + + def _fmt_duration(minutes: int) -> str: h, m = divmod(minutes, 60) if h and m: @@ -51,8 +95,8 @@ def _fmt_duration(minutes: int) -> str: def _is_viable_connection( - gwr: dict, - eurostar: dict, + gwr: dict[str, Any], + eurostar: dict[str, Any], travel_date: str, min_connection_minutes: int, max_connection_minutes: int, @@ -80,14 +124,45 @@ def _is_viable_connection( return dep_bri, arr_pad, dep_stp, arr_dest +def _is_viable_inbound_connection( + eurostar: dict[str, Any], + gwr: dict[str, Any], + travel_date: str, + min_connection_minutes: int, + max_connection_minutes: int, +) -> tuple[datetime, datetime, datetime, datetime] | None: + try: + dep_dest = _parse_dt(travel_date, eurostar["depart_destination"]) + arr_stp = _parse_dt(travel_date, eurostar["arrive_st_pancras"]) + dep_pad = _parse_dt(travel_date, gwr["depart_paddington"]) + arr_station = _parse_dt(travel_date, gwr["arrive_destination"]) + except (ValueError, KeyError): + return None + + if arr_stp < dep_dest: + arr_stp += timedelta(days=1) + if dep_pad < arr_stp: + dep_pad += timedelta(days=1) + if arr_station < dep_pad: + arr_station += timedelta(days=1) + + connection_minutes = (dep_pad - arr_stp).total_seconds() / 60 + if connection_minutes < min_connection_minutes: + return None + if connection_minutes > max_connection_minutes: + return None + + return dep_dest, arr_stp, dep_pad, arr_station + + def combine_trips( - gwr_trains: list[dict], - eurostar_trains: list[dict], + gwr_trains: list[dict[str, Any]], + eurostar_trains: list[dict[str, Any]], travel_date: str, min_connection_minutes: int = MIN_CONNECTION_MINUTES, max_connection_minutes: int = MAX_CONNECTION_MINUTES, - gwr_fares: dict | None = None, -) -> list[dict]: + gwr_fares: dict[str, Any] | None = None, +) -> list[dict[str, Any]]: """ Return a list of valid combined trips, sorted by Bristol departure time. @@ -117,8 +192,8 @@ def combine_trips( continue dep_bri, arr_pad, dep_stp, arr_dest = connection - total_mins = int((arr_dest - dep_bri).total_seconds() / 60) # Destination time is CET/CEST, departure is GMT/BST; Europe is always 1h ahead. + total_mins = int((arr_dest - dep_bri).total_seconds() / 60) - 60 eurostar_mins = int((arr_dest - dep_stp).total_seconds() / 60) - 60 fare = (gwr_fares or {}).get(gwr["depart_bristol"]) circle_svcs = _circle_line_services(arr_pad) @@ -154,13 +229,79 @@ def combine_trips( return trips +def combine_inbound_trips( + eurostar_trains: list[dict[str, Any]], + gwr_trains: list[dict[str, Any]], + travel_date: str, + min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES, + max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES, + gwr_fares: dict[str, Any] | None = None, +) -> list[dict[str, Any]]: + """Return valid continent→UK combined trips.""" + trips = [] + + for es in eurostar_trains: + for gwr in gwr_trains: + connection = _is_viable_inbound_connection( + es, + gwr, + travel_date, + min_connection_minutes, + max_connection_minutes, + ) + if not connection: + continue + dep_dest, arr_stp, dep_pad, arr_station = connection + # Destination time is CET/CEST, arrival at London is GMT/BST; Europe is always 1h ahead. + total_mins = int((arr_station - dep_dest).total_seconds() / 60) + 60 + eurostar_mins = int((arr_stp - dep_dest).total_seconds() / 60) + 60 + fare = (gwr_fares or {}).get(gwr["depart_paddington"]) + circle_svcs = _circle_line_services_to_paddington( + arr_stp, dep_pad, min_connection_minutes + ) + trips.append( + { + "direction": "inbound", + "depart_destination": es["depart_destination"], + "check_in_by": (dep_dest - timedelta(minutes=30)).strftime( + TIME_FMT + ), + "arrive_st_pancras": es["arrive_st_pancras"], + "depart_paddington": gwr["depart_paddington"], + "arrive_uk_station": gwr["arrive_destination"], + "arrive_platform": gwr.get("arrive_platform", ""), + "headcode": gwr.get("headcode", ""), + "gwr_duration": _fmt_duration( + int((arr_station - dep_pad).total_seconds() / 60) + ), + "connection_minutes": int((dep_pad - arr_stp).total_seconds() / 60), + "connection_duration": _fmt_duration( + int((dep_pad - arr_stp).total_seconds() / 60) + ), + "circle_services": circle_svcs, + "eurostar_duration": _fmt_duration(eurostar_mins), + "train_number": es.get("train_number", ""), + "total_duration": _fmt_duration(total_mins), + "total_minutes": total_mins, + "destination": es["destination"], + "ticket_name": fare["ticket"] if fare else None, + "ticket_price": fare["price"] if fare else None, + "ticket_code": fare["code"] if fare else None, + } + ) + break + + trips.sort(key=lambda t: (t["depart_destination"], t["depart_paddington"])) + return trips + + def find_unreachable_morning_eurostars( - gwr_trains: list[dict], - eurostar_trains: list[dict], + gwr_trains: list[dict[str, Any]], + eurostar_trains: list[dict[str, Any]], travel_date: str, min_connection_minutes: int = MIN_CONNECTION_MINUTES, max_connection_minutes: int = MAX_CONNECTION_MINUTES, -) -> list[dict]: +) -> list[dict[str, Any]]: unreachable = [] for es in eurostar_trains: @@ -184,3 +325,35 @@ def find_unreachable_morning_eurostars( unreachable.append({**es, "eurostar_duration": _fmt_duration(eurostar_mins)}) return sorted(unreachable, key=lambda s: s["depart_st_pancras"]) + + +def find_unreachable_inbound_eurostars( + eurostar_trains: list[dict[str, Any]], + gwr_trains: list[dict[str, Any]], + travel_date: str, + min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES, + max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES, +) -> list[dict[str, Any]]: + unreachable = [] + + for es in eurostar_trains: + if any( + _is_viable_inbound_connection( + es, + gwr, + travel_date, + min_connection_minutes, + max_connection_minutes, + ) + for gwr in gwr_trains + ): + continue + + dep_dest = _parse_dt(travel_date, es["depart_destination"]) + arr_stp = _parse_dt(travel_date, es["arrive_st_pancras"]) + if arr_stp < dep_dest: + arr_stp += timedelta(days=1) + eurostar_mins = int((arr_stp - dep_dest).total_seconds() / 60) + 60 + unreachable.append({**es, "eurostar_duration": _fmt_duration(eurostar_mins)}) + + return sorted(unreachable, key=lambda s: s["depart_destination"])