From 9691632f6571bc689c1b1d7d75dc35f6aea6d514 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 21 May 2026 08:46:35 +0100 Subject: [PATCH] Add return and inbound journey support --- app.py | 482 ++++++++++++++--------- circle_line.py | 75 ++-- scraper/eurostar.py | 150 +++++-- scraper/gwr_fares.py | 69 +++- scraper/realtime_trains.py | 58 ++- templates/index.html | 72 +++- templates/results.html | 541 +++++++++++++++----------- tests/test_app.py | 102 +++++ tests/test_eurostar_scraper.py | 25 +- tests/test_playwright_return_fares.py | 422 ++++++++++++++++++++ tests/test_trip_planner.py | 32 +- trip_planner.py | 145 ++++++- 12 files changed, 1687 insertions(+), 486 deletions(-) create mode 100644 tests/test_playwright_return_fares.py diff --git a/app.py b/app.py index 70b0884..98100a1 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,14 @@ from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper import scraper.gwr_fares as gwr_fares_scraper import scraper.realtime_trains as rtt_scraper -from trip_planner import combine_trips, find_unreachable_morning_eurostars +from trip_planner import ( + INBOUND_MAX_CONNECTION_MINUTES, + INBOUND_MIN_CONNECTION_MINUTES, + combine_inbound_trips, + combine_trips, + find_unreachable_inbound_eurostars, + find_unreachable_morning_eurostars, +) RTT_PADDINGTON_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" @@ -76,11 +83,15 @@ def index(): default_max_connection=default_max, valid_min_connections=sorted(VALID_MIN_CONNECTIONS), valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + default_return_date=(date.today() + timedelta(days=7)).isoformat(), ) VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120} VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} +VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120} +VALID_INBOUND_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} +VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"} VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'} VALID_ES_CLASSES = {'standard', 'plus'} DEFAULT_NR_CLASS = 'walkon' @@ -106,15 +117,24 @@ def _parse_connection(raw, default, valid_set): def search(): slug = request.args.get("destination", "") travel_date = request.args.get("travel_date", "") + return_date = request.args.get("return_date", "") + journey_type = request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" station_crs = request.args.get("station_crs", "BRI") if station_crs not in STATION_BY_CRS: station_crs = "BRI" - default_min, default_max = _get_defaults() + if journey_type == "inbound": + default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES + valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_conn = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_conn = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: @@ -122,13 +142,21 @@ def search(): es_class = request.args.get("es_class", DEFAULT_ES_CLASS) if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS - if slug in DESTINATIONS and travel_date: + if journey_type == "return": + try: + if return_date and date.fromisoformat(return_date) < date.fromisoformat(travel_date): + return_date = "" + except ValueError: + return_date = "" + if slug in DESTINATIONS and travel_date and (journey_type != "return" or return_date): return redirect( url_for( "results", station_crs=station_crs, slug=slug, travel_date=travel_date, + journey_type=None if journey_type == "outbound" else journey_type, + return_date=return_date if journey_type == "return" else None, min_connection=None if min_conn == default_min else min_conn, max_connection=None if max_conn == default_max else max_conn, nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class, @@ -147,12 +175,28 @@ def results(station_crs, slug, travel_date): if not destination or not travel_date: return redirect(url_for("index")) - default_min, default_max = _get_defaults() + journey_type = request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" + return_date = request.args.get("return_date") + if journey_type == "return": + try: + if not return_date or date.fromisoformat(return_date) < date.fromisoformat(travel_date): + return redirect(url_for("index")) + except ValueError: + return redirect(url_for("index")) + + if journey_type == "inbound": + default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES + valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_connection = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_connection = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: @@ -161,150 +205,207 @@ def results(station_crs, slug, travel_date): if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS - # Redirect to clean URL when all params are at their defaults - _clean_url_params = ["min_connection", "max_connection", "nr_class", "es_class"] - if any(k in request.args for k in _clean_url_params) and ( - min_connection == default_min - and max_connection == default_max - and nr_class == DEFAULT_NR_CLASS - and es_class == DEFAULT_ES_CLASS - ): - return redirect( - url_for("results", station_crs=station_crs, slug=slug, travel_date=travel_date) - ) - user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA) + error_messages = [] + from_cache_parts = [] - rtt_cache_key = f"rtt_{station_crs}_{travel_date}" - es_cache_key = f"eurostar_{travel_date}_{destination}" - gwr_fares_cache_key = f"gwr_fares_{station_crs}_{travel_date}" - gwr_advance_cache_key = f"gwr_advance_{station_crs}_{travel_date}" - - cached_rtt = get_cached(rtt_cache_key) - cached_es = get_cached(es_cache_key, ttl=24 * 3600) - cached_gwr_fares = get_cached(gwr_fares_cache_key, ttl=30 * 24 * 3600) - cached_advance_fares = get_cached(gwr_advance_cache_key, ttl=24 * 3600) - from_cache = bool(cached_rtt and cached_es) - - error = None - - if cached_rtt: - gwr_trains = cached_rtt - else: + def cached_fetch(key, ttl, fetcher, label): + cached = get_cached(key, ttl=ttl) + if cached is not None: + from_cache_parts.append(key) + return cached try: - gwr_trains = rtt_scraper.fetch(travel_date, user_agent, station_crs) - set_cached(rtt_cache_key, gwr_trains) + data = fetcher() + set_cached(key, data) + return data except Exception as e: - gwr_trains = [] - error = f"Could not fetch GWR trains: {e}" + error_messages.append(f"Could not fetch {label}: {e}") + return [] if label != "GWR fares" else {} - if cached_es: - eurostar_services = cached_es - else: - try: - eurostar_services = eurostar_scraper.fetch(destination, travel_date) - set_cached(es_cache_key, eurostar_services) - except Exception as e: - eurostar_services = [] - msg = f"Could not fetch Eurostar times: {e}" - error = f"{error}; {msg}" if error else msg + es_return = None + if journey_type == "return": + es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}" + es_return = cached_fetch( + es_return_key, + 24 * 3600, + lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date), + "Eurostar times", + ) + if not isinstance(es_return, dict): + es_return = {"outbound": [], "inbound": []} - if cached_gwr_fares: - gwr_fares = cached_gwr_fares - else: - try: - gwr_fares = gwr_fares_scraper.fetch(station_crs, travel_date) - set_cached(gwr_fares_cache_key, gwr_fares) - except Exception as e: - gwr_fares = {} - msg = f"Could not fetch GWR fares: {e}" - error = f"{error}; {msg}" if error else msg + def build_section(section_id, direction, section_date, eurostar_services=None): + section_min_connection = min_connection + section_max_connection = max_connection + if journey_type == "return" and direction == "inbound": + section_min_connection = INBOUND_MIN_CONNECTION_MINUTES + section_max_connection = INBOUND_MAX_CONNECTION_MINUTES + rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington" + rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}" + gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}" + advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}" - eurostar_trains = eurostar_services - eurostar_prices = { - s["depart_st_pancras"]: { - "price": s.get("price"), - "seats": s.get("seats"), - "plus_price": s.get("plus_price"), - "plus_seats": s.get("plus_seats"), + if direction == "outbound": + trains = cached_fetch( + rtt_cache_key, + None, + lambda: rtt_scraper.fetch(section_date, user_agent, station_crs), + "GWR trains", + ) + else: + trains = cached_fetch( + rtt_cache_key, + None, + lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs), + "GWR trains", + ) + + if eurostar_services is None: + es_cache_key = f"eurostar_{direction}_{section_date}_{destination}" + es_fetcher = ( + (lambda: eurostar_scraper.fetch(destination, section_date)) + if direction == "outbound" + else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction)) + ) + eurostar_services = cached_fetch( + es_cache_key, + 24 * 3600, + es_fetcher, + "Eurostar times", + ) + + fare_direction = "to_paddington" if direction == "outbound" else "from_paddington" + gwr_fares = cached_fetch( + gwr_cache_key, + 30 * 24 * 3600, + ( + (lambda: gwr_fares_scraper.fetch(station_crs, section_date)) + if fare_direction == "to_paddington" + else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction)) + ), + "GWR fares", + ) + cached_advance = get_cached(advance_cache_key, ttl=24 * 3600) + + if direction == "outbound": + trips = combine_trips( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_morning_eurostars( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_st_pancras"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_st_pancras"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], + key=lambda row: row["depart_st_pancras"], + ) + else: + trips = combine_inbound_trips( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_inbound_eurostars( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_destination"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_destination"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], + key=lambda row: row["depart_destination"], + ) + + es_by_key = { + (svc.get("depart_st_pancras") if direction == "outbound" else svc.get("depart_destination")): svc + for svc in eurostar_services } - for s in eurostar_services - } + for row in rows: + key = row.get("depart_st_pancras") if direction == "outbound" else row.get("depart_destination") + es = es_by_key.get(key, {}) + row["eurostar_price"] = es.get("price") + row["eurostar_seats"] = es.get("seats") + row["eurostar_plus_price"] = es.get("plus_price") + row["eurostar_plus_seats"] = es.get("plus_seats") + row["row_key"] = f"{section_id}:{key}" - trips = combine_trips( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - gwr_fares, - ) + dt = date.fromisoformat(section_date) + return { + "id": section_id, + "direction": direction, + "date": section_date, + "date_display": dt.strftime("%A %-d %B %Y"), + "rows": rows, + "trips": trips, + "gwr_count": len(trains), + "eurostar_count": len(eurostar_services), + "min_connection": section_min_connection, + "max_connection": section_max_connection, + "advance_fares": cached_advance, + "advance_api_url": url_for( + "api_advance_fares", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + "advance_stream_url": url_for( + "api_advance_fares_stream", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + } - # Annotate each trip with Eurostar prices and total cost (walk-on + standard) - for trip in trips: - es = eurostar_prices.get(trip["depart_st_pancras"], {}) - es_price = es.get("price") - trip["eurostar_price"] = es_price - trip["eurostar_seats"] = es.get("seats") - trip["eurostar_plus_price"] = es.get("plus_price") - trip["eurostar_plus_seats"] = es.get("plus_seats") - gwr_p = trip.get("ticket_price") - circle_svcs = trip.get("circle_services") - circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 - trip["total_price"] = ( - gwr_p + es_price + circle_fare - if (gwr_p is not None and es_price is not None) - else None - ) - - # If the API returned journeys but every price is None, tickets aren't on sale yet - no_prices_note = None - if eurostar_prices and all( - v.get("price") is None for v in eurostar_prices.values() - ): - no_prices_note = ( - "Eurostar prices not yet available — tickets may not be on sale yet." - ) - - unreachable_morning_services = find_unreachable_morning_eurostars( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - ) - for svc in unreachable_morning_services: - es = eurostar_prices.get(svc["depart_st_pancras"], {}) - svc["eurostar_price"] = es.get("price") - svc["eurostar_seats"] = es.get("seats") - svc["eurostar_plus_price"] = es.get("plus_price") - svc["eurostar_plus_seats"] = es.get("plus_seats") - - # Only keep unreachable services that depart before the first reachable Eurostar. - # Services after the first reachable one are omitted (they aren't "Too early"). - if trips: - first_es_depart = min(t["depart_st_pancras"] for t in trips) - unreachable_morning_services = [ - s - for s in unreachable_morning_services - if s["depart_st_pancras"] < first_es_depart + if journey_type == "return": + sections = [ + build_section("outbound", "outbound", travel_date, es_return.get("outbound", [])), + build_section("inbound", "inbound", return_date, es_return.get("inbound", [])), ] + else: + sections = [build_section("main", journey_type, travel_date)] - result_rows = sorted( - [{"row_type": "trip", **trip} for trip in trips] - + [ - {"row_type": "unreachable", **service} - for service in unreachable_morning_services - ], - key=lambda row: row["depart_st_pancras"], - ) + no_prices_note = None + all_es_prices = [ + row.get("eurostar_price") + for section in sections + for row in section["rows"] + if row.get("row_type") == "trip" + ] + if all_es_prices and all(price is None for price in all_es_prices): + no_prices_note = "Eurostar prices not yet available — tickets may not be on sale yet." dt = date.fromisoformat(travel_date) prev_date = (dt - timedelta(days=1)).isoformat() next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime("%A %-d %B %Y") - eurostar_url = eurostar_scraper.search_url(destination, travel_date) + eurostar_url = eurostar_scraper.search_url( + destination, travel_date, direction=journey_type, return_date=return_date + ) rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date) rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date) @@ -313,55 +414,62 @@ def results(station_crs, slug, travel_date): url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class url_es = None if es_class == DEFAULT_ES_CLASS else es_class - # Build per-row fare data for JS consumption trip_fares = {} - for row in result_rows: - stp = row.get("depart_st_pancras") - if not stp: - continue - circle_svcs = row.get("circle_services") or [] - circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 - walkon = ( - {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} - if row.get("ticket_price") is not None - else None - ) - es_std = ( - {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} - if row.get("eurostar_price") is not None - else None - ) - es_plus = ( - {"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")} - if row.get("eurostar_plus_price") is not None - else None - ) - trip_fares[stp] = { - "depart_bristol": row.get("depart_bristol"), - "walkon": walkon, - "es_standard": es_std, - "es_plus": es_plus, - "circle_fare": circle_fare, - } + advance_fares = {} + advance_api_urls = {} + advance_stream_urls = {} + for section in sections: + advance_fares[section["id"]] = section["advance_fares"] + advance_api_urls[section["id"]] = section["advance_api_url"] + advance_stream_urls[section["id"]] = section["advance_stream_url"] + for row in section["rows"]: + circle_svcs = row.get("circle_services") or [] + circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 + walkon = ( + {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} + if row.get("ticket_price") is not None + else None + ) + es_std = ( + {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} + if row.get("eurostar_price") is not None + else None + ) + es_plus = ( + {"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")} + if row.get("eurostar_plus_price") is not None + else None + ) + trip_fares[row["row_key"]] = { + "section": section["id"], + "advance_key": row.get("depart_bristol") or row.get("depart_paddington"), + "walkon": walkon, + "es_standard": es_std, + "es_plus": es_plus, + "circle_fare": circle_fare, + } return render_template( "results.html", - trips=trips, - result_rows=result_rows, - unreachable_morning_services=unreachable_morning_services, + sections=sections, + trips=sections[0]["trips"] if sections else [], + result_rows=sections[0]["rows"] if sections else [], + unreachable_morning_services=[], destinations=DESTINATIONS, destination=destination, travel_date=travel_date, + return_date=return_date, + journey_type=journey_type, slug=slug, station_crs=station_crs, departure_station_name=departure_station_name, prev_date=prev_date, next_date=next_date, travel_date_display=travel_date_display, - gwr_count=len(gwr_trains), - eurostar_count=len(eurostar_trains), - from_cache=from_cache, - error=error, + gwr_count=sum(section["gwr_count"] for section in sections), + eurostar_count=sum(section["eurostar_count"] for section in sections), + from_cache=bool(from_cache_parts), + error="; ".join(error_messages) if error_messages else None, no_prices_note=no_prices_note, eurostar_url=eurostar_url, rtt_url=rtt_url, @@ -376,12 +484,15 @@ def results(station_crs, slug, travel_date): es_class=es_class, url_nr_class=url_nr, url_es_class=url_es, + url_journey_type=None if journey_type == "outbound" else journey_type, trip_fares_json=json.dumps(trip_fares), - advance_fares_json=json.dumps(cached_advance_fares), + advance_fares_json=json.dumps(advance_fares), + advance_api_urls_json=json.dumps(advance_api_urls), + advance_stream_urls_json=json.dumps(advance_stream_urls), advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date), advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date), - valid_min_connections=sorted(VALID_MIN_CONNECTIONS), - valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + valid_min_connections=sorted(valid_min), + valid_max_connections=sorted(valid_max), ) @@ -389,12 +500,19 @@ def results(station_crs, slug, travel_date): def api_advance_fares(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) - cache_key = f"gwr_advance_{station_crs}_{travel_date}" + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" cached = get_cached(cache_key, ttl=24 * 3600) if cached is not None: return jsonify(cached) try: - fares = gwr_fares_scraper.fetch_advance(station_crs, travel_date) + fares = ( + gwr_fares_scraper.fetch_advance(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance(station_crs, travel_date, direction=direction) + ) set_cached(cache_key, fares) return jsonify(fares) except Exception as e: @@ -405,7 +523,10 @@ def api_advance_fares(station_crs, travel_date): def api_advance_fares_stream(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) - cache_key = f"gwr_advance_{station_crs}_{travel_date}" + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" def generate(): cached = get_cached(cache_key, ttl=24 * 3600) @@ -416,7 +537,14 @@ def api_advance_fares_stream(station_crs, travel_date): accumulated: dict = {} try: - for page_fares in gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date): + stream = ( + gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance_streaming( + station_crs, travel_date, direction=direction + ) + ) + for page_fares in stream: for dep_time, fare_data in page_fares.items(): if dep_time not in accumulated: accumulated[dep_time] = {"advance_std": None, "advance_1st": None} diff --git a/circle_line.py b/circle_line.py index 6292f24..c0d945c 100644 --- a/circle_line.py +++ b/circle_line.py @@ -1,5 +1,5 @@ """ -Circle Line timetable: Paddington (H&C Line) → King's Cross St Pancras. +Circle Line timetable between Paddington (H&C Line) and King's Cross St Pancras. Parses the TransXChange XML file on first use and caches the result in memory. """ @@ -14,9 +14,9 @@ _KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras from config.default import CIRCLE_LINE_XML as _TXC_XML # overridden by app config after import _NS = {'t': 'http://www.transxchange.org.uk/'} -# Populated on first call to next_service(); maps day-type -> sorted list of -# (pad_depart_seconds, kxp_arrive_seconds) measured from midnight. -_timetable: dict[str, list[tuple[int, int]]] | None = None +# Populated on first call to next_service(); maps direction -> day-type -> sorted +# list of (origin_depart_seconds, destination_arrive_seconds) measured from midnight. +_timetable: dict[str, dict[str, list[tuple[int, int]]]] | None = None def _parse_duration(s: str | None) -> int: @@ -26,7 +26,7 @@ def _parse_duration(s: str | None) -> int: return int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0) -def _load_timetable() -> dict[str, list[tuple[int, int]]]: +def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: tree = ET.parse(_TXC_XML) root = tree.getroot() @@ -66,8 +66,8 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: return elapsed return None - # Map JP id -> (pad_offset_secs, kxp_arrive_offset_secs) - jp_offsets: dict[str, tuple[int, int]] = {} + # Map JP id -> [(direction, origin_depart_offset_secs, destination_arrive_offset_secs)]. + jp_offsets: dict[str, list[tuple[str, int, int]]] = {} for svc in root.find('t:Services', _NS): for jp in svc.findall('.//t:JourneyPattern', _NS): jps_ref = jp.find('t:JourneyPatternSectionRefs', _NS) @@ -75,6 +75,7 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: continue links = jps_map.get(jps_ref.text, []) stops = [l[0] for l in links] + ([links[-1][1]] if links else []) + offsets = [] if ( _PAD_STOP in stops and _KXP_STOP in stops @@ -83,12 +84,30 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: pad_off = _seconds_to_depart(links, _PAD_STOP) kxp_off = _seconds_to_arrive(links, _KXP_STOP) if pad_off is not None and kxp_off is not None: - jp_offsets[jp.get('id')] = (pad_off, kxp_off) + offsets.append(('pad_to_kx', pad_off, kxp_off)) + if ( + _PAD_STOP in stops + and _KXP_STOP in stops + and stops.index(_KXP_STOP) < stops.index(_PAD_STOP) + ): + kxp_off = _seconds_to_depart(links, _KXP_STOP) + pad_off = _seconds_to_arrive(links, _PAD_STOP) + if kxp_off is not None and pad_off is not None: + offsets.append(('kx_to_pad', kxp_off, pad_off)) + if offsets: + jp_offsets[jp.get('id')] = offsets - result: dict[str, list[tuple[int, int]]] = { - 'MondayToFriday': [], - 'Saturday': [], - 'Sunday': [], + result: dict[str, dict[str, list[tuple[int, int]]]] = { + 'pad_to_kx': { + 'MondayToFriday': [], + 'Saturday': [], + 'Sunday': [], + }, + 'kx_to_pad': { + 'MondayToFriday': [], + 'Saturday': [], + 'Sunday': [], + }, } for vj in root.find('t:VehicleJourneys', _NS): @@ -97,7 +116,6 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: op = vj.find('t:OperatingProfile', _NS) if jp_ref is None or dep_time is None or jp_ref.text not in jp_offsets: continue - pad_off, kxp_off = jp_offsets[jp_ref.text] h, m, s = map(int, dep_time.text.split(':')) dep_secs = h * 3600 + m * 60 + s rdt = op.find('.//t:DaysOfWeek', _NS) if op is not None else None @@ -105,15 +123,20 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: continue for day_el in rdt: day_type = day_el.tag.split('}')[-1] - if day_type in result: - result[day_type].append((dep_secs + pad_off, dep_secs + kxp_off)) + for direction, origin_off, dest_off in jp_offsets[jp_ref.text]: + if day_type in result[direction]: + result[direction][day_type].append(( + dep_secs + origin_off, + dep_secs + dest_off, + )) - for key in result: - result[key].sort() + for direction in result: + for key in result[direction]: + result[direction][key].sort() return result -def _get_timetable() -> dict[str, list[tuple[int, int]]]: +def _get_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: global _timetable if _timetable is None: _timetable = _load_timetable() @@ -126,7 +149,9 @@ def _day_type(weekday: int) -> str: return 'Saturday' if weekday == 5 else 'Sunday' -def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: +def next_service( + earliest_board: datetime, direction: str = 'pad_to_kx' +) -> tuple[datetime, datetime] | None: """ Given the earliest time a passenger can board at Paddington (H&C Line), return (circle_line_depart, arrive_kings_cross) as datetimes, or None if @@ -135,20 +160,20 @@ def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: The caller is responsible for adding any walk time from the GWR platform before passing *earliest_board*. """ - services = upcoming_services(earliest_board, count=1) + services = upcoming_services(earliest_board, count=1, direction=direction) return services[0] if services else None def upcoming_services( - earliest_board: datetime, count: int = 2 + earliest_board: datetime, count: int = 2, direction: str = 'pad_to_kx' ) -> list[tuple[datetime, datetime]]: """ - Return up to *count* Circle line services from Paddington (H&C Line) to - King's Cross St Pancras, starting from *earliest_board*. + Return up to *count* Circle line services for *direction*, starting from + *earliest_board*. - Each element is (depart_paddington, arrive_kings_cross) as datetimes. + Each element is (depart_origin, arrive_destination) as datetimes. """ - timetable = _get_timetable()[_day_type(earliest_board.weekday())] + timetable = _get_timetable().get(direction, {})[_day_type(earliest_board.weekday())] board_secs = ( earliest_board.hour * 3600 + earliest_board.minute * 60 diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 667decf..6e2f82a 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -16,7 +16,8 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) -ORIGIN_STATION_ID = '7015400' +ST_PANCRAS_STATION_ID = '7015400' +ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID DESTINATION_STATION_IDS = { 'Paris Gare du Nord': '8727100', @@ -35,11 +36,11 @@ _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' _GQL_QUERY = ( "query NewBookingSearch(" "$origin:String!,$destination:String!,$outbound:String!," - "$currency:Currency!,$adult:Int," + "$inbound:String,$currency:Currency!,$adult:Int," "$filteredClassesOfService:[ClassOfServiceEnum]" "){" "journeySearch(" - "outboundDate:$outbound origin:$origin destination:$destination" + "outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination" " adults:$adult currency:$currency" " productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\"" " adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0" @@ -64,6 +65,22 @@ _GQL_QUERY = ( "}" "}" "}" + "inbound{" + "journeys(" + "hideIndirectTrainsWhenDisruptedAndCancelled:false" + " hideDepartedTrains:true" + " hideExternalCarrierTrains:true" + " hideDirectExternalCarrierTrains:true" + "){" + "timing{departureTime:departs arrivalTime:arrives}" + "fares(filteredClassesOfService:$filteredClassesOfService){" + "classOfService{code}" + "prices{displayPrice}" + "seats " + "legs{serviceName serviceType{code}}" + "}" + "}" + "}" "}" "}" ) @@ -72,11 +89,19 @@ _STANDARD = 'STANDARD' _STANDARD_PLUS = 'PLUS' -def search_url(destination: str, travel_date: str) -> str: +def search_url(destination: str, travel_date: str, direction: str = "outbound", return_date: str | None = None) -> str: dest_id = DESTINATION_STATION_IDS[destination] + origin = ST_PANCRAS_STATION_ID + destination_id = dest_id + outbound = travel_date + inbound = return_date + if direction == "inbound": + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + inbound = None return ( f'https://www.eurostar.com/search/uk-en' - f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}' + f'?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}' + + (f'&inbound={inbound}' if inbound else '') ) @@ -85,7 +110,7 @@ def _generate_cid() -> str: return 'SRCH-' + ''.join(random.choices(chars, k=22)) -def _parse_graphql(data: dict, destination: str) -> list[dict]: +def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> list[dict]: """ Parse a NewBookingSearch GraphQL response into a list of service dicts. @@ -97,7 +122,6 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329'). """ best: dict[str, dict] = {} - journeys = data['data']['journeySearch']['outbound']['journeys'] for journey in journeys: dep = journey['timing']['departureTime'] arr = journey['timing']['arrivalTime'] @@ -118,8 +142,21 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: std_price, std_seats = price, seats elif cos == _STANDARD_PLUS: plus_price, plus_seats = price, seats - if dep not in best or arr < best[dep]['arrive_destination']: - best[dep] = { + if direction == 'inbound': + service = { + 'depart_destination': dep, + 'arrive_st_pancras': arr, + 'destination': destination, + 'train_number': train_number, + 'price': std_price, + 'seats': std_seats, + 'plus_price': plus_price, + 'plus_seats': plus_seats, + } + key = dep + arrive_key = 'arrive_st_pancras' + else: + service = { 'depart_st_pancras': dep, 'arrive_destination': arr, 'destination': destination, @@ -129,18 +166,43 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: 'plus_price': plus_price, 'plus_seats': plus_seats, } - return sorted(best.values(), key=lambda s: s['depart_st_pancras']) + key = dep + arrive_key = 'arrive_destination' + if key not in best or arr < best[key][arrive_key]: + best[key] = service + sort_key = 'depart_destination' if direction == 'inbound' else 'depart_st_pancras' + return sorted(best.values(), key=lambda s: s[sort_key]) -def fetch(destination: str, travel_date: str) -> list[dict]: - """ - Return all Eurostar services for destination on travel_date. +def _parse_graphql(data: dict, destination: str) -> list[dict]: + journeys = data['data']['journeySearch']['outbound']['journeys'] + return _parse_journeys(journeys, destination, 'outbound') - Each dict contains timetable info (depart_st_pancras, arrive_destination, - train_number) plus pricing (price, seats) from a single GraphQL call. - """ - dest_id = DESTINATION_STATION_IDS[destination] - headers = { + +def _parse_graphql_leg(data: dict, destination: str, leg: str, direction: str) -> list[dict]: + journeys = data['data']['journeySearch'][leg]['journeys'] + return _parse_journeys(journeys, destination, direction) + + +def _payload(origin: str, destination_id: str, outbound: str, inbound: str | None = None) -> dict: + variables = { + 'origin': origin, + 'destination': destination_id, + 'outbound': outbound, + 'inbound': inbound, + 'currency': 'GBP', + 'adult': 1, + 'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS], + } + return { + 'operationName': 'NewBookingSearch', + 'variables': variables, + 'query': _GQL_QUERY, + } + + +def _headers() -> dict: + return { 'User-Agent': DEFAULT_UA, 'Content-Type': 'application/json', 'Accept': '*/*', @@ -151,18 +213,42 @@ def fetch(destination: str, travel_date: str) -> list[dict]: 'x-source-url': 'search-app/', 'cid': _generate_cid(), } - payload = { - 'operationName': 'NewBookingSearch', - 'variables': { - 'origin': ORIGIN_STATION_ID, - 'destination': dest_id, - 'outbound': travel_date, - 'currency': 'GBP', - 'adult': 1, - 'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS], - }, - 'query': _GQL_QUERY, - } - resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) + + +def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> list[dict]: + """ + Return all Eurostar services for destination on travel_date. + + Each dict contains timetable info (depart_st_pancras, arrive_destination, + train_number) plus pricing (price, seats) from a single GraphQL call. + """ + dest_id = DESTINATION_STATION_IDS[destination] + if direction == 'inbound': + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + else: + origin, destination_id = ST_PANCRAS_STATION_ID, dest_id + resp = requests.post( + _GATEWAY_URL, + json=_payload(origin, destination_id, travel_date), + headers=_headers(), + timeout=20, + ) resp.raise_for_status() - return _parse_graphql(resp.json(), destination) + leg_direction = 'inbound' if direction == 'inbound' else 'outbound' + return _parse_graphql_leg(resp.json(), destination, 'outbound', leg_direction) + + +def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict[str, list[dict]]: + dest_id = DESTINATION_STATION_IDS[destination] + resp = requests.post( + _GATEWAY_URL, + json=_payload(ST_PANCRAS_STATION_ID, dest_id, outbound_date, return_date), + headers=_headers(), + timeout=20, + ) + resp.raise_for_status() + data = resp.json() + return { + 'outbound': _parse_graphql_leg(data, destination, 'outbound', 'outbound'), + 'inbound': _parse_graphql_leg(data, destination, 'inbound', 'inbound'), + } diff --git a/scraper/gwr_fares.py b/scraper/gwr_fares.py index 4d9f203..7d3cc26 100644 --- a/scraper/gwr_fares.py +++ b/scraper/gwr_fares.py @@ -32,7 +32,8 @@ def _headers() -> dict: def _request_body( - station_crs: str, + from_code: str, + to_code: str, travel_date: str, conversation_token: str | None, later: bool, @@ -44,8 +45,8 @@ def _request_body( "IsPreviousReturn": False, "campaignCode": "", "validationCode": "", - "locfrom": f"GB{station_crs}", - "locto": _PAD_CODE, + "locfrom": from_code, + "locto": to_code, "datetimedepart": f"{travel_date}T00:00:00", "outwarddepartafter": True, "datetimereturn": None, @@ -67,7 +68,22 @@ def _request_body( } -def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): +def _station_code(station_crs: str) -> str: + return f"GB{station_crs}" + + +def _od_codes(station_crs: str, direction: str) -> tuple[str, str]: + if direction == "from_paddington": + return _PAD_CODE, _station_code(station_crs) + return _station_code(station_crs), _PAD_CODE + + +def _run_pages( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +): """ Iterate all pages of GWR journey search results. @@ -78,8 +94,9 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): with httpx.Client(headers=_headers(), timeout=30) as client: conversation_token = None later = False + from_code, to_code = _od_codes(station_crs, direction) for _ in range(_MAX_PAGES): - body = _request_body(station_crs, travel_date, conversation_token, later) + body = _request_body(from_code, to_code, travel_date, conversation_token, later) if first_class: body["firstclass"] = True body["standardclass"] = False @@ -99,7 +116,12 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): later = True -def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = False): +def _run_pages_batched( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +): """ Like _run_pages but yields one list of (dep_time, fares_list) per API page call, allowing callers to stream results a page at a time. @@ -108,8 +130,9 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F with httpx.Client(headers=_headers(), timeout=30) as client: conversation_token = None later = False + from_code, to_code = _od_codes(station_crs, direction) for _ in range(_MAX_PAGES): - body = _request_body(station_crs, travel_date, conversation_token, later) + body = _request_body(from_code, to_code, travel_date, conversation_token, later) if first_class: body["firstclass"] = True body["standardclass"] = False @@ -132,16 +155,18 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F later = True -def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: +def fetch( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict]: """ - Fetch GWR walk-on single fares from station_crs to London Paddington on travel_date. + Fetch GWR walk-on single fares for the selected Paddington direction. Returns {departure_time: {'ticket': name, 'price': float, 'code': code}} where price is in £ and only the cheapest available standard-class walk-on ticket per departure (with restrictions already applied by GWR) is kept. """ result: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date): + for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -166,7 +191,9 @@ def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: return result -def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: +def fetch_advance( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict]: """ Fetch advance fares: cheapest standard advance and first-class advance per departure. @@ -175,7 +202,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: where each sub-dict has keys 'ticket', 'price', 'code'. """ std_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=False): + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=False, direction=direction + ): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -199,7 +228,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: } first_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=True): + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=True, direction=direction + ): cheapest = None for fare in fares: price_pence = fare.get("fare", 0) @@ -227,7 +258,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: } -def fetch_advance_streaming(station_crs: str, travel_date: str): +def fetch_advance_streaming( + station_crs: str, travel_date: str, direction: str = "to_paddington" +): """ Generator yielding partial advance fare dicts one GWR API page at a time. @@ -236,7 +269,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str): yielded immediately so callers can stream prices to clients as they arrive. """ # Pass 1: standard class advance fares - for batch in _run_pages_batched(station_crs, travel_date, first_class=False): + for batch in _run_pages_batched( + station_crs, travel_date, first_class=False, direction=direction + ): page: dict[str, dict] = {} for dep_time, fares in batch: cheapest = None @@ -267,7 +302,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str): yield page # Pass 2: first class advance fares - for batch in _run_pages_batched(station_crs, travel_date, first_class=True): + for batch in _run_pages_batched( + station_crs, travel_date, first_class=True, direction=direction + ): page = {} for dep_time, fares in batch: cheapest = None diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py index 9b5e936..6613bbf 100644 --- a/scraper/realtime_trains.py +++ b/scraper/realtime_trains.py @@ -1,5 +1,6 @@ """ -Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. +Scrape direct trains between a selected station and London Paddington using +Realtime Trains. Two fetches: BRI/to/PAD → departure times from Bristol (div.time.plan.d) @@ -20,6 +21,16 @@ _PAD_FROM_TMPL = ( "gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359" "?stp=WVS&show=pax-calls&order=wtt" ) +_PAD_TO_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) +_FROM_PAD_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) DEFAULT_UA = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " @@ -69,7 +80,7 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]: def _parse_arrivals(html: str) -> dict[str, dict]: - """Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page.""" + """Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page.""" root = lxml.html.fromstring(html) sl = root.cssselect('div.servicelist') if not sl: @@ -93,7 +104,7 @@ def _parse_arrivals(html: str) -> dict[str, dict]: def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]: - """Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}].""" + """Fetch trains from station_crs to PAD.""" headers = _browser_headers(user_agent) with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date)) @@ -113,3 +124,44 @@ def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> if tid in arrivals ] return sorted(trains, key=lambda t: t['depart_bristol']) + + +def fetch_to_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI' +) -> list[dict]: + """Fetch trains from station_crs to PAD using generic field names.""" + return [ + { + **train, + "depart_origin": train["depart_bristol"], + "arrive_paddington": train["arrive_paddington"], + "arrive_platform": train.get("arrive_platform", ""), + "headcode": train.get("headcode", ""), + } + for train in fetch(date, user_agent, station_crs) + ] + + +def fetch_from_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI' +) -> list[dict]: + """Fetch trains from PAD to station_crs.""" + headers = _browser_headers(user_agent) + with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: + r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date)) + r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date)) + + departures = _parse_services(r_pad.text, 'div.time.plan.d') + arrivals = _parse_arrivals(r_station.text) + + trains = [ + { + "depart_paddington": dep, + "arrive_destination": arrivals[tid]["time"], + "arrive_platform": arrivals[tid]["platform"], + "headcode": tid, + } + for tid, dep in departures.items() + if tid in arrivals + ] + return sorted(trains, key=lambda t: t["depart_paddington"]) diff --git a/templates/index.html b/templates/index.html index 4e7140b..655d415 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,7 +2,7 @@ {% block content %}

Plan your journey

-
+
+
+ Journey type +
+
+ + +
+
+ + +
+
+ + +
+
+
+
Eurostar destination
@@ -36,13 +63,22 @@
+
+ + +
+
{% endblock %} diff --git a/templates/results.html b/templates/results.html index a439983..749048f 100644 --- a/templates/results.html +++ b/templates/results.html @@ -1,8 +1,8 @@ {% extends "base.html" %} -{% block title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} -{% block og_title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} +{% block title %}{% if journey_type == 'inbound' %}{{ destination }} to {{ departure_station_name }} via Eurostar{% elif journey_type == 'return' %}{{ departure_station_name }} to {{ destination }} return via Eurostar{% else %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endif %}{% endblock %} +{% block og_title %}{{ self.title()|trim }}{% endblock %} {% block og_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} -{% block twitter_title %}{{ departure_station_name }} to {{ destination }} via Eurostar{% endblock %} +{% block twitter_title %}{{ self.title()|trim }}{% endblock %} {% block twitter_description %}Train options from {{ departure_station_name }} to {{ destination }} on {{ travel_date_display }} via Paddington, St Pancras, and Eurostar.{% endblock %} {% block content %} @@ -12,13 +12,19 @@

- {{ departure_station_name }} → {{ destination }} + {% if journey_type == 'inbound' %} + {{ destination }} → {{ departure_station_name }} + {% elif journey_type == 'return' %} + {{ departure_station_name }} ↔ {{ destination }} + {% else %} + {{ departure_station_name }} → {{ destination }} + {% endif %}

@@ -30,7 +36,7 @@ {% else %} {{ destination_name }} {% endif %} {% endfor %} @@ -57,36 +63,44 @@
NR ticket: + + Load advance prices
- - - + + +
-
Eurostar: +
- - + +

- {{ gwr_count }} GWR service{{ 's' if gwr_count != 1 }} + {{ gwr_count }} National Rail service{{ 's' if gwr_count != 1 }}  ·  {{ eurostar_count }} Eurostar service{{ 's' if eurostar_count != 1 }} {% if from_cache %} @@ -278,135 +318,180 @@ {% endif %}

-{% if trips or unreachable_morning_services %} -
- - - - - - - - - - - {% if trips %} - {% set best_mins = trips | map(attribute='total_minutes') | min %} - {% set worst_mins = trips | map(attribute='total_minutes') | max %} - {% endif %} - {% for row in result_rows %} - {% if row.row_type == 'trip' and row.total_minutes <= best_mins + 5 and trips | length > 1 %} - {% set row_class = 'row-fast' %} - {% elif row.row_type == 'trip' and row.total_minutes >= worst_mins - 5 and trips | length > 1 %} - {% set row_class = 'row-slow' %} - {% elif row.row_type == 'unreachable' %} - {% set row_class = 'row-unreachable' %} - {% elif loop.index is odd %} - {% set row_class = 'row-alt' %} - {% else %} - {% set row_class = '' %} - {% endif %} - - {% if row.row_type == 'trip' %} - - - - +{% if sections %} + {% for section in sections %} +
+

+ {% if section.direction == 'inbound' %} + Return: {{ destination }} → {{ departure_station_name }} {% else %} -

- - - + Outbound: {{ departure_station_name }} → {{ destination }} {% endif %} - - {% endfor %} - -
National Rail
{{ departure_station_name }} → Paddington
Transfer
Paddington → St Pancras
Eurostar
St Pancras → {{ destination }}
Total
- {{ row.depart_bristol }} → {{ row.arrive_paddington }} - ({{ row.gwr_duration }}) - {% if row.headcode or row.arrive_platform %} -
- {%- if row.headcode %}{{ row.headcode }}{% endif %} - {%- if row.headcode and row.arrive_platform %} · {% endif %} - {%- if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} - - {% endif %} - - - -
- {{ row.connection_duration }}{% if row.connection_minutes < 80 %} ⚠️{% endif %} - {% if row.circle_services %} - {% set c = row.circle_services[0] %} -
Circle {{ c.depart }} → KX {{ c.arrive_kx }} · £{{ "%.2f"|format(c.fare) }} - {% if row.circle_services | length > 1 %} - {% set c2 = row.circle_services[1] %} -
next {{ c2.depart }} → KX {{ c2.arrive_kx }} · £{{ "%.2f"|format(c2.fare) }} - {% endif %} - {% endif %} -
- {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) - {% if row.eurostar_duration or row.train_number %} -
- {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} - {%- if row.eurostar_duration and row.train_number %} · {% endif %} - {%- if row.train_number %}{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} - - {% endif %} - - -
- {% if row.total_minutes <= best_mins + 5 and trips | length > 1 %} - {{ row.total_duration }} ⚡ - {% elif row.total_minutes >= worst_mins - 5 and trips | length > 1 %} - {{ row.total_duration }} 🐢 - {% else %} - {{ row.total_duration }} - {% endif %} -
-
- Too early - - {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) - {% if row.eurostar_duration or row.train_number %} -
- {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} - {%- if row.eurostar_duration and row.train_number %} · {% endif %} - {%- if row.train_number %}{% for part in row.train_number.split(' + ') %}{{ part }}{% if not loop.last %} + {% endif %}{% endfor %}{% endif %} - - {% endif %} - - -
-
+ +

{{ section.date_display }}

+ {% if section.rows %} + + + + {% if section.direction == 'inbound' %} + + + + {% else %} + + + + {% endif %} + + + + + {% set trip_rows = section.rows | selectattr('row_type', 'equalto', 'trip') | list %} + {% if trip_rows %} + {% set best_mins = trip_rows | map(attribute='total_minutes') | min %} + {% set worst_mins = trip_rows | map(attribute='total_minutes') | max %} + {% endif %} + {% for row in section.rows %} + {% if row.row_type == 'trip' and row.total_minutes <= best_mins + 5 and trip_rows | length > 1 %} + {% set row_class = 'row-fast' %} + {% elif row.row_type == 'trip' and row.total_minutes >= worst_mins - 5 and trip_rows | length > 1 %} + {% set row_class = 'row-slow' %} + {% elif row.row_type == 'unreachable' %} + {% set row_class = 'row-unreachable' %} + {% elif loop.index is odd %} + {% set row_class = 'row-alt' %} + {% else %} + {% set row_class = '' %} + {% endif %} + + {% if row.row_type == 'trip' %} + {% if section.direction == 'inbound' %} + + + + {% else %} + + + + {% endif %} + + {% else %} + + + + + {% endif %} + + {% endfor %} + +
Eurostar
{{ destination }} → St Pancras
Transfer
St Pancras → Paddington
National Rail
Paddington → {{ departure_station_name }}
National Rail
{{ departure_station_name }} → Paddington
Transfer
Paddington → St Pancras
Eurostar
St Pancras → {{ destination }}
Total
+ {{ row.depart_destination }} → {{ row.arrive_st_pancras }} (UK) +
check in by {{ row.check_in_by }} + {% if row.eurostar_duration or row.train_number %} +
+ {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} + {%- if row.eurostar_duration and row.train_number %} · {% endif %} + {%- if row.train_number %}{{ row.train_number }}{% endif %} + + {% endif %} + {% if row.eurostar_price is not none %}£{{ "%.2f"|format(row.eurostar_price) }}{% endif %} + {% if row.eurostar_plus_price is not none %}£{{ "%.2f"|format(row.eurostar_plus_price) }}{% endif %} +
+ {{ row.connection_duration }}{% if row.connection_minutes < 45 %} !{% endif %} + {% if row.circle_services %} + {% set c = row.circle_services[0] %} +
Circle {{ c.depart }} → PAD {{ c.arrive_pad }} · £{{ "%.2f"|format(c.fare) }} + {% endif %} +
+ {{ row.depart_paddington }} → {{ row.arrive_uk_station }} + ({{ row.gwr_duration }}) + {% if row.headcode or row.arrive_platform %} +
{{ row.headcode }}{% if row.headcode and row.arrive_platform %} · {% endif %}{% if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} + {% endif %} + {% if row.ticket_price is not none %}£{{ "%.2f"|format(row.ticket_price) }}{% endif %} + + +
+ {{ row.depart_bristol }} → {{ row.arrive_paddington }} + ({{ row.gwr_duration }}) + {% if row.headcode or row.arrive_platform %} +
{{ row.headcode }}{% if row.headcode and row.arrive_platform %} · {% endif %}{% if row.arrive_platform %}Plat {{ row.arrive_platform }}{% endif %} + {% endif %} + {% if row.ticket_price is not none %}£{{ "%.2f"|format(row.ticket_price) }}{% endif %} + + +
+ {{ row.connection_duration }}{% if row.connection_minutes < 80 %} !{% endif %} + {% if row.circle_services %} + {% set c = row.circle_services[0] %} +
Circle {{ c.depart }} → KX {{ c.arrive_kx }} · £{{ "%.2f"|format(c.fare) }} + {% endif %} +
+ {{ row.depart_st_pancras }} → {{ row.arrive_destination }} (CET) + {% if row.eurostar_duration or row.train_number %} +
+ {%- if row.eurostar_duration %}({{ row.eurostar_duration }}){% endif %} + {%- if row.eurostar_duration and row.train_number %} · {% endif %} + {%- if row.train_number %}{{ row.train_number }}{% endif %} + + {% endif %} + {% if row.eurostar_price is not none %}£{{ "%.2f"|format(row.eurostar_price) }}{% endif %} + {% if row.eurostar_plus_price is not none %}£{{ "%.2f"|format(row.eurostar_plus_price) }}{% endif %} +
+ {% if row.total_minutes <= best_mins + 5 and trip_rows | length > 1 %} + {{ row.total_duration }} ⚡ + {% elif row.total_minutes >= worst_mins - 5 and trip_rows | length > 1 %} + {{ row.total_duration }} 🐢 + {% else %} + {{ row.total_duration }} + {% endif %} +
+
+ Too early + + {% if section.direction == 'inbound' %} + {{ row.depart_destination }} → {{ row.arrive_st_pancras }} + {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% else %} + {{ row.depart_st_pancras }} → {{ row.arrive_destination }} + {% if row.train_number %}
{{ row.train_number }}{% endif %} + {% endif %} +
No connection
+ {% else %} +
+

No valid journeys found.

+

+ {% if section.gwr_count == 0 and section.eurostar_count == 0 %} + Could not retrieve train data. Check your network connection or try again. + {% elif section.gwr_count == 0 %} + No National Rail trains found for this date. + {% elif section.eurostar_count == 0 %} + No Eurostar services found for {{ destination }} on this date. + {% else %} + No National Rail + Eurostar combination has a {{ section.min_connection }}-{{ section.max_connection }} minute connection. + {% endif %} +

+
+ {% endif %} +
+ {% endfor %} -

- Paddington → St Pancras connection: {{ min_connection }}–{{ max_connection }} min. - GWR walk-on and advance prices from - gwr.com. - Eurostar Standard and Standard Premier prices are for 1 adult in GBP; always check - eurostar.com to book. -  ·  - {{ departure_station_name }} departures on RTT -  ·  - Paddington arrivals on RTT -

- -{% else %} -
-

No valid journeys found.

-

- {% if gwr_count == 0 and eurostar_count == 0 %} - Could not retrieve train data. Check your network connection or try again. - {% elif gwr_count == 0 %} - No GWR trains found for this date. - {% elif eurostar_count == 0 %} - No Eurostar services found for {{ destination }} on this date. - {% else %} - No GWR + Eurostar combination has at least a {{ min_connection }}-minute connection at Paddington/St Pancras. - {% endif %} +

+ Connection windows: + {% for section in sections %} + {% if section.direction == 'inbound' %}return{% else %}outbound{% endif %} + {{ section.min_connection }}–{{ section.max_connection }} min{% if not loop.last %}; {% endif %} + {% endfor %}. + National Rail prices from gwr.com. + Eurostar prices are for 1 adult in GBP; return searches use Eurostar return-search prices. + Always check eurostar.com to book. +  ·  + {{ departure_station_name }} on RTT +  ·  + Paddington on RTT

-
{% endif %} {% endblock %} diff --git a/tests/test_app.py b/tests/test_app.py index 08b97fd..404b92b 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -65,6 +65,17 @@ def test_search_redirects_to_results_with_selected_params(): ) +def test_search_redirects_return_with_return_date(): + client = _client() + + resp = client.get('/search?journey_type=return&destination=paris&travel_date=2026-04-10&return_date=2026-04-17&station_crs=BRI') + + assert resp.status_code == 302 + assert resp.headers['Location'].endswith( + '/results/BRI/paris/2026-04-10?journey_type=return&return_date=2026-04-17' + ) + + def test_results_shows_same_day_destination_switcher(monkeypatch): _stub_data(monkeypatch) client = _client() @@ -290,6 +301,97 @@ def test_results_preloads_cached_advance_fares(monkeypatch): assert 'cachedAdvanceFares' in html +def test_results_inbound_uses_reverse_legs(monkeypatch): + monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) + monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) + monkeypatch.setattr( + app_module.rtt_scraper, + 'fetch_from_paddington', + lambda travel_date, user_agent, station_crs='BRI': [ + {'depart_paddington': '17:15', 'arrive_destination': '18:55', 'headcode': '1B99'}, + ], + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + 'fetch', + lambda station_crs, travel_date, direction='to_paddington': { + '17:15': {'ticket': 'Off-Peak Single', 'price': 63.60, 'code': 'SVS'} + }, + ) + monkeypatch.setattr( + app_module.eurostar_scraper, + 'fetch', + lambda destination, travel_date, direction='outbound': [ + {'depart_destination': '15:12', 'arrive_st_pancras': '16:30', + 'destination': destination, 'train_number': 'ES 9035', + 'price': 49, 'seats': 43, 'plus_price': None, 'plus_seats': None}, + ], + ) + client = _client() + + resp = client.get('/results/BRI/paris/2026-04-10?journey_type=inbound') + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert 'Paris Gare du Nord → Bristol Temple Meads' in html + assert '15:12 → 16:30' in html + assert '17:15 → 18:55' in html + assert 'ES 9035' in html + + +def test_results_return_renders_outbound_and_inbound_tables(monkeypatch): + monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) + monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) + monkeypatch.setattr( + app_module.rtt_scraper, + 'fetch', + lambda travel_date, user_agent, station_crs='BRI': [ + {'depart_bristol': '07:00', 'arrive_paddington': '08:45', 'headcode': '1A23'}, + ], + ) + monkeypatch.setattr( + app_module.rtt_scraper, + 'fetch_from_paddington', + lambda travel_date, user_agent, station_crs='BRI': [ + {'depart_paddington': '17:15', 'arrive_destination': '18:55', 'headcode': '1B99'}, + ], + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + 'fetch', + lambda station_crs, travel_date, direction='to_paddington': { + '07:00': {'ticket': 'Anytime Day Single', 'price': 138.70, 'code': 'SDS'}, + '17:15': {'ticket': 'Off-Peak Single', 'price': 63.60, 'code': 'SVS'}, + }, + ) + monkeypatch.setattr( + app_module.eurostar_scraper, + 'fetch_return', + lambda destination, outbound_date, return_date: { + 'outbound': [ + {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', + 'destination': destination, 'train_number': 'ES 9014', + 'price': 59, 'seats': 42, 'plus_price': None, 'plus_seats': None}, + ], + 'inbound': [ + {'depart_destination': '15:12', 'arrive_st_pancras': '16:30', + 'destination': destination, 'train_number': 'ES 9035', + 'price': 49, 'seats': 43, 'plus_price': None, 'plus_seats': None}, + ], + }, + ) + client = _client() + + resp = client.get('/results/BRI/paris/2026-04-10?journey_type=return&return_date=2026-04-17') + html = resp.get_data(as_text=True) + + assert resp.status_code == 200 + assert 'Outbound: Bristol Temple Meads → Paris Gare du Nord' in html + assert 'Return: Paris Gare du Nord → Bristol Temple Meads' in html + assert 'ES 9014' in html + assert 'ES 9035' in html + + def test_api_advance_fares_returns_json(monkeypatch): monkeypatch.setattr(app_module, 'get_cached', lambda key, ttl=None: None) monkeypatch.setattr(app_module, 'set_cached', lambda key, data: None) diff --git a/tests/test_eurostar_scraper.py b/tests/test_eurostar_scraper.py index b73597f..3e1475b 100644 --- a/tests/test_eurostar_scraper.py +++ b/tests/test_eurostar_scraper.py @@ -1,5 +1,5 @@ import pytest -from scraper.eurostar import _parse_graphql, search_url +from scraper.eurostar import _parse_graphql, _parse_graphql_leg, search_url def _gql_response(journeys: list) -> dict: @@ -110,6 +110,24 @@ def test_parse_graphql_empty_journeys(): assert _parse_graphql(data, 'Paris Gare du Nord') == [] +def test_parse_graphql_inbound_leg(): + data = {'data': {'journeySearch': {'inbound': {'journeys': [ + _journey('17:12', '18:30', price=49, seats=43, service_name='9035') + ]}}}} + services = _parse_graphql_leg(data, 'Paris Gare du Nord', 'inbound', 'inbound') + + assert services == [{ + 'depart_destination': '17:12', + 'arrive_st_pancras': '18:30', + 'destination': 'Paris Gare du Nord', + 'train_number': 'ES 9035', + 'price': 49.0, + 'seats': 43, + 'plus_price': None, + 'plus_seats': None, + }] + + # --------------------------------------------------------------------------- # search_url # --------------------------------------------------------------------------- @@ -120,3 +138,8 @@ def test_search_url(): 'https://www.eurostar.com/search/uk-en' '?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10' ) + + +def test_search_url_return(): + url = search_url('Paris Gare du Nord', '2026-04-10', return_date='2026-04-17') + assert url.endswith('&outbound=2026-04-10&inbound=2026-04-17') diff --git a/tests/test_playwright_return_fares.py b/tests/test_playwright_return_fares.py new file mode 100644 index 0000000..d550d9a --- /dev/null +++ b/tests/test_playwright_return_fares.py @@ -0,0 +1,422 @@ +import threading + +import pytest +from werkzeug.serving import make_server + +import app as app_module + +playwright_sync = pytest.importorskip("playwright.sync_api") +sync_playwright = playwright_sync.sync_playwright + + +def _stub_return_data(monkeypatch): + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + app_module.rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + app_module.rtt_scraper, + "fetch_from_paddington", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_paddington": "17:15", + "arrive_destination": "18:55", + "headcode": "1B99", + }, + ], + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date, direction="to_paddington": { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + "17:15": { + "ticket": "Off-Peak Single", + "price": 63.60, + "code": "SVS", + }, + }, + ) + + def fake_advance_streaming(station_crs, travel_date, direction="to_paddington"): + if direction == "from_paddington": + yield { + "17:15": { + "advance_std": { + "ticket": "Advance Single", + "price": 25.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 45.0, + "code": "AFA", + }, + } + } + else: + yield { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 80.0, + "code": "AFA", + }, + } + } + + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch_advance_streaming", + fake_advance_streaming, + ) + + def fake_advance(station_crs, travel_date, direction="to_paddington"): + pages = list(fake_advance_streaming(station_crs, travel_date, direction)) + return pages[0] if pages else {} + + monkeypatch.setattr(app_module.gwr_fares_scraper, "fetch_advance", fake_advance) + monkeypatch.setattr( + app_module.eurostar_scraper, + "fetch_return", + lambda destination, outbound_date, return_date: { + "outbound": [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + "inbound": [ + { + "depart_destination": "15:12", + "arrive_st_pancras": "16:30", + "destination": destination, + "train_number": "ES 9035", + "price": 49, + "seats": 43, + "plus_price": 79, + "plus_seats": 6, + }, + ], + }, + ) + + +def _stub_single_data(monkeypatch): + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + app_module.rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + }, + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch_advance", + lambda station_crs, travel_date: { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": { + "ticket": "1st Advance", + "price": 80.0, + "code": "AFA", + }, + }, + }, + ) + monkeypatch.setattr( + app_module.eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": 89, + "plus_seats": 5, + }, + ], + ) + + +@pytest.fixture +def local_server(monkeypatch): + _stub_return_data(monkeypatch) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield f"http://127.0.0.1:{server.server_port}" + finally: + server.shutdown() + thread.join(timeout=5) + + +@pytest.fixture +def single_server(monkeypatch): + _stub_single_data(monkeypatch) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + yield f"http://127.0.0.1:{server.server_port}" + finally: + server.shutdown() + thread.join(timeout=5) + + +def _launch_browser(playwright): + try: + return playwright.chromium.launch(headless=True) + except Exception as exc: + pytest.skip(f"Chromium browser unavailable for Playwright: {exc}") + + +def test_single_advance_standard_totals_after_click(single_server): + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{single_server}/results/BRI/paris/2026-07-20", + wait_until="domcontentloaded", + ) + + page.get_by_role("button", name="Advance Std").click() + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£112.10'))", + timeout=10000, + ) + assert "nr_class=advance_std" in page.url + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£112.10"] + browser.close() + + +def test_single_next_date_advance_standard_labels_unreachable_rows(monkeypatch): + monkeypatch.setattr(app_module, "get_cached", lambda key, ttl=None: None) + monkeypatch.setattr(app_module, "set_cached", lambda key, data: None) + monkeypatch.setattr( + app_module.rtt_scraper, + "fetch", + lambda travel_date, user_agent, station_crs="BRI": [ + { + "depart_bristol": "07:00", + "arrive_paddington": "08:45", + "headcode": "1A23", + }, + ], + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch", + lambda station_crs, travel_date: { + "07:00": { + "ticket": "Anytime Day Single", + "price": 138.70, + "code": "SDS", + }, + }, + ) + monkeypatch.setattr( + app_module.gwr_fares_scraper, + "fetch_advance", + lambda station_crs, travel_date: { + "07:00": { + "advance_std": { + "ticket": "Advance Single", + "price": 50.0, + "code": "ADV", + }, + "advance_1st": None, + }, + }, + ) + monkeypatch.setattr( + app_module.eurostar_scraper, + "fetch", + lambda destination, travel_date: [ + { + "depart_st_pancras": "09:30", + "arrive_destination": "12:30", + "destination": destination, + "train_number": "ES 9001", + "price": 59, + "seats": 42, + "plus_price": None, + "plus_seats": None, + }, + { + "depart_st_pancras": "10:01", + "arrive_destination": "13:34", + "destination": destination, + "train_number": "ES 9014", + "price": 59, + "seats": 42, + "plus_price": None, + "plus_seats": None, + }, + ], + ) + app_module.app.config["TESTING"] = True + server = make_server("127.0.0.1", 0, app_module.app) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"http://127.0.0.1:{server.server_port}" + "/results/BRI/brussels/2026-06-16", + wait_until="domcontentloaded", + ) + page.get_by_role("link", name="Next →").click() + page.wait_for_url("**/2026-06-17**", timeout=10000) + page.get_by_role("button", name="Advance Std").click() + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£112.10'))", + timeout=10000, + ) + + assert page.get_by_text("No connection").count() == 1 + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£112.10"] + browser.close() + finally: + server.shutdown() + thread.join(timeout=5) + + +def test_single_advance_standard_premier_totals_on_initial_url(single_server): + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{single_server}/results/BRI/paris/2026-07-20" + "?nr_class=advance_std&es_class=plus", + wait_until="domcontentloaded", + ) + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£142.10'))", + timeout=10000, + ) + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£142.10"] + browser.close() + + +def test_return_advance_first_standard_premier_totals(local_server): + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto(f"{local_server}/", wait_until="domcontentloaded") + page.locator("#journey-return").check(force=True) + page.locator("#destination-paris").check(force=True) + page.locator("#travel_date").fill("2026-07-20") + page.locator("#return_date").fill("2026-07-27") + page.locator('button[type="submit"]').click() + page.wait_for_url("**/results/**", timeout=10000) + + page.get_by_role("button", name="Advance 1st").click() + page.get_by_role("button", name="Standard Premier").click() + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£172.10'))", + timeout=10000, + ) + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£127.10'))", + timeout=10000, + ) + + assert "journey_type=return" in page.url + assert "return_date=2026-07-27" in page.url + assert "nr_class=advance_1st" in page.url + assert "es_class=plus" in page.url + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£172.10 high", "£127.10 low"] + browser.close() + + +def test_return_advance_first_standard_premier_totals_on_initial_url(local_server): + with sync_playwright() as p: + browser = _launch_browser(p) + page = browser.new_page() + page.goto( + f"{local_server}/results/BRI/paris/2026-07-20" + "?journey_type=return&return_date=2026-07-27" + "&nr_class=advance_1st&es_class=plus", + wait_until="domcontentloaded", + ) + + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£172.10'))", + timeout=10000, + ) + page.wait_for_function( + "Array.from(document.querySelectorAll('.total-price'))" + ".some(el => el.textContent.includes('£127.10'))", + timeout=10000, + ) + + totals = [el.inner_text() for el in page.locator(".total-price").all()] + assert totals == ["£172.10 high", "£127.10 low"] + browser.close() diff --git a/tests/test_trip_planner.py b/tests/test_trip_planner.py index 306d391..7b6c43d 100644 --- a/tests/test_trip_planner.py +++ b/tests/test_trip_planner.py @@ -1,5 +1,10 @@ import pytest -from trip_planner import combine_trips, find_unreachable_morning_eurostars, _fmt_duration +from trip_planner import ( + combine_inbound_trips, + combine_trips, + find_unreachable_morning_eurostars, + _fmt_duration, +) DATE = '2026-03-30' @@ -178,3 +183,28 @@ def test_find_unreachable_eurostars_returns_empty_when_all_connectable(): ] assert find_unreachable_morning_eurostars(gwr, eurostar, DATE) == [] + + +def test_combine_inbound_trips_pairs_eurostar_to_paddington_departure(): + eurostar = [{ + 'depart_destination': '15:12', + 'arrive_st_pancras': '16:30', + 'destination': 'Paris Gare du Nord', + 'train_number': 'ES 9035', + }] + gwr = [{ + 'depart_paddington': '17:15', + 'arrive_destination': '18:55', + 'headcode': '1B99', + }] + fares = {'17:15': {'ticket': 'Off-Peak Single', 'price': 63.60, 'code': 'SVS'}} + + trips = combine_inbound_trips(eurostar, gwr, DATE, min_connection_minutes=30, max_connection_minutes=120, gwr_fares=fares) + + assert len(trips) == 1 + assert trips[0]['depart_destination'] == '15:12' + assert trips[0]['arrive_st_pancras'] == '16:30' + assert trips[0]['depart_paddington'] == '17:15' + assert trips[0]['arrive_uk_station'] == '18:55' + assert trips[0]['ticket_price'] == 63.60 + assert trips[0]['check_in_by'] == '14:42' diff --git a/trip_planner.py b/trip_planner.py index 324a703..50de19d 100644 --- a/trip_planner.py +++ b/trip_planner.py @@ -9,10 +9,13 @@ from tfl_fare import circle_line_fare MIN_CONNECTION_MINUTES = 50 MAX_CONNECTION_MINUTES = 110 +INBOUND_MIN_CONNECTION_MINUTES = 30 +INBOUND_MAX_CONNECTION_MINUTES = 120 DATE_FMT = "%Y-%m-%d" TIME_FMT = "%H:%M" PAD_WALK_TO_UNDERGROUND_MINUTES = 8 # GWR platform → Paddington (H&C Line) platform +KX_WALK_TO_UNDERGROUND_MINUTES = 10 # St Pancras arrivals → King's Cross St Pancras Underground def _parse_dt(date: str, time: str) -> datetime: @@ -30,7 +33,7 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]: earliest_board = arrive_paddington + timedelta( minutes=PAD_WALK_TO_UNDERGROUND_MINUTES ) - services = circle_line.upcoming_services(earliest_board, count=2) + services = circle_line.upcoming_services(earliest_board, count=2, direction='pad_to_kx') return [ { "depart": dep.strftime(TIME_FMT), @@ -41,6 +44,21 @@ def _circle_line_services(arrive_paddington: datetime) -> list[dict]: ] +def _circle_line_services_to_paddington(arrive_st_pancras: datetime) -> list[dict]: + earliest_board = arrive_st_pancras + timedelta( + minutes=KX_WALK_TO_UNDERGROUND_MINUTES + ) + services = circle_line.upcoming_services(earliest_board, count=2, direction='kx_to_pad') + return [ + { + "depart": dep.strftime(TIME_FMT), + "arrive_pad": arr.strftime(TIME_FMT), + "fare": circle_line_fare(dep), + } + for dep, arr in services + ] + + def _fmt_duration(minutes: int) -> str: h, m = divmod(minutes, 60) if h and m: @@ -80,6 +98,37 @@ def _is_viable_connection( return dep_bri, arr_pad, dep_stp, arr_dest +def _is_viable_inbound_connection( + eurostar: dict, + gwr: dict, + travel_date: str, + min_connection_minutes: int, + max_connection_minutes: int, +) -> tuple[datetime, datetime, datetime, datetime] | None: + try: + dep_dest = _parse_dt(travel_date, eurostar["depart_destination"]) + arr_stp = _parse_dt(travel_date, eurostar["arrive_st_pancras"]) + dep_pad = _parse_dt(travel_date, gwr["depart_paddington"]) + arr_station = _parse_dt(travel_date, gwr["arrive_destination"]) + except (ValueError, KeyError): + return None + + if arr_stp < dep_dest: + arr_stp += timedelta(days=1) + if dep_pad < arr_stp: + dep_pad += timedelta(days=1) + if arr_station < dep_pad: + arr_station += timedelta(days=1) + + connection_minutes = (dep_pad - arr_stp).total_seconds() / 60 + if connection_minutes < min_connection_minutes: + return None + if connection_minutes > max_connection_minutes: + return None + + return dep_dest, arr_stp, dep_pad, arr_station + + def combine_trips( gwr_trains: list[dict], eurostar_trains: list[dict], @@ -154,6 +203,68 @@ def combine_trips( return trips +def combine_inbound_trips( + eurostar_trains: list[dict], + gwr_trains: list[dict], + travel_date: str, + min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES, + max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES, + gwr_fares: dict | None = None, +) -> list[dict]: + """Return valid continent→UK combined trips.""" + trips = [] + + for es in eurostar_trains: + for gwr in gwr_trains: + connection = _is_viable_inbound_connection( + es, + gwr, + travel_date, + min_connection_minutes, + max_connection_minutes, + ) + if not connection: + continue + dep_dest, arr_stp, dep_pad, arr_station = connection + total_mins = int((arr_station - dep_dest).total_seconds() / 60) + # Destination time is CET/CEST, arrival at London is GMT/BST. + eurostar_mins = int((arr_stp - dep_dest).total_seconds() / 60) + 60 + fare = (gwr_fares or {}).get(gwr["depart_paddington"]) + circle_svcs = _circle_line_services_to_paddington(arr_stp) + trips.append( + { + "direction": "inbound", + "depart_destination": es["depart_destination"], + "check_in_by": (dep_dest - timedelta(minutes=30)).strftime(TIME_FMT), + "arrive_st_pancras": es["arrive_st_pancras"], + "depart_paddington": gwr["depart_paddington"], + "arrive_uk_station": gwr["arrive_destination"], + "arrive_platform": gwr.get("arrive_platform", ""), + "headcode": gwr.get("headcode", ""), + "gwr_duration": _fmt_duration( + int((arr_station - dep_pad).total_seconds() / 60) + ), + "connection_minutes": int((dep_pad - arr_stp).total_seconds() / 60), + "connection_duration": _fmt_duration( + int((dep_pad - arr_stp).total_seconds() / 60) + ), + "circle_services": circle_svcs, + "eurostar_duration": _fmt_duration(eurostar_mins), + "train_number": es.get("train_number", ""), + "total_duration": _fmt_duration(total_mins), + "total_minutes": total_mins, + "destination": es["destination"], + "ticket_name": fare["ticket"] if fare else None, + "ticket_price": fare["price"] if fare else None, + "ticket_code": fare["code"] if fare else None, + } + ) + break + + trips.sort(key=lambda t: (t["depart_destination"], t["depart_paddington"])) + return trips + + def find_unreachable_morning_eurostars( gwr_trains: list[dict], eurostar_trains: list[dict], @@ -184,3 +295,35 @@ def find_unreachable_morning_eurostars( unreachable.append({**es, "eurostar_duration": _fmt_duration(eurostar_mins)}) return sorted(unreachable, key=lambda s: s["depart_st_pancras"]) + + +def find_unreachable_inbound_eurostars( + eurostar_trains: list[dict], + gwr_trains: list[dict], + travel_date: str, + min_connection_minutes: int = INBOUND_MIN_CONNECTION_MINUTES, + max_connection_minutes: int = INBOUND_MAX_CONNECTION_MINUTES, +) -> list[dict]: + unreachable = [] + + for es in eurostar_trains: + if any( + _is_viable_inbound_connection( + es, + gwr, + travel_date, + min_connection_minutes, + max_connection_minutes, + ) + for gwr in gwr_trains + ): + continue + + dep_dest = _parse_dt(travel_date, es["depart_destination"]) + arr_stp = _parse_dt(travel_date, es["arrive_st_pancras"]) + if arr_stp < dep_dest: + arr_stp += timedelta(days=1) + eurostar_mins = int((arr_stp - dep_dest).total_seconds() / 60) + 60 + unreachable.append({**es, "eurostar_duration": _fmt_duration(eurostar_mins)}) + + return sorted(unreachable, key=lambda s: s["depart_destination"])