diff --git a/app.py b/app.py index 70b0884..98100a1 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,14 @@ from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper import scraper.gwr_fares as gwr_fares_scraper import scraper.realtime_trains as rtt_scraper -from trip_planner import combine_trips, find_unreachable_morning_eurostars +from trip_planner import ( + INBOUND_MAX_CONNECTION_MINUTES, + INBOUND_MIN_CONNECTION_MINUTES, + combine_inbound_trips, + combine_trips, + find_unreachable_inbound_eurostars, + find_unreachable_morning_eurostars, +) RTT_PADDINGTON_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" @@ -76,11 +83,15 @@ def index(): default_max_connection=default_max, valid_min_connections=sorted(VALID_MIN_CONNECTIONS), valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + default_return_date=(date.today() + timedelta(days=7)).isoformat(), ) VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120} VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} +VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120} +VALID_INBOUND_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} +VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"} VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'} VALID_ES_CLASSES = {'standard', 'plus'} DEFAULT_NR_CLASS = 'walkon' @@ -106,15 +117,24 @@ def _parse_connection(raw, default, valid_set): def search(): slug = request.args.get("destination", "") travel_date = request.args.get("travel_date", "") + return_date = request.args.get("return_date", "") + journey_type = request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" station_crs = request.args.get("station_crs", "BRI") if station_crs not in STATION_BY_CRS: station_crs = "BRI" - default_min, default_max = _get_defaults() + if journey_type == "inbound": + default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES + valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_conn = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_conn = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: @@ -122,13 +142,21 @@ def search(): es_class = request.args.get("es_class", DEFAULT_ES_CLASS) if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS - if slug in DESTINATIONS and travel_date: + if journey_type == "return": + try: + if return_date and date.fromisoformat(return_date) < date.fromisoformat(travel_date): + return_date = "" + except ValueError: + return_date = "" + if slug in DESTINATIONS and travel_date and (journey_type != "return" or return_date): return redirect( url_for( "results", station_crs=station_crs, slug=slug, travel_date=travel_date, + journey_type=None if journey_type == "outbound" else journey_type, + return_date=return_date if journey_type == "return" else None, min_connection=None if min_conn == default_min else min_conn, max_connection=None if max_conn == default_max else max_conn, nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class, @@ -147,12 +175,28 @@ def results(station_crs, slug, travel_date): if not destination or not travel_date: return redirect(url_for("index")) - default_min, default_max = _get_defaults() + journey_type = request.args.get("journey_type", "outbound") + if journey_type not in VALID_JOURNEY_TYPES: + journey_type = "outbound" + return_date = request.args.get("return_date") + if journey_type == "return": + try: + if not return_date or date.fromisoformat(return_date) < date.fromisoformat(travel_date): + return redirect(url_for("index")) + except ValueError: + return redirect(url_for("index")) + + if journey_type == "inbound": + default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES + valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS + else: + default_min, default_max = _get_defaults() + valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_connection = _parse_connection( - request.args.get("min_connection"), default_min, VALID_MIN_CONNECTIONS + request.args.get("min_connection"), default_min, valid_min ) max_connection = _parse_connection( - request.args.get("max_connection"), default_max, VALID_MAX_CONNECTIONS + request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: @@ -161,150 +205,207 @@ def results(station_crs, slug, travel_date): if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS - # Redirect to clean URL when all params are at their defaults - _clean_url_params = ["min_connection", "max_connection", "nr_class", "es_class"] - if any(k in request.args for k in _clean_url_params) and ( - min_connection == default_min - and max_connection == default_max - and nr_class == DEFAULT_NR_CLASS - and es_class == DEFAULT_ES_CLASS - ): - return redirect( - url_for("results", station_crs=station_crs, slug=slug, travel_date=travel_date) - ) - user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA) + error_messages = [] + from_cache_parts = [] - rtt_cache_key = f"rtt_{station_crs}_{travel_date}" - es_cache_key = f"eurostar_{travel_date}_{destination}" - gwr_fares_cache_key = f"gwr_fares_{station_crs}_{travel_date}" - gwr_advance_cache_key = f"gwr_advance_{station_crs}_{travel_date}" - - cached_rtt = get_cached(rtt_cache_key) - cached_es = get_cached(es_cache_key, ttl=24 * 3600) - cached_gwr_fares = get_cached(gwr_fares_cache_key, ttl=30 * 24 * 3600) - cached_advance_fares = get_cached(gwr_advance_cache_key, ttl=24 * 3600) - from_cache = bool(cached_rtt and cached_es) - - error = None - - if cached_rtt: - gwr_trains = cached_rtt - else: + def cached_fetch(key, ttl, fetcher, label): + cached = get_cached(key, ttl=ttl) + if cached is not None: + from_cache_parts.append(key) + return cached try: - gwr_trains = rtt_scraper.fetch(travel_date, user_agent, station_crs) - set_cached(rtt_cache_key, gwr_trains) + data = fetcher() + set_cached(key, data) + return data except Exception as e: - gwr_trains = [] - error = f"Could not fetch GWR trains: {e}" + error_messages.append(f"Could not fetch {label}: {e}") + return [] if label != "GWR fares" else {} - if cached_es: - eurostar_services = cached_es - else: - try: - eurostar_services = eurostar_scraper.fetch(destination, travel_date) - set_cached(es_cache_key, eurostar_services) - except Exception as e: - eurostar_services = [] - msg = f"Could not fetch Eurostar times: {e}" - error = f"{error}; {msg}" if error else msg + es_return = None + if journey_type == "return": + es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}" + es_return = cached_fetch( + es_return_key, + 24 * 3600, + lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date), + "Eurostar times", + ) + if not isinstance(es_return, dict): + es_return = {"outbound": [], "inbound": []} - if cached_gwr_fares: - gwr_fares = cached_gwr_fares - else: - try: - gwr_fares = gwr_fares_scraper.fetch(station_crs, travel_date) - set_cached(gwr_fares_cache_key, gwr_fares) - except Exception as e: - gwr_fares = {} - msg = f"Could not fetch GWR fares: {e}" - error = f"{error}; {msg}" if error else msg + def build_section(section_id, direction, section_date, eurostar_services=None): + section_min_connection = min_connection + section_max_connection = max_connection + if journey_type == "return" and direction == "inbound": + section_min_connection = INBOUND_MIN_CONNECTION_MINUTES + section_max_connection = INBOUND_MAX_CONNECTION_MINUTES + rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington" + rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}" + gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}" + advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}" - eurostar_trains = eurostar_services - eurostar_prices = { - s["depart_st_pancras"]: { - "price": s.get("price"), - "seats": s.get("seats"), - "plus_price": s.get("plus_price"), - "plus_seats": s.get("plus_seats"), + if direction == "outbound": + trains = cached_fetch( + rtt_cache_key, + None, + lambda: rtt_scraper.fetch(section_date, user_agent, station_crs), + "GWR trains", + ) + else: + trains = cached_fetch( + rtt_cache_key, + None, + lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs), + "GWR trains", + ) + + if eurostar_services is None: + es_cache_key = f"eurostar_{direction}_{section_date}_{destination}" + es_fetcher = ( + (lambda: eurostar_scraper.fetch(destination, section_date)) + if direction == "outbound" + else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction)) + ) + eurostar_services = cached_fetch( + es_cache_key, + 24 * 3600, + es_fetcher, + "Eurostar times", + ) + + fare_direction = "to_paddington" if direction == "outbound" else "from_paddington" + gwr_fares = cached_fetch( + gwr_cache_key, + 30 * 24 * 3600, + ( + (lambda: gwr_fares_scraper.fetch(station_crs, section_date)) + if fare_direction == "to_paddington" + else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction)) + ), + "GWR fares", + ) + cached_advance = get_cached(advance_cache_key, ttl=24 * 3600) + + if direction == "outbound": + trips = combine_trips( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_morning_eurostars( + trains, + eurostar_services, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_st_pancras"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_st_pancras"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], + key=lambda row: row["depart_st_pancras"], + ) + else: + trips = combine_inbound_trips( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + gwr_fares, + ) + unreachable = find_unreachable_inbound_eurostars( + eurostar_services, + trains, + section_date, + section_min_connection, + section_max_connection, + ) + if trips: + first_es_depart = min(t["depart_destination"] for t in trips) + unreachable = [ + s for s in unreachable if s["depart_destination"] < first_es_depart + ] + rows = sorted( + [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], + key=lambda row: row["depart_destination"], + ) + + es_by_key = { + (svc.get("depart_st_pancras") if direction == "outbound" else svc.get("depart_destination")): svc + for svc in eurostar_services } - for s in eurostar_services - } + for row in rows: + key = row.get("depart_st_pancras") if direction == "outbound" else row.get("depart_destination") + es = es_by_key.get(key, {}) + row["eurostar_price"] = es.get("price") + row["eurostar_seats"] = es.get("seats") + row["eurostar_plus_price"] = es.get("plus_price") + row["eurostar_plus_seats"] = es.get("plus_seats") + row["row_key"] = f"{section_id}:{key}" - trips = combine_trips( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - gwr_fares, - ) + dt = date.fromisoformat(section_date) + return { + "id": section_id, + "direction": direction, + "date": section_date, + "date_display": dt.strftime("%A %-d %B %Y"), + "rows": rows, + "trips": trips, + "gwr_count": len(trains), + "eurostar_count": len(eurostar_services), + "min_connection": section_min_connection, + "max_connection": section_max_connection, + "advance_fares": cached_advance, + "advance_api_url": url_for( + "api_advance_fares", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + "advance_stream_url": url_for( + "api_advance_fares_stream", + station_crs=station_crs, + travel_date=section_date, + direction=fare_direction, + ), + } - # Annotate each trip with Eurostar prices and total cost (walk-on + standard) - for trip in trips: - es = eurostar_prices.get(trip["depart_st_pancras"], {}) - es_price = es.get("price") - trip["eurostar_price"] = es_price - trip["eurostar_seats"] = es.get("seats") - trip["eurostar_plus_price"] = es.get("plus_price") - trip["eurostar_plus_seats"] = es.get("plus_seats") - gwr_p = trip.get("ticket_price") - circle_svcs = trip.get("circle_services") - circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 - trip["total_price"] = ( - gwr_p + es_price + circle_fare - if (gwr_p is not None and es_price is not None) - else None - ) - - # If the API returned journeys but every price is None, tickets aren't on sale yet - no_prices_note = None - if eurostar_prices and all( - v.get("price") is None for v in eurostar_prices.values() - ): - no_prices_note = ( - "Eurostar prices not yet available — tickets may not be on sale yet." - ) - - unreachable_morning_services = find_unreachable_morning_eurostars( - gwr_trains, - eurostar_trains, - travel_date, - min_connection, - max_connection, - ) - for svc in unreachable_morning_services: - es = eurostar_prices.get(svc["depart_st_pancras"], {}) - svc["eurostar_price"] = es.get("price") - svc["eurostar_seats"] = es.get("seats") - svc["eurostar_plus_price"] = es.get("plus_price") - svc["eurostar_plus_seats"] = es.get("plus_seats") - - # Only keep unreachable services that depart before the first reachable Eurostar. - # Services after the first reachable one are omitted (they aren't "Too early"). - if trips: - first_es_depart = min(t["depart_st_pancras"] for t in trips) - unreachable_morning_services = [ - s - for s in unreachable_morning_services - if s["depart_st_pancras"] < first_es_depart + if journey_type == "return": + sections = [ + build_section("outbound", "outbound", travel_date, es_return.get("outbound", [])), + build_section("inbound", "inbound", return_date, es_return.get("inbound", [])), ] + else: + sections = [build_section("main", journey_type, travel_date)] - result_rows = sorted( - [{"row_type": "trip", **trip} for trip in trips] - + [ - {"row_type": "unreachable", **service} - for service in unreachable_morning_services - ], - key=lambda row: row["depart_st_pancras"], - ) + no_prices_note = None + all_es_prices = [ + row.get("eurostar_price") + for section in sections + for row in section["rows"] + if row.get("row_type") == "trip" + ] + if all_es_prices and all(price is None for price in all_es_prices): + no_prices_note = "Eurostar prices not yet available — tickets may not be on sale yet." dt = date.fromisoformat(travel_date) prev_date = (dt - timedelta(days=1)).isoformat() next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime("%A %-d %B %Y") - eurostar_url = eurostar_scraper.search_url(destination, travel_date) + eurostar_url = eurostar_scraper.search_url( + destination, travel_date, direction=journey_type, return_date=return_date + ) rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date) rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date) @@ -313,55 +414,62 @@ def results(station_crs, slug, travel_date): url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class url_es = None if es_class == DEFAULT_ES_CLASS else es_class - # Build per-row fare data for JS consumption trip_fares = {} - for row in result_rows: - stp = row.get("depart_st_pancras") - if not stp: - continue - circle_svcs = row.get("circle_services") or [] - circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 - walkon = ( - {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} - if row.get("ticket_price") is not None - else None - ) - es_std = ( - {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} - if row.get("eurostar_price") is not None - else None - ) - es_plus = ( - {"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")} - if row.get("eurostar_plus_price") is not None - else None - ) - trip_fares[stp] = { - "depart_bristol": row.get("depart_bristol"), - "walkon": walkon, - "es_standard": es_std, - "es_plus": es_plus, - "circle_fare": circle_fare, - } + advance_fares = {} + advance_api_urls = {} + advance_stream_urls = {} + for section in sections: + advance_fares[section["id"]] = section["advance_fares"] + advance_api_urls[section["id"]] = section["advance_api_url"] + advance_stream_urls[section["id"]] = section["advance_stream_url"] + for row in section["rows"]: + circle_svcs = row.get("circle_services") or [] + circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 + walkon = ( + {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} + if row.get("ticket_price") is not None + else None + ) + es_std = ( + {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} + if row.get("eurostar_price") is not None + else None + ) + es_plus = ( + {"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")} + if row.get("eurostar_plus_price") is not None + else None + ) + trip_fares[row["row_key"]] = { + "section": section["id"], + "advance_key": row.get("depart_bristol") or row.get("depart_paddington"), + "walkon": walkon, + "es_standard": es_std, + "es_plus": es_plus, + "circle_fare": circle_fare, + } return render_template( "results.html", - trips=trips, - result_rows=result_rows, - unreachable_morning_services=unreachable_morning_services, + sections=sections, + trips=sections[0]["trips"] if sections else [], + result_rows=sections[0]["rows"] if sections else [], + unreachable_morning_services=[], destinations=DESTINATIONS, destination=destination, travel_date=travel_date, + return_date=return_date, + journey_type=journey_type, slug=slug, station_crs=station_crs, departure_station_name=departure_station_name, prev_date=prev_date, next_date=next_date, travel_date_display=travel_date_display, - gwr_count=len(gwr_trains), - eurostar_count=len(eurostar_trains), - from_cache=from_cache, - error=error, + gwr_count=sum(section["gwr_count"] for section in sections), + eurostar_count=sum(section["eurostar_count"] for section in sections), + from_cache=bool(from_cache_parts), + error="; ".join(error_messages) if error_messages else None, no_prices_note=no_prices_note, eurostar_url=eurostar_url, rtt_url=rtt_url, @@ -376,12 +484,15 @@ def results(station_crs, slug, travel_date): es_class=es_class, url_nr_class=url_nr, url_es_class=url_es, + url_journey_type=None if journey_type == "outbound" else journey_type, trip_fares_json=json.dumps(trip_fares), - advance_fares_json=json.dumps(cached_advance_fares), + advance_fares_json=json.dumps(advance_fares), + advance_api_urls_json=json.dumps(advance_api_urls), + advance_stream_urls_json=json.dumps(advance_stream_urls), advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date), advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date), - valid_min_connections=sorted(VALID_MIN_CONNECTIONS), - valid_max_connections=sorted(VALID_MAX_CONNECTIONS), + valid_min_connections=sorted(valid_min), + valid_max_connections=sorted(valid_max), ) @@ -389,12 +500,19 @@ def results(station_crs, slug, travel_date): def api_advance_fares(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) - cache_key = f"gwr_advance_{station_crs}_{travel_date}" + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" cached = get_cached(cache_key, ttl=24 * 3600) if cached is not None: return jsonify(cached) try: - fares = gwr_fares_scraper.fetch_advance(station_crs, travel_date) + fares = ( + gwr_fares_scraper.fetch_advance(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance(station_crs, travel_date, direction=direction) + ) set_cached(cache_key, fares) return jsonify(fares) except Exception as e: @@ -405,7 +523,10 @@ def api_advance_fares(station_crs, travel_date): def api_advance_fares_stream(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) - cache_key = f"gwr_advance_{station_crs}_{travel_date}" + direction = request.args.get("direction", "to_paddington") + if direction not in {"to_paddington", "from_paddington"}: + direction = "to_paddington" + cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" def generate(): cached = get_cached(cache_key, ttl=24 * 3600) @@ -416,7 +537,14 @@ def api_advance_fares_stream(station_crs, travel_date): accumulated: dict = {} try: - for page_fares in gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date): + stream = ( + gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date) + if direction == "to_paddington" + else gwr_fares_scraper.fetch_advance_streaming( + station_crs, travel_date, direction=direction + ) + ) + for page_fares in stream: for dep_time, fare_data in page_fares.items(): if dep_time not in accumulated: accumulated[dep_time] = {"advance_std": None, "advance_1st": None} diff --git a/circle_line.py b/circle_line.py index 6292f24..c0d945c 100644 --- a/circle_line.py +++ b/circle_line.py @@ -1,5 +1,5 @@ """ -Circle Line timetable: Paddington (H&C Line) → King's Cross St Pancras. +Circle Line timetable between Paddington (H&C Line) and King's Cross St Pancras. Parses the TransXChange XML file on first use and caches the result in memory. """ @@ -14,9 +14,9 @@ _KXP_STOP = '9400ZZLUKSX3' # King's Cross St Pancras from config.default import CIRCLE_LINE_XML as _TXC_XML # overridden by app config after import _NS = {'t': 'http://www.transxchange.org.uk/'} -# Populated on first call to next_service(); maps day-type -> sorted list of -# (pad_depart_seconds, kxp_arrive_seconds) measured from midnight. -_timetable: dict[str, list[tuple[int, int]]] | None = None +# Populated on first call to next_service(); maps direction -> day-type -> sorted +# list of (origin_depart_seconds, destination_arrive_seconds) measured from midnight. +_timetable: dict[str, dict[str, list[tuple[int, int]]]] | None = None def _parse_duration(s: str | None) -> int: @@ -26,7 +26,7 @@ def _parse_duration(s: str | None) -> int: return int(m.group(1) or 0) * 3600 + int(m.group(2) or 0) * 60 + int(m.group(3) or 0) -def _load_timetable() -> dict[str, list[tuple[int, int]]]: +def _load_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: tree = ET.parse(_TXC_XML) root = tree.getroot() @@ -66,8 +66,8 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: return elapsed return None - # Map JP id -> (pad_offset_secs, kxp_arrive_offset_secs) - jp_offsets: dict[str, tuple[int, int]] = {} + # Map JP id -> [(direction, origin_depart_offset_secs, destination_arrive_offset_secs)]. + jp_offsets: dict[str, list[tuple[str, int, int]]] = {} for svc in root.find('t:Services', _NS): for jp in svc.findall('.//t:JourneyPattern', _NS): jps_ref = jp.find('t:JourneyPatternSectionRefs', _NS) @@ -75,6 +75,7 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: continue links = jps_map.get(jps_ref.text, []) stops = [l[0] for l in links] + ([links[-1][1]] if links else []) + offsets = [] if ( _PAD_STOP in stops and _KXP_STOP in stops @@ -83,12 +84,30 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: pad_off = _seconds_to_depart(links, _PAD_STOP) kxp_off = _seconds_to_arrive(links, _KXP_STOP) if pad_off is not None and kxp_off is not None: - jp_offsets[jp.get('id')] = (pad_off, kxp_off) + offsets.append(('pad_to_kx', pad_off, kxp_off)) + if ( + _PAD_STOP in stops + and _KXP_STOP in stops + and stops.index(_KXP_STOP) < stops.index(_PAD_STOP) + ): + kxp_off = _seconds_to_depart(links, _KXP_STOP) + pad_off = _seconds_to_arrive(links, _PAD_STOP) + if kxp_off is not None and pad_off is not None: + offsets.append(('kx_to_pad', kxp_off, pad_off)) + if offsets: + jp_offsets[jp.get('id')] = offsets - result: dict[str, list[tuple[int, int]]] = { - 'MondayToFriday': [], - 'Saturday': [], - 'Sunday': [], + result: dict[str, dict[str, list[tuple[int, int]]]] = { + 'pad_to_kx': { + 'MondayToFriday': [], + 'Saturday': [], + 'Sunday': [], + }, + 'kx_to_pad': { + 'MondayToFriday': [], + 'Saturday': [], + 'Sunday': [], + }, } for vj in root.find('t:VehicleJourneys', _NS): @@ -97,7 +116,6 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: op = vj.find('t:OperatingProfile', _NS) if jp_ref is None or dep_time is None or jp_ref.text not in jp_offsets: continue - pad_off, kxp_off = jp_offsets[jp_ref.text] h, m, s = map(int, dep_time.text.split(':')) dep_secs = h * 3600 + m * 60 + s rdt = op.find('.//t:DaysOfWeek', _NS) if op is not None else None @@ -105,15 +123,20 @@ def _load_timetable() -> dict[str, list[tuple[int, int]]]: continue for day_el in rdt: day_type = day_el.tag.split('}')[-1] - if day_type in result: - result[day_type].append((dep_secs + pad_off, dep_secs + kxp_off)) + for direction, origin_off, dest_off in jp_offsets[jp_ref.text]: + if day_type in result[direction]: + result[direction][day_type].append(( + dep_secs + origin_off, + dep_secs + dest_off, + )) - for key in result: - result[key].sort() + for direction in result: + for key in result[direction]: + result[direction][key].sort() return result -def _get_timetable() -> dict[str, list[tuple[int, int]]]: +def _get_timetable() -> dict[str, dict[str, list[tuple[int, int]]]]: global _timetable if _timetable is None: _timetable = _load_timetable() @@ -126,7 +149,9 @@ def _day_type(weekday: int) -> str: return 'Saturday' if weekday == 5 else 'Sunday' -def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: +def next_service( + earliest_board: datetime, direction: str = 'pad_to_kx' +) -> tuple[datetime, datetime] | None: """ Given the earliest time a passenger can board at Paddington (H&C Line), return (circle_line_depart, arrive_kings_cross) as datetimes, or None if @@ -135,20 +160,20 @@ def next_service(earliest_board: datetime) -> tuple[datetime, datetime] | None: The caller is responsible for adding any walk time from the GWR platform before passing *earliest_board*. """ - services = upcoming_services(earliest_board, count=1) + services = upcoming_services(earliest_board, count=1, direction=direction) return services[0] if services else None def upcoming_services( - earliest_board: datetime, count: int = 2 + earliest_board: datetime, count: int = 2, direction: str = 'pad_to_kx' ) -> list[tuple[datetime, datetime]]: """ - Return up to *count* Circle line services from Paddington (H&C Line) to - King's Cross St Pancras, starting from *earliest_board*. + Return up to *count* Circle line services for *direction*, starting from + *earliest_board*. - Each element is (depart_paddington, arrive_kings_cross) as datetimes. + Each element is (depart_origin, arrive_destination) as datetimes. """ - timetable = _get_timetable()[_day_type(earliest_board.weekday())] + timetable = _get_timetable().get(direction, {})[_day_type(earliest_board.weekday())] board_secs = ( earliest_board.hour * 3600 + earliest_board.minute * 60 diff --git a/scraper/eurostar.py b/scraper/eurostar.py index 667decf..6e2f82a 100644 --- a/scraper/eurostar.py +++ b/scraper/eurostar.py @@ -16,7 +16,8 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" ) -ORIGIN_STATION_ID = '7015400' +ST_PANCRAS_STATION_ID = '7015400' +ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID DESTINATION_STATION_IDS = { 'Paris Gare du Nord': '8727100', @@ -35,11 +36,11 @@ _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' _GQL_QUERY = ( "query NewBookingSearch(" "$origin:String!,$destination:String!,$outbound:String!," - "$currency:Currency!,$adult:Int," + "$inbound:String,$currency:Currency!,$adult:Int," "$filteredClassesOfService:[ClassOfServiceEnum]" "){" "journeySearch(" - "outboundDate:$outbound origin:$origin destination:$destination" + "outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination" " adults:$adult currency:$currency" " productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\"" " adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0" @@ -64,6 +65,22 @@ _GQL_QUERY = ( "}" "}" "}" + "inbound{" + "journeys(" + "hideIndirectTrainsWhenDisruptedAndCancelled:false" + " hideDepartedTrains:true" + " hideExternalCarrierTrains:true" + " hideDirectExternalCarrierTrains:true" + "){" + "timing{departureTime:departs arrivalTime:arrives}" + "fares(filteredClassesOfService:$filteredClassesOfService){" + "classOfService{code}" + "prices{displayPrice}" + "seats " + "legs{serviceName serviceType{code}}" + "}" + "}" + "}" "}" "}" ) @@ -72,11 +89,19 @@ _STANDARD = 'STANDARD' _STANDARD_PLUS = 'PLUS' -def search_url(destination: str, travel_date: str) -> str: +def search_url(destination: str, travel_date: str, direction: str = "outbound", return_date: str | None = None) -> str: dest_id = DESTINATION_STATION_IDS[destination] + origin = ST_PANCRAS_STATION_ID + destination_id = dest_id + outbound = travel_date + inbound = return_date + if direction == "inbound": + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + inbound = None return ( f'https://www.eurostar.com/search/uk-en' - f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}' + f'?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}' + + (f'&inbound={inbound}' if inbound else '') ) @@ -85,7 +110,7 @@ def _generate_cid() -> str: return 'SRCH-' + ''.join(random.choices(chars, k=22)) -def _parse_graphql(data: dict, destination: str) -> list[dict]: +def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> list[dict]: """ Parse a NewBookingSearch GraphQL response into a list of service dicts. @@ -97,7 +122,6 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329'). """ best: dict[str, dict] = {} - journeys = data['data']['journeySearch']['outbound']['journeys'] for journey in journeys: dep = journey['timing']['departureTime'] arr = journey['timing']['arrivalTime'] @@ -118,8 +142,21 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: std_price, std_seats = price, seats elif cos == _STANDARD_PLUS: plus_price, plus_seats = price, seats - if dep not in best or arr < best[dep]['arrive_destination']: - best[dep] = { + if direction == 'inbound': + service = { + 'depart_destination': dep, + 'arrive_st_pancras': arr, + 'destination': destination, + 'train_number': train_number, + 'price': std_price, + 'seats': std_seats, + 'plus_price': plus_price, + 'plus_seats': plus_seats, + } + key = dep + arrive_key = 'arrive_st_pancras' + else: + service = { 'depart_st_pancras': dep, 'arrive_destination': arr, 'destination': destination, @@ -129,18 +166,43 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]: 'plus_price': plus_price, 'plus_seats': plus_seats, } - return sorted(best.values(), key=lambda s: s['depart_st_pancras']) + key = dep + arrive_key = 'arrive_destination' + if key not in best or arr < best[key][arrive_key]: + best[key] = service + sort_key = 'depart_destination' if direction == 'inbound' else 'depart_st_pancras' + return sorted(best.values(), key=lambda s: s[sort_key]) -def fetch(destination: str, travel_date: str) -> list[dict]: - """ - Return all Eurostar services for destination on travel_date. +def _parse_graphql(data: dict, destination: str) -> list[dict]: + journeys = data['data']['journeySearch']['outbound']['journeys'] + return _parse_journeys(journeys, destination, 'outbound') - Each dict contains timetable info (depart_st_pancras, arrive_destination, - train_number) plus pricing (price, seats) from a single GraphQL call. - """ - dest_id = DESTINATION_STATION_IDS[destination] - headers = { + +def _parse_graphql_leg(data: dict, destination: str, leg: str, direction: str) -> list[dict]: + journeys = data['data']['journeySearch'][leg]['journeys'] + return _parse_journeys(journeys, destination, direction) + + +def _payload(origin: str, destination_id: str, outbound: str, inbound: str | None = None) -> dict: + variables = { + 'origin': origin, + 'destination': destination_id, + 'outbound': outbound, + 'inbound': inbound, + 'currency': 'GBP', + 'adult': 1, + 'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS], + } + return { + 'operationName': 'NewBookingSearch', + 'variables': variables, + 'query': _GQL_QUERY, + } + + +def _headers() -> dict: + return { 'User-Agent': DEFAULT_UA, 'Content-Type': 'application/json', 'Accept': '*/*', @@ -151,18 +213,42 @@ def fetch(destination: str, travel_date: str) -> list[dict]: 'x-source-url': 'search-app/', 'cid': _generate_cid(), } - payload = { - 'operationName': 'NewBookingSearch', - 'variables': { - 'origin': ORIGIN_STATION_ID, - 'destination': dest_id, - 'outbound': travel_date, - 'currency': 'GBP', - 'adult': 1, - 'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS], - }, - 'query': _GQL_QUERY, - } - resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) + + +def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> list[dict]: + """ + Return all Eurostar services for destination on travel_date. + + Each dict contains timetable info (depart_st_pancras, arrive_destination, + train_number) plus pricing (price, seats) from a single GraphQL call. + """ + dest_id = DESTINATION_STATION_IDS[destination] + if direction == 'inbound': + origin, destination_id = dest_id, ST_PANCRAS_STATION_ID + else: + origin, destination_id = ST_PANCRAS_STATION_ID, dest_id + resp = requests.post( + _GATEWAY_URL, + json=_payload(origin, destination_id, travel_date), + headers=_headers(), + timeout=20, + ) resp.raise_for_status() - return _parse_graphql(resp.json(), destination) + leg_direction = 'inbound' if direction == 'inbound' else 'outbound' + return _parse_graphql_leg(resp.json(), destination, 'outbound', leg_direction) + + +def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict[str, list[dict]]: + dest_id = DESTINATION_STATION_IDS[destination] + resp = requests.post( + _GATEWAY_URL, + json=_payload(ST_PANCRAS_STATION_ID, dest_id, outbound_date, return_date), + headers=_headers(), + timeout=20, + ) + resp.raise_for_status() + data = resp.json() + return { + 'outbound': _parse_graphql_leg(data, destination, 'outbound', 'outbound'), + 'inbound': _parse_graphql_leg(data, destination, 'inbound', 'inbound'), + } diff --git a/scraper/gwr_fares.py b/scraper/gwr_fares.py index 4d9f203..7d3cc26 100644 --- a/scraper/gwr_fares.py +++ b/scraper/gwr_fares.py @@ -32,7 +32,8 @@ def _headers() -> dict: def _request_body( - station_crs: str, + from_code: str, + to_code: str, travel_date: str, conversation_token: str | None, later: bool, @@ -44,8 +45,8 @@ def _request_body( "IsPreviousReturn": False, "campaignCode": "", "validationCode": "", - "locfrom": f"GB{station_crs}", - "locto": _PAD_CODE, + "locfrom": from_code, + "locto": to_code, "datetimedepart": f"{travel_date}T00:00:00", "outwarddepartafter": True, "datetimereturn": None, @@ -67,7 +68,22 @@ def _request_body( } -def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): +def _station_code(station_crs: str) -> str: + return f"GB{station_crs}" + + +def _od_codes(station_crs: str, direction: str) -> tuple[str, str]: + if direction == "from_paddington": + return _PAD_CODE, _station_code(station_crs) + return _station_code(station_crs), _PAD_CODE + + +def _run_pages( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +): """ Iterate all pages of GWR journey search results. @@ -78,8 +94,9 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): with httpx.Client(headers=_headers(), timeout=30) as client: conversation_token = None later = False + from_code, to_code = _od_codes(station_crs, direction) for _ in range(_MAX_PAGES): - body = _request_body(station_crs, travel_date, conversation_token, later) + body = _request_body(from_code, to_code, travel_date, conversation_token, later) if first_class: body["firstclass"] = True body["standardclass"] = False @@ -99,7 +116,12 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False): later = True -def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = False): +def _run_pages_batched( + station_crs: str, + travel_date: str, + first_class: bool = False, + direction: str = "to_paddington", +): """ Like _run_pages but yields one list of (dep_time, fares_list) per API page call, allowing callers to stream results a page at a time. @@ -108,8 +130,9 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F with httpx.Client(headers=_headers(), timeout=30) as client: conversation_token = None later = False + from_code, to_code = _od_codes(station_crs, direction) for _ in range(_MAX_PAGES): - body = _request_body(station_crs, travel_date, conversation_token, later) + body = _request_body(from_code, to_code, travel_date, conversation_token, later) if first_class: body["firstclass"] = True body["standardclass"] = False @@ -132,16 +155,18 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F later = True -def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: +def fetch( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict]: """ - Fetch GWR walk-on single fares from station_crs to London Paddington on travel_date. + Fetch GWR walk-on single fares for the selected Paddington direction. Returns {departure_time: {'ticket': name, 'price': float, 'code': code}} where price is in £ and only the cheapest available standard-class walk-on ticket per departure (with restrictions already applied by GWR) is kept. """ result: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date): + for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -166,7 +191,9 @@ def fetch(station_crs: str, travel_date: str) -> dict[str, dict]: return result -def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: +def fetch_advance( + station_crs: str, travel_date: str, direction: str = "to_paddington" +) -> dict[str, dict]: """ Fetch advance fares: cheapest standard advance and first-class advance per departure. @@ -175,7 +202,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: where each sub-dict has keys 'ticket', 'price', 'code'. """ std_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=False): + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=False, direction=direction + ): cheapest = None for fare in fares: code = fare.get("ticketTypeCode") @@ -199,7 +228,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: } first_advance: dict[str, dict] = {} - for dep_time, fares in _run_pages(station_crs, travel_date, first_class=True): + for dep_time, fares in _run_pages( + station_crs, travel_date, first_class=True, direction=direction + ): cheapest = None for fare in fares: price_pence = fare.get("fare", 0) @@ -227,7 +258,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]: } -def fetch_advance_streaming(station_crs: str, travel_date: str): +def fetch_advance_streaming( + station_crs: str, travel_date: str, direction: str = "to_paddington" +): """ Generator yielding partial advance fare dicts one GWR API page at a time. @@ -236,7 +269,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str): yielded immediately so callers can stream prices to clients as they arrive. """ # Pass 1: standard class advance fares - for batch in _run_pages_batched(station_crs, travel_date, first_class=False): + for batch in _run_pages_batched( + station_crs, travel_date, first_class=False, direction=direction + ): page: dict[str, dict] = {} for dep_time, fares in batch: cheapest = None @@ -267,7 +302,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str): yield page # Pass 2: first class advance fares - for batch in _run_pages_batched(station_crs, travel_date, first_class=True): + for batch in _run_pages_batched( + station_crs, travel_date, first_class=True, direction=direction + ): page = {} for dep_time, fares in batch: cheapest = None diff --git a/scraper/realtime_trains.py b/scraper/realtime_trains.py index 9b5e936..6613bbf 100644 --- a/scraper/realtime_trains.py +++ b/scraper/realtime_trains.py @@ -1,5 +1,6 @@ """ -Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains. +Scrape direct trains between a selected station and London Paddington using +Realtime Trains. Two fetches: BRI/to/PAD → departure times from Bristol (div.time.plan.d) @@ -20,6 +21,16 @@ _PAD_FROM_TMPL = ( "gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359" "?stp=WVS&show=pax-calls&order=wtt" ) +_PAD_TO_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) +_FROM_PAD_TMPL = ( + "https://www.realtimetrains.co.uk/search/detailed/" + "gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359" + "?stp=WVS&show=pax-calls&order=wtt" +) DEFAULT_UA = ( "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " @@ -69,7 +80,7 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]: def _parse_arrivals(html: str) -> dict[str, dict]: - """Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page.""" + """Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page.""" root = lxml.html.fromstring(html) sl = root.cssselect('div.servicelist') if not sl: @@ -93,7 +104,7 @@ def _parse_arrivals(html: str) -> dict[str, dict]: def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]: - """Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}].""" + """Fetch trains from station_crs to PAD.""" headers = _browser_headers(user_agent) with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date)) @@ -113,3 +124,44 @@ def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> if tid in arrivals ] return sorted(trains, key=lambda t: t['depart_bristol']) + + +def fetch_to_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI' +) -> list[dict]: + """Fetch trains from station_crs to PAD using generic field names.""" + return [ + { + **train, + "depart_origin": train["depart_bristol"], + "arrive_paddington": train["arrive_paddington"], + "arrive_platform": train.get("arrive_platform", ""), + "headcode": train.get("headcode", ""), + } + for train in fetch(date, user_agent, station_crs) + ] + + +def fetch_from_paddington( + date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI' +) -> list[dict]: + """Fetch trains from PAD to station_crs.""" + headers = _browser_headers(user_agent) + with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client: + r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date)) + r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date)) + + departures = _parse_services(r_pad.text, 'div.time.plan.d') + arrivals = _parse_arrivals(r_station.text) + + trains = [ + { + "depart_paddington": dep, + "arrive_destination": arrivals[tid]["time"], + "arrive_platform": arrivals[tid]["platform"], + "headcode": tid, + } + for tid, dep in departures.items() + if tid in arrivals + ] + return sorted(trains, key=lambda t: t["depart_paddington"]) diff --git a/templates/index.html b/templates/index.html index 4e7140b..655d415 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,7 +2,7 @@ {% block content %}