""" Combine GWR Bristol→Paddington trains with Eurostar St Pancras→destination trains. """ from flask import Flask, render_template, redirect, url_for, request, abort, jsonify, Response, stream_with_context from datetime import date, timedelta from pathlib import Path import json import os from cache import get_cached, set_cached import scraper.eurostar as eurostar_scraper import scraper.gwr_fares as gwr_fares_scraper import scraper.realtime_trains as rtt_scraper from trip_planner import ( INBOUND_MAX_CONNECTION_MINUTES, INBOUND_MIN_CONNECTION_MINUTES, combine_inbound_trips, combine_trips, find_unreachable_inbound_eurostars, find_unreachable_morning_eurostars, ) RTT_PADDINGTON_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" "gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359" "?stp=WVS&show=pax-calls&order=wtt" ) RTT_STATION_URL = ( "https://www.realtimetrains.co.uk/search/detailed/" "gb-nr:{crs}/to/gb-nr:PAD/{date}/0000-2359" "?stp=WVS&show=pax-calls&order=wtt" ) app = Flask(__name__, instance_relative_config=False) app.config.from_object("config.default") _local = os.path.join(os.path.dirname(__file__), "config", "local.py") if os.path.exists(_local): app.config.from_pyfile(_local) import cache import circle_line cache.CACHE_DIR = app.config["CACHE_DIR"] circle_line._TXC_XML = app.config["CIRCLE_LINE_XML"] def _load_stations(): tsv = Path(__file__).parent / "data" / "direct_to_paddington.tsv" stations = [] for line in tsv.read_text().splitlines(): line = line.strip() if "\t" in line: name, crs = line.split("\t", 1) stations.append((name, crs)) return sorted(stations, key=lambda x: x[0]) STATIONS = _load_stations() STATION_BY_CRS = {crs: name for name, crs in STATIONS} DESTINATIONS = { "paris": "Paris Gare du Nord", "brussels": "Brussels Midi", "lille": "Lille Europe", "amsterdam": "Amsterdam Centraal", "rotterdam": "Rotterdam Centraal", "cologne": "Cologne Hbf", } @app.route("/") def index(): today = date.today().isoformat() default_min, default_max = _get_defaults() return render_template( "index.html", destinations=DESTINATIONS, today=today, stations=STATIONS, default_min_connection=default_min, default_max_connection=default_max, valid_min_connections=sorted(VALID_MIN_CONNECTIONS), valid_max_connections=sorted(VALID_MAX_CONNECTIONS), default_return_date=(date.today() + timedelta(days=7)).isoformat(), ) VALID_MIN_CONNECTIONS = {45, 50, 60, 70, 80, 90, 100, 110, 120} VALID_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} VALID_INBOUND_MIN_CONNECTIONS = {20, 30, 40, 45, 50, 60, 70, 80, 90, 100, 110, 120} VALID_INBOUND_MAX_CONNECTIONS = {60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180} VALID_JOURNEY_TYPES = {"outbound", "inbound", "return"} VALID_NR_CLASSES = {'walkon', 'advance_std', 'advance_1st'} VALID_ES_CLASSES = {'standard', 'plus'} DEFAULT_NR_CLASS = 'walkon' DEFAULT_ES_CLASS = 'standard' def _get_defaults(): return ( app.config["DEFAULT_MIN_CONNECTION"], app.config["DEFAULT_MAX_CONNECTION"], ) def _parse_connection(raw, default, valid_set): try: val = int(raw) except (TypeError, ValueError): return default return val if val in valid_set else default @app.route("/search") def search(): slug = request.args.get("destination", "") travel_date = request.args.get("travel_date", "") return_date = request.args.get("return_date", "") journey_type = request.args.get("journey_type", "outbound") if journey_type not in VALID_JOURNEY_TYPES: journey_type = "outbound" station_crs = request.args.get("station_crs", "BRI") if station_crs not in STATION_BY_CRS: station_crs = "BRI" if journey_type == "inbound": default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS else: default_min, default_max = _get_defaults() valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_conn = _parse_connection( request.args.get("min_connection"), default_min, valid_min ) max_conn = _parse_connection( request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: nr_class = DEFAULT_NR_CLASS es_class = request.args.get("es_class", DEFAULT_ES_CLASS) if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS if journey_type == "return": try: if return_date and date.fromisoformat(return_date) < date.fromisoformat(travel_date): return_date = "" except ValueError: return_date = "" if slug in DESTINATIONS and travel_date and (journey_type != "return" or return_date): return redirect( url_for( "results", station_crs=station_crs, slug=slug, travel_date=travel_date, journey_type=None if journey_type == "outbound" else journey_type, return_date=return_date if journey_type == "return" else None, min_connection=None if min_conn == default_min else min_conn, max_connection=None if max_conn == default_max else max_conn, nr_class=None if nr_class == DEFAULT_NR_CLASS else nr_class, es_class=None if es_class == DEFAULT_ES_CLASS else es_class, ) ) return redirect(url_for("index")) @app.route("/results///") def results(station_crs, slug, travel_date): departure_station_name = STATION_BY_CRS.get(station_crs) if departure_station_name is None: abort(404) destination = DESTINATIONS.get(slug) if not destination or not travel_date: return redirect(url_for("index")) journey_type = request.args.get("journey_type", "outbound") if journey_type not in VALID_JOURNEY_TYPES: journey_type = "outbound" return_date = request.args.get("return_date") if journey_type == "return": try: if not return_date or date.fromisoformat(return_date) < date.fromisoformat(travel_date): return redirect(url_for("index")) except ValueError: return redirect(url_for("index")) if journey_type == "inbound": default_min, default_max = INBOUND_MIN_CONNECTION_MINUTES, INBOUND_MAX_CONNECTION_MINUTES valid_min, valid_max = VALID_INBOUND_MIN_CONNECTIONS, VALID_INBOUND_MAX_CONNECTIONS else: default_min, default_max = _get_defaults() valid_min, valid_max = VALID_MIN_CONNECTIONS, VALID_MAX_CONNECTIONS min_connection = _parse_connection( request.args.get("min_connection"), default_min, valid_min ) max_connection = _parse_connection( request.args.get("max_connection"), default_max, valid_max ) nr_class = request.args.get("nr_class", DEFAULT_NR_CLASS) if nr_class not in VALID_NR_CLASSES: nr_class = DEFAULT_NR_CLASS es_class = request.args.get("es_class", DEFAULT_ES_CLASS) if es_class not in VALID_ES_CLASSES: es_class = DEFAULT_ES_CLASS user_agent = request.headers.get("User-Agent", rtt_scraper.DEFAULT_UA) error_messages = [] from_cache_parts = [] def cached_fetch(key, ttl, fetcher, label): cached = get_cached(key, ttl=ttl) if cached is not None: from_cache_parts.append(key) return cached try: data = fetcher() set_cached(key, data) return data except Exception as e: error_messages.append(f"Could not fetch {label}: {e}") return [] if label != "GWR fares" else {} es_return = None if journey_type == "return": es_return_key = f"eurostar_return_{travel_date}_{return_date}_{destination}" es_return = cached_fetch( es_return_key, 24 * 3600, lambda: eurostar_scraper.fetch_return(destination, travel_date, return_date), "Eurostar times", ) if not isinstance(es_return, dict): es_return = {"outbound": [], "inbound": []} def build_section(section_id, direction, section_date, eurostar_services=None): section_min_connection = min_connection section_max_connection = max_connection if journey_type == "return" and direction == "inbound": section_min_connection = INBOUND_MIN_CONNECTION_MINUTES section_max_connection = INBOUND_MAX_CONNECTION_MINUTES rtt_direction = "to_paddington" if direction == "outbound" else "from_paddington" rtt_cache_key = f"rtt_{rtt_direction}_{station_crs}_{section_date}" gwr_cache_key = f"gwr_fares_{rtt_direction}_{station_crs}_{section_date}" advance_cache_key = f"gwr_advance_{rtt_direction}_{station_crs}_{section_date}" if direction == "outbound": trains = cached_fetch( rtt_cache_key, None, lambda: rtt_scraper.fetch(section_date, user_agent, station_crs), "GWR trains", ) else: trains = cached_fetch( rtt_cache_key, None, lambda: rtt_scraper.fetch_from_paddington(section_date, user_agent, station_crs), "GWR trains", ) if eurostar_services is None: es_cache_key = f"eurostar_{direction}_{section_date}_{destination}" es_fetcher = ( (lambda: eurostar_scraper.fetch(destination, section_date)) if direction == "outbound" else (lambda: eurostar_scraper.fetch(destination, section_date, direction=direction)) ) eurostar_services = cached_fetch( es_cache_key, 24 * 3600, es_fetcher, "Eurostar times", ) fare_direction = "to_paddington" if direction == "outbound" else "from_paddington" gwr_fares = cached_fetch( gwr_cache_key, 30 * 24 * 3600, ( (lambda: gwr_fares_scraper.fetch(station_crs, section_date)) if fare_direction == "to_paddington" else (lambda: gwr_fares_scraper.fetch(station_crs, section_date, direction=fare_direction)) ), "GWR fares", ) cached_advance = get_cached(advance_cache_key, ttl=24 * 3600) if direction == "outbound": trips = combine_trips( trains, eurostar_services, section_date, section_min_connection, section_max_connection, gwr_fares, ) unreachable = find_unreachable_morning_eurostars( trains, eurostar_services, section_date, section_min_connection, section_max_connection, ) if trips: first_es_depart = min(t["depart_st_pancras"] for t in trips) unreachable = [ s for s in unreachable if s["depart_st_pancras"] < first_es_depart ] rows = sorted( [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], key=lambda row: row["depart_st_pancras"], ) else: trips = combine_inbound_trips( eurostar_services, trains, section_date, section_min_connection, section_max_connection, gwr_fares, ) unreachable = find_unreachable_inbound_eurostars( eurostar_services, trains, section_date, section_min_connection, section_max_connection, ) if trips: first_es_depart = min(t["depart_destination"] for t in trips) unreachable = [ s for s in unreachable if s["depart_destination"] < first_es_depart ] rows = sorted( [{"row_type": "trip", "direction": direction, **trip} for trip in trips] + [{"row_type": "unreachable", "direction": direction, **svc} for svc in unreachable], key=lambda row: row["depart_destination"], ) es_by_key = { (svc.get("depart_st_pancras") if direction == "outbound" else svc.get("depart_destination")): svc for svc in eurostar_services } for row in rows: key = row.get("depart_st_pancras") if direction == "outbound" else row.get("depart_destination") es = es_by_key.get(key, {}) row["eurostar_price"] = es.get("price") row["eurostar_seats"] = es.get("seats") row["eurostar_plus_price"] = es.get("plus_price") row["eurostar_plus_seats"] = es.get("plus_seats") row["row_key"] = f"{section_id}:{key}" dt = date.fromisoformat(section_date) return { "id": section_id, "direction": direction, "date": section_date, "date_display": dt.strftime("%A %-d %B %Y"), "rows": rows, "trips": trips, "gwr_count": len(trains), "eurostar_count": len(eurostar_services), "min_connection": section_min_connection, "max_connection": section_max_connection, "advance_fares": cached_advance, "advance_api_url": url_for( "api_advance_fares", station_crs=station_crs, travel_date=section_date, direction=fare_direction, ), "advance_stream_url": url_for( "api_advance_fares_stream", station_crs=station_crs, travel_date=section_date, direction=fare_direction, ), } if journey_type == "return": sections = [ build_section("outbound", "outbound", travel_date, es_return.get("outbound", [])), build_section("inbound", "inbound", return_date, es_return.get("inbound", [])), ] else: sections = [build_section("main", journey_type, travel_date)] no_prices_note = None all_es_prices = [ row.get("eurostar_price") for section in sections for row in section["rows"] if row.get("row_type") == "trip" ] if all_es_prices and all(price is None for price in all_es_prices): no_prices_note = "Eurostar prices not yet available — tickets may not be on sale yet." dt = date.fromisoformat(travel_date) prev_date = (dt - timedelta(days=1)).isoformat() next_date = (dt + timedelta(days=1)).isoformat() travel_date_display = dt.strftime("%A %-d %B %Y") eurostar_url = eurostar_scraper.search_url( destination, travel_date, direction=journey_type, return_date=return_date ) rtt_url = RTT_PADDINGTON_URL.format(crs=station_crs, date=travel_date) rtt_station_url = RTT_STATION_URL.format(crs=station_crs, date=travel_date) url_min = None if min_connection == default_min else min_connection url_max = None if max_connection == default_max else max_connection url_nr = None if nr_class == DEFAULT_NR_CLASS else nr_class url_es = None if es_class == DEFAULT_ES_CLASS else es_class trip_fares = {} advance_fares = {} advance_api_urls = {} advance_stream_urls = {} for section in sections: advance_fares[section["id"]] = section["advance_fares"] advance_api_urls[section["id"]] = section["advance_api_url"] advance_stream_urls[section["id"]] = section["advance_stream_url"] for row in section["rows"]: circle_svcs = row.get("circle_services") or [] circle_fare = circle_svcs[0]["fare"] if circle_svcs else 0 walkon = ( {"price": row["ticket_price"], "ticket": row.get("ticket_name", "")} if row.get("ticket_price") is not None else None ) es_std = ( {"price": row["eurostar_price"], "seats": row.get("eurostar_seats")} if row.get("eurostar_price") is not None else None ) es_plus = ( {"price": row["eurostar_plus_price"], "seats": row.get("eurostar_plus_seats")} if row.get("eurostar_plus_price") is not None else None ) trip_fares[row["row_key"]] = { "section": section["id"], "advance_key": row.get("depart_bristol") or row.get("depart_paddington"), "walkon": walkon, "es_standard": es_std, "es_plus": es_plus, "circle_fare": circle_fare, } return render_template( "results.html", sections=sections, trips=sections[0]["trips"] if sections else [], result_rows=sections[0]["rows"] if sections else [], unreachable_morning_services=[], destinations=DESTINATIONS, destination=destination, travel_date=travel_date, return_date=return_date, journey_type=journey_type, slug=slug, station_crs=station_crs, departure_station_name=departure_station_name, prev_date=prev_date, next_date=next_date, travel_date_display=travel_date_display, gwr_count=sum(section["gwr_count"] for section in sections), eurostar_count=sum(section["eurostar_count"] for section in sections), from_cache=bool(from_cache_parts), error="; ".join(error_messages) if error_messages else None, no_prices_note=no_prices_note, eurostar_url=eurostar_url, rtt_url=rtt_url, rtt_station_url=rtt_station_url, min_connection=min_connection, max_connection=max_connection, default_min_connection=default_min, default_max_connection=default_max, url_min_connection=url_min, url_max_connection=url_max, nr_class=nr_class, es_class=es_class, url_nr_class=url_nr, url_es_class=url_es, url_journey_type=None if journey_type == "outbound" else journey_type, trip_fares_json=json.dumps(trip_fares), advance_fares_json=json.dumps(advance_fares), advance_api_urls_json=json.dumps(advance_api_urls), advance_stream_urls_json=json.dumps(advance_stream_urls), advance_fares_api_url=url_for("api_advance_fares", station_crs=station_crs, travel_date=travel_date), advance_fares_stream_url=url_for("api_advance_fares_stream", station_crs=station_crs, travel_date=travel_date), valid_min_connections=sorted(valid_min), valid_max_connections=sorted(valid_max), ) @app.route("/api/advance_fares//") def api_advance_fares(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) direction = request.args.get("direction", "to_paddington") if direction not in {"to_paddington", "from_paddington"}: direction = "to_paddington" cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" cached = get_cached(cache_key, ttl=24 * 3600) if cached is not None: return jsonify(cached) try: fares = ( gwr_fares_scraper.fetch_advance(station_crs, travel_date) if direction == "to_paddington" else gwr_fares_scraper.fetch_advance(station_crs, travel_date, direction=direction) ) set_cached(cache_key, fares) return jsonify(fares) except Exception as e: return jsonify({"error": str(e)}), 500 @app.route("/api/advance_fares_stream//") def api_advance_fares_stream(station_crs, travel_date): if station_crs not in STATION_BY_CRS: abort(404) direction = request.args.get("direction", "to_paddington") if direction not in {"to_paddington", "from_paddington"}: direction = "to_paddington" cache_key = f"gwr_advance_{direction}_{station_crs}_{travel_date}" def generate(): cached = get_cached(cache_key, ttl=24 * 3600) if cached is not None: yield f"data: {json.dumps({'type': 'fares', 'fares': cached})}\n\n" yield f"data: {json.dumps({'type': 'done'})}\n\n" return accumulated: dict = {} try: stream = ( gwr_fares_scraper.fetch_advance_streaming(station_crs, travel_date) if direction == "to_paddington" else gwr_fares_scraper.fetch_advance_streaming( station_crs, travel_date, direction=direction ) ) for page_fares in stream: for dep_time, fare_data in page_fares.items(): if dep_time not in accumulated: accumulated[dep_time] = {"advance_std": None, "advance_1st": None} if fare_data.get("advance_std"): accumulated[dep_time]["advance_std"] = fare_data["advance_std"] if fare_data.get("advance_1st"): accumulated[dep_time]["advance_1st"] = fare_data["advance_1st"] yield f"data: {json.dumps({'type': 'fares', 'fares': page_fares})}\n\n" except Exception as e: yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n" return set_cached(cache_key, accumulated) yield f"data: {json.dumps({'type': 'done'})}\n\n" return Response( stream_with_context(generate()), mimetype="text/event-stream", headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"}, ) if __name__ == "__main__": app.run(debug=True, host="0.0.0.0")