""" Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API. Timetable: route-specific pages are Next.js SSR — all departure data is embedded in ', html, re.DOTALL) if not m: return [] data = json.loads(m.group(1)) departures = data['props']['pageProps']['pageData']['liveDepartures'] services = [] for dep in departures: dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime']) arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime']) if dep_time and arr_time: carrier = dep.get('model', {}).get('carrier', 'ES') number = dep.get('model', {}).get('trainNumber', '') services.append({ 'depart_st_pancras': dep_time, 'arrive_destination': arr_time, 'destination': destination, 'train_number': f"{carrier} {number}" if number else '', }) return sorted(services, key=lambda s: s['depart_st_pancras']) def fetch(destination: str, travel_date: str, user_agent: str = DEFAULT_UA) -> list[dict]: url = timetable_url(destination) headers = { 'User-Agent': user_agent, 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-GB,en;q=0.9', } with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client: r = client.get(url, params={'date': travel_date}) r.raise_for_status() return _parse(r.text, destination) # --------------------------------------------------------------------------- # Price fetching via site-api.eurostar.com GraphQL # --------------------------------------------------------------------------- _GATEWAY_URL = 'https://site-api.eurostar.com/gateway' # Minimal query requesting only timing + Eurostar Standard fare price. # Variable names and inline argument names match what the site sends so the # server-side query planner sees a familiar shape. _GQL_PRICES = ( "query NewBookingSearch(" "$origin:String!,$destination:String!,$outbound:String!," "$currency:Currency!,$adult:Int," "$filteredClassesOfService:[ClassOfServiceEnum]" "){" "journeySearch(" "outboundDate:$outbound origin:$origin destination:$destination" " adults:$adult currency:$currency" " productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\"" " adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0" " infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0" " wheelchairCompanions:0 nonWheelchairCompanions:0" " isAftersales:false multipleFlexibility:true showAllSummatedFares:false" " seniorsAges:[] prioritiseShortHaulODTrains:true" "){" "outbound{" "journeys(" "hideIndirectTrainsWhenDisruptedAndCancelled:false" " hideDepartedTrains:true" " hideExternalCarrierTrains:true" " hideDirectExternalCarrierTrains:true" "){" "timing{departureTime:departs __typename}" "fares(filteredClassesOfService:$filteredClassesOfService){" "classOfService{code __typename}" "prices{displayPrice __typename}" "seats __typename" "}" "__typename" "}" "__typename" "}" "__typename" "}" "}" ) def _generate_cid() -> str: chars = string.ascii_letters + string.digits return 'SRCH-' + ''.join(random.choices(chars, k=22)) def fetch_prices(destination: str, travel_date: str) -> dict[str, int | None]: """ Return Eurostar Standard prices for every departure on travel_date. Result: {depart_st_pancras: price_gbp_int_or_None} None means the class is sold out or unavailable for that departure. """ dest_id = DESTINATION_STATION_IDS[destination] headers = { 'User-Agent': DEFAULT_UA, 'Content-Type': 'application/json', 'Accept': '*/*', 'Accept-Language': 'en-GB', 'Referer': 'https://www.eurostar.com/', 'x-platform': 'web', 'x-market-code': 'uk', 'x-source-url': 'search-app/', 'cid': _generate_cid(), } payload = { 'operationName': 'NewBookingSearch', 'variables': { 'origin': ORIGIN_STATION_ID, 'destination': dest_id, 'outbound': travel_date, 'currency': 'GBP', 'adult': 1, 'filteredClassesOfService': ['STANDARD'], }, 'query': _GQL_PRICES, } resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20) resp.raise_for_status() data = resp.json() prices: dict[str, int | None] = {} journeys = data['data']['journeySearch']['outbound']['journeys'] for journey in journeys: dep = journey['timing']['departureTime'] price = None for fare in journey['fares']: if fare['classOfService']['code'] == 'STANDARD': p = fare.get('prices') if p and p.get('displayPrice'): price = int(p['displayPrice']) break prices[dep] = price return prices