Show Eurostar Standard prices and total journey cost on results page

Fetches prices via the site-api.eurostar.com GraphQL gateway
(NewBookingSearch operation, discovered with Playwright). Adds
fetch_prices() to scraper/eurostar.py using requests, caches results,
annotates each trip with eurostar_price and total_price, and shows an
ES Std column plus total cost (duration + price) in the results table.
The Transfer column is hidden on small screens for mobile usability.

Closes #4

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-04-04 10:38:09 +01:00
parent 804fcedfad
commit 0dee942e16
4 changed files with 191 additions and 12 deletions

View file

@ -1,7 +1,7 @@
"""
Scrape Eurostar timetable via httpx.
Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API.
The route-specific timetable pages are Next.js SSR all departure data is
Timetable: route-specific pages are Next.js SSR all departure data is
embedded in <script id="__NEXT_DATA__"> as JSON, so no browser / JS needed.
URL pattern:
@ -12,10 +12,19 @@ Data path: props.pageProps.pageData.liveDepartures[]
.origin.model.scheduledDepartureDateTime London departure
.destination.model.scheduledArrivalDateTime destination arrival
(already filtered to the requested stop, not the final stop)
Prices: POST https://site-api.eurostar.com/gateway (GraphQL, operationName
NewBookingSearch). The `journeys[].fares[]` array contains one entry per
class of service; we extract the Eurostar Standard (classOfService.code ==
"STANDARD") displayPrice for 1 adult, in GBP.
"""
import json
import random
import re
import string
import httpx
import requests
DEFAULT_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
@ -90,3 +99,105 @@ def fetch(destination: str, travel_date: str,
r = client.get(url, params={'date': travel_date})
r.raise_for_status()
return _parse(r.text, destination)
# ---------------------------------------------------------------------------
# Price fetching via site-api.eurostar.com GraphQL
# ---------------------------------------------------------------------------
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
# Minimal query requesting only timing + Eurostar Standard fare price.
# Variable names and inline argument names match what the site sends so the
# server-side query planner sees a familiar shape.
_GQL_PRICES = (
"query NewBookingSearch("
"$origin:String!,$destination:String!,$outbound:String!,"
"$currency:Currency!,$adult:Int,"
"$filteredClassesOfService:[ClassOfServiceEnum]"
"){"
"journeySearch("
"outboundDate:$outbound origin:$origin destination:$destination"
" adults:$adult currency:$currency"
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
" infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0"
" wheelchairCompanions:0 nonWheelchairCompanions:0"
" isAftersales:false multipleFlexibility:true showAllSummatedFares:false"
" seniorsAges:[] prioritiseShortHaulODTrains:true"
"){"
"outbound{"
"journeys("
"hideIndirectTrainsWhenDisruptedAndCancelled:false"
" hideDepartedTrains:true"
" hideExternalCarrierTrains:true"
" hideDirectExternalCarrierTrains:true"
"){"
"timing{departureTime:departs __typename}"
"fares(filteredClassesOfService:$filteredClassesOfService){"
"classOfService{code __typename}"
"prices{displayPrice __typename}"
"seats __typename"
"}"
"__typename"
"}"
"__typename"
"}"
"__typename"
"}"
"}"
)
def _generate_cid() -> str:
chars = string.ascii_letters + string.digits
return 'SRCH-' + ''.join(random.choices(chars, k=22))
def fetch_prices(destination: str, travel_date: str) -> dict[str, int | None]:
"""
Return Eurostar Standard prices for every departure on travel_date.
Result: {depart_st_pancras: price_gbp_int_or_None}
None means the class is sold out or unavailable for that departure.
"""
dest_id = DESTINATION_STATION_IDS[destination]
headers = {
'User-Agent': DEFAULT_UA,
'Content-Type': 'application/json',
'Accept': '*/*',
'Accept-Language': 'en-GB',
'Referer': 'https://www.eurostar.com/',
'x-platform': 'web',
'x-market-code': 'uk',
'x-source-url': 'search-app/',
'cid': _generate_cid(),
}
payload = {
'operationName': 'NewBookingSearch',
'variables': {
'origin': ORIGIN_STATION_ID,
'destination': dest_id,
'outbound': travel_date,
'currency': 'GBP',
'adult': 1,
'filteredClassesOfService': ['STANDARD'],
},
'query': _GQL_PRICES,
}
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
resp.raise_for_status()
data = resp.json()
prices: dict[str, int | None] = {}
journeys = data['data']['journeySearch']['outbound']['journeys']
for journey in journeys:
dep = journey['timing']['departureTime']
price = None
for fare in journey['fares']:
if fare['classOfService']['code'] == 'STANDARD':
p = fare.get('prices')
if p and p.get('displayPrice'):
price = int(p['displayPrice'])
break
prices[dep] = price
return prices