bristol-eurostar/scraper/eurostar.py
Edward Betts c22a3ea0fc Consolidate to single GraphQL call; show indirect trains; fix price formatting
Replace two-step Eurostar fetch (HTML timetable + GraphQL prices) with a
single GraphQL call that returns timing, train numbers, prices, and seats.
Support indirect services (e.g. Amsterdam) by joining multi-leg train numbers
with ' + ' and keeping the earliest arrival per departure time.
Fix half-pound prices by casting displayPrice to float instead of int.
Wrap each train number segment in white-space:nowrap so 'ES 9132 + ER 9363'
never breaks mid-segment.
Format Eurostar prices with two decimal places.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 14:46:22 +01:00

156 lines
5.7 KiB
Python

"""
Fetch Eurostar timetable, prices, and seat availability via the GraphQL API.
A single POST to https://site-api.eurostar.com/gateway (operationName
NewBookingSearch) returns departure time, arrival time, train number,
Eurostar Standard fare price, and seats remaining at that price for every
service on the requested date.
"""
import random
import string
import requests
DEFAULT_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
ORIGIN_STATION_ID = '7015400'
DESTINATION_STATION_IDS = {
'Paris Gare du Nord': '8727100',
'Brussels Midi': '8814001',
'Lille Europe': '8722326',
'Amsterdam Centraal': '8400058',
'Rotterdam Centraal': '8400530',
}
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
# Query requesting timing, train identity, and Standard fare price + seats.
# Variable names and argument names match the site's own query so the
# server-side query planner sees a familiar shape.
_GQL_QUERY = (
"query NewBookingSearch("
"$origin:String!,$destination:String!,$outbound:String!,"
"$currency:Currency!,$adult:Int,"
"$filteredClassesOfService:[ClassOfServiceEnum]"
"){"
"journeySearch("
"outboundDate:$outbound origin:$origin destination:$destination"
" adults:$adult currency:$currency"
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
" infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0"
" wheelchairCompanions:0 nonWheelchairCompanions:0"
" isAftersales:false multipleFlexibility:true showAllSummatedFares:false"
" seniorsAges:[] prioritiseShortHaulODTrains:true"
"){"
"outbound{"
"journeys("
"hideIndirectTrainsWhenDisruptedAndCancelled:false"
" hideDepartedTrains:true"
" hideExternalCarrierTrains:true"
" hideDirectExternalCarrierTrains:true"
"){"
"timing{departureTime:departs arrivalTime:arrives}"
"fares(filteredClassesOfService:$filteredClassesOfService){"
"classOfService{code}"
"prices{displayPrice}"
"seats "
"legs{serviceName serviceType{code}}"
"}"
"}"
"}"
"}"
"}"
)
def search_url(destination: str, travel_date: str) -> str:
dest_id = DESTINATION_STATION_IDS[destination]
return (
f'https://www.eurostar.com/search/uk-en'
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
)
def _generate_cid() -> str:
chars = string.ascii_letters + string.digits
return 'SRCH-' + ''.join(random.choices(chars, k=22))
def _parse_graphql(data: dict, destination: str) -> list[dict]:
"""
Parse a NewBookingSearch GraphQL response into a list of service dicts.
Each dict contains: depart_st_pancras, arrive_destination, destination,
train_number, price (float or None), seats (int or None).
The same St Pancras departure can appear multiple times (different
connecting trains); we keep the entry with the earliest arrival.
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
"""
best: dict[str, dict] = {}
journeys = data['data']['journeySearch']['outbound']['journeys']
for journey in journeys:
dep = journey['timing']['departureTime']
arr = journey['timing']['arrivalTime']
for fare in journey['fares']:
if fare['classOfService']['code'] == 'STANDARD':
p = fare.get('prices')
price = float(p['displayPrice']) if p and p.get('displayPrice') else None
seats = fare.get('seats')
legs = fare.get('legs') or []
train_number = ' + '.join(
f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}"
for leg in legs if leg.get('serviceName')
)
if dep not in best or arr < best[dep]['arrive_destination']:
best[dep] = {
'depart_st_pancras': dep,
'arrive_destination': arr,
'destination': destination,
'train_number': train_number,
'price': price,
'seats': seats,
}
break
return sorted(best.values(), key=lambda s: s['depart_st_pancras'])
def fetch(destination: str, travel_date: str) -> list[dict]:
"""
Return all Eurostar services for destination on travel_date.
Each dict contains timetable info (depart_st_pancras, arrive_destination,
train_number) plus pricing (price, seats) from a single GraphQL call.
"""
dest_id = DESTINATION_STATION_IDS[destination]
headers = {
'User-Agent': DEFAULT_UA,
'Content-Type': 'application/json',
'Accept': '*/*',
'Accept-Language':'en-GB',
'Referer': 'https://www.eurostar.com/',
'x-platform': 'web',
'x-market-code': 'uk',
'x-source-url': 'search-app/',
'cid': _generate_cid(),
}
payload = {
'operationName': 'NewBookingSearch',
'variables': {
'origin': ORIGIN_STATION_ID,
'destination': dest_id,
'outbound': travel_date,
'currency': 'GBP',
'adult': 1,
'filteredClassesOfService': ['STANDARD'],
},
'query': _GQL_QUERY,
}
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
resp.raise_for_status()
return _parse_graphql(resp.json(), destination)