Consolidate to single GraphQL call; show indirect trains; fix price formatting
Replace two-step Eurostar fetch (HTML timetable + GraphQL prices) with a single GraphQL call that returns timing, train numbers, prices, and seats. Support indirect services (e.g. Amsterdam) by joining multi-leg train numbers with ' + ' and keeping the earliest arrival per departure time. Fix half-pound prices by casting displayPrice to float instead of int. Wrap each train number segment in white-space:nowrap so 'ES 9132 + ER 9363' never breaks mid-segment. Format Eurostar prices with two decimal places. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
05eec29b7d
commit
c22a3ea0fc
5 changed files with 182 additions and 320 deletions
|
|
@ -1,29 +1,14 @@
|
|||
"""
|
||||
Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API.
|
||||
Fetch Eurostar timetable, prices, and seat availability via the GraphQL API.
|
||||
|
||||
Timetable: route-specific pages are Next.js SSR — all departure data is
|
||||
embedded in <script id="__NEXT_DATA__"> as JSON, so no browser / JS needed.
|
||||
|
||||
URL pattern:
|
||||
https://www.eurostar.com/uk-en/travel-info/timetable/
|
||||
{origin_id}/{dest_id}/{origin_slug}/{dest_slug}?date=YYYY-MM-DD
|
||||
|
||||
Data path: props.pageProps.pageData.liveDepartures[]
|
||||
.origin.model.scheduledDepartureDateTime → London departure
|
||||
.destination.model.scheduledArrivalDateTime → destination arrival
|
||||
(already filtered to the requested stop, not the final stop)
|
||||
|
||||
Prices: POST https://site-api.eurostar.com/gateway (GraphQL, operationName
|
||||
NewBookingSearch). The `journeys[].fares[]` array contains one entry per
|
||||
class of service; we extract the Eurostar Standard (classOfService.code ==
|
||||
"STANDARD") displayPrice for 1 adult, in GBP.
|
||||
A single POST to https://site-api.eurostar.com/gateway (operationName
|
||||
NewBookingSearch) returns departure time, arrival time, train number,
|
||||
Eurostar Standard fare price, and seats remaining at that price for every
|
||||
service on the requested date.
|
||||
"""
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
DEFAULT_UA = (
|
||||
|
|
@ -32,93 +17,21 @@ DEFAULT_UA = (
|
|||
)
|
||||
|
||||
ORIGIN_STATION_ID = '7015400'
|
||||
ORIGIN_STATION_SLUG = 'london-st-pancras-intl'
|
||||
TIMETABLE_BASE_URL = 'https://www.eurostar.com/uk-en/travel-info/timetable'
|
||||
|
||||
DESTINATION_STATION_IDS = {
|
||||
'Paris Gare du Nord': '8727100',
|
||||
'Brussels Midi': '8814001',
|
||||
'Lille Europe': '8722326',
|
||||
'Brussels Midi': '8814001',
|
||||
'Lille Europe': '8722326',
|
||||
'Amsterdam Centraal': '8400058',
|
||||
'Rotterdam Centraal': '8400530',
|
||||
}
|
||||
|
||||
|
||||
def _slugify_station_name(name: str) -> str:
|
||||
return re.sub(r'[^a-z0-9]+', '-', name.lower()).strip('-')
|
||||
|
||||
|
||||
def search_url(destination: str, travel_date: str) -> str:
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
return (
|
||||
f'https://www.eurostar.com/search/uk-en'
|
||||
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
|
||||
)
|
||||
|
||||
|
||||
def timetable_url(destination: str) -> str:
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
dest_slug = _slugify_station_name(destination)
|
||||
return (
|
||||
f'{TIMETABLE_BASE_URL}/{ORIGIN_STATION_ID}/{dest_id}/'
|
||||
f'{ORIGIN_STATION_SLUG}/{dest_slug}'
|
||||
)
|
||||
|
||||
|
||||
def _hhmm(dt_str: str | None) -> str | None:
|
||||
"""'2026-03-30 09:34:00' → '09:34'"""
|
||||
if not dt_str:
|
||||
return None
|
||||
m = re.search(r'(\d{2}):(\d{2}):\d{2}$', dt_str)
|
||||
return f"{m.group(1)}:{m.group(2)}" if m else None
|
||||
|
||||
|
||||
def _parse(html: str, destination: str) -> list[dict]:
|
||||
m = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
|
||||
if not m:
|
||||
return []
|
||||
data = json.loads(m.group(1))
|
||||
departures = data['props']['pageProps']['pageData']['liveDepartures']
|
||||
services = []
|
||||
for dep in departures:
|
||||
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
|
||||
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
|
||||
if dep_time and arr_time:
|
||||
carrier = dep.get('model', {}).get('carrier', 'ES')
|
||||
number = dep.get('model', {}).get('trainNumber', '')
|
||||
services.append({
|
||||
'depart_st_pancras': dep_time,
|
||||
'arrive_destination': arr_time,
|
||||
'destination': destination,
|
||||
'train_number': f"{carrier} {number}" if number else '',
|
||||
})
|
||||
return sorted(services, key=lambda s: s['depart_st_pancras'])
|
||||
|
||||
|
||||
def fetch(destination: str, travel_date: str,
|
||||
user_agent: str = DEFAULT_UA) -> list[dict]:
|
||||
url = timetable_url(destination)
|
||||
headers = {
|
||||
'User-Agent': user_agent,
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'en-GB,en;q=0.9',
|
||||
}
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=20) as client:
|
||||
r = client.get(url, params={'date': travel_date})
|
||||
r.raise_for_status()
|
||||
return _parse(r.text, destination)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Price fetching via site-api.eurostar.com GraphQL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
|
||||
|
||||
# Minimal query requesting only timing + Eurostar Standard fare price.
|
||||
# Variable names and inline argument names match what the site sends so the
|
||||
# Query requesting timing, train identity, and Standard fare price + seats.
|
||||
# Variable names and argument names match the site's own query so the
|
||||
# server-side query planner sees a familiar shape.
|
||||
_GQL_PRICES = (
|
||||
_GQL_QUERY = (
|
||||
"query NewBookingSearch("
|
||||
"$origin:String!,$destination:String!,$outbound:String!,"
|
||||
"$currency:Currency!,$adult:Int,"
|
||||
|
|
@ -141,74 +54,103 @@ _GQL_PRICES = (
|
|||
" hideExternalCarrierTrains:true"
|
||||
" hideDirectExternalCarrierTrains:true"
|
||||
"){"
|
||||
"timing{departureTime:departs __typename}"
|
||||
"timing{departureTime:departs arrivalTime:arrives}"
|
||||
"fares(filteredClassesOfService:$filteredClassesOfService){"
|
||||
"classOfService{code __typename}"
|
||||
"prices{displayPrice __typename}"
|
||||
"seats __typename"
|
||||
"classOfService{code}"
|
||||
"prices{displayPrice}"
|
||||
"seats "
|
||||
"legs{serviceName serviceType{code}}"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"}"
|
||||
)
|
||||
|
||||
|
||||
def search_url(destination: str, travel_date: str) -> str:
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
return (
|
||||
f'https://www.eurostar.com/search/uk-en'
|
||||
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
|
||||
)
|
||||
|
||||
|
||||
def _generate_cid() -> str:
|
||||
chars = string.ascii_letters + string.digits
|
||||
return 'SRCH-' + ''.join(random.choices(chars, k=22))
|
||||
|
||||
|
||||
def fetch_prices(destination: str, travel_date: str) -> dict[str, dict]:
|
||||
def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
||||
"""
|
||||
Return Eurostar Standard price and seat availability for every departure on travel_date.
|
||||
Parse a NewBookingSearch GraphQL response into a list of service dicts.
|
||||
|
||||
Result: {depart_st_pancras: {'price': int_or_None, 'seats': int_or_None}}
|
||||
price is None when unavailable/not yet on sale; seats is the number of
|
||||
Standard seats currently available for sale.
|
||||
Each dict contains: depart_st_pancras, arrive_destination, destination,
|
||||
train_number, price (float or None), seats (int or None).
|
||||
|
||||
The same St Pancras departure can appear multiple times (different
|
||||
connecting trains); we keep the entry with the earliest arrival.
|
||||
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
|
||||
"""
|
||||
best: dict[str, dict] = {}
|
||||
journeys = data['data']['journeySearch']['outbound']['journeys']
|
||||
for journey in journeys:
|
||||
dep = journey['timing']['departureTime']
|
||||
arr = journey['timing']['arrivalTime']
|
||||
for fare in journey['fares']:
|
||||
if fare['classOfService']['code'] == 'STANDARD':
|
||||
p = fare.get('prices')
|
||||
price = float(p['displayPrice']) if p and p.get('displayPrice') else None
|
||||
seats = fare.get('seats')
|
||||
legs = fare.get('legs') or []
|
||||
train_number = ' + '.join(
|
||||
f"{(leg.get('serviceType') or {}).get('code', 'ES')} {leg['serviceName']}"
|
||||
for leg in legs if leg.get('serviceName')
|
||||
)
|
||||
if dep not in best or arr < best[dep]['arrive_destination']:
|
||||
best[dep] = {
|
||||
'depart_st_pancras': dep,
|
||||
'arrive_destination': arr,
|
||||
'destination': destination,
|
||||
'train_number': train_number,
|
||||
'price': price,
|
||||
'seats': seats,
|
||||
}
|
||||
break
|
||||
return sorted(best.values(), key=lambda s: s['depart_st_pancras'])
|
||||
|
||||
|
||||
def fetch(destination: str, travel_date: str) -> list[dict]:
|
||||
"""
|
||||
Return all Eurostar services for destination on travel_date.
|
||||
|
||||
Each dict contains timetable info (depart_st_pancras, arrive_destination,
|
||||
train_number) plus pricing (price, seats) from a single GraphQL call.
|
||||
"""
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
headers = {
|
||||
'User-Agent': DEFAULT_UA,
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-GB',
|
||||
'Referer': 'https://www.eurostar.com/',
|
||||
'x-platform': 'web',
|
||||
'x-market-code': 'uk',
|
||||
'x-source-url': 'search-app/',
|
||||
'cid': _generate_cid(),
|
||||
'User-Agent': DEFAULT_UA,
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language':'en-GB',
|
||||
'Referer': 'https://www.eurostar.com/',
|
||||
'x-platform': 'web',
|
||||
'x-market-code': 'uk',
|
||||
'x-source-url': 'search-app/',
|
||||
'cid': _generate_cid(),
|
||||
}
|
||||
payload = {
|
||||
'operationName': 'NewBookingSearch',
|
||||
'variables': {
|
||||
'origin': ORIGIN_STATION_ID,
|
||||
'destination': dest_id,
|
||||
'outbound': travel_date,
|
||||
'currency': 'GBP',
|
||||
'adult': 1,
|
||||
'filteredClassesOfService': ['STANDARD'],
|
||||
'origin': ORIGIN_STATION_ID,
|
||||
'destination': dest_id,
|
||||
'outbound': travel_date,
|
||||
'currency': 'GBP',
|
||||
'adult': 1,
|
||||
'filteredClassesOfService': ['STANDARD'],
|
||||
},
|
||||
'query': _GQL_PRICES,
|
||||
'query': _GQL_QUERY,
|
||||
}
|
||||
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
prices: dict[str, dict] = {}
|
||||
journeys = data['data']['journeySearch']['outbound']['journeys']
|
||||
for journey in journeys:
|
||||
dep = journey['timing']['departureTime']
|
||||
price = None
|
||||
seats = None
|
||||
for fare in journey['fares']:
|
||||
if fare['classOfService']['code'] == 'STANDARD':
|
||||
p = fare.get('prices')
|
||||
if p and p.get('displayPrice'):
|
||||
price = int(p['displayPrice'])
|
||||
seats = fare.get('seats')
|
||||
break
|
||||
prices[dep] = {'price': price, 'seats': seats}
|
||||
return prices
|
||||
return _parse_graphql(resp.json(), destination)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue