Show Eurostar Standard prices and total journey cost on results page
Fetches prices via the site-api.eurostar.com GraphQL gateway (NewBookingSearch operation, discovered with Playwright). Adds fetch_prices() to scraper/eurostar.py using requests, caches results, annotates each trip with eurostar_price and total_price, and shows an ES Std column plus total cost (duration + price) in the results table. The Transfer column is hidden on small screens for mobile usability. Closes #4 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
804fcedfad
commit
0dee942e16
4 changed files with 191 additions and 12 deletions
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
Scrape Eurostar timetable via httpx.
|
||||
Scrape Eurostar timetable via httpx and fetch prices via the GraphQL API.
|
||||
|
||||
The route-specific timetable pages are Next.js SSR — all departure data is
|
||||
Timetable: route-specific pages are Next.js SSR — all departure data is
|
||||
embedded in <script id="__NEXT_DATA__"> as JSON, so no browser / JS needed.
|
||||
|
||||
URL pattern:
|
||||
|
|
@ -12,10 +12,19 @@ Data path: props.pageProps.pageData.liveDepartures[]
|
|||
.origin.model.scheduledDepartureDateTime → London departure
|
||||
.destination.model.scheduledArrivalDateTime → destination arrival
|
||||
(already filtered to the requested stop, not the final stop)
|
||||
|
||||
Prices: POST https://site-api.eurostar.com/gateway (GraphQL, operationName
|
||||
NewBookingSearch). The `journeys[].fares[]` array contains one entry per
|
||||
class of service; we extract the Eurostar Standard (classOfService.code ==
|
||||
"STANDARD") displayPrice for 1 adult, in GBP.
|
||||
"""
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
import httpx
|
||||
import requests
|
||||
|
||||
DEFAULT_UA = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
|
|
@ -90,3 +99,105 @@ def fetch(destination: str, travel_date: str,
|
|||
r = client.get(url, params={'date': travel_date})
|
||||
r.raise_for_status()
|
||||
return _parse(r.text, destination)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Price fetching via site-api.eurostar.com GraphQL
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
|
||||
|
||||
# Minimal query requesting only timing + Eurostar Standard fare price.
|
||||
# Variable names and inline argument names match what the site sends so the
|
||||
# server-side query planner sees a familiar shape.
|
||||
_GQL_PRICES = (
|
||||
"query NewBookingSearch("
|
||||
"$origin:String!,$destination:String!,$outbound:String!,"
|
||||
"$currency:Currency!,$adult:Int,"
|
||||
"$filteredClassesOfService:[ClassOfServiceEnum]"
|
||||
"){"
|
||||
"journeySearch("
|
||||
"outboundDate:$outbound origin:$origin destination:$destination"
|
||||
" adults:$adult currency:$currency"
|
||||
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
|
||||
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
|
||||
" infants:0 adultsWheelchair:0 childrenWheelchair:0 guideDogs:0"
|
||||
" wheelchairCompanions:0 nonWheelchairCompanions:0"
|
||||
" isAftersales:false multipleFlexibility:true showAllSummatedFares:false"
|
||||
" seniorsAges:[] prioritiseShortHaulODTrains:true"
|
||||
"){"
|
||||
"outbound{"
|
||||
"journeys("
|
||||
"hideIndirectTrainsWhenDisruptedAndCancelled:false"
|
||||
" hideDepartedTrains:true"
|
||||
" hideExternalCarrierTrains:true"
|
||||
" hideDirectExternalCarrierTrains:true"
|
||||
"){"
|
||||
"timing{departureTime:departs __typename}"
|
||||
"fares(filteredClassesOfService:$filteredClassesOfService){"
|
||||
"classOfService{code __typename}"
|
||||
"prices{displayPrice __typename}"
|
||||
"seats __typename"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"__typename"
|
||||
"}"
|
||||
"}"
|
||||
)
|
||||
|
||||
|
||||
def _generate_cid() -> str:
|
||||
chars = string.ascii_letters + string.digits
|
||||
return 'SRCH-' + ''.join(random.choices(chars, k=22))
|
||||
|
||||
|
||||
def fetch_prices(destination: str, travel_date: str) -> dict[str, int | None]:
|
||||
"""
|
||||
Return Eurostar Standard prices for every departure on travel_date.
|
||||
|
||||
Result: {depart_st_pancras: price_gbp_int_or_None}
|
||||
None means the class is sold out or unavailable for that departure.
|
||||
"""
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
headers = {
|
||||
'User-Agent': DEFAULT_UA,
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'en-GB',
|
||||
'Referer': 'https://www.eurostar.com/',
|
||||
'x-platform': 'web',
|
||||
'x-market-code': 'uk',
|
||||
'x-source-url': 'search-app/',
|
||||
'cid': _generate_cid(),
|
||||
}
|
||||
payload = {
|
||||
'operationName': 'NewBookingSearch',
|
||||
'variables': {
|
||||
'origin': ORIGIN_STATION_ID,
|
||||
'destination': dest_id,
|
||||
'outbound': travel_date,
|
||||
'currency': 'GBP',
|
||||
'adult': 1,
|
||||
'filteredClassesOfService': ['STANDARD'],
|
||||
},
|
||||
'query': _GQL_PRICES,
|
||||
}
|
||||
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
prices: dict[str, int | None] = {}
|
||||
journeys = data['data']['journeySearch']['outbound']['journeys']
|
||||
for journey in journeys:
|
||||
dep = journey['timing']['departureTime']
|
||||
price = None
|
||||
for fare in journey['fares']:
|
||||
if fare['classOfService']['code'] == 'STANDARD':
|
||||
p = fare.get('prices')
|
||||
if p and p.get('displayPrice'):
|
||||
price = int(p['displayPrice'])
|
||||
break
|
||||
prices[dep] = price
|
||||
return prices
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue