Add return and inbound journey support
This commit is contained in:
parent
6ba71447ef
commit
9691632f65
12 changed files with 1687 additions and 486 deletions
|
|
@ -16,7 +16,8 @@ DEFAULT_UA = (
|
|||
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
ORIGIN_STATION_ID = '7015400'
|
||||
ST_PANCRAS_STATION_ID = '7015400'
|
||||
ORIGIN_STATION_ID = ST_PANCRAS_STATION_ID
|
||||
|
||||
DESTINATION_STATION_IDS = {
|
||||
'Paris Gare du Nord': '8727100',
|
||||
|
|
@ -35,11 +36,11 @@ _GATEWAY_URL = 'https://site-api.eurostar.com/gateway'
|
|||
_GQL_QUERY = (
|
||||
"query NewBookingSearch("
|
||||
"$origin:String!,$destination:String!,$outbound:String!,"
|
||||
"$currency:Currency!,$adult:Int,"
|
||||
"$inbound:String,$currency:Currency!,$adult:Int,"
|
||||
"$filteredClassesOfService:[ClassOfServiceEnum]"
|
||||
"){"
|
||||
"journeySearch("
|
||||
"outboundDate:$outbound origin:$origin destination:$destination"
|
||||
"outboundDate:$outbound inboundDate:$inbound origin:$origin destination:$destination"
|
||||
" adults:$adult currency:$currency"
|
||||
" productFamilies:[\"PUB\"] contractCode:\"EIL_ALL\""
|
||||
" adults16Plus:0 children:0 youths:0 children4Only:0 children5To11:0"
|
||||
|
|
@ -64,6 +65,22 @@ _GQL_QUERY = (
|
|||
"}"
|
||||
"}"
|
||||
"}"
|
||||
"inbound{"
|
||||
"journeys("
|
||||
"hideIndirectTrainsWhenDisruptedAndCancelled:false"
|
||||
" hideDepartedTrains:true"
|
||||
" hideExternalCarrierTrains:true"
|
||||
" hideDirectExternalCarrierTrains:true"
|
||||
"){"
|
||||
"timing{departureTime:departs arrivalTime:arrives}"
|
||||
"fares(filteredClassesOfService:$filteredClassesOfService){"
|
||||
"classOfService{code}"
|
||||
"prices{displayPrice}"
|
||||
"seats "
|
||||
"legs{serviceName serviceType{code}}"
|
||||
"}"
|
||||
"}"
|
||||
"}"
|
||||
"}"
|
||||
"}"
|
||||
)
|
||||
|
|
@ -72,11 +89,19 @@ _STANDARD = 'STANDARD'
|
|||
_STANDARD_PLUS = 'PLUS'
|
||||
|
||||
|
||||
def search_url(destination: str, travel_date: str) -> str:
|
||||
def search_url(destination: str, travel_date: str, direction: str = "outbound", return_date: str | None = None) -> str:
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
origin = ST_PANCRAS_STATION_ID
|
||||
destination_id = dest_id
|
||||
outbound = travel_date
|
||||
inbound = return_date
|
||||
if direction == "inbound":
|
||||
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
|
||||
inbound = None
|
||||
return (
|
||||
f'https://www.eurostar.com/search/uk-en'
|
||||
f'?adult=1&origin={ORIGIN_STATION_ID}&destination={dest_id}&outbound={travel_date}'
|
||||
f'?adult=1&origin={origin}&destination={destination_id}&outbound={outbound}'
|
||||
+ (f'&inbound={inbound}' if inbound else '')
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -85,7 +110,7 @@ def _generate_cid() -> str:
|
|||
return 'SRCH-' + ''.join(random.choices(chars, k=22))
|
||||
|
||||
|
||||
def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
||||
def _parse_journeys(journeys: list[dict], destination: str, direction: str) -> list[dict]:
|
||||
"""
|
||||
Parse a NewBookingSearch GraphQL response into a list of service dicts.
|
||||
|
||||
|
|
@ -97,7 +122,6 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
|||
Multi-leg train numbers are joined with ' + ' (e.g. 'ES 9116 + ER 9329').
|
||||
"""
|
||||
best: dict[str, dict] = {}
|
||||
journeys = data['data']['journeySearch']['outbound']['journeys']
|
||||
for journey in journeys:
|
||||
dep = journey['timing']['departureTime']
|
||||
arr = journey['timing']['arrivalTime']
|
||||
|
|
@ -118,8 +142,21 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
|||
std_price, std_seats = price, seats
|
||||
elif cos == _STANDARD_PLUS:
|
||||
plus_price, plus_seats = price, seats
|
||||
if dep not in best or arr < best[dep]['arrive_destination']:
|
||||
best[dep] = {
|
||||
if direction == 'inbound':
|
||||
service = {
|
||||
'depart_destination': dep,
|
||||
'arrive_st_pancras': arr,
|
||||
'destination': destination,
|
||||
'train_number': train_number,
|
||||
'price': std_price,
|
||||
'seats': std_seats,
|
||||
'plus_price': plus_price,
|
||||
'plus_seats': plus_seats,
|
||||
}
|
||||
key = dep
|
||||
arrive_key = 'arrive_st_pancras'
|
||||
else:
|
||||
service = {
|
||||
'depart_st_pancras': dep,
|
||||
'arrive_destination': arr,
|
||||
'destination': destination,
|
||||
|
|
@ -129,18 +166,43 @@ def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
|||
'plus_price': plus_price,
|
||||
'plus_seats': plus_seats,
|
||||
}
|
||||
return sorted(best.values(), key=lambda s: s['depart_st_pancras'])
|
||||
key = dep
|
||||
arrive_key = 'arrive_destination'
|
||||
if key not in best or arr < best[key][arrive_key]:
|
||||
best[key] = service
|
||||
sort_key = 'depart_destination' if direction == 'inbound' else 'depart_st_pancras'
|
||||
return sorted(best.values(), key=lambda s: s[sort_key])
|
||||
|
||||
|
||||
def fetch(destination: str, travel_date: str) -> list[dict]:
|
||||
"""
|
||||
Return all Eurostar services for destination on travel_date.
|
||||
def _parse_graphql(data: dict, destination: str) -> list[dict]:
|
||||
journeys = data['data']['journeySearch']['outbound']['journeys']
|
||||
return _parse_journeys(journeys, destination, 'outbound')
|
||||
|
||||
Each dict contains timetable info (depart_st_pancras, arrive_destination,
|
||||
train_number) plus pricing (price, seats) from a single GraphQL call.
|
||||
"""
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
headers = {
|
||||
|
||||
def _parse_graphql_leg(data: dict, destination: str, leg: str, direction: str) -> list[dict]:
|
||||
journeys = data['data']['journeySearch'][leg]['journeys']
|
||||
return _parse_journeys(journeys, destination, direction)
|
||||
|
||||
|
||||
def _payload(origin: str, destination_id: str, outbound: str, inbound: str | None = None) -> dict:
|
||||
variables = {
|
||||
'origin': origin,
|
||||
'destination': destination_id,
|
||||
'outbound': outbound,
|
||||
'inbound': inbound,
|
||||
'currency': 'GBP',
|
||||
'adult': 1,
|
||||
'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS],
|
||||
}
|
||||
return {
|
||||
'operationName': 'NewBookingSearch',
|
||||
'variables': variables,
|
||||
'query': _GQL_QUERY,
|
||||
}
|
||||
|
||||
|
||||
def _headers() -> dict:
|
||||
return {
|
||||
'User-Agent': DEFAULT_UA,
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': '*/*',
|
||||
|
|
@ -151,18 +213,42 @@ def fetch(destination: str, travel_date: str) -> list[dict]:
|
|||
'x-source-url': 'search-app/',
|
||||
'cid': _generate_cid(),
|
||||
}
|
||||
payload = {
|
||||
'operationName': 'NewBookingSearch',
|
||||
'variables': {
|
||||
'origin': ORIGIN_STATION_ID,
|
||||
'destination': dest_id,
|
||||
'outbound': travel_date,
|
||||
'currency': 'GBP',
|
||||
'adult': 1,
|
||||
'filteredClassesOfService': [_STANDARD, _STANDARD_PLUS],
|
||||
},
|
||||
'query': _GQL_QUERY,
|
||||
}
|
||||
resp = requests.post(_GATEWAY_URL, json=payload, headers=headers, timeout=20)
|
||||
|
||||
|
||||
def fetch(destination: str, travel_date: str, direction: str = 'outbound') -> list[dict]:
|
||||
"""
|
||||
Return all Eurostar services for destination on travel_date.
|
||||
|
||||
Each dict contains timetable info (depart_st_pancras, arrive_destination,
|
||||
train_number) plus pricing (price, seats) from a single GraphQL call.
|
||||
"""
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
if direction == 'inbound':
|
||||
origin, destination_id = dest_id, ST_PANCRAS_STATION_ID
|
||||
else:
|
||||
origin, destination_id = ST_PANCRAS_STATION_ID, dest_id
|
||||
resp = requests.post(
|
||||
_GATEWAY_URL,
|
||||
json=_payload(origin, destination_id, travel_date),
|
||||
headers=_headers(),
|
||||
timeout=20,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return _parse_graphql(resp.json(), destination)
|
||||
leg_direction = 'inbound' if direction == 'inbound' else 'outbound'
|
||||
return _parse_graphql_leg(resp.json(), destination, 'outbound', leg_direction)
|
||||
|
||||
|
||||
def fetch_return(destination: str, outbound_date: str, return_date: str) -> dict[str, list[dict]]:
|
||||
dest_id = DESTINATION_STATION_IDS[destination]
|
||||
resp = requests.post(
|
||||
_GATEWAY_URL,
|
||||
json=_payload(ST_PANCRAS_STATION_ID, dest_id, outbound_date, return_date),
|
||||
headers=_headers(),
|
||||
timeout=20,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return {
|
||||
'outbound': _parse_graphql_leg(data, destination, 'outbound', 'outbound'),
|
||||
'inbound': _parse_graphql_leg(data, destination, 'inbound', 'inbound'),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ def _headers() -> dict:
|
|||
|
||||
|
||||
def _request_body(
|
||||
station_crs: str,
|
||||
from_code: str,
|
||||
to_code: str,
|
||||
travel_date: str,
|
||||
conversation_token: str | None,
|
||||
later: bool,
|
||||
|
|
@ -44,8 +45,8 @@ def _request_body(
|
|||
"IsPreviousReturn": False,
|
||||
"campaignCode": "",
|
||||
"validationCode": "",
|
||||
"locfrom": f"GB{station_crs}",
|
||||
"locto": _PAD_CODE,
|
||||
"locfrom": from_code,
|
||||
"locto": to_code,
|
||||
"datetimedepart": f"{travel_date}T00:00:00",
|
||||
"outwarddepartafter": True,
|
||||
"datetimereturn": None,
|
||||
|
|
@ -67,7 +68,22 @@ def _request_body(
|
|||
}
|
||||
|
||||
|
||||
def _run_pages(station_crs: str, travel_date: str, first_class: bool = False):
|
||||
def _station_code(station_crs: str) -> str:
|
||||
return f"GB{station_crs}"
|
||||
|
||||
|
||||
def _od_codes(station_crs: str, direction: str) -> tuple[str, str]:
|
||||
if direction == "from_paddington":
|
||||
return _PAD_CODE, _station_code(station_crs)
|
||||
return _station_code(station_crs), _PAD_CODE
|
||||
|
||||
|
||||
def _run_pages(
|
||||
station_crs: str,
|
||||
travel_date: str,
|
||||
first_class: bool = False,
|
||||
direction: str = "to_paddington",
|
||||
):
|
||||
"""
|
||||
Iterate all pages of GWR journey search results.
|
||||
|
||||
|
|
@ -78,8 +94,9 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False):
|
|||
with httpx.Client(headers=_headers(), timeout=30) as client:
|
||||
conversation_token = None
|
||||
later = False
|
||||
from_code, to_code = _od_codes(station_crs, direction)
|
||||
for _ in range(_MAX_PAGES):
|
||||
body = _request_body(station_crs, travel_date, conversation_token, later)
|
||||
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
|
||||
if first_class:
|
||||
body["firstclass"] = True
|
||||
body["standardclass"] = False
|
||||
|
|
@ -99,7 +116,12 @@ def _run_pages(station_crs: str, travel_date: str, first_class: bool = False):
|
|||
later = True
|
||||
|
||||
|
||||
def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = False):
|
||||
def _run_pages_batched(
|
||||
station_crs: str,
|
||||
travel_date: str,
|
||||
first_class: bool = False,
|
||||
direction: str = "to_paddington",
|
||||
):
|
||||
"""
|
||||
Like _run_pages but yields one list of (dep_time, fares_list) per API page call,
|
||||
allowing callers to stream results a page at a time.
|
||||
|
|
@ -108,8 +130,9 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F
|
|||
with httpx.Client(headers=_headers(), timeout=30) as client:
|
||||
conversation_token = None
|
||||
later = False
|
||||
from_code, to_code = _od_codes(station_crs, direction)
|
||||
for _ in range(_MAX_PAGES):
|
||||
body = _request_body(station_crs, travel_date, conversation_token, later)
|
||||
body = _request_body(from_code, to_code, travel_date, conversation_token, later)
|
||||
if first_class:
|
||||
body["firstclass"] = True
|
||||
body["standardclass"] = False
|
||||
|
|
@ -132,16 +155,18 @@ def _run_pages_batched(station_crs: str, travel_date: str, first_class: bool = F
|
|||
later = True
|
||||
|
||||
|
||||
def fetch(station_crs: str, travel_date: str) -> dict[str, dict]:
|
||||
def fetch(
|
||||
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
||||
) -> dict[str, dict]:
|
||||
"""
|
||||
Fetch GWR walk-on single fares from station_crs to London Paddington on travel_date.
|
||||
Fetch GWR walk-on single fares for the selected Paddington direction.
|
||||
|
||||
Returns {departure_time: {'ticket': name, 'price': float, 'code': code}}
|
||||
where price is in £ and only the cheapest available standard-class walk-on
|
||||
ticket per departure (with restrictions already applied by GWR) is kept.
|
||||
"""
|
||||
result: dict[str, dict] = {}
|
||||
for dep_time, fares in _run_pages(station_crs, travel_date):
|
||||
for dep_time, fares in _run_pages(station_crs, travel_date, direction=direction):
|
||||
cheapest = None
|
||||
for fare in fares:
|
||||
code = fare.get("ticketTypeCode")
|
||||
|
|
@ -166,7 +191,9 @@ def fetch(station_crs: str, travel_date: str) -> dict[str, dict]:
|
|||
return result
|
||||
|
||||
|
||||
def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]:
|
||||
def fetch_advance(
|
||||
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
||||
) -> dict[str, dict]:
|
||||
"""
|
||||
Fetch advance fares: cheapest standard advance and first-class advance per departure.
|
||||
|
||||
|
|
@ -175,7 +202,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]:
|
|||
where each sub-dict has keys 'ticket', 'price', 'code'.
|
||||
"""
|
||||
std_advance: dict[str, dict] = {}
|
||||
for dep_time, fares in _run_pages(station_crs, travel_date, first_class=False):
|
||||
for dep_time, fares in _run_pages(
|
||||
station_crs, travel_date, first_class=False, direction=direction
|
||||
):
|
||||
cheapest = None
|
||||
for fare in fares:
|
||||
code = fare.get("ticketTypeCode")
|
||||
|
|
@ -199,7 +228,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]:
|
|||
}
|
||||
|
||||
first_advance: dict[str, dict] = {}
|
||||
for dep_time, fares in _run_pages(station_crs, travel_date, first_class=True):
|
||||
for dep_time, fares in _run_pages(
|
||||
station_crs, travel_date, first_class=True, direction=direction
|
||||
):
|
||||
cheapest = None
|
||||
for fare in fares:
|
||||
price_pence = fare.get("fare", 0)
|
||||
|
|
@ -227,7 +258,9 @@ def fetch_advance(station_crs: str, travel_date: str) -> dict[str, dict]:
|
|||
}
|
||||
|
||||
|
||||
def fetch_advance_streaming(station_crs: str, travel_date: str):
|
||||
def fetch_advance_streaming(
|
||||
station_crs: str, travel_date: str, direction: str = "to_paddington"
|
||||
):
|
||||
"""
|
||||
Generator yielding partial advance fare dicts one GWR API page at a time.
|
||||
|
||||
|
|
@ -236,7 +269,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str):
|
|||
yielded immediately so callers can stream prices to clients as they arrive.
|
||||
"""
|
||||
# Pass 1: standard class advance fares
|
||||
for batch in _run_pages_batched(station_crs, travel_date, first_class=False):
|
||||
for batch in _run_pages_batched(
|
||||
station_crs, travel_date, first_class=False, direction=direction
|
||||
):
|
||||
page: dict[str, dict] = {}
|
||||
for dep_time, fares in batch:
|
||||
cheapest = None
|
||||
|
|
@ -267,7 +302,9 @@ def fetch_advance_streaming(station_crs: str, travel_date: str):
|
|||
yield page
|
||||
|
||||
# Pass 2: first class advance fares
|
||||
for batch in _run_pages_batched(station_crs, travel_date, first_class=True):
|
||||
for batch in _run_pages_batched(
|
||||
station_crs, travel_date, first_class=True, direction=direction
|
||||
):
|
||||
page = {}
|
||||
for dep_time, fares in batch:
|
||||
cheapest = None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""
|
||||
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
|
||||
Scrape direct trains between a selected station and London Paddington using
|
||||
Realtime Trains.
|
||||
|
||||
Two fetches:
|
||||
BRI/to/PAD → departure times from Bristol (div.time.plan.d)
|
||||
|
|
@ -20,6 +21,16 @@ _PAD_FROM_TMPL = (
|
|||
"gb-nr:PAD/from/gb-nr:{crs}/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
_PAD_TO_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:PAD/to/gb-nr:{crs}/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
_FROM_PAD_TMPL = (
|
||||
"https://www.realtimetrains.co.uk/search/detailed/"
|
||||
"gb-nr:{crs}/from/gb-nr:PAD/{date}/0000-2359"
|
||||
"?stp=WVS&show=pax-calls&order=wtt"
|
||||
)
|
||||
|
||||
DEFAULT_UA = (
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
|
||||
|
|
@ -69,7 +80,7 @@ def _parse_services(html: str, time_selector: str) -> dict[str, str]:
|
|||
|
||||
|
||||
def _parse_arrivals(html: str) -> dict[str, dict]:
|
||||
"""Return {train_id: {'time': ..., 'platform': ...}} from a PAD arrivals page."""
|
||||
"""Return {train_id: {'time': ..., 'platform': ...}} from an arrivals page."""
|
||||
root = lxml.html.fromstring(html)
|
||||
sl = root.cssselect('div.servicelist')
|
||||
if not sl:
|
||||
|
|
@ -93,7 +104,7 @@ def _parse_arrivals(html: str) -> dict[str, dict]:
|
|||
|
||||
|
||||
def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') -> list[dict]:
|
||||
"""Fetch trains from station_crs to PAD; returns [{'depart_bristol', 'arrive_paddington', 'headcode', 'arrive_platform'}]."""
|
||||
"""Fetch trains from station_crs to PAD."""
|
||||
headers = _browser_headers(user_agent)
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||
r_bri = client.get(_TO_PAD_TMPL.format(crs=station_crs, date=date))
|
||||
|
|
@ -113,3 +124,44 @@ def fetch(date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI') ->
|
|||
if tid in arrivals
|
||||
]
|
||||
return sorted(trains, key=lambda t: t['depart_bristol'])
|
||||
|
||||
|
||||
def fetch_to_paddington(
|
||||
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
|
||||
) -> list[dict]:
|
||||
"""Fetch trains from station_crs to PAD using generic field names."""
|
||||
return [
|
||||
{
|
||||
**train,
|
||||
"depart_origin": train["depart_bristol"],
|
||||
"arrive_paddington": train["arrive_paddington"],
|
||||
"arrive_platform": train.get("arrive_platform", ""),
|
||||
"headcode": train.get("headcode", ""),
|
||||
}
|
||||
for train in fetch(date, user_agent, station_crs)
|
||||
]
|
||||
|
||||
|
||||
def fetch_from_paddington(
|
||||
date: str, user_agent: str = DEFAULT_UA, station_crs: str = 'BRI'
|
||||
) -> list[dict]:
|
||||
"""Fetch trains from PAD to station_crs."""
|
||||
headers = _browser_headers(user_agent)
|
||||
with httpx.Client(headers=headers, follow_redirects=True, timeout=30) as client:
|
||||
r_pad = client.get(_PAD_TO_TMPL.format(crs=station_crs, date=date))
|
||||
r_station = client.get(_FROM_PAD_TMPL.format(crs=station_crs, date=date))
|
||||
|
||||
departures = _parse_services(r_pad.text, 'div.time.plan.d')
|
||||
arrivals = _parse_arrivals(r_station.text)
|
||||
|
||||
trains = [
|
||||
{
|
||||
"depart_paddington": dep,
|
||||
"arrive_destination": arrivals[tid]["time"],
|
||||
"arrive_platform": arrivals[tid]["platform"],
|
||||
"headcode": tid,
|
||||
}
|
||||
for tid, dep in departures.items()
|
||||
if tid in arrivals
|
||||
]
|
||||
return sorted(trains, key=lambda t: t["depart_paddington"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue