paddington-eurostar/tests/test_eurostar_scraper.py
Edward Betts 13c4341f3a Add full type annotations and black formatting across all modules
Annotated all functions with mypy --strict-compatible types (-> None, dict[str,
Any], Generator types, etc.), added # type: ignore for untyped third-party libs
(lxml), and reformatted with black. All 18 source files now pass mypy --strict
with zero errors.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-25 21:48:53 +01:00

197 lines
5.9 KiB
Python

from typing import Any
import pytest
from scraper.eurostar import _parse_graphql, _parse_graphql_leg, search_url
def _gql_response(journeys: list[dict[str, Any]]) -> dict[str, Any]:
return {"data": {"journeySearch": {"outbound": {"journeys": journeys}}}}
def _journey(
departs: str,
arrives: str,
price: float | None = None,
seats: int | None = None,
service_name: str = "",
carrier: str = "ES",
plus_price: float | None = None,
plus_seats: int | None = None,
) -> dict[str, Any]:
fares: list[dict[str, Any]] = [
{
"classOfService": {"code": "STANDARD"},
"prices": {"displayPrice": price},
"seats": seats,
"legs": (
[{"serviceName": service_name, "serviceType": {"code": carrier}}]
if service_name
else []
),
}
]
if plus_price is not None or plus_seats is not None:
fares.append(
{
"classOfService": {"code": "PLUS"},
"prices": {"displayPrice": plus_price},
"seats": plus_seats,
"legs": [],
}
)
return {
"timing": {"departureTime": departs, "arrivalTime": arrives},
"fares": fares,
}
# ---------------------------------------------------------------------------
# _parse_graphql
# ---------------------------------------------------------------------------
def test_parse_graphql_single_journey() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="9014")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s["depart_st_pancras"] == "09:31"
assert s["arrive_destination"] == "12:55"
assert s["destination"] == "Paris Gare du Nord"
assert s["train_number"] == "ES 9014"
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] is None
assert s["plus_seats"] is None
def test_parse_graphql_standard_premier_price() -> None:
data = _gql_response(
[
_journey(
"09:31",
"12:55",
price=156,
seats=37,
service_name="9014",
plus_price=220,
plus_seats=12,
)
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert len(services) == 1
s = services[0]
assert s["price"] == 156.0
assert s["seats"] == 37
assert s["plus_price"] == 220.0
assert s["plus_seats"] == 12
def test_parse_graphql_plus_price_none_when_not_returned() -> None:
data = _gql_response([_journey("09:31", "12:55", price=156, seats=37)])
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["plus_price"] is None
assert services[0]["plus_seats"] is None
def test_parse_graphql_half_pound_price() -> None:
data = _gql_response(
[_journey("09:01", "14:20", price=192.5, seats=25, service_name="9116")]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] == 192.5
def test_parse_graphql_null_price() -> None:
data = _gql_response([_journey("06:16", "11:09", price=None, seats=0)])
services = _parse_graphql(data, "Amsterdam Centraal")
assert services[0]["price"] is None
assert services[0]["seats"] == 0
def test_parse_graphql_sorted_by_departure() -> None:
data = _gql_response(
[
_journey("10:31", "13:55"),
_journey("07:31", "10:59"),
]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["depart_st_pancras"] == "07:31"
assert services[1]["depart_st_pancras"] == "10:31"
def test_parse_graphql_deduplicates_same_departure_time() -> None:
data = _gql_response(
[
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
_journey("06:16", "11:09", price=None, seats=0),
]
)
services = _parse_graphql(data, "Amsterdam Centraal")
assert len(services) == 1
def test_parse_graphql_no_legs_gives_empty_train_number() -> None:
data = _gql_response(
[_journey("09:31", "12:55", price=156, seats=37, service_name="")]
)
services = _parse_graphql(data, "Paris Gare du Nord")
assert services[0]["train_number"] == ""
def test_parse_graphql_empty_journeys() -> None:
data = _gql_response([])
assert _parse_graphql(data, "Paris Gare du Nord") == []
def test_parse_graphql_inbound_leg() -> None:
data: dict[str, Any] = {
"data": {
"journeySearch": {
"inbound": {
"journeys": [
_journey(
"17:12", "18:30", price=49, seats=43, service_name="9035"
)
]
}
}
}
}
services = _parse_graphql_leg(data, "Paris Gare du Nord", "inbound", "inbound")
assert services == [
{
"depart_destination": "17:12",
"arrive_st_pancras": "18:30",
"destination": "Paris Gare du Nord",
"train_number": "ES 9035",
"price": 49.0,
"seats": 43,
"plus_price": None,
"plus_seats": None,
}
]
# ---------------------------------------------------------------------------
# search_url
# ---------------------------------------------------------------------------
def test_search_url() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10")
assert url == (
"https://www.eurostar.com/search/uk-en"
"?adult=1&origin=7015400&destination=8727100&outbound=2026-04-10"
)
def test_search_url_return() -> None:
url = search_url("Paris Gare du Nord", "2026-04-10", return_date="2026-04-17")
assert url.endswith("&outbound=2026-04-10&inbound=2026-04-17")