426 lines
14 KiB
Python
Executable file
426 lines
14 KiB
Python
Executable file
#!/usr/bin/python3
|
|
"""Load YAML data to ensure validity."""
|
|
|
|
import os
|
|
import sys
|
|
import typing
|
|
from datetime import date, datetime, timedelta
|
|
from typing import Tuple, TypeVar, cast
|
|
|
|
import yaml
|
|
from geopy.distance import distance # type: ignore[import-untyped]
|
|
from rich.pretty import pprint
|
|
|
|
import agenda
|
|
import agenda.conference
|
|
import agenda.data
|
|
import agenda.travel
|
|
import agenda.trip
|
|
import agenda.types
|
|
|
|
config = __import__("config.default", fromlist=[""])
|
|
data_dir = config.PERSONAL_DATA
|
|
|
|
currencies = set(config.CURRENCIES + ["GBP"])
|
|
|
|
LatLon = Tuple[float, float]
|
|
|
|
|
|
def check_currency(item: agenda.types.StrDict) -> None:
|
|
"""Throw error if currency is not in config."""
|
|
currency = item.get("currency")
|
|
if not currency or currency in currencies:
|
|
return None
|
|
pprint(item)
|
|
print(f"currency {currency!r} not in {currencies!r}")
|
|
sys.exit(-1)
|
|
|
|
|
|
def get_coords(item: agenda.types.StrDict) -> LatLon | None:
|
|
"""Return latitude/longitude tuple when present."""
|
|
if "latitude" in item and "longitude" in item:
|
|
latitude = item["latitude"]
|
|
longitude = item["longitude"]
|
|
assert isinstance(latitude, (int, float))
|
|
assert isinstance(longitude, (int, float))
|
|
return (float(latitude), float(longitude))
|
|
return None
|
|
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
def remove_nones(items: list[T | None]) -> list[T]:
|
|
"""Return a new list with None values removed."""
|
|
return [item for item in items if item is not None]
|
|
|
|
|
|
def distance_km(a: LatLon, b: LatLon) -> float:
|
|
"""Return the great-circle distance between two (lat, lon) points in km."""
|
|
return cast(float, distance(a, b).km)
|
|
|
|
|
|
def parse_datetime_value(value: typing.Any) -> datetime | None:
|
|
"""Return naive datetime for supported input types."""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, str):
|
|
try:
|
|
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
except ValueError as exc:
|
|
raise ValueError(f"Invalid ISO datetime string: {value}") from exc
|
|
return parsed.replace(tzinfo=None)
|
|
if isinstance(value, datetime):
|
|
return value.replace(tzinfo=None)
|
|
if isinstance(value, date):
|
|
return datetime.combine(value, datetime.min.time())
|
|
raise TypeError(f"Unsupported datetime value type: {type(value)}")
|
|
|
|
|
|
def ranges_overlap(
|
|
start_a: datetime, end_a: datetime, start_b: datetime, end_b: datetime
|
|
) -> bool:
|
|
"""Return True when two datetime ranges overlap."""
|
|
return start_a < end_b and start_b < end_a
|
|
|
|
|
|
def check_trips() -> None:
|
|
"""Check trips and ensure they are in chronological order."""
|
|
filepath = os.path.join(data_dir, "trips.yaml")
|
|
trips_data = yaml.safe_load(open(filepath, "r"))
|
|
|
|
prev_trip = None
|
|
prev_trip_data = None
|
|
for trip_data in trips_data:
|
|
current_trip = normalize_datetime(trip_data["trip"])
|
|
if prev_trip and current_trip < prev_trip:
|
|
assert prev_trip_data is not None
|
|
print("Out of order trip found:")
|
|
print(
|
|
f" Previous: {prev_trip_data.get('trip')} - "
|
|
+ f"{prev_trip_data.get('name', 'No name')}"
|
|
)
|
|
print(
|
|
f" Current: {trip_data.get('trip')} - "
|
|
+ f"{trip_data.get('name', 'No name')}"
|
|
)
|
|
assert False, "Trips are not in chronological order by trip date."
|
|
prev_trip = current_trip
|
|
prev_trip_data = trip_data
|
|
|
|
trip_list = agenda.trip.build_trip_list(data_dir)
|
|
print(len(trip_list), "trips")
|
|
|
|
for trip in trip_list:
|
|
if not trip.accommodation or not trip.conferences:
|
|
continue
|
|
accommodation_entries: list[
|
|
tuple[agenda.types.StrDict, LatLon, datetime, datetime]
|
|
] = []
|
|
for accommodation in trip.accommodation:
|
|
accommodation_coords = get_coords(accommodation)
|
|
if accommodation_coords is None:
|
|
continue
|
|
start_dt = parse_datetime_value(accommodation.get("from"))
|
|
end_dt = parse_datetime_value(accommodation.get("to"))
|
|
if start_dt is None or end_dt is None:
|
|
continue
|
|
accommodation_entries.append(
|
|
(accommodation, accommodation_coords, start_dt, end_dt)
|
|
)
|
|
|
|
if not accommodation_entries:
|
|
continue
|
|
|
|
for conference in trip.conferences:
|
|
if conference.get("online"):
|
|
continue
|
|
conference_coords = get_coords(conference)
|
|
if conference_coords is None:
|
|
continue
|
|
start_dt = parse_datetime_value(conference.get("start"))
|
|
end_value = conference.get("end") or conference.get("start")
|
|
end_dt = parse_datetime_value(end_value)
|
|
if start_dt is None or end_dt is None:
|
|
continue
|
|
|
|
conference_country = (
|
|
str(conference.get("country")).lower()
|
|
if conference.get("country")
|
|
else None
|
|
)
|
|
overlapping_distances = []
|
|
for (
|
|
accommodation_item,
|
|
accommodation_coords,
|
|
accommodation_start,
|
|
accommodation_end,
|
|
) in accommodation_entries:
|
|
accommodation_country = (
|
|
str(accommodation_item.get("country")).lower()
|
|
if accommodation_item.get("country")
|
|
else None
|
|
)
|
|
if (
|
|
conference_country
|
|
and accommodation_country
|
|
and accommodation_country != conference_country
|
|
):
|
|
continue
|
|
if not ranges_overlap(
|
|
accommodation_start, accommodation_end, start_dt, end_dt
|
|
):
|
|
continue
|
|
overlapping_distances.append(
|
|
distance_km(conference_coords, accommodation_coords)
|
|
)
|
|
if not overlapping_distances:
|
|
continue
|
|
assert min(overlapping_distances) < config.ACCOMODATION_MAX_DISTANCE_KM
|
|
|
|
coords, routes = agenda.trip.get_coordinates_and_routes(trip_list, data_dir)
|
|
print(len(coords), "coords")
|
|
print(len(routes), "routes")
|
|
|
|
|
|
def check_flights(airlines: set[str]) -> None:
|
|
"""Check flights and ensure they are in chronological order."""
|
|
bookings = agenda.travel.parse_yaml("flights", data_dir)
|
|
|
|
flight_count = 0
|
|
co2_flight_count = 0
|
|
|
|
prev_first_depart = None
|
|
for booking in bookings:
|
|
if "trip" not in booking:
|
|
pprint(booking)
|
|
assert "trip" in booking
|
|
assert all(flight["airline"] in airlines for flight in booking["flights"])
|
|
flight_count += len(booking["flights"])
|
|
co2_flight_count += len(
|
|
[flight for flight in booking["flights"] if "co2_kg" in flight]
|
|
)
|
|
for flight in booking["flights"]:
|
|
if "co2_kg" not in flight:
|
|
pprint(booking)
|
|
check_currency(booking)
|
|
|
|
if prev_first_depart:
|
|
assert (
|
|
booking["flights"][0]["depart"] > prev_first_depart
|
|
), "Bookings are not in chronological order by first flight's departure."
|
|
prev_first_depart = booking["flights"][0]["depart"]
|
|
|
|
print(
|
|
f"{len(bookings)} flight bookings, {flight_count} flights, "
|
|
f"{co2_flight_count} with CO2 numbers"
|
|
)
|
|
|
|
|
|
def normalize_datetime(dt_value: date | datetime) -> datetime:
|
|
"""Convert date or datetime to datetime for comparison, removing timezone info."""
|
|
if isinstance(dt_value, datetime):
|
|
return dt_value.replace(tzinfo=None)
|
|
if isinstance(dt_value, date):
|
|
return datetime.combine(dt_value, datetime.min.time())
|
|
raise TypeError(f"Unsupported datetime value type: {type(dt_value)}")
|
|
|
|
|
|
def check_trains() -> None:
|
|
"""Check trains and ensure they are in chronological order."""
|
|
trains = agenda.travel.parse_yaml("trains", data_dir)
|
|
|
|
prev_depart = None
|
|
prev_train = None
|
|
for train in trains:
|
|
current_depart = normalize_datetime(train["depart"])
|
|
if prev_depart and current_depart < prev_depart:
|
|
assert prev_train is not None
|
|
print(f"Out of order train found:")
|
|
print(
|
|
f" Previous: {prev_train.get('depart')} {prev_train.get('from', '')} -> {prev_train.get('to', '')}"
|
|
)
|
|
print(
|
|
f" Current: {train.get('depart')} {train.get('from', '')} -> {train.get('to', '')}"
|
|
)
|
|
assert False, "Trains are not in chronological order by departure time."
|
|
prev_depart = current_depart
|
|
prev_train = train
|
|
|
|
print(len(trains), "trains")
|
|
|
|
|
|
def check_conferences() -> None:
|
|
"""Check conferences and ensure they are in chronological order."""
|
|
filepath = os.path.join(data_dir, "conferences.yaml")
|
|
conferences_data = yaml.safe_load(open(filepath, "r"))
|
|
conferences = [agenda.conference.Conference(**conf) for conf in conferences_data]
|
|
|
|
prev_start = None
|
|
prev_conf_data = None
|
|
for i, conf_data in enumerate(conferences_data):
|
|
conf = conferences[i]
|
|
if not conf.currency or conf.currency in currencies:
|
|
pass
|
|
else:
|
|
pprint(conf)
|
|
print(f"currency {conf.currency!r} not in {currencies!r}")
|
|
sys.exit(-1)
|
|
|
|
current_start = normalize_datetime(conf_data["start"])
|
|
if prev_start and current_start < prev_start:
|
|
assert prev_conf_data is not None
|
|
print(f"Out of order conference found:")
|
|
print(
|
|
f" Previous: {prev_conf_data.get('start')} - {prev_conf_data.get('name', 'No name')}"
|
|
)
|
|
print(
|
|
f" Current: {conf_data.get('start')} - {conf_data.get('name', 'No name')}"
|
|
)
|
|
assert False, "Conferences are not in chronological order by start time."
|
|
prev_start = current_start
|
|
prev_conf_data = conf_data
|
|
|
|
print(len(conferences), "conferences")
|
|
|
|
|
|
def check_events() -> None:
|
|
"""Check events."""
|
|
today = date.today()
|
|
last_year = today - timedelta(days=365)
|
|
next_year = today + timedelta(days=2 * 365)
|
|
|
|
events = agenda.events_yaml.read(data_dir, last_year, next_year)
|
|
print(len(events), "events")
|
|
|
|
|
|
def check_coordinates(item: agenda.types.StrDict) -> None:
|
|
"""Check coordinate are valid."""
|
|
if "latitude" not in item and "longitude" not in item:
|
|
return
|
|
assert "latitude" in item and "longitude" in item
|
|
assert all(isinstance(item[key], (int, float)) for key in ("latitude", "longitude"))
|
|
|
|
|
|
def check_accommodation() -> None:
|
|
"""Check accommodation and ensure they are in chronological order."""
|
|
filepath = os.path.join(data_dir, "accommodation.yaml")
|
|
accommodation_list = yaml.safe_load(open(filepath))
|
|
|
|
required_fields = ["type", "name", "country", "location", "trip", "from", "to"]
|
|
|
|
prev_from = None
|
|
prev_stay = None
|
|
for stay in accommodation_list:
|
|
try:
|
|
assert all(field in stay for field in required_fields)
|
|
check_coordinates(stay)
|
|
except AssertionError:
|
|
pprint(stay)
|
|
raise
|
|
|
|
check_currency(stay)
|
|
|
|
current_from = normalize_datetime(stay["from"])
|
|
if prev_from and current_from < prev_from:
|
|
assert prev_stay is not None
|
|
print(f"Out of order accommodation found:")
|
|
print(
|
|
f" Previous: {prev_stay.get('from')} - {prev_stay.get('name', 'No name')} ({prev_stay.get('location', '')})"
|
|
)
|
|
print(
|
|
f" Current: {stay.get('from')} - {stay.get('name', 'No name')} ({stay.get('location', '')})"
|
|
)
|
|
assert (
|
|
False
|
|
), "Accommodation is not in chronological order by check-in time."
|
|
prev_from = current_from
|
|
prev_stay = stay
|
|
|
|
print(len(accommodation_list), "stays")
|
|
|
|
|
|
def check_airports() -> None:
|
|
"""Check airports."""
|
|
airports = typing.cast(
|
|
dict[str, agenda.types.StrDict], agenda.travel.parse_yaml("airports", data_dir)
|
|
)
|
|
print(len(airports), "airports")
|
|
for airport in airports.values():
|
|
assert "country" in airport
|
|
assert agenda.get_country(airport["country"])
|
|
|
|
|
|
def check_stations() -> None:
|
|
"""Check stations."""
|
|
stations = agenda.travel.parse_yaml("stations", data_dir)
|
|
print(len(stations), "stations")
|
|
for station in stations:
|
|
assert "country" in station
|
|
assert agenda.get_country(station["country"])
|
|
|
|
|
|
def check_ferries() -> None:
|
|
"""Check ferries and ensure they are in chronological order."""
|
|
ferries = agenda.travel.parse_yaml("ferries", data_dir)
|
|
|
|
prev_depart = None
|
|
prev_ferry = None
|
|
for ferry in ferries:
|
|
current_depart = normalize_datetime(ferry["depart"])
|
|
if prev_depart and current_depart < prev_depart:
|
|
assert prev_ferry is not None
|
|
print(f"Out of order ferry found:")
|
|
print(
|
|
f" Previous: {prev_ferry.get('depart')} {prev_ferry.get('from', '')} -> {prev_ferry.get('to', '')}"
|
|
)
|
|
print(
|
|
f" Current: {ferry.get('depart')} {ferry.get('from', '')} -> {ferry.get('to', '')}"
|
|
)
|
|
assert False, "Ferries are not in chronological order by departure time."
|
|
prev_depart = current_depart
|
|
prev_ferry = ferry
|
|
check_currency(ferry)
|
|
|
|
print(len(ferries), "ferries")
|
|
|
|
|
|
def check_airlines() -> list[agenda.types.StrDict]:
|
|
"""Check airlines."""
|
|
airlines = agenda.travel.parse_yaml("airlines", data_dir)
|
|
print(len(airlines), "airlines")
|
|
for airline in airlines:
|
|
try:
|
|
keys = set(airline.keys())
|
|
keys.discard("flight_number_prefer_icao")
|
|
assert keys == {"icao", "iata", "name"}
|
|
iata, icao = airline["iata"], airline["icao"]
|
|
assert iata[0].isupper() and iata[1].isupper() or iata[1].isdigit()
|
|
assert icao.isupper()
|
|
assert len(iata) == 2 and len(icao) == 3
|
|
if "flight_number_prefer_icao" in airline:
|
|
assert isinstance(airline["flight_number_prefer_icao"], bool)
|
|
except AssertionError:
|
|
print(yaml.dump([airline]))
|
|
raise
|
|
|
|
return airlines
|
|
|
|
|
|
def check() -> None:
|
|
"""Validate personal data YAML files."""
|
|
airlines = check_airlines()
|
|
check_trips()
|
|
check_flights({airline["iata"] for airline in airlines})
|
|
check_trains()
|
|
check_ferries()
|
|
check_conferences()
|
|
check_events()
|
|
check_accommodation()
|
|
check_airports()
|
|
check_stations()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
check()
|