#!/usr/bin/python3 """Load YAML data to ensure validity.""" import os import sys import typing from datetime import date, datetime, timedelta from typing import Tuple, TypeVar, cast import yaml from geopy.distance import distance # type: ignore[import-untyped] from rich.pretty import pprint import agenda import agenda.conference import agenda.data import agenda.travel import agenda.trip import agenda.types config = __import__("config.default", fromlist=[""]) data_dir = config.PERSONAL_DATA currencies = set(config.CURRENCIES + ["GBP"]) LatLon = Tuple[float, float] def check_currency(item: agenda.types.StrDict) -> None: """Throw error if currency is not in config.""" currency = item.get("currency") if not currency or currency in currencies: return None pprint(item) print(f"currency {currency!r} not in {currencies!r}") sys.exit(-1) def get_coords(item: agenda.types.StrDict) -> LatLon | None: """Return latitude/longitude tuple when present.""" if "latitude" in item and "longitude" in item: latitude = item["latitude"] longitude = item["longitude"] assert isinstance(latitude, (int, float)) assert isinstance(longitude, (int, float)) return (float(latitude), float(longitude)) return None T = TypeVar("T") def remove_nones(items: list[T | None]) -> list[T]: """Return a new list with None values removed.""" return [item for item in items if item is not None] def distance_km(a: LatLon, b: LatLon) -> float: """Return the great-circle distance between two (lat, lon) points in km.""" return cast(float, distance(a, b).km) def parse_datetime_value(value: typing.Any) -> datetime | None: """Return naive datetime for supported input types.""" if value is None: return None if isinstance(value, str): try: parsed = datetime.fromisoformat(value.replace("Z", "+00:00")) except ValueError as exc: raise ValueError(f"Invalid ISO datetime string: {value}") from exc return parsed.replace(tzinfo=None) if isinstance(value, datetime): return value.replace(tzinfo=None) if isinstance(value, date): return datetime.combine(value, datetime.min.time()) raise TypeError(f"Unsupported datetime value type: {type(value)}") def ranges_overlap( start_a: datetime, end_a: datetime, start_b: datetime, end_b: datetime ) -> bool: """Return True when two datetime ranges overlap.""" return start_a < end_b and start_b < end_a def check_trips() -> None: """Check trips and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "trips.yaml") trips_data = yaml.safe_load(open(filepath, "r")) prev_trip = None prev_trip_data = None for trip_data in trips_data: current_trip = normalize_datetime(trip_data["trip"]) if prev_trip and current_trip < prev_trip: assert prev_trip_data is not None print("Out of order trip found:") print( f" Previous: {prev_trip_data.get('trip')} - " + f"{prev_trip_data.get('name', 'No name')}" ) print( f" Current: {trip_data.get('trip')} - " + f"{trip_data.get('name', 'No name')}" ) assert False, "Trips are not in chronological order by trip date." prev_trip = current_trip prev_trip_data = trip_data trip_list = agenda.trip.build_trip_list(data_dir) print(len(trip_list), "trips") for trip in trip_list: if not trip.accommodation or not trip.conferences: continue accommodation_entries: list[ tuple[agenda.types.StrDict, LatLon, datetime, datetime] ] = [] for accommodation in trip.accommodation: accommodation_coords = get_coords(accommodation) if accommodation_coords is None: continue start_dt = parse_datetime_value(accommodation.get("from")) end_dt = parse_datetime_value(accommodation.get("to")) if start_dt is None or end_dt is None: continue accommodation_entries.append( (accommodation, accommodation_coords, start_dt, end_dt) ) if not accommodation_entries: continue for conference in trip.conferences: if conference.get("online"): continue conference_coords = get_coords(conference) if conference_coords is None: continue start_dt = parse_datetime_value(conference.get("start")) end_value = conference.get("end") or conference.get("start") end_dt = parse_datetime_value(end_value) if start_dt is None or end_dt is None: continue conference_country = ( str(conference.get("country")).lower() if conference.get("country") else None ) overlapping_distances = [] for ( accommodation_item, accommodation_coords, accommodation_start, accommodation_end, ) in accommodation_entries: accommodation_country = ( str(accommodation_item.get("country")).lower() if accommodation_item.get("country") else None ) if ( conference_country and accommodation_country and accommodation_country != conference_country ): continue if not ranges_overlap( accommodation_start, accommodation_end, start_dt, end_dt ): continue overlapping_distances.append( distance_km(conference_coords, accommodation_coords) ) if not overlapping_distances: continue assert min(overlapping_distances) < config.ACCOMODATION_MAX_DISTANCE_KM coords, routes = agenda.trip.get_coordinates_and_routes(trip_list, data_dir) print(len(coords), "coords") print(len(routes), "routes") def check_flights(airlines: set[str]) -> None: """Check flights and ensure they are in chronological order.""" bookings = agenda.travel.parse_yaml("flights", data_dir) flight_count = 0 co2_flight_count = 0 prev_first_depart = None for booking in bookings: if "trip" not in booking: pprint(booking) assert "trip" in booking assert all(flight["airline"] in airlines for flight in booking["flights"]) flight_count += len(booking["flights"]) co2_flight_count += len( [flight for flight in booking["flights"] if "co2_kg" in flight] ) for flight in booking["flights"]: if "co2_kg" not in flight: pprint(booking) check_currency(booking) if prev_first_depart: assert ( booking["flights"][0]["depart"] > prev_first_depart ), "Bookings are not in chronological order by first flight's departure." prev_first_depart = booking["flights"][0]["depart"] print( f"{len(bookings)} flight bookings, {flight_count} flights, " f"{co2_flight_count} with CO2 numbers" ) def normalize_datetime(dt_value: date | datetime) -> datetime: """Convert date or datetime to datetime for comparison, removing timezone info.""" if isinstance(dt_value, datetime): return dt_value.replace(tzinfo=None) if isinstance(dt_value, date): return datetime.combine(dt_value, datetime.min.time()) raise TypeError(f"Unsupported datetime value type: {type(dt_value)}") def check_trains() -> None: """Check trains and ensure they are in chronological order.""" trains = agenda.travel.parse_yaml("trains", data_dir) prev_depart = None prev_train = None for train in trains: current_depart = normalize_datetime(train["depart"]) if prev_depart and current_depart < prev_depart: assert prev_train is not None print(f"Out of order train found:") print( f" Previous: {prev_train.get('depart')} {prev_train.get('from', '')} -> {prev_train.get('to', '')}" ) print( f" Current: {train.get('depart')} {train.get('from', '')} -> {train.get('to', '')}" ) assert False, "Trains are not in chronological order by departure time." prev_depart = current_depart prev_train = train print(len(trains), "trains") def check_conferences() -> None: """Check conferences and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "conferences.yaml") conferences_data = yaml.safe_load(open(filepath, "r")) conferences = [agenda.conference.Conference(**conf) for conf in conferences_data] prev_start = None prev_conf_data = None for i, conf_data in enumerate(conferences_data): conf = conferences[i] if not conf.currency or conf.currency in currencies: pass else: pprint(conf) print(f"currency {conf.currency!r} not in {currencies!r}") sys.exit(-1) current_start = normalize_datetime(conf_data["start"]) if prev_start and current_start < prev_start: assert prev_conf_data is not None print(f"Out of order conference found:") print( f" Previous: {prev_conf_data.get('start')} - {prev_conf_data.get('name', 'No name')}" ) print( f" Current: {conf_data.get('start')} - {conf_data.get('name', 'No name')}" ) assert False, "Conferences are not in chronological order by start time." prev_start = current_start prev_conf_data = conf_data print(len(conferences), "conferences") def check_events() -> None: """Check events.""" today = date.today() last_year = today - timedelta(days=365) next_year = today + timedelta(days=2 * 365) events = agenda.events_yaml.read(data_dir, last_year, next_year) print(len(events), "events") def check_coordinates(item: agenda.types.StrDict) -> None: """Check coordinate are valid.""" if "latitude" not in item and "longitude" not in item: return assert "latitude" in item and "longitude" in item assert all(isinstance(item[key], (int, float)) for key in ("latitude", "longitude")) def check_accommodation() -> None: """Check accommodation and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "accommodation.yaml") accommodation_list = yaml.safe_load(open(filepath)) required_fields = ["type", "name", "country", "location", "trip", "from", "to"] prev_from = None prev_stay = None for stay in accommodation_list: try: assert all(field in stay for field in required_fields) check_coordinates(stay) except AssertionError: pprint(stay) raise check_currency(stay) current_from = normalize_datetime(stay["from"]) if prev_from and current_from < prev_from: assert prev_stay is not None print(f"Out of order accommodation found:") print( f" Previous: {prev_stay.get('from')} - {prev_stay.get('name', 'No name')} ({prev_stay.get('location', '')})" ) print( f" Current: {stay.get('from')} - {stay.get('name', 'No name')} ({stay.get('location', '')})" ) assert ( False ), "Accommodation is not in chronological order by check-in time." prev_from = current_from prev_stay = stay print(len(accommodation_list), "stays") def check_airports() -> None: """Check airports.""" airports = typing.cast( dict[str, agenda.types.StrDict], agenda.travel.parse_yaml("airports", data_dir) ) print(len(airports), "airports") for airport in airports.values(): assert "country" in airport assert agenda.get_country(airport["country"]) def check_stations() -> None: """Check stations.""" stations = agenda.travel.parse_yaml("stations", data_dir) print(len(stations), "stations") for station in stations: assert "country" in station assert agenda.get_country(station["country"]) def check_ferries() -> None: """Check ferries and ensure they are in chronological order.""" ferries = agenda.travel.parse_yaml("ferries", data_dir) prev_depart = None prev_ferry = None for ferry in ferries: current_depart = normalize_datetime(ferry["depart"]) if prev_depart and current_depart < prev_depart: assert prev_ferry is not None print(f"Out of order ferry found:") print( f" Previous: {prev_ferry.get('depart')} {prev_ferry.get('from', '')} -> {prev_ferry.get('to', '')}" ) print( f" Current: {ferry.get('depart')} {ferry.get('from', '')} -> {ferry.get('to', '')}" ) assert False, "Ferries are not in chronological order by departure time." prev_depart = current_depart prev_ferry = ferry check_currency(ferry) print(len(ferries), "ferries") def check_airlines() -> list[agenda.types.StrDict]: """Check airlines.""" airlines = agenda.travel.parse_yaml("airlines", data_dir) print(len(airlines), "airlines") for airline in airlines: try: keys = set(airline.keys()) keys.discard("flight_number_prefer_icao") assert keys == {"icao", "iata", "name"} iata, icao = airline["iata"], airline["icao"] assert iata[0].isupper() and iata[1].isupper() or iata[1].isdigit() assert icao.isupper() assert len(iata) == 2 and len(icao) == 3 if "flight_number_prefer_icao" in airline: assert isinstance(airline["flight_number_prefer_icao"], bool) except AssertionError: print(yaml.dump([airline])) raise return airlines def check() -> None: """Validate personal data YAML files.""" airlines = check_airlines() check_trips() check_flights({airline["iata"] for airline in airlines}) check_trains() check_ferries() check_conferences() check_events() check_accommodation() check_airports() check_stations() if __name__ == "__main__": check()