#!/usr/bin/python3 """Load YAML data to ensure validity.""" import os import sys import typing from datetime import date, timedelta, datetime import yaml from rich.pretty import pprint import agenda import agenda.conference import agenda.data import agenda.travel import agenda.trip import agenda.types config = __import__("config.default", fromlist=[""]) data_dir = config.PERSONAL_DATA currencies = set(config.CURRENCIES + ["GBP"]) def check_currency(item: agenda.types.StrDict) -> None: """Throw error if currency is not in config.""" currency = item.get("currency") if not currency or currency in currencies: return None pprint(item) print(f"currency {currency!r} not in {currencies!r}") sys.exit(-1) def check_trips() -> None: """Check trips and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "trips.yaml") trips_data = yaml.safe_load(open(filepath, "r")) prev_trip = None prev_trip_data = None for trip_data in trips_data: current_trip = normalize_datetime(trip_data["trip"]) if prev_trip and current_trip < prev_trip: print(f"Out of order trip found:") print( f" Previous: {prev_trip_data.get('trip')} - {prev_trip_data.get('name', 'No name')}" ) print( f" Current: {trip_data.get('trip')} - {trip_data.get('name', 'No name')}" ) assert False, "Trips are not in chronological order by trip date." prev_trip = current_trip prev_trip_data = trip_data trip_list = agenda.trip.build_trip_list(data_dir) print(len(trip_list), "trips") coords, routes = agenda.trip.get_coordinates_and_routes(trip_list, data_dir) print(len(coords), "coords") print(len(routes), "routes") def check_flights(airlines: set[str]) -> None: """Check flights and ensure they are in chronological order.""" bookings = agenda.travel.parse_yaml("flights", data_dir) flight_count = 0 co2_flight_count = 0 prev_first_depart = None for booking in bookings: if "trip" not in booking: pprint(booking) assert "trip" in booking assert all(flight["airline"] in airlines for flight in booking["flights"]) flight_count += len(booking["flights"]) co2_flight_count += len( [flight for flight in booking["flights"] if "co2_kg" in flight] ) for flight in booking["flights"]: if "co2_kg" not in flight: pprint(booking) check_currency(booking) if prev_first_depart: assert ( booking["flights"][0]["depart"] > prev_first_depart ), "Bookings are not in chronological order by first flight's departure." prev_first_depart = booking["flights"][0]["depart"] print( f"{len(bookings)} flight bookings, {flight_count} flights, " f"{co2_flight_count} with CO2 numbers" ) def normalize_datetime(dt_value): """Convert date or datetime to datetime for comparison, removing timezone info.""" if isinstance(dt_value, date) and not isinstance(dt_value, datetime): return datetime.combine(dt_value, datetime.min.time()) elif isinstance(dt_value, datetime): # Remove timezone info to allow comparison between naive and aware datetimes return dt_value.replace(tzinfo=None) return dt_value def check_trains() -> None: """Check trains and ensure they are in chronological order.""" trains = agenda.travel.parse_yaml("trains", data_dir) prev_depart = None prev_train = None for train in trains: current_depart = normalize_datetime(train["depart"]) if prev_depart and current_depart < prev_depart: print(f"Out of order train found:") print( f" Previous: {prev_train.get('depart')} {prev_train.get('from', '')} -> {prev_train.get('to', '')}" ) print( f" Current: {train.get('depart')} {train.get('from', '')} -> {train.get('to', '')}" ) assert False, "Trains are not in chronological order by departure time." prev_depart = current_depart prev_train = train print(len(trains), "trains") def check_conferences() -> None: """Check conferences and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "conferences.yaml") conferences_data = yaml.safe_load(open(filepath, "r")) conferences = [agenda.conference.Conference(**conf) for conf in conferences_data] prev_start = None prev_conf_data = None for i, conf_data in enumerate(conferences_data): conf = conferences[i] if not conf.currency or conf.currency in currencies: pass else: pprint(conf) print(f"currency {conf.currency!r} not in {currencies!r}") sys.exit(-1) current_start = normalize_datetime(conf_data["start"]) if prev_start and current_start < prev_start: print(f"Out of order conference found:") print( f" Previous: {prev_conf_data.get('start')} - {prev_conf_data.get('name', 'No name')}" ) print( f" Current: {conf_data.get('start')} - {conf_data.get('name', 'No name')}" ) assert False, "Conferences are not in chronological order by start time." prev_start = current_start prev_conf_data = conf_data print(len(conferences), "conferences") def check_events() -> None: """Check events.""" today = date.today() last_year = today - timedelta(days=365) next_year = today + timedelta(days=2 * 365) events = agenda.events_yaml.read(data_dir, last_year, next_year) print(len(events), "events") def check_coordinates(item: agenda.types.StrDict) -> None: """Check coordinate are valid.""" if "latitude" not in item and "longitude" not in item: return assert "latitude" in item and "longitude" in item assert all(isinstance(item[key], (int, float)) for key in ("latitude", "longitude")) def check_accommodation() -> None: """Check accommodation and ensure they are in chronological order.""" filepath = os.path.join(data_dir, "accommodation.yaml") accommodation_list = yaml.safe_load(open(filepath)) required_fields = ["type", "name", "country", "location", "trip", "from", "to"] prev_from = None prev_stay = None for stay in accommodation_list: try: assert all(field in stay for field in required_fields) check_coordinates(stay) except AssertionError: pprint(stay) raise check_currency(stay) current_from = normalize_datetime(stay["from"]) if prev_from and current_from < prev_from: print(f"Out of order accommodation found:") print( f" Previous: {prev_stay.get('from')} - {prev_stay.get('name', 'No name')} ({prev_stay.get('location', '')})" ) print( f" Current: {stay.get('from')} - {stay.get('name', 'No name')} ({stay.get('location', '')})" ) assert ( False ), "Accommodation is not in chronological order by check-in time." prev_from = current_from prev_stay = stay print(len(accommodation_list), "stays") def check_airports() -> None: """Check airports.""" airports = typing.cast( dict[str, agenda.types.StrDict], agenda.travel.parse_yaml("airports", data_dir) ) print(len(airports), "airports") for airport in airports.values(): assert "country" in airport assert agenda.get_country(airport["country"]) def check_stations() -> None: """Check stations.""" stations = agenda.travel.parse_yaml("stations", data_dir) print(len(stations), "stations") for station in stations: assert "country" in station assert agenda.get_country(station["country"]) def check_ferries() -> None: """Check ferries and ensure they are in chronological order.""" ferries = agenda.travel.parse_yaml("ferries", data_dir) prev_depart = None prev_ferry = None for ferry in ferries: current_depart = normalize_datetime(ferry["depart"]) if prev_depart and current_depart < prev_depart: print(f"Out of order ferry found:") print( f" Previous: {prev_ferry.get('depart')} {prev_ferry.get('from', '')} -> {prev_ferry.get('to', '')}" ) print( f" Current: {ferry.get('depart')} {ferry.get('from', '')} -> {ferry.get('to', '')}" ) assert False, "Ferries are not in chronological order by departure time." prev_depart = current_depart prev_ferry = ferry check_currency(ferry) print(len(ferries), "ferries") def check_airlines() -> list[agenda.types.StrDict]: """Check airlines.""" airlines = agenda.travel.parse_yaml("airlines", data_dir) print(len(airlines), "airlines") for airline in airlines: try: keys = set(airline.keys()) keys.discard("flight_number_prefer_icao") assert keys == {"icao", "iata", "name"} iata, icao = airline["iata"], airline["icao"] assert iata[0].isupper() and iata[1].isupper() or iata[1].isdigit() assert icao.isupper() assert len(iata) == 2 and len(icao) == 3 if "flight_number_prefer_icao" in airline: assert isinstance(airline["flight_number_prefer_icao"], bool) except AssertionError: print(yaml.dump([airline])) raise return airlines def check() -> None: """Validate personal data YAML files.""" airlines = check_airlines() check_trips() check_flights({airline["iata"] for airline in airlines}) check_trains() check_ferries() check_conferences() check_events() check_accommodation() check_airports() check_stations() if __name__ == "__main__": check()