338 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			338 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/python3
 | 
						|
"""Load YAML data to ensure validity."""
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
import typing
 | 
						|
from datetime import date, datetime, timedelta
 | 
						|
from typing import Tuple, TypeVar
 | 
						|
 | 
						|
import yaml
 | 
						|
from geopy.distance import distance
 | 
						|
from rich.pretty import pprint
 | 
						|
 | 
						|
import agenda
 | 
						|
import agenda.conference
 | 
						|
import agenda.data
 | 
						|
import agenda.travel
 | 
						|
import agenda.trip
 | 
						|
import agenda.types
 | 
						|
 | 
						|
config = __import__("config.default", fromlist=[""])
 | 
						|
data_dir = config.PERSONAL_DATA
 | 
						|
 | 
						|
currencies = set(config.CURRENCIES + ["GBP"])
 | 
						|
 | 
						|
 | 
						|
def check_currency(item: agenda.types.StrDict) -> None:
 | 
						|
    """Throw error if currency is not in config."""
 | 
						|
    currency = item.get("currency")
 | 
						|
    if not currency or currency in currencies:
 | 
						|
        return None
 | 
						|
    pprint(item)
 | 
						|
    print(f"currency {currency!r} not in {currencies!r}")
 | 
						|
    sys.exit(-1)
 | 
						|
 | 
						|
 | 
						|
def get_coords(item):
 | 
						|
    if "latitude" in item and "longitude" in item:
 | 
						|
        return (item["latitude"], item["longitude"])
 | 
						|
    else:
 | 
						|
        return None
 | 
						|
 | 
						|
 | 
						|
T = TypeVar("T")
 | 
						|
 | 
						|
 | 
						|
def remove_nones(items: list[T | None]) -> list[T]:
 | 
						|
    """Return a new list with None values removed."""
 | 
						|
    return [item for item in items if item is not None]
 | 
						|
 | 
						|
 | 
						|
LatLon = Tuple[float, float]
 | 
						|
 | 
						|
 | 
						|
def distance_km(a: LatLon, b: LatLon) -> float:
 | 
						|
    """Return the great-circle distance between two (lat, lon) points in km."""
 | 
						|
    return distance(a, b).km
 | 
						|
 | 
						|
 | 
						|
def check_trips() -> None:
 | 
						|
    """Check trips and ensure they are in chronological order."""
 | 
						|
    filepath = os.path.join(data_dir, "trips.yaml")
 | 
						|
    trips_data = yaml.safe_load(open(filepath, "r"))
 | 
						|
 | 
						|
    prev_trip = None
 | 
						|
    prev_trip_data = None
 | 
						|
    for trip_data in trips_data:
 | 
						|
        current_trip = normalize_datetime(trip_data["trip"])
 | 
						|
        if prev_trip and current_trip < prev_trip:
 | 
						|
            print("Out of order trip found:")
 | 
						|
            print(
 | 
						|
                f"  Previous: {prev_trip_data.get('trip')} - "
 | 
						|
                + f"{prev_trip_data.get('name', 'No name')}"
 | 
						|
            )
 | 
						|
            print(
 | 
						|
                f"  Current:  {trip_data.get('trip')} - "
 | 
						|
                + f"{trip_data.get('name', 'No name')}"
 | 
						|
            )
 | 
						|
            assert False, "Trips are not in chronological order by trip date."
 | 
						|
        prev_trip = current_trip
 | 
						|
        prev_trip_data = trip_data
 | 
						|
 | 
						|
    trip_list = agenda.trip.build_trip_list(data_dir)
 | 
						|
    print(len(trip_list), "trips")
 | 
						|
 | 
						|
    for trip in trip_list:
 | 
						|
        if not trip.accommodation or not trip.conferences:
 | 
						|
            continue
 | 
						|
        accommodation_coords = remove_nones([get_coords(a) for a in trip.accommodation])
 | 
						|
        conference_coords = remove_nones([get_coords(c) for c in trip.conferences])
 | 
						|
        if len(accommodation_coords) != 1 or len(conference_coords) != 1:
 | 
						|
            continue
 | 
						|
        dist = distance_km(conference_coords[0], accommodation_coords[0])
 | 
						|
        assert dist < config.ACCOMODATION_MAX_DISTANCE_KM
 | 
						|
 | 
						|
    coords, routes = agenda.trip.get_coordinates_and_routes(trip_list, data_dir)
 | 
						|
    print(len(coords), "coords")
 | 
						|
    print(len(routes), "routes")
 | 
						|
 | 
						|
 | 
						|
def check_flights(airlines: set[str]) -> None:
 | 
						|
    """Check flights and ensure they are in chronological order."""
 | 
						|
    bookings = agenda.travel.parse_yaml("flights", data_dir)
 | 
						|
 | 
						|
    flight_count = 0
 | 
						|
    co2_flight_count = 0
 | 
						|
 | 
						|
    prev_first_depart = None
 | 
						|
    for booking in bookings:
 | 
						|
        if "trip" not in booking:
 | 
						|
            pprint(booking)
 | 
						|
        assert "trip" in booking
 | 
						|
        assert all(flight["airline"] in airlines for flight in booking["flights"])
 | 
						|
        flight_count += len(booking["flights"])
 | 
						|
        co2_flight_count += len(
 | 
						|
            [flight for flight in booking["flights"] if "co2_kg" in flight]
 | 
						|
        )
 | 
						|
        for flight in booking["flights"]:
 | 
						|
            if "co2_kg" not in flight:
 | 
						|
                pprint(booking)
 | 
						|
        check_currency(booking)
 | 
						|
 | 
						|
        if prev_first_depart:
 | 
						|
            assert (
 | 
						|
                booking["flights"][0]["depart"] > prev_first_depart
 | 
						|
            ), "Bookings are not in chronological order by first flight's departure."
 | 
						|
        prev_first_depart = booking["flights"][0]["depart"]
 | 
						|
 | 
						|
    print(
 | 
						|
        f"{len(bookings)} flight bookings, {flight_count} flights, "
 | 
						|
        f"{co2_flight_count} with CO2 numbers"
 | 
						|
    )
 | 
						|
 | 
						|
 | 
						|
def normalize_datetime(dt_value):
 | 
						|
    """Convert date or datetime to datetime for comparison, removing timezone info."""
 | 
						|
    if isinstance(dt_value, date) and not isinstance(dt_value, datetime):
 | 
						|
        return datetime.combine(dt_value, datetime.min.time())
 | 
						|
    elif isinstance(dt_value, datetime):
 | 
						|
        # Remove timezone info to allow comparison between naive and aware datetimes
 | 
						|
        return dt_value.replace(tzinfo=None)
 | 
						|
    return dt_value
 | 
						|
 | 
						|
 | 
						|
def check_trains() -> None:
 | 
						|
    """Check trains and ensure they are in chronological order."""
 | 
						|
    trains = agenda.travel.parse_yaml("trains", data_dir)
 | 
						|
 | 
						|
    prev_depart = None
 | 
						|
    prev_train = None
 | 
						|
    for train in trains:
 | 
						|
        current_depart = normalize_datetime(train["depart"])
 | 
						|
        if prev_depart and current_depart < prev_depart:
 | 
						|
            print(f"Out of order train found:")
 | 
						|
            print(
 | 
						|
                f"  Previous: {prev_train.get('depart')} {prev_train.get('from', '')} -> {prev_train.get('to', '')}"
 | 
						|
            )
 | 
						|
            print(
 | 
						|
                f"  Current:  {train.get('depart')} {train.get('from', '')} -> {train.get('to', '')}"
 | 
						|
            )
 | 
						|
            assert False, "Trains are not in chronological order by departure time."
 | 
						|
        prev_depart = current_depart
 | 
						|
        prev_train = train
 | 
						|
 | 
						|
    print(len(trains), "trains")
 | 
						|
 | 
						|
 | 
						|
def check_conferences() -> None:
 | 
						|
    """Check conferences and ensure they are in chronological order."""
 | 
						|
    filepath = os.path.join(data_dir, "conferences.yaml")
 | 
						|
    conferences_data = yaml.safe_load(open(filepath, "r"))
 | 
						|
    conferences = [agenda.conference.Conference(**conf) for conf in conferences_data]
 | 
						|
 | 
						|
    prev_start = None
 | 
						|
    prev_conf_data = None
 | 
						|
    for i, conf_data in enumerate(conferences_data):
 | 
						|
        conf = conferences[i]
 | 
						|
        if not conf.currency or conf.currency in currencies:
 | 
						|
            pass
 | 
						|
        else:
 | 
						|
            pprint(conf)
 | 
						|
            print(f"currency {conf.currency!r} not in {currencies!r}")
 | 
						|
            sys.exit(-1)
 | 
						|
 | 
						|
        current_start = normalize_datetime(conf_data["start"])
 | 
						|
        if prev_start and current_start < prev_start:
 | 
						|
            print(f"Out of order conference found:")
 | 
						|
            print(
 | 
						|
                f"  Previous: {prev_conf_data.get('start')} - {prev_conf_data.get('name', 'No name')}"
 | 
						|
            )
 | 
						|
            print(
 | 
						|
                f"  Current:  {conf_data.get('start')} - {conf_data.get('name', 'No name')}"
 | 
						|
            )
 | 
						|
            assert False, "Conferences are not in chronological order by start time."
 | 
						|
        prev_start = current_start
 | 
						|
        prev_conf_data = conf_data
 | 
						|
 | 
						|
    print(len(conferences), "conferences")
 | 
						|
 | 
						|
 | 
						|
def check_events() -> None:
 | 
						|
    """Check events."""
 | 
						|
    today = date.today()
 | 
						|
    last_year = today - timedelta(days=365)
 | 
						|
    next_year = today + timedelta(days=2 * 365)
 | 
						|
 | 
						|
    events = agenda.events_yaml.read(data_dir, last_year, next_year)
 | 
						|
    print(len(events), "events")
 | 
						|
 | 
						|
 | 
						|
def check_coordinates(item: agenda.types.StrDict) -> None:
 | 
						|
    """Check coordinate are valid."""
 | 
						|
    if "latitude" not in item and "longitude" not in item:
 | 
						|
        return
 | 
						|
    assert "latitude" in item and "longitude" in item
 | 
						|
    assert all(isinstance(item[key], (int, float)) for key in ("latitude", "longitude"))
 | 
						|
 | 
						|
 | 
						|
def check_accommodation() -> None:
 | 
						|
    """Check accommodation and ensure they are in chronological order."""
 | 
						|
    filepath = os.path.join(data_dir, "accommodation.yaml")
 | 
						|
    accommodation_list = yaml.safe_load(open(filepath))
 | 
						|
 | 
						|
    required_fields = ["type", "name", "country", "location", "trip", "from", "to"]
 | 
						|
 | 
						|
    prev_from = None
 | 
						|
    prev_stay = None
 | 
						|
    for stay in accommodation_list:
 | 
						|
        try:
 | 
						|
            assert all(field in stay for field in required_fields)
 | 
						|
            check_coordinates(stay)
 | 
						|
        except AssertionError:
 | 
						|
            pprint(stay)
 | 
						|
            raise
 | 
						|
 | 
						|
        check_currency(stay)
 | 
						|
 | 
						|
        current_from = normalize_datetime(stay["from"])
 | 
						|
        if prev_from and current_from < prev_from:
 | 
						|
            print(f"Out of order accommodation found:")
 | 
						|
            print(
 | 
						|
                f"  Previous: {prev_stay.get('from')} - {prev_stay.get('name', 'No name')} ({prev_stay.get('location', '')})"
 | 
						|
            )
 | 
						|
            print(
 | 
						|
                f"  Current:  {stay.get('from')} - {stay.get('name', 'No name')} ({stay.get('location', '')})"
 | 
						|
            )
 | 
						|
            assert (
 | 
						|
                False
 | 
						|
            ), "Accommodation is not in chronological order by check-in time."
 | 
						|
        prev_from = current_from
 | 
						|
        prev_stay = stay
 | 
						|
 | 
						|
    print(len(accommodation_list), "stays")
 | 
						|
 | 
						|
 | 
						|
def check_airports() -> None:
 | 
						|
    """Check airports."""
 | 
						|
    airports = typing.cast(
 | 
						|
        dict[str, agenda.types.StrDict], agenda.travel.parse_yaml("airports", data_dir)
 | 
						|
    )
 | 
						|
    print(len(airports), "airports")
 | 
						|
    for airport in airports.values():
 | 
						|
        assert "country" in airport
 | 
						|
        assert agenda.get_country(airport["country"])
 | 
						|
 | 
						|
 | 
						|
def check_stations() -> None:
 | 
						|
    """Check stations."""
 | 
						|
    stations = agenda.travel.parse_yaml("stations", data_dir)
 | 
						|
    print(len(stations), "stations")
 | 
						|
    for station in stations:
 | 
						|
        assert "country" in station
 | 
						|
        assert agenda.get_country(station["country"])
 | 
						|
 | 
						|
 | 
						|
def check_ferries() -> None:
 | 
						|
    """Check ferries and ensure they are in chronological order."""
 | 
						|
    ferries = agenda.travel.parse_yaml("ferries", data_dir)
 | 
						|
 | 
						|
    prev_depart = None
 | 
						|
    prev_ferry = None
 | 
						|
    for ferry in ferries:
 | 
						|
        current_depart = normalize_datetime(ferry["depart"])
 | 
						|
        if prev_depart and current_depart < prev_depart:
 | 
						|
            print(f"Out of order ferry found:")
 | 
						|
            print(
 | 
						|
                f"  Previous: {prev_ferry.get('depart')} {prev_ferry.get('from', '')} -> {prev_ferry.get('to', '')}"
 | 
						|
            )
 | 
						|
            print(
 | 
						|
                f"  Current:  {ferry.get('depart')} {ferry.get('from', '')} -> {ferry.get('to', '')}"
 | 
						|
            )
 | 
						|
            assert False, "Ferries are not in chronological order by departure time."
 | 
						|
        prev_depart = current_depart
 | 
						|
        prev_ferry = ferry
 | 
						|
        check_currency(ferry)
 | 
						|
 | 
						|
    print(len(ferries), "ferries")
 | 
						|
 | 
						|
 | 
						|
def check_airlines() -> list[agenda.types.StrDict]:
 | 
						|
    """Check airlines."""
 | 
						|
    airlines = agenda.travel.parse_yaml("airlines", data_dir)
 | 
						|
    print(len(airlines), "airlines")
 | 
						|
    for airline in airlines:
 | 
						|
        try:
 | 
						|
            keys = set(airline.keys())
 | 
						|
            keys.discard("flight_number_prefer_icao")
 | 
						|
            assert keys == {"icao", "iata", "name"}
 | 
						|
            iata, icao = airline["iata"], airline["icao"]
 | 
						|
            assert iata[0].isupper() and iata[1].isupper() or iata[1].isdigit()
 | 
						|
            assert icao.isupper()
 | 
						|
            assert len(iata) == 2 and len(icao) == 3
 | 
						|
            if "flight_number_prefer_icao" in airline:
 | 
						|
                assert isinstance(airline["flight_number_prefer_icao"], bool)
 | 
						|
        except AssertionError:
 | 
						|
            print(yaml.dump([airline]))
 | 
						|
            raise
 | 
						|
 | 
						|
    return airlines
 | 
						|
 | 
						|
 | 
						|
def check() -> None:
 | 
						|
    """Validate personal data YAML files."""
 | 
						|
    airlines = check_airlines()
 | 
						|
    check_trips()
 | 
						|
    check_flights({airline["iata"] for airline in airlines})
 | 
						|
    check_trains()
 | 
						|
    check_ferries()
 | 
						|
    check_conferences()
 | 
						|
    check_events()
 | 
						|
    check_accommodation()
 | 
						|
    check_airports()
 | 
						|
    check_stations()
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    check()
 |