agenda/validate_yaml.py

301 lines
10 KiB
Python
Executable file

#!/usr/bin/python3
"""Load YAML data to ensure validity."""
import os
import sys
import typing
from datetime import date, timedelta, datetime
import yaml
from rich.pretty import pprint
import agenda
import agenda.conference
import agenda.data
import agenda.travel
import agenda.trip
import agenda.types
config = __import__("config.default", fromlist=[""])
data_dir = config.PERSONAL_DATA
currencies = set(config.CURRENCIES + ["GBP"])
def check_currency(item: agenda.types.StrDict) -> None:
"""Throw error if currency is not in config."""
currency = item.get("currency")
if not currency or currency in currencies:
return None
pprint(item)
print(f"currency {currency!r} not in {currencies!r}")
sys.exit(-1)
def check_trips() -> None:
"""Check trips and ensure they are in chronological order."""
filepath = os.path.join(data_dir, "trips.yaml")
trips_data = yaml.safe_load(open(filepath, "r"))
prev_trip = None
prev_trip_data = None
for trip_data in trips_data:
current_trip = normalize_datetime(trip_data["trip"])
if prev_trip and current_trip < prev_trip:
print(f"Out of order trip found:")
print(
f" Previous: {prev_trip_data.get('trip')} - {prev_trip_data.get('name', 'No name')}"
)
print(
f" Current: {trip_data.get('trip')} - {trip_data.get('name', 'No name')}"
)
assert False, "Trips are not in chronological order by trip date."
prev_trip = current_trip
prev_trip_data = trip_data
trip_list = agenda.trip.build_trip_list(data_dir)
print(len(trip_list), "trips")
coords, routes = agenda.trip.get_coordinates_and_routes(trip_list, data_dir)
print(len(coords), "coords")
print(len(routes), "routes")
def check_flights(airlines: set[str]) -> None:
"""Check flights and ensure they are in chronological order."""
bookings = agenda.travel.parse_yaml("flights", data_dir)
flight_count = 0
co2_flight_count = 0
prev_first_depart = None
for booking in bookings:
if "trip" not in booking:
pprint(booking)
assert "trip" in booking
assert all(flight["airline"] in airlines for flight in booking["flights"])
flight_count += len(booking["flights"])
co2_flight_count += len(
[flight for flight in booking["flights"] if "co2_kg" in flight]
)
for flight in booking["flights"]:
if "co2_kg" not in flight:
pprint(booking)
check_currency(booking)
if prev_first_depart:
assert (
booking["flights"][0]["depart"] > prev_first_depart
), "Bookings are not in chronological order by first flight's departure."
prev_first_depart = booking["flights"][0]["depart"]
print(
f"{len(bookings)} flight bookings, {flight_count} flights, "
f"{co2_flight_count} with CO2 numbers"
)
def normalize_datetime(dt_value):
"""Convert date or datetime to datetime for comparison, removing timezone info."""
if isinstance(dt_value, date) and not isinstance(dt_value, datetime):
return datetime.combine(dt_value, datetime.min.time())
elif isinstance(dt_value, datetime):
# Remove timezone info to allow comparison between naive and aware datetimes
return dt_value.replace(tzinfo=None)
return dt_value
def check_trains() -> None:
"""Check trains and ensure they are in chronological order."""
trains = agenda.travel.parse_yaml("trains", data_dir)
prev_depart = None
prev_train = None
for train in trains:
current_depart = normalize_datetime(train["depart"])
if prev_depart and current_depart < prev_depart:
print(f"Out of order train found:")
print(
f" Previous: {prev_train.get('depart')} {prev_train.get('from', '')} -> {prev_train.get('to', '')}"
)
print(
f" Current: {train.get('depart')} {train.get('from', '')} -> {train.get('to', '')}"
)
assert False, "Trains are not in chronological order by departure time."
prev_depart = current_depart
prev_train = train
print(len(trains), "trains")
def check_conferences() -> None:
"""Check conferences and ensure they are in chronological order."""
filepath = os.path.join(data_dir, "conferences.yaml")
conferences_data = yaml.safe_load(open(filepath, "r"))
conferences = [agenda.conference.Conference(**conf) for conf in conferences_data]
prev_start = None
prev_conf_data = None
for i, conf_data in enumerate(conferences_data):
conf = conferences[i]
if not conf.currency or conf.currency in currencies:
pass
else:
pprint(conf)
print(f"currency {conf.currency!r} not in {currencies!r}")
sys.exit(-1)
current_start = normalize_datetime(conf_data["start"])
if prev_start and current_start < prev_start:
print(f"Out of order conference found:")
print(
f" Previous: {prev_conf_data.get('start')} - {prev_conf_data.get('name', 'No name')}"
)
print(
f" Current: {conf_data.get('start')} - {conf_data.get('name', 'No name')}"
)
assert False, "Conferences are not in chronological order by start time."
prev_start = current_start
prev_conf_data = conf_data
print(len(conferences), "conferences")
def check_events() -> None:
"""Check events."""
today = date.today()
last_year = today - timedelta(days=365)
next_year = today + timedelta(days=2 * 365)
events = agenda.events_yaml.read(data_dir, last_year, next_year)
print(len(events), "events")
def check_coordinates(item: agenda.types.StrDict) -> None:
"""Check coordinate are valid."""
if "latitude" not in item and "longitude" not in item:
return
assert "latitude" in item and "longitude" in item
assert all(isinstance(item[key], (int, float)) for key in ("latitude", "longitude"))
def check_accommodation() -> None:
"""Check accommodation and ensure they are in chronological order."""
filepath = os.path.join(data_dir, "accommodation.yaml")
accommodation_list = yaml.safe_load(open(filepath))
required_fields = ["type", "name", "country", "location", "trip", "from", "to"]
prev_from = None
prev_stay = None
for stay in accommodation_list:
try:
assert all(field in stay for field in required_fields)
check_coordinates(stay)
except AssertionError:
pprint(stay)
raise
check_currency(stay)
current_from = normalize_datetime(stay["from"])
if prev_from and current_from < prev_from:
print(f"Out of order accommodation found:")
print(
f" Previous: {prev_stay.get('from')} - {prev_stay.get('name', 'No name')} ({prev_stay.get('location', '')})"
)
print(
f" Current: {stay.get('from')} - {stay.get('name', 'No name')} ({stay.get('location', '')})"
)
assert (
False
), "Accommodation is not in chronological order by check-in time."
prev_from = current_from
prev_stay = stay
print(len(accommodation_list), "stays")
def check_airports() -> None:
"""Check airports."""
airports = typing.cast(
dict[str, agenda.types.StrDict], agenda.travel.parse_yaml("airports", data_dir)
)
print(len(airports), "airports")
for airport in airports.values():
assert "country" in airport
assert agenda.get_country(airport["country"])
def check_stations() -> None:
"""Check stations."""
stations = agenda.travel.parse_yaml("stations", data_dir)
print(len(stations), "stations")
for station in stations:
assert "country" in station
assert agenda.get_country(station["country"])
def check_ferries() -> None:
"""Check ferries and ensure they are in chronological order."""
ferries = agenda.travel.parse_yaml("ferries", data_dir)
prev_depart = None
prev_ferry = None
for ferry in ferries:
current_depart = normalize_datetime(ferry["depart"])
if prev_depart and current_depart < prev_depart:
print(f"Out of order ferry found:")
print(
f" Previous: {prev_ferry.get('depart')} {prev_ferry.get('from', '')} -> {prev_ferry.get('to', '')}"
)
print(
f" Current: {ferry.get('depart')} {ferry.get('from', '')} -> {ferry.get('to', '')}"
)
assert False, "Ferries are not in chronological order by departure time."
prev_depart = current_depart
prev_ferry = ferry
check_currency(ferry)
print(len(ferries), "ferries")
def check_airlines() -> list[agenda.types.StrDict]:
"""Check airlines."""
airlines = agenda.travel.parse_yaml("airlines", data_dir)
print(len(airlines), "airlines")
for airline in airlines:
try:
keys = set(airline.keys())
keys.discard("flight_number_prefer_icao")
assert keys == {"icao", "iata", "name"}
iata, icao = airline["iata"], airline["icao"]
assert iata[0].isupper() and iata[1].isupper() or iata[1].isdigit()
assert icao.isupper()
assert len(iata) == 2 and len(icao) == 3
if "flight_number_prefer_icao" in airline:
assert isinstance(airline["flight_number_prefer_icao"], bool)
except AssertionError:
print(yaml.dump([airline]))
raise
return airlines
def check() -> None:
"""Validate personal data YAML files."""
airlines = check_airlines()
check_trips()
check_flights({airline["iata"] for airline in airlines})
check_trains()
check_ferries()
check_conferences()
check_events()
check_accommodation()
check_airports()
check_stations()
if __name__ == "__main__":
check()