From 7ec36a5e807c19dd8f1fd7be60e2f4d8a0dd8a5b Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 22 Jun 2026 09:06:09 +0100 Subject: [PATCH] Document personal data YAML formats --- docs/personal-data-yaml.md | 720 +++++++++++++++++++++++++++++++++++++ validate_yaml.py | 62 +++- 2 files changed, 776 insertions(+), 6 deletions(-) create mode 100644 docs/personal-data-yaml.md diff --git a/docs/personal-data-yaml.md b/docs/personal-data-yaml.md new file mode 100644 index 0000000..572ece6 --- /dev/null +++ b/docs/personal-data-yaml.md @@ -0,0 +1,720 @@ +# Personal Data YAML Formats + +This document describes the YAML files read from `../personal-data/`. It is intended for humans and LLMs generating new entries. + +## General Rules + +- Use YAML lists for most files. `airports.yaml` is a mapping keyed by IATA code. +- Use ISO-like YAML dates and datetimes: + - Date: `2026-03-14` + - Datetime with timezone: `2026-03-14 09:30:00+01:00` +- Use lowercase ISO 3166-1 alpha-2 country codes, for example `gb`, `be`, `us`. +- Use quoted strings for prices and identifiers that might otherwise be parsed as numbers: `'154.34'`, `'06525269'`, `'0042'`. +- Currencies must be in `config.CURRENCIES` or `GBP`. +- Travel and trip-related entries are grouped by the `trip` date. That date should match an entry in `trips.yaml` when a named trip is needed, but trip groups can also be created from travel/accommodation/conference entries. +- Keep chronological files sorted by their natural start field. `validate_yaml.py` checks ordering for trips, flights, trains, ferries, conferences, and accommodation. +- Coordinates are `latitude` then `longitude`, both numeric. + +## Cross-File References + +- `flights.yaml` flight `airline` values reference `airlines.yaml` `iata`. +- `flights.yaml` flight `from` and `to` values reference `airports.yaml` keys. +- `trains.yaml` journey and leg `from` and `to` values reference `stations.yaml` `name`. +- `ferries.yaml` `from` and `to` values reference `ferry_terminals.yaml` `name`. +- `buses.yaml` `from` and `to` values reference `bus_stops.yaml` `name`. +- `coaches.yaml` `from` and `to` values reference `coach_stations.yaml` `name`. +- Station, stop, and terminal `routes` values name GeoJSON files without the `.geojson` extension. + +## `accommodation.yaml` + +Top-level shape: list of accommodation stays. + +Used by: agenda events, trip pages, trip maps, busy/location logic. + +Required fields: + +- `type`: accommodation category such as `hotel`, `apartment`, `airbnb`. +- `name`: property name. +- `country`: lowercase country code. +- `location`: city or place name. +- `trip`: trip start date. +- `from`: check-in datetime. +- `to`: check-out datetime. + +Common optional fields: + +- Booking: `operator`, `booking_reference`, `confirmation_code`, `booking_url`, `url`, `email`, `phone`. +- Money: `price`, `currency`, `room_rate`, `estimated_taxes`, `estimated_additional_fees`. +- Room/stay: `address`, `room_type`, `room_name`, `room_number`, `number_of_adults`, `breakfast_included`, `breakfast`, `cancellation_policy`, `free_cancellation`, `refundable`. +- Coordinates/IDs: `latitude`, `longitude`, `timezone`, `osm_node`, `wikidata`. +- Loyalty: `rewards`, `radisson_rewards_number`. + +Example: + +```yaml +- type: hotel + operator: Example Hotels + name: Example Central Hotel + location: Brussels + country: be + trip: 2026-02-06 + from: 2026-02-06 15:00:00+01:00 + to: 2026-02-09 11:00:00+01:00 + address: 1 Example Street, Brussels + confirmation_code: ABC123 + price: '312.50' + currency: EUR + number_of_adults: 1 + room_type: Standard double + breakfast_included: true + latitude: 50.8466 + longitude: 4.3528 +``` + +## `airlines.yaml` + +Top-level shape: list of airlines. + +Used by: flight loading and display. + +Required fields: + +- `iata`: two-character IATA airline code. +- `icao`: three-character ICAO airline code. +- `name`: airline name. + +Optional fields: + +- `flight_number_prefer_icao`: boolean. When true, display flight numbers with the ICAO code instead of the IATA code. + +Example: + +```yaml +- iata: BA + icao: BAW + name: British Airways +- iata: U2 + icao: EZY + name: easyJet + flight_number_prefer_icao: true +``` + +## `airports.yaml` + +Top-level shape: mapping keyed by IATA airport code. + +Used by: flight loading, distance calculation, maps, unbooked route hints. + +Required fields for each airport: + +- `iata`: IATA code. Should match the mapping key. +- `name`: airport name. +- `city`: city or main served place. +- `country`: lowercase country code. +- `latitude`, `longitude`: numeric coordinates. +- `qid`: Wikidata QID. + +Optional fields: + +- `alt_name`: display name override used in labels. +- `elevation`: metres. +- `website`, `url`. + +Example: + +```yaml +BRU: + iata: BRU + name: Brussels Airport + city: Brussels + country: be + qid: Q220613 + latitude: 50.9014 + longitude: 4.4844 + elevation: 56 + website: https://www.brusselsairport.be/ +``` + +## `bus_stops.yaml` + +Top-level shape: list of bus stops. + +Used by: bus trip loading, maps, route rendering. + +Required fields: + +- `name`: stop name referenced by `buses.yaml`. +- `city`: city or place. +- `country`: lowercase country code. +- `latitude`, `longitude`: numeric coordinates. +- `routes`: mapping from destination stop name to GeoJSON filename without `.geojson`. + +Optional fields: + +- `Atco`: UK ATCO stop code. +- `osm_node`. + +Example: + +```yaml +- name: West Street + city: Bristol + country: gb + Atco: '0100BRA10073' + osm_node: 485403178 + latitude: 51.4393854 + longitude: -2.6017977 + routes: + Bristol Airport: West_Street_to_Bristol_Airport +``` + +## `buses.yaml` + +Top-level shape: list of bus journeys. + +Used by: trip loading, maps, trip timeline. Bus journeys are not counted for Schengen tracking. + +Required fields: + +- `trip`: trip start date. +- `depart`: departure datetime. +- `arrive`: arrival datetime. `validate_yaml.py` requires arrival after departure and duration no more than 12 hours. +- `from`, `to`: names from `bus_stops.yaml`. + +Optional fields: + +- `operator`, `price`, `currency`. + +Example: + +```yaml +- trip: 2026-03-14 + depart: 2026-03-14 08:20:00+00:00 + arrive: 2026-03-14 08:55:00+00:00 + from: West Street + to: Bristol Airport + operator: First Bus + price: '2.00' + currency: GBP +``` + +## `coach_stations.yaml` + +Top-level shape: list of coach stations. + +Used by: coach trip loading, maps, route rendering. + +Fields are the same pattern as `bus_stops.yaml`, except entries describe coach stations. + +Example: + +```yaml +- name: Example Coach Station + city: Example City + country: gb + latitude: 51.4500 + longitude: -2.5800 + routes: + Other Coach Station: example_city_to_other_city +``` + +## `coaches.yaml` + +Top-level shape: list of coach journeys. + +Used by: trip loading, maps, trip timeline. Coach journeys are not counted for Schengen tracking. + +Required fields: + +- `trip`, `depart`, `arrive`, `from`, `to`. +- `from` and `to` must be names from `coach_stations.yaml`. + +Optional fields: + +- `operator`, `class`, `booking_reference`, `price`, `currency`, `price_details`. + +Example: + +```yaml +- booking_reference: ABC123 + trip: 2026-05-25 + price: '55.00' + currency: GBP + depart: 2026-05-26 14:45:00+01:00 + arrive: 2026-05-26 18:30:00+01:00 + from: Example Coach Station + to: Other Coach Station + operator: Example Coaches + class: Standard + price_details: + base_fare: '55.00' +``` + +## `conferences.yaml` + +Top-level shape: list of conferences and conference-like events. + +Used by: agenda events, trip pages, trip maps, conference list, CFP reminders. + +Required fields: + +- `name`: event name. +- `topic`: topic/category. +- `location`: city or location label. +- `start`: date or datetime. +- `end`: date or datetime. Must be no earlier than `start`, and duration must be under 20 days. + +Common optional fields: + +- Trip/location: `trip`, `country`, `venue`, `address`, `latitude`, `longitude`. +- Attendance: `going`, `registered`, `speaking`, `online`, `accommodation_booked`, `transport_booked`. +- Partial attendance: `attend_start`, `attend_end`. These may be dates or timezone-aware datetimes and are used on trip pages instead of official dates. +- Web/CFP: `url`, `cfp_end`, `cfp_url`, `hashtag`, `description`. +- Money/tickets: `free`, `price`, `currency`, `ticket_type`. +- Other flags: `hackathon`, `attendees`. + +Example: + +```yaml +- name: FOSDEM + topic: FOSDEM + location: Brussels + country: be + trip: 2026-02-06 + start: 2026-02-07 + end: 2026-02-08 + attend_start: 2026-02-07 14:00:00+01:00 + attend_end: 2026-02-08 + going: true + registered: true + accommodation_booked: true + transport_booked: true + url: https://fosdem.org/2026/ + venue: Universite Libre de Bruxelles + address: Av. Franklin Roosevelt 50, 1050 Bruxelles, Belgium + latitude: 50.8132 + longitude: 4.3822 +``` + +## `entities.yaml` + +Top-level shape: list of people/entities. + +Used by: birthday events. + +Required fields for birthday support: + +- `name`: full name. +- `label`: display name. +- `type`: for example `human`. +- `birthday`: mapping with `day`, `month`, and optionally `year`. + +Optional fields: + +- `relation`, `email`. + +If `birthday.year` is omitted, age is shown as unknown. + +Example: + +```yaml +- name: Ada Example + label: Ada + type: human + relation: friend + birthday: + day: 10 + month: 12 + year: 1990 +``` + +## `events.yaml` + +Top-level shape: list of general events. + +Used by: agenda events and trip pages. + +Required fields: + +- `name`: event type. +- One date source: + - `date`: single event date/datetime, or + - `start_date`: used for events with a separate `end_date`, or + - `rrule`: recurrence rule string. + +Optional fields: + +- `title`: display title. +- `end_date`: explicit end date/datetime. +- `duration`: ISO 8601 duration such as `PT2H`, `P1D`. +- `url`. +- Trip/map fields: `trip`, `location`, `country`, `venue`, `address`, `latitude`, `longitude`. + +Special cases: + +- For `name: travel_insurance`, the event date field is `end_date`; no `end_date` is attached to the generated event. +- For recurring events, if the `rrule` has no `BYHOUR`, `BYMINUTE`, or `BYSECOND`, generated events are all-day dates. Otherwise generated datetimes are localized to UK time. +- `skip_trips=True` consumers ignore entries with `trip`. + +Examples: + +```yaml +- name: travel_insurance + start_date: 2026-05-04 + end_date: 2027-05-03 + +- name: meetup + title: Example Geo Meetup + date: 2026-06-18 18:30:00+01:00 + duration: PT2H + url: https://example.org/meetup + location: Bristol + country: gb + latitude: 51.4545 + longitude: -2.5879 + +- name: market + title: Monthly Example Market + rrule: FREQ=MONTHLY;BYDAY=1SA +``` + +## `ferries.yaml` + +Top-level shape: list of ferry journeys. + +Used by: trip loading, maps, trip timeline, Schengen tracking. + +Required fields: + +- `trip`: trip start date. +- `depart`, `arrive`: datetimes. Ferry `arrive` is required. +- `from`, `to`: names from `ferry_terminals.yaml`. + +Common optional fields: + +- `operator`, `ferry`, `direction`, `class`, `booking_reference`, `price`, `currency`. +- `price_details`: free-form mapping of fare components. +- `vehicle`: mapping with fields such as `type`, `registration`, `height`, `length`, `extras`. + +Example: + +```yaml +- booking_reference: ABC123 + trip: 2026-05-04 + price: '302.00' + currency: GBP + depart: 2026-05-04 23:00:00+01:00 + arrive: 2026-05-05 08:00:00+02:00 + from: Portsmouth + to: Cherbourg + operator: Brittany Ferries + class: Commodore cabin + price_details: + base_fare: '153.00' + cabin: '149.00' + vehicle: + type: Example car + registration: AB12CDE + height: 1.63m + length: 4.15m +``` + +## `ferry_terminals.yaml` + +Top-level shape: list of ferry terminals. + +Used by: ferry loading and route rendering. + +Required fields: + +- `name`: terminal name referenced by `ferries.yaml`. +- `city`, `country`. +- `latitude`, `longitude`. +- `routes`: mapping from destination terminal name to GeoJSON filename without `.geojson`. Ferry route rendering expects a GeoJSON route. + +Optional fields: + +- `osm_node`, `osm_way`. + +Example: + +```yaml +- name: Portsmouth + city: Portsmouth + country: gb + osm_way: 123456 + latitude: 50.8120 + longitude: -1.0880 + routes: + Cherbourg: portsmouth_cherbourg +``` + +## `flight_destinations.yaml` + +Top-level shape: list of origin rules for unbooked conference flight route hints. + +Used by: trip maps when a trip has conferences but no booked travel. + +Required fields: + +- `origin`: origin airport IATA code. +- `airline`: airline IATA code. Currently loaded for validation/description but not used in origin selection. +- `destinations`: list of destination airport IATA codes. + +Example: + +```yaml +- origin: BRS + airline: U2 + destinations: + - AMS + - BCN + - CDG +``` + +## `flights.yaml` + +Top-level shape: list of flight bookings. Each booking contains one or more flight legs. + +Used by: agenda transport events, trip loading, maps, distance calculation. + +Required booking fields: + +- `trip`: trip start date. +- `flights`: list of flight leg mappings. + +Common optional booking fields: + +- `booking_reference`, `price`, `currency`. + +Required flight leg fields: + +- `depart`: departure datetime. +- `from`, `to`: airport IATA codes from `airports.yaml`. +- `flight_number`: numeric/string flight number without airline prefix. +- `airline`: airline IATA code from `airlines.yaml`. + +Common optional flight leg fields: + +- Time/location: `arrive`, `from_terminal`, `to_terminal`, `duration`. +- Seat/cabin: `seat`, `seat_type`, `class`, `cabin`. +- Aircraft: `plane`, `registration`. +- Tracking: `distance`, `co2_kg`, `openflights_trip`, `reason`. +- Ticket/passenger: `e_ticket_number`, `ticket_number`, `frequent_flyer_number`, `passenger_name`, `passengers`, `baggage`, `payment_details`. + +`validate_yaml.py` checks that every booking has `trip`, all flight airlines exist in `airlines.yaml`, bookings are sorted by first departure, and currencies are configured. It reports flights missing `co2_kg`. + +Example: + +```yaml +- booking_reference: ABC123 + trip: 2026-04-22 + price: '62.50' + currency: GBP + flights: + - depart: 2026-04-22 17:20:00+01:00 + arrive: 2026-04-22 20:20:00+02:00 + from: LHR + to: BRU + flight_number: '1234' + airline: BA + duration: 01:00 + seat: 5F + seat_type: W + class: C + cabin: business + plane: Airbus A320 + registration: G-ABCD + co2_kg: 154 +``` + +## `follow_launches.yaml` + +Top-level shape: list of SpaceDevs launch slugs. + +Used by: no current in-repo reader was found, but the file appears intended as a watch list for launch update tooling. + +Example: + +```yaml +- starship-integrated-flight-test-5 +- artemis-ii +``` + +## `stations.yaml` + +Top-level shape: list of railway stations. + +Used by: train loading, maps, route rendering. + +Required fields: + +- `name`: station name referenced by `trains.yaml`. +- `country`: lowercase country code. +- `latitude`, `longitude`. +- `routes`: mapping from destination station name to GeoJSON filename without `.geojson`. + +Common optional fields: + +- `uic`, `alpha3`, `wikidata`, `osm_node`. + +Note: the code reads `routes`, not `rotues`; `rotues` appears to be a typo in existing data and should not be used for new entries. + +Example: + +```yaml +- name: London St Pancras + uic: 7015400 + alpha3: STP + wikidata: Q720102 + latitude: 51.531921 + longitude: -0.126361 + country: gb + routes: + Brussels Midi: london_brussels_eurostar +``` + +## `subscriptions.yaml` + +Top-level shape: list of subscriptions. + +Used by: subscription renewal agenda events when `renewal_date` is present. + +Required fields: + +- `name`: subscription name. + +Common optional fields: + +- Dates: `start`, `start_date`, `renewal_date`. +- `price`: mapping with `amount` and `currency`. +- `term`: mapping with `duration` and `unit` or `term_unit`. +- Account: `email`, `account_url`, `account_number`. + +Only items with `renewal_date` create agenda events. + +Example: + +```yaml +- name: Example Magazine + start_date: 2026-01-01 + renewal_date: 2027-01-01 + price: + amount: 99 + currency: GBP + term: + duration: 1 + unit: year + email: me@example.com + account_url: https://example.com/account + account_number: '001234' +``` + +## `trains.yaml` + +Top-level shape: list of train journeys. Each journey contains one or more legs. + +Used by: agenda transport events, trip loading, trip timeline, maps, stats. + +Required journey fields: + +- `operator`: booking/operator label. +- `from`, `to`: station names from `stations.yaml`. +- `trip`: trip start date. +- `depart`, `arrive`: journey datetimes or dates. +- `legs`: list of leg mappings. + +Common optional journey fields: + +- `class`, `number`, `tickets`, `ticket_code`, `total_price`, `co2_kg`. + +Required leg fields: + +- `from`, `to`: station names from `stations.yaml`. +- `depart`, `arrive`. +- `operator`. + +Common optional leg fields: + +- `train`, `number`, `service`, `service_number`, `service_numbers`, `reporting_number`, `mode`. +- Seat/reservation: `coach`, `seat`, `seat_type`, `seat_features`, `reservation_number`, `platform`. +- `class`, `trip`, `url`. + +Ticket fields are free-form but commonly include `booking_reference`, `url`, `price`, `currency`, `booking_date`, `ticket`, `ticket_code`, `ticket_type`, `from`, `to`, `class`, `validity`, `route`, `fare`, `quantity`, `seat_reservation`. + +Example: + +```yaml +- operator: eurostar + from: London St Pancras + to: Brussels Midi + trip: 2026-02-06 + depart: 2026-02-06 15:04:00+00:00 + arrive: 2026-02-06 18:12:00+01:00 + class: Standard Premier + tickets: + - booking_reference: ABCDEF + url: https://example.com/booking/ABCDEF + price: '89.00' + currency: GBP + legs: + - from: London St Pancras + to: Brussels Midi + depart: 2026-02-06 15:04:00+00:00 + arrive: 2026-02-06 18:12:00+01:00 + coach: 1 + seat: 41 + operator: Eurostar +``` + +## `travel_rewards.yaml` + +Top-level shape: list of travel loyalty accounts. + +Used by: no current in-repo reader was found. The file is structured as account metadata. + +Common fields: + +- `name`: programme name. +- `type`: category such as `hotel`, `airline`, `rail`. +- `member_number`: membership identifier. +- `balance`: current points/miles balance. +- `expiry`: expiry date or null. +- `url`: account URL. +- `person`: account holder key/name. +- `email`, `note`. + +Example: + +```yaml +- name: Example Rewards + type: hotel + member_number: '123456789' + balance: 3665 + expiry: 2027-08-15 + url: https://example.com/rewards + person: edward +``` + +## `trips.yaml` + +Top-level shape: list of trip metadata. + +Used by: trip grouping and trip titles. + +Required fields: + +- `trip`: trip start date. This is the grouping key used by travel, accommodation, conferences, and trip events. + +Optional fields: + +- `name`: explicit trip title. +- `private`: boolean. Private trips are hidden from unauthenticated users. + +Example: + +```yaml +- trip: 2026-02-06 + name: Brussels for FOSDEM + private: false +``` diff --git a/validate_yaml.py b/validate_yaml.py index 0fc82ab..1e5b29e 100755 --- a/validate_yaml.py +++ b/validate_yaml.py @@ -36,6 +36,23 @@ def check_currency(item: agenda.types.StrDict) -> None: sys.exit(-1) +def check_country_code( + item: agenda.types.StrDict, source: str, required: bool = True +) -> None: + """Throw error if country code is missing or invalid.""" + country = item.get("country") + if country is None: + if not required: + return + pprint(item) + print(f"{source} missing country") + sys.exit(-1) + if not isinstance(country, str) or not agenda.get_country(country): + pprint(item) + print(f"{source} has invalid country {country!r}") + sys.exit(-1) + + def get_coords(item: agenda.types.StrDict) -> LatLon | None: """Return latitude/longitude tuple when present.""" if "latitude" in item and "longitude" in item: @@ -267,6 +284,8 @@ def check_conferences() -> None: print(f"currency {conf.currency!r} not in {currencies!r}") sys.exit(-1) + check_country_code(conf_data, "conference", required=False) + current_start = normalize_datetime(conf_data["start"]) if prev_start and current_start < prev_start: assert prev_conf_data is not None @@ -290,6 +309,11 @@ def check_events() -> None: last_year = today - timedelta(days=365) next_year = today + timedelta(days=2 * 365) + filepath = os.path.join(data_dir, "events.yaml") + events_data = yaml.safe_load(open(filepath, "r")) + for event in events_data: + check_country_code(event, "event", required=False) + events = agenda.events_yaml.read(data_dir, last_year, next_year) print(len(events), "events") @@ -314,6 +338,7 @@ def check_accommodation() -> None: for stay in accommodation_list: try: assert all(field in stay for field in required_fields) + check_country_code(stay, "accommodation") check_coordinates(stay) except AssertionError: pprint(stay) @@ -347,8 +372,7 @@ def check_airports() -> None: ) print(len(airports), "airports") for airport in airports.values(): - assert "country" in airport - assert agenda.get_country(airport["country"]) + check_country_code(airport, "airport") def check_stations() -> None: @@ -356,8 +380,31 @@ def check_stations() -> None: stations = agenda.travel.parse_yaml("stations", data_dir) print(len(stations), "stations") for station in stations: - assert "country" in station - assert agenda.get_country(station["country"]) + check_country_code(station, "station") + + +def check_ferry_terminals() -> None: + """Check ferry terminals.""" + terminals = agenda.travel.parse_yaml("ferry_terminals", data_dir) + print(len(terminals), "ferry terminals") + for terminal in terminals: + check_country_code(terminal, "ferry terminal") + + +def check_bus_stops() -> None: + """Check bus stops.""" + stops = agenda.travel.parse_yaml("bus_stops", data_dir) + print(len(stops), "bus stops") + for stop in stops: + check_country_code(stop, "bus stop") + + +def check_coach_stations() -> None: + """Check coach stations.""" + stations = agenda.travel.parse_yaml("coach_stations", data_dir) + print(len(stations), "coach stations") + for station in stations: + check_country_code(station, "coach station") def check_ferries() -> None: @@ -415,8 +462,8 @@ def check_buses() -> None: def check_airlines() -> list[agenda.types.StrDict]: """Check airlines.""" - airlines = typing.cast( - list[agenda.types.StrDict], agenda.travel.parse_yaml("airlines", data_dir) + airlines: list[agenda.types.StrDict] = agenda.travel.parse_yaml( + "airlines", data_dir ) print(len(airlines), "airlines") for airline in airlines: @@ -450,6 +497,9 @@ def check() -> None: check_accommodation() check_airports() check_stations() + check_ferry_terminals() + check_bus_stops() + check_coach_stations() if __name__ == "__main__":