#!/usr/bin/python3 import datetime import lxml.etree from confarchive import database, model DB_URL = "postgresql:///confarchive" database.init_db(DB_URL) Element = lxml.etree._Element meals = {"lunch", "dinner", "breakfast"} non_talk_titles = {"afternoon break", "cheese and wine party", "debcamp", "job fair"} def not_a_talk(title: str) -> bool: """Event with this title is not a talk.""" return is_meal(title) or title.lower() in non_talk_titles def is_meal(title: str) -> bool: """Event title represents a meal.""" return title.lower() in meals def read_field(root: Element, field: str) -> str | None: """Get conference field.""" value = root.findtext(".//" + field) if value is None: return None assert isinstance(value, str) return value def read_date_field(root: Element, field: str) -> datetime.date | None: """Read date from a field.""" value = read_field(root, field) return parse_isodate(value) if value is not None else None def read_required_field(root: Element, field: str) -> str: """Read a required field.""" value = read_field(root, field) assert value return value def parse_isodate(iso_date: str) -> datetime.date: """Read a date in ISO format.""" return datetime.datetime.fromisoformat(iso_date).date() def conference_obj(root: Element) -> model.Conference: """Build conference object.""" e = root.find(".//conference") assert e is not None return model.Conference( title=read_required_field(e, "title"), start=read_date_field(e, "start"), end=read_date_field(e, "end"), timezone=read_field(e, "time_zone_name"), ) def build_event_object( e: Element, person_lookup: dict[int, model.Person] ) -> model.Event | None: """Build an event object.""" title = read_required_field(e, "title") if not_a_talk(title): return None room = read_field(e, "room") slug = read_field(e, "slug") description = read_field(e, "description") event_type = read_field(e, "type") url = read_field(e, "url") persons_element = e.find(".//persons") if persons_element is None: return None people = [] for p in persons_element: id_str = p.get("id") assert id_str is not None people.append(person_lookup[int(id_str)]) print(title, people) return model.Event( title=title, room=room, slug=slug, description=description, event_type=event_type, url=url, people=people, ) def get_all_people(root: Element) -> list[tuple[int, str]]: people: dict[int, str] = {} for person in root.findall(".//person"): assert person.text person_id_str = person.get("id") assert person_id_str person_id = int(person_id_str) existing = people.get(person_id) if existing: assert person.text == existing continue people[person_id] = person.text return sorted(people.items()) def load(filename: str) -> None: """Load conference schedule.""" root = lxml.etree.parse(filename).getroot() conf = conference_obj(root) database.session.add(conf) event_count = 0 people = get_all_people(root) person_lookup = {} for person_id, name in people: person = model.Person.query.filter_by(name=name).first() if not person: person = model.Person(name=name) database.session.add(person) person_lookup[person_id] = person for day in root.findall(".//day"): for room in root.findall(".//room"): for event_element in root.findall(".//event"): event = build_event_object(event_element, person_lookup) if not event: continue event.conference = conf print() database.session.add(event) event_count += 1 if event_count > 10: return None load("/home/edward/src/2022/conference-gender-mix/schedules/debconf22") database.session.commit()