conference-archive/load_conference.py
2023-09-13 16:19:08 +05:30

154 lines
4.1 KiB
Python
Executable file

#!/usr/bin/python3
import datetime
import lxml.etree
from confarchive import database, model
DB_URL = "postgresql:///confarchive"
database.init_db(DB_URL)
Element = lxml.etree._Element
meals = {"lunch", "dinner", "breakfast"}
non_talk_titles = {"afternoon break", "cheese and wine party", "debcamp", "job fair"}
def not_a_talk(title: str) -> bool:
"""Event with this title is not a talk."""
return is_meal(title) or title.lower() in non_talk_titles
def is_meal(title: str) -> bool:
"""Event title represents a meal."""
return title.lower() in meals
def read_field(root: Element, field: str) -> str | None:
"""Get conference field."""
value = root.findtext(".//" + field)
if value is None:
return None
assert isinstance(value, str)
return value
def read_date_field(root: Element, field: str) -> datetime.date | None:
"""Read date from a field."""
value = read_field(root, field)
return parse_isodate(value) if value is not None else None
def read_required_field(root: Element, field: str) -> str:
"""Read a required field."""
value = read_field(root, field)
assert value
return value
def parse_isodate(iso_date: str) -> datetime.date:
"""Read a date in ISO format."""
return datetime.datetime.fromisoformat(iso_date).date()
def conference_obj(root: Element) -> model.Conference:
"""Build conference object."""
e = root.find(".//conference")
assert e is not None
return model.Conference(
title=read_required_field(e, "title"),
start=read_date_field(e, "start"),
end=read_date_field(e, "end"),
timezone=read_field(e, "time_zone_name"),
)
def build_event_object(
e: Element, person_lookup: dict[int, model.Person]
) -> model.Event | None:
"""Build an event object."""
title = read_required_field(e, "title")
if not_a_talk(title):
return None
room = read_field(e, "room")
slug = read_field(e, "slug")
description = read_field(e, "description")
event_type = read_field(e, "type")
url = read_field(e, "url")
persons_element = e.find(".//persons")
if persons_element is None:
return None
people = []
for p in persons_element:
id_str = p.get("id")
assert id_str is not None
people.append(person_lookup[int(id_str)])
print(title, people)
return model.Event(
title=title,
room=room,
slug=slug,
description=description,
event_type=event_type,
url=url,
people=people,
)
def get_all_people(root: Element) -> list[tuple[int, str]]:
people: dict[int, str] = {}
for person in root.findall(".//person"):
assert person.text
person_id_str = person.get("id")
assert person_id_str
person_id = int(person_id_str)
existing = people.get(person_id)
if existing:
assert person.text == existing
continue
people[person_id] = person.text
return sorted(people.items())
def load(filename: str) -> None:
"""Load conference schedule."""
root = lxml.etree.parse(filename).getroot()
conf = conference_obj(root)
database.session.add(conf)
event_count = 0
people = get_all_people(root)
person_lookup = {}
for person_id, name in people:
person = model.Person.query.filter_by(name=name).first()
if not person:
person = model.Person(name=name)
database.session.add(person)
person_lookup[person_id] = person
for day in root.findall(".//day"):
for room in root.findall(".//room"):
for event_element in root.findall(".//event"):
event = build_event_object(event_element, person_lookup)
if not event:
continue
event.conference = conf
print()
database.session.add(event)
event_count += 1
if event_count > 10:
return None
load("/home/edward/src/2022/conference-gender-mix/schedules/debconf22")
database.session.commit()