2023-09-13 11:49:08 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
import datetime
|
2023-09-15 19:04:41 +01:00
|
|
|
import os
|
|
|
|
import sys
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
import lxml.etree
|
|
|
|
|
|
|
|
from confarchive import database, model
|
|
|
|
|
|
|
|
DB_URL = "postgresql:///confarchive"
|
2023-09-15 19:04:41 +01:00
|
|
|
schedules_loc = "/home/edward/src/2022/conference-gender-mix/schedules"
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
database.init_db(DB_URL)
|
|
|
|
|
|
|
|
Element = lxml.etree._Element
|
|
|
|
|
|
|
|
meals = {"lunch", "dinner", "breakfast"}
|
|
|
|
non_talk_titles = {"afternoon break", "cheese and wine party", "debcamp", "job fair"}
|
|
|
|
|
|
|
|
|
|
|
|
def not_a_talk(title: str) -> bool:
|
|
|
|
"""Event with this title is not a talk."""
|
|
|
|
return is_meal(title) or title.lower() in non_talk_titles
|
|
|
|
|
|
|
|
|
|
|
|
def is_meal(title: str) -> bool:
|
|
|
|
"""Event title represents a meal."""
|
|
|
|
return title.lower() in meals
|
|
|
|
|
|
|
|
|
|
|
|
def read_field(root: Element, field: str) -> str | None:
|
|
|
|
"""Get conference field."""
|
|
|
|
value = root.findtext(".//" + field)
|
|
|
|
if value is None:
|
|
|
|
return None
|
|
|
|
assert isinstance(value, str)
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
def read_date_field(root: Element, field: str) -> datetime.date | None:
|
|
|
|
"""Read date from a field."""
|
|
|
|
value = read_field(root, field)
|
2023-09-15 19:04:41 +01:00
|
|
|
return parse_isodate(value) if value else None
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
def read_required_field(root: Element, field: str) -> str:
|
|
|
|
"""Read a required field."""
|
|
|
|
value = read_field(root, field)
|
|
|
|
assert value
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
def parse_isodate(iso_date: str) -> datetime.date:
|
|
|
|
"""Read a date in ISO format."""
|
|
|
|
return datetime.datetime.fromisoformat(iso_date).date()
|
|
|
|
|
|
|
|
|
|
|
|
def conference_obj(root: Element) -> model.Conference:
|
|
|
|
"""Build conference object."""
|
|
|
|
e = root.find(".//conference")
|
|
|
|
assert e is not None
|
|
|
|
|
2023-09-15 19:04:41 +01:00
|
|
|
start = read_date_field(e, "start") or read_date_field(e, "start_date")
|
|
|
|
days_str = read_field(e, "days")
|
|
|
|
days = int(days_str) if days_str else None
|
|
|
|
assert start
|
|
|
|
end: datetime.date | None
|
|
|
|
if days:
|
|
|
|
end = start + datetime.timedelta(days=days - 1)
|
|
|
|
else:
|
|
|
|
end = read_date_field(e, "end") or read_date_field(e, "end_date")
|
|
|
|
assert end
|
|
|
|
|
|
|
|
if not start:
|
|
|
|
print(lxml.etree.tostring(e, encoding=str))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
assert start and end and end >= start
|
|
|
|
|
2023-09-13 11:49:08 +01:00
|
|
|
return model.Conference(
|
|
|
|
title=read_required_field(e, "title"),
|
2023-09-15 19:04:41 +01:00
|
|
|
start=start,
|
|
|
|
end=end,
|
2023-09-13 11:49:08 +01:00
|
|
|
timezone=read_field(e, "time_zone_name"),
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def build_event_object(
|
2023-09-15 19:04:41 +01:00
|
|
|
e: Element, person_lookup: dict[str, model.Person]
|
2023-09-13 11:49:08 +01:00
|
|
|
) -> model.Event | None:
|
|
|
|
"""Build an event object."""
|
2023-09-15 19:04:41 +01:00
|
|
|
title = read_field(e, "title")
|
|
|
|
guid = e.get("guid")
|
2023-09-13 11:49:08 +01:00
|
|
|
room = read_field(e, "room")
|
|
|
|
slug = read_field(e, "slug")
|
|
|
|
description = read_field(e, "description")
|
|
|
|
event_type = read_field(e, "type")
|
|
|
|
url = read_field(e, "url")
|
|
|
|
|
2023-09-15 19:04:41 +01:00
|
|
|
if title is None:
|
|
|
|
print("no title")
|
|
|
|
assert description is None and event_type is None
|
|
|
|
return None
|
|
|
|
|
|
|
|
persons = e.find(".//persons")
|
|
|
|
if persons is None or len(persons) == 0:
|
|
|
|
persons = e.findall(".//person")
|
|
|
|
if persons is None or len(persons) == 0:
|
2023-09-13 11:49:08 +01:00
|
|
|
return None
|
|
|
|
people = []
|
2023-09-15 19:04:41 +01:00
|
|
|
seen_person = set()
|
|
|
|
print("persons:", len(persons))
|
|
|
|
for p in persons:
|
|
|
|
name = p.text
|
|
|
|
print("peron:", name)
|
|
|
|
if name is None:
|
|
|
|
print("no name")
|
|
|
|
if name in seen_person:
|
|
|
|
print("seen already:", name)
|
|
|
|
if name is None or name in seen_person:
|
|
|
|
continue
|
|
|
|
seen_person.add(name)
|
|
|
|
people.append(model.EventPerson(person=person_lookup[name]))
|
2023-09-13 11:49:08 +01:00
|
|
|
|
2023-09-15 19:04:41 +01:00
|
|
|
if not people:
|
|
|
|
print("no people")
|
|
|
|
return None
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
return model.Event(
|
2023-09-15 19:04:41 +01:00
|
|
|
guid=guid,
|
2023-09-13 11:49:08 +01:00
|
|
|
title=title,
|
|
|
|
room=room,
|
|
|
|
slug=slug,
|
|
|
|
description=description,
|
|
|
|
event_type=event_type,
|
|
|
|
url=url,
|
2023-09-15 19:04:41 +01:00
|
|
|
people_detail=people,
|
2023-09-13 11:49:08 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2023-09-15 19:04:41 +01:00
|
|
|
def schedule_has_person_ids(root: Element) -> bool:
|
|
|
|
"""People listed in schedule have ids."""
|
|
|
|
person = root.find(".//person")
|
|
|
|
assert person is not None
|
|
|
|
person_id = person.get("id")
|
|
|
|
return person_id is not None
|
|
|
|
|
|
|
|
|
2023-09-13 11:49:08 +01:00
|
|
|
def get_all_people(root: Element) -> list[tuple[int, str]]:
|
|
|
|
people: dict[int, str] = {}
|
|
|
|
for person in root.findall(".//person"):
|
|
|
|
assert person.text
|
|
|
|
person_id_str = person.get("id")
|
2023-09-15 19:04:41 +01:00
|
|
|
if not person_id_str:
|
|
|
|
print(lxml.etree.tostring(person, encoding=str))
|
2023-09-13 11:49:08 +01:00
|
|
|
assert person_id_str
|
|
|
|
person_id = int(person_id_str)
|
|
|
|
existing = people.get(person_id)
|
|
|
|
if existing:
|
|
|
|
assert person.text == existing
|
|
|
|
continue
|
|
|
|
people[person_id] = person.text
|
|
|
|
|
|
|
|
return sorted(people.items())
|
|
|
|
|
|
|
|
|
2023-09-15 19:04:41 +01:00
|
|
|
def get_people_names(root: Element) -> set[str]:
|
|
|
|
return {
|
|
|
|
normalize_name(person.text)
|
|
|
|
for person in root.findall(".//person")
|
|
|
|
if person.text
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_name(n: str) -> str:
|
|
|
|
"""Normalize name."""
|
|
|
|
return " ".join(n.split()).strip()
|
|
|
|
|
|
|
|
|
|
|
|
def find_existing_person(name: str) -> model.Person | None:
|
|
|
|
# print("searching for:", name)
|
|
|
|
person = model.Person.query.filter(model.Person.name.ilike(name)).one_or_none()
|
|
|
|
assert person is None or isinstance(person, model.Person)
|
|
|
|
if person:
|
|
|
|
return person
|
|
|
|
|
|
|
|
person = model.ConferencePerson.query.filter(
|
|
|
|
model.ConferencePerson.named_as.ilike(name)
|
|
|
|
).one_or_none()
|
|
|
|
assert person is None or isinstance(person, model.Person)
|
|
|
|
if person:
|
|
|
|
return person
|
|
|
|
|
|
|
|
|
|
|
|
def load(filename: str, short_name: str) -> None:
|
2023-09-13 11:49:08 +01:00
|
|
|
"""Load conference schedule."""
|
2023-09-15 19:04:41 +01:00
|
|
|
start = open(filename).read(15)
|
|
|
|
if start == "BEGIN:VCALENDAR" or start.startswith("{"):
|
|
|
|
return None
|
|
|
|
|
2023-09-13 11:49:08 +01:00
|
|
|
root = lxml.etree.parse(filename).getroot()
|
2023-09-15 19:04:41 +01:00
|
|
|
|
|
|
|
conf = model.Conference.query.filter_by(short_name=short_name).one_or_none()
|
|
|
|
|
|
|
|
if conf:
|
|
|
|
assert conf.events.count() == 0
|
|
|
|
else:
|
|
|
|
conf = conference_obj(root)
|
|
|
|
assert model.Conference.query.filter_by(title=conf.title).count() == 0
|
|
|
|
database.session.add(conf)
|
|
|
|
|
|
|
|
print((conf.short_name, conf.title))
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
event_count = 0
|
2023-09-15 19:04:41 +01:00
|
|
|
people_names = get_people_names(root)
|
2023-09-13 11:49:08 +01:00
|
|
|
person_lookup = {}
|
2023-09-15 19:04:41 +01:00
|
|
|
for name in people_names:
|
|
|
|
cp = model.ConferencePerson.query.filter_by(
|
|
|
|
conference=conf, named_as=name
|
|
|
|
).one_or_none()
|
|
|
|
|
|
|
|
if cp and cp.person.events_association.count() == 0:
|
|
|
|
person = cp.person
|
|
|
|
for cp2 in person.conferences_association:
|
|
|
|
database.session.delete(cp2)
|
|
|
|
database.session.delete(cp.person)
|
|
|
|
database.session.commit()
|
|
|
|
|
|
|
|
person = find_existing_person(name)
|
2023-09-13 11:49:08 +01:00
|
|
|
if not person:
|
|
|
|
person = model.Person(name=name)
|
|
|
|
database.session.add(person)
|
2023-09-15 19:04:41 +01:00
|
|
|
person_lookup[name] = person
|
|
|
|
|
|
|
|
for name, person in person_lookup.items():
|
|
|
|
if model.ConferencePerson.query.filter_by(
|
|
|
|
conference=conf, person=person
|
|
|
|
).one_or_none():
|
|
|
|
continue
|
|
|
|
|
|
|
|
conf_person = model.ConferencePerson(
|
|
|
|
conference=conf, person=person, named_as=name
|
|
|
|
)
|
|
|
|
database.session.add(conf_person)
|
2023-09-13 11:49:08 +01:00
|
|
|
|
|
|
|
for day in root.findall(".//day"):
|
2023-09-15 19:04:41 +01:00
|
|
|
day_index_str = day.get("index")
|
|
|
|
# assert day_index_str is not None
|
|
|
|
# day_index = int(day_index_str)
|
|
|
|
print("day", day_index_str)
|
|
|
|
for event_element in day.findall(".//event"):
|
|
|
|
title = read_field(event_element, "title")
|
|
|
|
event = build_event_object(event_element, person_lookup)
|
|
|
|
if not event:
|
|
|
|
print(f"skip event: {title}")
|
|
|
|
continue
|
|
|
|
event.conference = conf
|
|
|
|
# event.day = day_index
|
|
|
|
database.session.add(event)
|
|
|
|
event_count += 1
|
|
|
|
|
|
|
|
if event_count > 1:
|
|
|
|
database.session.commit()
|
|
|
|
|
|
|
|
|
|
|
|
for f in os.scandir(schedules_loc):
|
|
|
|
if f.is_dir():
|
|
|
|
continue
|
|
|
|
if f.name in {"datenspuren_2019"}:
|
|
|
|
continue
|
|
|
|
if not f.name.startswith("capitole_du_libre"):
|
|
|
|
continue
|
|
|
|
print(f.name)
|
|
|
|
load(f.path, f.name)
|