conference-archive/add_talk_dates.py

93 lines
2.4 KiB
Python
Raw Normal View History

2023-09-22 14:52:55 +01:00
#!/usr/bin/python3
import collections
2023-09-22 14:52:55 +01:00
import datetime
import os
import lxml.etree
from confarchive import database, model
DB_URL = "postgresql:///confarchive"
schedules_loc = "schedules"
database.init_db(DB_URL)
Element = lxml.etree._Element
def read_field(root: Element, field: str) -> str | None:
"""Get conference field."""
value = root.findtext(".//" + field)
if value is None:
return None
assert isinstance(value, str)
return value
def process_schedule(filename: str, short_name: str) -> None:
"""Load conference schedule."""
start = open(filename).read(15)
if start == "BEGIN:VCALENDAR" or start.startswith("{"):
return None
conf = model.Conference.query.filter_by(short_name=short_name).one_or_none()
if not conf:
return
print((conf.short_name, conf.title))
by_title = collections.defaultdict(list)
2023-09-22 14:52:55 +01:00
root = lxml.etree.parse(filename).getroot()
for day in root.findall(".//day"):
day_index_str = day.get("index")
# assert day_index_str is not None
# day_index = int(day_index_str)
# print("day", day_index_str)
2023-09-22 14:52:55 +01:00
for event_element in day.findall(".//event"):
title = read_field(event_element, "title")
start = read_field(event_element, "start")
if len(start) == 4:
start = "0" + start
d = day.get("date")
assert d and start
dt = datetime.datetime.fromisoformat(d + " " + start)
q = model.Event.query.filter_by(title=title, conference=conf)
if q.count() < 2:
2023-09-22 14:52:55 +01:00
continue
by_title[title].append((dt, read_field(event_element, "abstract")))
continue
2023-09-22 14:52:55 +01:00
event = q.one()
event.event_date = dt
event.abstract = read_field(event_element, "abstract")
print(start, title)
for title, events in by_title.items():
q = model.Event.query.filter_by(title=title, conference=conf).order_by(
model.Event.id
)
if q.count() != len(events):
continue
print((short_name, title))
for event, (dt, abstract) in zip(q, events):
print(" ", event.id, dt)
event.event_date = dt
event.abstract = abstract
2023-09-22 14:52:55 +01:00
for f in os.scandir(schedules_loc):
if f.is_dir():
continue
print(f.name)
process_schedule(f.path, f.name)
database.session.commit()