#!/usr/bin/python3 import collections import datetime import os import lxml.etree from confarchive import database, model DB_URL = "postgresql:///confarchive" schedules_loc = "schedules" database.init_db(DB_URL) Element = lxml.etree._Element def read_field(root: Element, field: str) -> str | None: """Get conference field.""" value = root.findtext(".//" + field) if value is None: return None assert isinstance(value, str) return value def process_schedule(filename: str, short_name: str) -> None: """Load conference schedule.""" start = open(filename).read(15) if start == "BEGIN:VCALENDAR" or start.startswith("{"): return None conf = model.Conference.query.filter_by(short_name=short_name).one_or_none() if not conf: return print((conf.short_name, conf.title)) by_title = collections.defaultdict(list) root = lxml.etree.parse(filename).getroot() for day in root.findall(".//day"): day_index_str = day.get("index") # assert day_index_str is not None # day_index = int(day_index_str) # print("day", day_index_str) for event_element in day.findall(".//event"): title = read_field(event_element, "title") start = read_field(event_element, "start") if len(start) == 4: start = "0" + start d = day.get("date") assert d and start dt = datetime.datetime.fromisoformat(d + " " + start) q = model.Event.query.filter_by(title=title, conference=conf) if q.count() < 2: continue by_title[title].append((dt, read_field(event_element, "abstract"))) continue event = q.one() event.event_date = dt event.abstract = read_field(event_element, "abstract") print(start, title) for title, events in by_title.items(): q = model.Event.query.filter_by(title=title, conference=conf).order_by( model.Event.id ) if q.count() != len(events): continue print((short_name, title)) for event, (dt, abstract) in zip(q, events): print(" ", event.id, dt) event.event_date = dt event.abstract = abstract for f in os.scandir(schedules_loc): if f.is_dir(): continue print(f.name) process_schedule(f.path, f.name) database.session.commit()