#!/usr/bin/python3
"""Convert from wiki page schedule to XML schedule."""
import hashlib
import re
import sys
import typing
import urllib.parse
from datetime import time
import lxml.etree
event_id = 1
presenter_id = 1
def md5sum(s: str) -> str:
"""Generate hex md5sum."""
return hashlib.md5(s.encode("utf-8")).hexdigest()
re_day_heading = re.compile(
r'
'
+ r' .*(.*?) *
'
)
re_time = re.compile(r"^\| ?(\d{2}):(\d{2})")
re_session = re.compile(r'\| (?:colspan="\d+" )?rowspan="(\d+)" \|(.*)')
re_colspan = re.compile(r'\| colspan="(\d+)')
re_speaker = re.compile(r"(.*)")
class Session(typing.TypedDict):
"""Session."""
name: str
start: time
duration: int
room: str
speakers: list[str]
GroupedSlots = dict[int, dict[str, list[Session]]]
meta = [
("title", "GLAM Wiki 2023"),
("subtitle", "Galleries, Libraries, Archives, Museums, etc."),
("venue", "University of the Republic of Uruguay"),
("city", "Montevideo, Uruguay"),
("start", "2023-11-16"),
("end", "2023-11-18"),
("days", 3),
("day_change", "08:00"),
("timeslot_duration", "00:15"),
("time_zone_name", "America/Montevideo"),
]
rooms = [
"Auditorium",
"Posgrado 1 (110)",
"Posgrado 2 (111)",
"401",
"410",
"411",
]
def content_to_url(s: str) -> str:
"""Convert wiki link to URL."""
assert s.startswith("[[GLAM")
page_title = urllib.parse.quote(s.partition("|")[0][2:].replace(" ", "_"))
return "https://meta.wikimedia.org/wiki/" + page_title
def minutes_to_duration(mins: int) -> str:
"""Convert minutes to duration string."""
return f"{mins // 60:02d}:{mins % 60:02d}"
def build_event(item: Session, room: lxml.etree._Element) -> None:
"""Build an event."""
global event_id
"""Build an event element."""
if item["name"][0] != "[" or item["name"].startswith("[[Event:"):
return None
name: str = item["name"].partition("|")[2][:-2]
event_data = [
("start", str(item["start"])),
("duration", minutes_to_duration(item["duration"])),
("room", item["room"]),
("title", name),
("subtitle", ""),
# ("track", item["track"] or ""),
("language", ""),
# ("abstract", item["abstract"]),
("description", ""),
("url", content_to_url(item["name"])),
]
event = lxml.etree.SubElement(room, "event", id=str(event_id))
event_id += 1
for key, value in event_data:
lxml.etree.SubElement(event, key).text = value
persons = lxml.etree.SubElement(event, "persons")
for s in item["speakers"]:
lxml.etree.SubElement(persons, "person", id=md5sum(s)).text = s
def read_wikitext(filename: str) -> GroupedSlots:
"""Parse schedule wiki page."""
group_by_day_and_room: GroupedSlots = {
index: {room: [] for room in rooms} for index in range(1, 5)
}
expect = "h2" # state machine
start_time = None
day_index = 0
col: int = 0
current: Session | None = None
room: str | None = None
for line in open(filename):
if expect == "h2" and line.startswith(" lxml.etree._Element:
"""Generate the schedule XML."""
root = lxml.etree.Element("schedule")
conf = lxml.etree.SubElement(root, "conference")
for key, value in meta:
lxml.etree.SubElement(conf, key).text = str(value)
days = [(index, f"2023-11-{16 + index}") for index in range(1, 4)]
for index, d in days:
day = lxml.etree.SubElement(root, "day")
day.set("index", str(index))
day.set("date", d)
for room_name in rooms:
room = lxml.etree.SubElement(day, "room", name=room_name)
for slot in grouped[index][room_name]:
build_event(slot, room)
return root
def main(filename: str) -> None:
"""Parse JSON and convert to XML."""
root = generate_schedule(read_wikitext(filename))
as_xml = lxml.etree.tostring(
root, xml_declaration=True, encoding="utf-8", pretty_print=True
)
print(as_xml.decode("utf-8"), end="")
if __name__ == "__main__":
main(sys.argv[1])