From 9967fffcf470ef6f0490e42c29f4a904d3bb6d77 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 12 Nov 2023 18:19:48 +0100 Subject: [PATCH] Update --- parse.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/parse.py b/parse.py index 85c2700..2ebe369 100755 --- a/parse.py +++ b/parse.py @@ -1,4 +1,5 @@ #!/usr/bin/python3 +"""Convert from wiki page schedule to XML schedule.""" import hashlib import re @@ -24,9 +25,7 @@ re_day_heading = re.compile( ) re_time = re.compile(r"^\| ?(\d{2}):(\d{2})") -url = "https://meta.wikimedia.org/wiki/GLAM_Wiki_2023/Program/The_CC_Certificate_for_GLAM:_learn_about_it_by_becoming_part_of_a_human_sculpture_collection" -# re_session = re.compile(r'\| colspan="\d+" rowspan="\d+" \|\[\[(.*)\|(.*)\]\]') re_session = re.compile(r'\| (?:colspan="\d+" )?rowspan="(\d+)" \|(.*)') re_colspan = re.compile(r'\| colspan="(\d+)') @@ -46,16 +45,6 @@ class Session(typing.TypedDict): GroupedSlots = dict[int, dict[str, list[Session]]] -def content_to_url(s: str) -> str: - try: - assert s.startswith("[[GLAM") - except AssertionError: - print(s) - raise - page_title = urllib.parse.quote(s.partition("|")[0][2:].replace(" ", "_")) - return "https://meta.wikimedia.org/wiki/" + page_title - - meta = [ ("title", "GLAM Wiki 2023"), ("subtitle", "Galleries, Libraries, Archives, Museums, etc."), @@ -79,12 +68,20 @@ rooms = [ ] +def content_to_url(s: str) -> str: + """Convert wiki link to URL.""" + assert s.startswith("[[GLAM") + page_title = urllib.parse.quote(s.partition("|")[0][2:].replace(" ", "_")) + return "https://meta.wikimedia.org/wiki/" + page_title + + def minutes_to_duration(mins: int) -> str: """Convert minutes to duration string.""" return f"{mins // 60:02d}:{mins % 60:02d}" def build_event(item: Session, room: lxml.etree._Element) -> None: + """Build an event.""" global event_id """Build an event element.""" if item["name"][0] != "[" or item["name"].startswith("[[Event:"): @@ -115,6 +112,7 @@ def build_event(item: Session, room: lxml.etree._Element) -> None: def read_wikitext(filename: str) -> GroupedSlots: + """Parse schedule wiki page.""" group_by_day_and_room: GroupedSlots = { index: {room: [] for room in rooms} for index in range(1, 5) } @@ -163,7 +161,6 @@ def read_wikitext(filename: str) -> GroupedSlots: t = time(int(m.group(1)), int(m.group(2))) start_time = t col = 1 - # print("time: ", t) expect = "session" continue if expect == "session" and line == "|\n":