Update
This commit is contained in:
parent
f8454fa295
commit
9967fffcf4
23
parse.py
23
parse.py
|
@ -1,4 +1,5 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
"""Convert from wiki page schedule to XML schedule."""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import re
|
import re
|
||||||
|
@ -24,9 +25,7 @@ re_day_heading = re.compile(
|
||||||
)
|
)
|
||||||
|
|
||||||
re_time = re.compile(r"^\| ?(\d{2}):(\d{2})")
|
re_time = re.compile(r"^\| ?(\d{2}):(\d{2})")
|
||||||
url = "https://meta.wikimedia.org/wiki/GLAM_Wiki_2023/Program/The_CC_Certificate_for_GLAM:_learn_about_it_by_becoming_part_of_a_human_sculpture_collection"
|
|
||||||
|
|
||||||
# re_session = re.compile(r'\| colspan="\d+" rowspan="\d+" \|\[\[(.*)\|(.*)\]\]')
|
|
||||||
re_session = re.compile(r'\| (?:colspan="\d+" )?rowspan="(\d+)" \|(.*)')
|
re_session = re.compile(r'\| (?:colspan="\d+" )?rowspan="(\d+)" \|(.*)')
|
||||||
re_colspan = re.compile(r'\| colspan="(\d+)')
|
re_colspan = re.compile(r'\| colspan="(\d+)')
|
||||||
|
|
||||||
|
@ -46,16 +45,6 @@ class Session(typing.TypedDict):
|
||||||
GroupedSlots = dict[int, dict[str, list[Session]]]
|
GroupedSlots = dict[int, dict[str, list[Session]]]
|
||||||
|
|
||||||
|
|
||||||
def content_to_url(s: str) -> str:
|
|
||||||
try:
|
|
||||||
assert s.startswith("[[GLAM")
|
|
||||||
except AssertionError:
|
|
||||||
print(s)
|
|
||||||
raise
|
|
||||||
page_title = urllib.parse.quote(s.partition("|")[0][2:].replace(" ", "_"))
|
|
||||||
return "https://meta.wikimedia.org/wiki/" + page_title
|
|
||||||
|
|
||||||
|
|
||||||
meta = [
|
meta = [
|
||||||
("title", "GLAM Wiki 2023"),
|
("title", "GLAM Wiki 2023"),
|
||||||
("subtitle", "Galleries, Libraries, Archives, Museums, etc."),
|
("subtitle", "Galleries, Libraries, Archives, Museums, etc."),
|
||||||
|
@ -79,12 +68,20 @@ rooms = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def content_to_url(s: str) -> str:
|
||||||
|
"""Convert wiki link to URL."""
|
||||||
|
assert s.startswith("[[GLAM")
|
||||||
|
page_title = urllib.parse.quote(s.partition("|")[0][2:].replace(" ", "_"))
|
||||||
|
return "https://meta.wikimedia.org/wiki/" + page_title
|
||||||
|
|
||||||
|
|
||||||
def minutes_to_duration(mins: int) -> str:
|
def minutes_to_duration(mins: int) -> str:
|
||||||
"""Convert minutes to duration string."""
|
"""Convert minutes to duration string."""
|
||||||
return f"{mins // 60:02d}:{mins % 60:02d}"
|
return f"{mins // 60:02d}:{mins % 60:02d}"
|
||||||
|
|
||||||
|
|
||||||
def build_event(item: Session, room: lxml.etree._Element) -> None:
|
def build_event(item: Session, room: lxml.etree._Element) -> None:
|
||||||
|
"""Build an event."""
|
||||||
global event_id
|
global event_id
|
||||||
"""Build an event element."""
|
"""Build an event element."""
|
||||||
if item["name"][0] != "[" or item["name"].startswith("[[Event:"):
|
if item["name"][0] != "[" or item["name"].startswith("[[Event:"):
|
||||||
|
@ -115,6 +112,7 @@ def build_event(item: Session, room: lxml.etree._Element) -> None:
|
||||||
|
|
||||||
|
|
||||||
def read_wikitext(filename: str) -> GroupedSlots:
|
def read_wikitext(filename: str) -> GroupedSlots:
|
||||||
|
"""Parse schedule wiki page."""
|
||||||
group_by_day_and_room: GroupedSlots = {
|
group_by_day_and_room: GroupedSlots = {
|
||||||
index: {room: [] for room in rooms} for index in range(1, 5)
|
index: {room: [] for room in rooms} for index in range(1, 5)
|
||||||
}
|
}
|
||||||
|
@ -163,7 +161,6 @@ def read_wikitext(filename: str) -> GroupedSlots:
|
||||||
t = time(int(m.group(1)), int(m.group(2)))
|
t = time(int(m.group(1)), int(m.group(2)))
|
||||||
start_time = t
|
start_time = t
|
||||||
col = 1
|
col = 1
|
||||||
# print("time: ", t)
|
|
||||||
expect = "session"
|
expect = "session"
|
||||||
continue
|
continue
|
||||||
if expect == "session" and line == "|\n":
|
if expect == "session" and line == "|\n":
|
||||||
|
|
Loading…
Reference in a new issue