diff --git a/confarchive/model.py b/confarchive/model.py
index 70394cf..720037e 100644
--- a/confarchive/model.py
+++ b/confarchive/model.py
@@ -19,6 +19,12 @@ from .database import session
Base: sqlalchemy.orm.decl_api.DeclarativeMeta = declarative_base()
Base.query = session.query_property()
+content_type_to_extension = {
+ "image/jpeg": "jpg",
+ "image/png": "png",
+ "image/gif": "gif",
+}
+
class TimeStampedModel(Base):
"""Time stamped model."""
@@ -140,6 +146,7 @@ class ConferencePerson(Base):
url = Column(String)
affiliation = Column(String)
photo_url = Column(String)
+ photo_url_content_type = Column(String)
person = relationship("Person", back_populates="conferences_association")
conference = relationship("Conference", back_populates="people_detail")
@@ -181,7 +188,6 @@ class Event(TimeStampedModel):
people_detail = relationship(
"EventPerson",
order_by="EventPerson.position",
- lazy="dynamic",
back_populates="event",
collection_class=ordering_list("position"),
)
@@ -273,15 +279,21 @@ class Person(TimeStampedModel):
return typing.cast(ConferencePerson, best)
def photo_filename(self) -> str | None:
+ """Speaker photo filename."""
if self.wikidata_photo:
+ assert isinstance(self.wikidata_photo[0], str)
return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0])
- q = self.conferences_association.filter(ConferencePerson.photo_url.isnot(None))
+ q = self.conferences_association.filter(
+ ConferencePerson.photo_url.isnot(None),
+ ConferencePerson.photo_url_content_type.isnot(None),
+ )
if q.count() == 0:
return None
best = max(q, key=lambda cp: cp.conference.start)
- ext = best.photo_url.rpartition(".")[-1]
+ ext = content_type_to_extension[best.photo_url_content_type]
+
filename = f"{best.conference_id}_{self.id}.{ext}"
return os.path.join("conference_photo", filename)
diff --git a/confarchive/templates/import/event.html b/confarchive/templates/import/event.html
new file mode 100644
index 0000000..e251d70
--- /dev/null
+++ b/confarchive/templates/import/event.html
@@ -0,0 +1,83 @@
+{% extends "base.html" %}
+
+{% block title %}Import – Conference archive{% endblock %}
+
+{% block content %}
+
Import
+
+
+
+event
+
+{{ event.slug }} –
+{{ prefer_en_label(event.name) }}
+({{ event.date_from }} to {{ event.date_to }})
+
+
+
+rooms
+room count: {{ rooms.count }}
+
+{% for room in rooms["results"] %}
+ - {{ prefer_en_label(room.name) }}
+{% endfor %}
+
+
+speakers
+speaker count: {{ speakers | count }}
+{% for speaker in speakers %}
+
+
{{ speaker.name }}
+ {% if speaker.avatar %}
+
+ {% endif %}
+ {% if speaker.biography %}
+
+ {% for paragraph in speaker.biography.splitlines() %}
+ {{ paragraph }}
+ {% endfor %}
+
+ {% else %}
+
No biography
+ {% endif %}
+
{{ plural(person_candidates[speaker.code].count(), "candidate") }} found
+ {% for candidate in person_candidates[speaker.code] %}
+
+ {% endfor %}
+
+ {#{{ speaker | pprint }}
#}
+{% endfor %}
+
+talks
+talk count: {{ talks.count }}
+{% for talk in talks["results"] %}
+
+
{{ talk.title }}
+ {% if talk.speakers %}
+
speakers:
+ {% for speaker in talk.speakers %}
+
{{ speaker.name }} ({{ speaker.code }})
+ {% endfor %}
+
+ {% else %}
+
no speakers
+ {% endif %}
+
start: {{ talk.slot.start }}
+
duration: {{ talk.duration }}
+
room: {{ prefer_en_label(talk.slot.room) }}
+
track: {{ prefer_en_label(talk.track) }}
+
abstract
+
{{ talk.abstract }}
+ description
+
{{ talk.description }}
+
+ {# {{ talk | pprint }}
#}
+{% endfor %}
+
+
+
+{% endblock %}
diff --git a/confarchive/templates/import/start.html b/confarchive/templates/import/start.html
new file mode 100644
index 0000000..38135fc
--- /dev/null
+++ b/confarchive/templates/import/start.html
@@ -0,0 +1,40 @@
+{% extends "base.html" %}
+
+{% block title %}Import – Conference archive{% endblock %}
+
+{% block content %}
+Import
+
+
+
+{% if events %}
+
+ {% for event in events %}
+
+
+
+ {{event.slug}} ({{ event.date_from }} to {{ event.date_to }})
+
+ {% if event.existing %}
+ {% set conf = event.existing %}
+
+ {% endif %}
+ {% endfor %}
+
+{% endif %}
+
+{% endblock %}
diff --git a/confarchive/templates/person.html b/confarchive/templates/person.html
index 520b496..a26c1ff 100644
--- a/confarchive/templates/person.html
+++ b/confarchive/templates/person.html
@@ -154,7 +154,7 @@
{% endif %}
{% endif %}
- {% if event.people_detail.count() > 1 %}
+ {% if event.people_detail | count > 1 %}
Other people:
{% for p in event.people %}
diff --git a/confarchive/view.py b/confarchive/view.py
index ea8cdc4..faeac65 100644
--- a/confarchive/view.py
+++ b/confarchive/view.py
@@ -1,12 +1,17 @@
"""Flask views."""
+import json
import os
+import re
+import typing
+from datetime import datetime, timedelta
import flask
+import requests
from sqlalchemy import func, or_, update
from werkzeug.wrappers import Response
-from confarchive import database, model, wikidata, query, utils
+from confarchive import database, model, query, utils, wikidata
app = flask.Flask(__name__)
app.debug = True
@@ -14,6 +19,8 @@ app.debug = True
app.config.from_object("config.default")
database.init_app(app)
+user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
+
@app.route("/person/", methods=["GET", "POST"])
def person(person_id: int) -> str | Response:
@@ -425,6 +432,251 @@ def github_wikidata() -> str:
return flask.render_template("github.html", items=items)
+@app.route("/import")
+def import_start() -> str | Response:
+ """Begin import."""
+ assert check_admin_mode()
+ url = flask.request.args.get("url")
+ if not url:
+ return flask.render_template("import/start.html")
+
+ m = re.match("https?://([^/]+)/", url)
+ assert m
+ hostname = m.group(1)
+
+ return flask.redirect(flask.url_for("import_hostname", hostname=hostname))
+
+
+def pretalx_api(hostname: str, path: str) -> typing.Any:
+ """Call pretalx API and cache results."""
+ import_dir = os.path.join(app.config["DATA_DIR"], "import")
+ api_url = f"https://{hostname}/api/{path}"
+
+ cache_start = hostname + "_" + path.strip("/").replace("/", "_")
+ existing = [
+ f for f in os.listdir(import_dir) if f.startswith(cache_start + "_2023")
+ ]
+ if existing:
+ recent_filename = max(existing)
+ recent = datetime.strptime(
+ recent_filename, cache_start + "_%Y-%m-%d_%H:%M.json"
+ )
+ delta = flask.g.now - recent
+ if not existing or delta > timedelta(hours=1):
+ filename = os.path.join(import_dir, f"{cache_start}_{flask.g.now_str}.json")
+ r = requests.get(api_url, params={"limit": 500})
+ with open(filename, "w") as out:
+ out.write(r.text)
+ events = r.json()
+ else:
+ events = json.load(open(os.path.join(import_dir, recent_filename)))
+
+ return typing.cast(typing.Any, events)
+
+
+def set_now() -> None:
+ """Record current datetime in flask globals."""
+ flask.g.now = datetime.utcnow()
+ flask.g.now_str = flask.g.now.strftime("%Y-%m-%d_%H:%M")
+
+
+@app.route("/import/")
+def import_hostname(hostname: str) -> str:
+ """Import from hostname."""
+ assert check_admin_mode()
+ set_now()
+ events = pretalx_api(hostname, "events/")
+
+ slugs = [event["slug"] for event in events]
+ titles = [prefer_en_label(event["name"]) for event in events]
+ slug_lookup = {event["slug"]: event for event in events}
+ name_lookup = {prefer_en_label(event["name"]): event for event in events}
+
+ print(slugs)
+
+ q = model.Conference.query.filter(
+ or_(model.Conference.short_name.in_(slugs), model.Conference.title.in_(titles))
+ )
+ print(q.count())
+
+ for conf in q:
+ slug = conf.short_name
+ if slug in slug_lookup:
+ slug_lookup[slug]["existing"] = conf
+ if conf.title in name_lookup:
+ name_lookup[conf.title]["existing"] = conf
+
+ return flask.render_template(
+ "import/start.html",
+ events=events,
+ hostname=hostname,
+ prefer_en_label=prefer_en_label,
+ )
+
+
+def run_import(
+ event: dict[str, typing.Any],
+ speakers: list[dict[str, typing.Any]],
+ talks: list[dict[str, typing.Any]],
+ speaker_lookup: dict[str, typing.Any],
+) -> model.Conference:
+ if True:
+ conf = model.Conference(
+ short_name=event["slug"],
+ title=prefer_en_label(event["name"]),
+ start=event["date_from"],
+ end=event["date_to"],
+ )
+ database.session.add(conf)
+
+ code_to_speaker = {}
+ for speaker in speakers:
+ code, name, photo_url = speaker["code"], speaker["name"], speaker["avatar"]
+ if code == "9NX3NE":
+ continue
+
+ if speaker_lookup[code].count():
+ person = speaker_lookup[code].one()
+ else:
+ person = model.Person(name=name)
+ database.session.add(person)
+ code_to_speaker[code] = person
+
+ cp = model.ConferencePerson(
+ person=person,
+ conference=conf,
+ named_as=name,
+ photo_url=photo_url or None,
+ )
+ database.session.add(cp)
+
+ for talk in talks:
+ if not talk["speakers"]:
+ continue
+
+ for s in talk["speakers"]:
+ if s["code"] == "9NX3NE":
+ s["code"] = "3BRWWP"
+
+ start_time = talk["slot"]["start"][11:16]
+ assert re.match(r"\d\d:\d\d", start_time)
+ event = model.Event(
+ title=talk["title"],
+ conference=conf,
+ event_date=talk["slot"]["start"],
+ duration=talk["duration"],
+ room=prefer_en_label(talk["slot"]["room"]),
+ track=prefer_en_label(talk["track"]),
+ abstract=talk["abstract"],
+ description=talk["description"],
+ start=start_time,
+ people=[code_to_speaker[s["code"]] for s in talk["speakers"]],
+ )
+ database.session.add(event)
+
+ database.session.commit()
+
+ if False:
+ conf = model.Conference.query.filter_by(short_name="osfc2018").one()
+
+ q = model.ConferencePerson.query.filter(
+ model.ConferencePerson.conference == conf,
+ model.ConferencePerson.photo_url.isnot(None),
+ )
+
+ photo_dir = "confarchive/static/conference_photo"
+ web_session = requests.Session()
+ web_session.headers.update({"User-Agent": user_agent})
+
+ for cp in q:
+ if not cp.photo_url:
+ continue
+ print(cp.photo_url)
+ r = web_session.get(cp.photo_url)
+ content_type = r.headers.get("Content-Type")
+ print(content_type)
+ assert content_type in model.content_type_to_extension
+ image_ext = model.content_type_to_extension[content_type]
+ photo_filename = f"{conf.id}_{cp.person_id}.{image_ext}"
+ print((cp.person_id, cp.named_as, content_type, image_ext, photo_filename))
+ full_photo = os.path.join(photo_dir, photo_filename)
+
+ with open(full_photo, "wb") as out:
+ out.write(r.content)
+ cp.photo_url_content_type = content_type
+
+ database.session.commit()
+
+ return conf
+
+
+def prefer_en_label(labels: dict[str, str] | None) -> str | None:
+ if labels is None:
+ return None
+ if "en" in labels:
+ return labels["en"]
+ if len(labels) == 1:
+ return list(labels.values())[0]
+ else:
+ return " / ".join(f"{lang}: {label}" for lang, label in labels.items())
+
+
+def find_matching_name(name: str):
+ """People with a matching name."""
+ name_parts = name.split()
+ q1 = model.Person.query.filter(model.Person.name == name)
+ if q1.count():
+ return q1
+ name_pattern = "%" + "%".join(name_parts) + "%"
+ q2 = model.Person.query.filter(model.Person.name.ilike(name_pattern))
+ if len(name_parts) == 1 and q2.count() > 1:
+ return q1
+ return q2
+
+
+@app.route("/import//", methods=["GET", "POST"])
+def import_event(hostname: str, slug: str) -> str | Response:
+ """Import event."""
+ set_now()
+
+ event = pretalx_api(hostname, f"events/{slug}")
+ rooms = pretalx_api(hostname, f"events/{slug}/rooms")
+ speakers = pretalx_api(hostname, f"events/{slug}/speakers")
+ talks = pretalx_api(hostname, f"events/{slug}/talks")
+
+ print(len(speakers["results"]), speakers["count"])
+ print((speakers["next"], speakers["previous"]))
+ assert len(speakers["results"]) == speakers["count"]
+ assert len(talks["results"]) == talks["count"]
+
+ all_talk_speakers: set[str] = set()
+ for talk in talks["results"]:
+ all_talk_speakers.update(speaker["code"] for speaker in talk["speakers"])
+
+ person_candidates = {
+ speaker["code"]: find_matching_name(speaker["name"])
+ for speaker in speakers["results"]
+ }
+
+ if flask.request.method == "GET":
+ return flask.render_template(
+ "import/event.html",
+ hostname=hostname,
+ slug=slug,
+ event=event,
+ rooms=rooms,
+ speakers=[s for s in speakers["results"] if s["code"] in all_talk_speakers],
+ talks=talks,
+ person_candidates=person_candidates,
+ plural=utils.plural,
+ prefer_en_label=prefer_en_label,
+ )
+
+ conf = run_import(event, speakers["results"], talks["results"], person_candidates)
+
+ return flask.redirect(flask.url_for("conference_page", short_name=conf.short_name))
+
+
@app.route("/reports")
def reports_page() -> str:
"""Page showing statistics."""