diff --git a/confarchive/model.py b/confarchive/model.py index 70394cf..720037e 100644 --- a/confarchive/model.py +++ b/confarchive/model.py @@ -19,6 +19,12 @@ from .database import session Base: sqlalchemy.orm.decl_api.DeclarativeMeta = declarative_base() Base.query = session.query_property() +content_type_to_extension = { + "image/jpeg": "jpg", + "image/png": "png", + "image/gif": "gif", +} + class TimeStampedModel(Base): """Time stamped model.""" @@ -140,6 +146,7 @@ class ConferencePerson(Base): url = Column(String) affiliation = Column(String) photo_url = Column(String) + photo_url_content_type = Column(String) person = relationship("Person", back_populates="conferences_association") conference = relationship("Conference", back_populates="people_detail") @@ -181,7 +188,6 @@ class Event(TimeStampedModel): people_detail = relationship( "EventPerson", order_by="EventPerson.position", - lazy="dynamic", back_populates="event", collection_class=ordering_list("position"), ) @@ -273,15 +279,21 @@ class Person(TimeStampedModel): return typing.cast(ConferencePerson, best) def photo_filename(self) -> str | None: + """Speaker photo filename.""" if self.wikidata_photo: + assert isinstance(self.wikidata_photo[0], str) return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0]) - q = self.conferences_association.filter(ConferencePerson.photo_url.isnot(None)) + q = self.conferences_association.filter( + ConferencePerson.photo_url.isnot(None), + ConferencePerson.photo_url_content_type.isnot(None), + ) if q.count() == 0: return None best = max(q, key=lambda cp: cp.conference.start) - ext = best.photo_url.rpartition(".")[-1] + ext = content_type_to_extension[best.photo_url_content_type] + filename = f"{best.conference_id}_{self.id}.{ext}" return os.path.join("conference_photo", filename) diff --git a/confarchive/templates/import/event.html b/confarchive/templates/import/event.html new file mode 100644 index 0000000..e251d70 --- /dev/null +++ b/confarchive/templates/import/event.html @@ -0,0 +1,83 @@ +{% extends "base.html" %} + +{% block title %}Import – Conference archive{% endblock %} + +{% block content %} +<h1>Import</h1> + +<form method="POST"> + <button>run import</button> +</form> + +<h2>event</h2> +<div> +{{ event.slug }} – +{{ prefer_en_label(event.name) }} +({{ event.date_from }} to {{ event.date_to }}) +</div> + + +<h2>rooms</h2> +<p>room count: {{ rooms.count }}</p> +<ul> +{% for room in rooms["results"] %} + <li>{{ prefer_en_label(room.name) }}</li> +{% endfor %} +</ul> + +<h2>speakers</h2> +<p>speaker count: {{ speakers | count }}</p> +{% for speaker in speakers %} + <div> + <h3 id="{{ speaker.code }}">{{ speaker.name }}</h3> + {% if speaker.avatar %} + <div><img src="{{ speaker.avatar }}" style="max-width:200px"></div> + {% endif %} + {% if speaker.biography %} + <blockquote> + {% for paragraph in speaker.biography.splitlines() %} + <p>{{ paragraph }}</p> + {% endfor %} + </blockquote> + {% else %} + <p>No biography</p> + {% endif %} + <p>{{ plural(person_candidates[speaker.code].count(), "candidate") }} found</p> + {% for candidate in person_candidates[speaker.code] %} + <div><a href="{{ url_for("person", person_id=candidate.id) }}">{{ candidate.name }}</a></div> + {% endfor %} + </div> + {#<pre>{{ speaker | pprint }}</pre> #} +{% endfor %} + +<h2>talks</h2> +<p>talk count: {{ talks.count }}</p> +{% for talk in talks["results"] %} + <div> + <h3>{{ talk.title }}</h3> + {% if talk.speakers %} + <div>speakers: + {% for speaker in talk.speakers %} + <a href="#{{ speaker.code }}">{{ speaker.name }}</a> ({{ speaker.code }}) + {% endfor %} + </div> + {% else %} + <div>no speakers</div> + {% endif %} + <div>start: {{ talk.slot.start }}</div> + <div>duration: {{ talk.duration }}</div> + <div>room: {{ prefer_en_label(talk.slot.room) }}</div> + <div>track: {{ prefer_en_label(talk.track) }}</div> + <h4>abstract</h3> + <div>{{ talk.abstract }}</div> + <h4>description</h3> + <div>{{ talk.description }}</div> + </div> + {# <pre>{{ talk | pprint }}</pre> #} +{% endfor %} + +<form method="POST"> + <button>run import</button> +</form> + +{% endblock %} diff --git a/confarchive/templates/import/start.html b/confarchive/templates/import/start.html new file mode 100644 index 0000000..38135fc --- /dev/null +++ b/confarchive/templates/import/start.html @@ -0,0 +1,40 @@ +{% extends "base.html" %} + +{% block title %}Import – Conference archive{% endblock %} + +{% block content %} +<h1>Import</h1> + +<form> + <div> + <label for="url">URL</label> + <input name="url" id="url" size="60"/> + </div> + <button>next</button> +</form> + +{% if events %} +<div style="margin-top: 1.5rem"> + {% for event in events %} + <hr> + <h3> + {% set href = url_for("import_event", hostname=hostname, slug=event.slug) %} + <a href="{{ href }}"> + {{ prefer_en_label(event.name) or "[ anme missing ]" }} + </a> + </h3> + <div> + {{event.slug}} ({{ event.date_from }} to {{ event.date_to }}) + </div> + {% if event.existing %} + {% set conf = event.existing %} + <div> + Already loaded: + <a href="{{ url_for("conference_page", short_name=conf.short_name) }}">{{ conf.title }}</a> + </div> + {% endif %} + {% endfor %} +</div> +{% endif %} + +{% endblock %} diff --git a/confarchive/templates/person.html b/confarchive/templates/person.html index 520b496..a26c1ff 100644 --- a/confarchive/templates/person.html +++ b/confarchive/templates/person.html @@ -154,7 +154,7 @@ {% endif %} </div> {% endif %} - {% if event.people_detail.count() > 1 %} + {% if event.people_detail | count > 1 %} <div> Other people: {% for p in event.people %} diff --git a/confarchive/view.py b/confarchive/view.py index ea8cdc4..faeac65 100644 --- a/confarchive/view.py +++ b/confarchive/view.py @@ -1,12 +1,17 @@ """Flask views.""" +import json import os +import re +import typing +from datetime import datetime, timedelta import flask +import requests from sqlalchemy import func, or_, update from werkzeug.wrappers import Response -from confarchive import database, model, wikidata, query, utils +from confarchive import database, model, query, utils, wikidata app = flask.Flask(__name__) app.debug = True @@ -14,6 +19,8 @@ app.debug = True app.config.from_object("config.default") database.init_app(app) +user_agent = "conference-archive/0.1 (contact: edward@4angle.com)" + @app.route("/person/<int:person_id>", methods=["GET", "POST"]) def person(person_id: int) -> str | Response: @@ -425,6 +432,251 @@ def github_wikidata() -> str: return flask.render_template("github.html", items=items) +@app.route("/import") +def import_start() -> str | Response: + """Begin import.""" + assert check_admin_mode() + url = flask.request.args.get("url") + if not url: + return flask.render_template("import/start.html") + + m = re.match("https?://([^/]+)/", url) + assert m + hostname = m.group(1) + + return flask.redirect(flask.url_for("import_hostname", hostname=hostname)) + + +def pretalx_api(hostname: str, path: str) -> typing.Any: + """Call pretalx API and cache results.""" + import_dir = os.path.join(app.config["DATA_DIR"], "import") + api_url = f"https://{hostname}/api/{path}" + + cache_start = hostname + "_" + path.strip("/").replace("/", "_") + existing = [ + f for f in os.listdir(import_dir) if f.startswith(cache_start + "_2023") + ] + if existing: + recent_filename = max(existing) + recent = datetime.strptime( + recent_filename, cache_start + "_%Y-%m-%d_%H:%M.json" + ) + delta = flask.g.now - recent + if not existing or delta > timedelta(hours=1): + filename = os.path.join(import_dir, f"{cache_start}_{flask.g.now_str}.json") + r = requests.get(api_url, params={"limit": 500}) + with open(filename, "w") as out: + out.write(r.text) + events = r.json() + else: + events = json.load(open(os.path.join(import_dir, recent_filename))) + + return typing.cast(typing.Any, events) + + +def set_now() -> None: + """Record current datetime in flask globals.""" + flask.g.now = datetime.utcnow() + flask.g.now_str = flask.g.now.strftime("%Y-%m-%d_%H:%M") + + +@app.route("/import/<hostname>") +def import_hostname(hostname: str) -> str: + """Import from hostname.""" + assert check_admin_mode() + set_now() + events = pretalx_api(hostname, "events/") + + slugs = [event["slug"] for event in events] + titles = [prefer_en_label(event["name"]) for event in events] + slug_lookup = {event["slug"]: event for event in events} + name_lookup = {prefer_en_label(event["name"]): event for event in events} + + print(slugs) + + q = model.Conference.query.filter( + or_(model.Conference.short_name.in_(slugs), model.Conference.title.in_(titles)) + ) + print(q.count()) + + for conf in q: + slug = conf.short_name + if slug in slug_lookup: + slug_lookup[slug]["existing"] = conf + if conf.title in name_lookup: + name_lookup[conf.title]["existing"] = conf + + return flask.render_template( + "import/start.html", + events=events, + hostname=hostname, + prefer_en_label=prefer_en_label, + ) + + +def run_import( + event: dict[str, typing.Any], + speakers: list[dict[str, typing.Any]], + talks: list[dict[str, typing.Any]], + speaker_lookup: dict[str, typing.Any], +) -> model.Conference: + if True: + conf = model.Conference( + short_name=event["slug"], + title=prefer_en_label(event["name"]), + start=event["date_from"], + end=event["date_to"], + ) + database.session.add(conf) + + code_to_speaker = {} + for speaker in speakers: + code, name, photo_url = speaker["code"], speaker["name"], speaker["avatar"] + if code == "9NX3NE": + continue + + if speaker_lookup[code].count(): + person = speaker_lookup[code].one() + else: + person = model.Person(name=name) + database.session.add(person) + code_to_speaker[code] = person + + cp = model.ConferencePerson( + person=person, + conference=conf, + named_as=name, + photo_url=photo_url or None, + ) + database.session.add(cp) + + for talk in talks: + if not talk["speakers"]: + continue + + for s in talk["speakers"]: + if s["code"] == "9NX3NE": + s["code"] = "3BRWWP" + + start_time = talk["slot"]["start"][11:16] + assert re.match(r"\d\d:\d\d", start_time) + event = model.Event( + title=talk["title"], + conference=conf, + event_date=talk["slot"]["start"], + duration=talk["duration"], + room=prefer_en_label(talk["slot"]["room"]), + track=prefer_en_label(talk["track"]), + abstract=talk["abstract"], + description=talk["description"], + start=start_time, + people=[code_to_speaker[s["code"]] for s in talk["speakers"]], + ) + database.session.add(event) + + database.session.commit() + + if False: + conf = model.Conference.query.filter_by(short_name="osfc2018").one() + + q = model.ConferencePerson.query.filter( + model.ConferencePerson.conference == conf, + model.ConferencePerson.photo_url.isnot(None), + ) + + photo_dir = "confarchive/static/conference_photo" + web_session = requests.Session() + web_session.headers.update({"User-Agent": user_agent}) + + for cp in q: + if not cp.photo_url: + continue + print(cp.photo_url) + r = web_session.get(cp.photo_url) + content_type = r.headers.get("Content-Type") + print(content_type) + assert content_type in model.content_type_to_extension + image_ext = model.content_type_to_extension[content_type] + photo_filename = f"{conf.id}_{cp.person_id}.{image_ext}" + print((cp.person_id, cp.named_as, content_type, image_ext, photo_filename)) + full_photo = os.path.join(photo_dir, photo_filename) + + with open(full_photo, "wb") as out: + out.write(r.content) + cp.photo_url_content_type = content_type + + database.session.commit() + + return conf + + +def prefer_en_label(labels: dict[str, str] | None) -> str | None: + if labels is None: + return None + if "en" in labels: + return labels["en"] + if len(labels) == 1: + return list(labels.values())[0] + else: + return " / ".join(f"{lang}: {label}" for lang, label in labels.items()) + + +def find_matching_name(name: str): + """People with a matching name.""" + name_parts = name.split() + q1 = model.Person.query.filter(model.Person.name == name) + if q1.count(): + return q1 + name_pattern = "%" + "%".join(name_parts) + "%" + q2 = model.Person.query.filter(model.Person.name.ilike(name_pattern)) + if len(name_parts) == 1 and q2.count() > 1: + return q1 + return q2 + + +@app.route("/import/<hostname>/<slug>", methods=["GET", "POST"]) +def import_event(hostname: str, slug: str) -> str | Response: + """Import event.""" + set_now() + + event = pretalx_api(hostname, f"events/{slug}") + rooms = pretalx_api(hostname, f"events/{slug}/rooms") + speakers = pretalx_api(hostname, f"events/{slug}/speakers") + talks = pretalx_api(hostname, f"events/{slug}/talks") + + print(len(speakers["results"]), speakers["count"]) + print((speakers["next"], speakers["previous"])) + assert len(speakers["results"]) == speakers["count"] + assert len(talks["results"]) == talks["count"] + + all_talk_speakers: set[str] = set() + for talk in talks["results"]: + all_talk_speakers.update(speaker["code"] for speaker in talk["speakers"]) + + person_candidates = { + speaker["code"]: find_matching_name(speaker["name"]) + for speaker in speakers["results"] + } + + if flask.request.method == "GET": + return flask.render_template( + "import/event.html", + hostname=hostname, + slug=slug, + event=event, + rooms=rooms, + speakers=[s for s in speakers["results"] if s["code"] in all_talk_speakers], + talks=talks, + person_candidates=person_candidates, + plural=utils.plural, + prefer_en_label=prefer_en_label, + ) + + conf = run_import(event, speakers["results"], talks["results"], person_candidates) + + return flask.redirect(flask.url_for("conference_page", short_name=conf.short_name)) + + @app.route("/reports") def reports_page() -> str: """Page showing statistics."""