Code for importing data from pretalx
This commit is contained in:
parent
1001c51bc1
commit
e48acab387
|
@ -19,6 +19,12 @@ from .database import session
|
||||||
Base: sqlalchemy.orm.decl_api.DeclarativeMeta = declarative_base()
|
Base: sqlalchemy.orm.decl_api.DeclarativeMeta = declarative_base()
|
||||||
Base.query = session.query_property()
|
Base.query = session.query_property()
|
||||||
|
|
||||||
|
content_type_to_extension = {
|
||||||
|
"image/jpeg": "jpg",
|
||||||
|
"image/png": "png",
|
||||||
|
"image/gif": "gif",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TimeStampedModel(Base):
|
class TimeStampedModel(Base):
|
||||||
"""Time stamped model."""
|
"""Time stamped model."""
|
||||||
|
@ -140,6 +146,7 @@ class ConferencePerson(Base):
|
||||||
url = Column(String)
|
url = Column(String)
|
||||||
affiliation = Column(String)
|
affiliation = Column(String)
|
||||||
photo_url = Column(String)
|
photo_url = Column(String)
|
||||||
|
photo_url_content_type = Column(String)
|
||||||
|
|
||||||
person = relationship("Person", back_populates="conferences_association")
|
person = relationship("Person", back_populates="conferences_association")
|
||||||
conference = relationship("Conference", back_populates="people_detail")
|
conference = relationship("Conference", back_populates="people_detail")
|
||||||
|
@ -181,7 +188,6 @@ class Event(TimeStampedModel):
|
||||||
people_detail = relationship(
|
people_detail = relationship(
|
||||||
"EventPerson",
|
"EventPerson",
|
||||||
order_by="EventPerson.position",
|
order_by="EventPerson.position",
|
||||||
lazy="dynamic",
|
|
||||||
back_populates="event",
|
back_populates="event",
|
||||||
collection_class=ordering_list("position"),
|
collection_class=ordering_list("position"),
|
||||||
)
|
)
|
||||||
|
@ -273,15 +279,21 @@ class Person(TimeStampedModel):
|
||||||
return typing.cast(ConferencePerson, best)
|
return typing.cast(ConferencePerson, best)
|
||||||
|
|
||||||
def photo_filename(self) -> str | None:
|
def photo_filename(self) -> str | None:
|
||||||
|
"""Speaker photo filename."""
|
||||||
if self.wikidata_photo:
|
if self.wikidata_photo:
|
||||||
|
assert isinstance(self.wikidata_photo[0], str)
|
||||||
return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0])
|
return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0])
|
||||||
|
|
||||||
q = self.conferences_association.filter(ConferencePerson.photo_url.isnot(None))
|
q = self.conferences_association.filter(
|
||||||
|
ConferencePerson.photo_url.isnot(None),
|
||||||
|
ConferencePerson.photo_url_content_type.isnot(None),
|
||||||
|
)
|
||||||
if q.count() == 0:
|
if q.count() == 0:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
best = max(q, key=lambda cp: cp.conference.start)
|
best = max(q, key=lambda cp: cp.conference.start)
|
||||||
ext = best.photo_url.rpartition(".")[-1]
|
ext = content_type_to_extension[best.photo_url_content_type]
|
||||||
|
|
||||||
filename = f"{best.conference_id}_{self.id}.{ext}"
|
filename = f"{best.conference_id}_{self.id}.{ext}"
|
||||||
return os.path.join("conference_photo", filename)
|
return os.path.join("conference_photo", filename)
|
||||||
|
|
||||||
|
|
83
confarchive/templates/import/event.html
Normal file
83
confarchive/templates/import/event.html
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Import – Conference archive{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h1>Import</h1>
|
||||||
|
|
||||||
|
<form method="POST">
|
||||||
|
<button>run import</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<h2>event</h2>
|
||||||
|
<div>
|
||||||
|
{{ event.slug }} –
|
||||||
|
{{ prefer_en_label(event.name) }}
|
||||||
|
({{ event.date_from }} to {{ event.date_to }})
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
|
<h2>rooms</h2>
|
||||||
|
<p>room count: {{ rooms.count }}</p>
|
||||||
|
<ul>
|
||||||
|
{% for room in rooms["results"] %}
|
||||||
|
<li>{{ prefer_en_label(room.name) }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<h2>speakers</h2>
|
||||||
|
<p>speaker count: {{ speakers | count }}</p>
|
||||||
|
{% for speaker in speakers %}
|
||||||
|
<div>
|
||||||
|
<h3 id="{{ speaker.code }}">{{ speaker.name }}</h3>
|
||||||
|
{% if speaker.avatar %}
|
||||||
|
<div><img src="{{ speaker.avatar }}" style="max-width:200px"></div>
|
||||||
|
{% endif %}
|
||||||
|
{% if speaker.biography %}
|
||||||
|
<blockquote>
|
||||||
|
{% for paragraph in speaker.biography.splitlines() %}
|
||||||
|
<p>{{ paragraph }}</p>
|
||||||
|
{% endfor %}
|
||||||
|
</blockquote>
|
||||||
|
{% else %}
|
||||||
|
<p>No biography</p>
|
||||||
|
{% endif %}
|
||||||
|
<p>{{ plural(person_candidates[speaker.code].count(), "candidate") }} found</p>
|
||||||
|
{% for candidate in person_candidates[speaker.code] %}
|
||||||
|
<div><a href="{{ url_for("person", person_id=candidate.id) }}">{{ candidate.name }}</a></div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{#<pre>{{ speaker | pprint }}</pre> #}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<h2>talks</h2>
|
||||||
|
<p>talk count: {{ talks.count }}</p>
|
||||||
|
{% for talk in talks["results"] %}
|
||||||
|
<div>
|
||||||
|
<h3>{{ talk.title }}</h3>
|
||||||
|
{% if talk.speakers %}
|
||||||
|
<div>speakers:
|
||||||
|
{% for speaker in talk.speakers %}
|
||||||
|
<a href="#{{ speaker.code }}">{{ speaker.name }}</a> ({{ speaker.code }})
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div>no speakers</div>
|
||||||
|
{% endif %}
|
||||||
|
<div>start: {{ talk.slot.start }}</div>
|
||||||
|
<div>duration: {{ talk.duration }}</div>
|
||||||
|
<div>room: {{ prefer_en_label(talk.slot.room) }}</div>
|
||||||
|
<div>track: {{ prefer_en_label(talk.track) }}</div>
|
||||||
|
<h4>abstract</h3>
|
||||||
|
<div>{{ talk.abstract }}</div>
|
||||||
|
<h4>description</h3>
|
||||||
|
<div>{{ talk.description }}</div>
|
||||||
|
</div>
|
||||||
|
{# <pre>{{ talk | pprint }}</pre> #}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
<form method="POST">
|
||||||
|
<button>run import</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{% endblock %}
|
40
confarchive/templates/import/start.html
Normal file
40
confarchive/templates/import/start.html
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Import – Conference archive{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h1>Import</h1>
|
||||||
|
|
||||||
|
<form>
|
||||||
|
<div>
|
||||||
|
<label for="url">URL</label>
|
||||||
|
<input name="url" id="url" size="60"/>
|
||||||
|
</div>
|
||||||
|
<button>next</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
{% if events %}
|
||||||
|
<div style="margin-top: 1.5rem">
|
||||||
|
{% for event in events %}
|
||||||
|
<hr>
|
||||||
|
<h3>
|
||||||
|
{% set href = url_for("import_event", hostname=hostname, slug=event.slug) %}
|
||||||
|
<a href="{{ href }}">
|
||||||
|
{{ prefer_en_label(event.name) or "[ anme missing ]" }}
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<div>
|
||||||
|
{{event.slug}} ({{ event.date_from }} to {{ event.date_to }})
|
||||||
|
</div>
|
||||||
|
{% if event.existing %}
|
||||||
|
{% set conf = event.existing %}
|
||||||
|
<div>
|
||||||
|
Already loaded:
|
||||||
|
<a href="{{ url_for("conference_page", short_name=conf.short_name) }}">{{ conf.title }}</a>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% endblock %}
|
|
@ -154,7 +154,7 @@
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if event.people_detail.count() > 1 %}
|
{% if event.people_detail | count > 1 %}
|
||||||
<div>
|
<div>
|
||||||
Other people:
|
Other people:
|
||||||
{% for p in event.people %}
|
{% for p in event.people %}
|
||||||
|
|
|
@ -1,12 +1,17 @@
|
||||||
"""Flask views."""
|
"""Flask views."""
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
import typing
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
|
import requests
|
||||||
from sqlalchemy import func, or_, update
|
from sqlalchemy import func, or_, update
|
||||||
from werkzeug.wrappers import Response
|
from werkzeug.wrappers import Response
|
||||||
|
|
||||||
from confarchive import database, model, wikidata, query, utils
|
from confarchive import database, model, query, utils, wikidata
|
||||||
|
|
||||||
app = flask.Flask(__name__)
|
app = flask.Flask(__name__)
|
||||||
app.debug = True
|
app.debug = True
|
||||||
|
@ -14,6 +19,8 @@ app.debug = True
|
||||||
app.config.from_object("config.default")
|
app.config.from_object("config.default")
|
||||||
database.init_app(app)
|
database.init_app(app)
|
||||||
|
|
||||||
|
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
|
||||||
|
|
||||||
|
|
||||||
@app.route("/person/<int:person_id>", methods=["GET", "POST"])
|
@app.route("/person/<int:person_id>", methods=["GET", "POST"])
|
||||||
def person(person_id: int) -> str | Response:
|
def person(person_id: int) -> str | Response:
|
||||||
|
@ -425,6 +432,251 @@ def github_wikidata() -> str:
|
||||||
return flask.render_template("github.html", items=items)
|
return flask.render_template("github.html", items=items)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/import")
|
||||||
|
def import_start() -> str | Response:
|
||||||
|
"""Begin import."""
|
||||||
|
assert check_admin_mode()
|
||||||
|
url = flask.request.args.get("url")
|
||||||
|
if not url:
|
||||||
|
return flask.render_template("import/start.html")
|
||||||
|
|
||||||
|
m = re.match("https?://([^/]+)/", url)
|
||||||
|
assert m
|
||||||
|
hostname = m.group(1)
|
||||||
|
|
||||||
|
return flask.redirect(flask.url_for("import_hostname", hostname=hostname))
|
||||||
|
|
||||||
|
|
||||||
|
def pretalx_api(hostname: str, path: str) -> typing.Any:
|
||||||
|
"""Call pretalx API and cache results."""
|
||||||
|
import_dir = os.path.join(app.config["DATA_DIR"], "import")
|
||||||
|
api_url = f"https://{hostname}/api/{path}"
|
||||||
|
|
||||||
|
cache_start = hostname + "_" + path.strip("/").replace("/", "_")
|
||||||
|
existing = [
|
||||||
|
f for f in os.listdir(import_dir) if f.startswith(cache_start + "_2023")
|
||||||
|
]
|
||||||
|
if existing:
|
||||||
|
recent_filename = max(existing)
|
||||||
|
recent = datetime.strptime(
|
||||||
|
recent_filename, cache_start + "_%Y-%m-%d_%H:%M.json"
|
||||||
|
)
|
||||||
|
delta = flask.g.now - recent
|
||||||
|
if not existing or delta > timedelta(hours=1):
|
||||||
|
filename = os.path.join(import_dir, f"{cache_start}_{flask.g.now_str}.json")
|
||||||
|
r = requests.get(api_url, params={"limit": 500})
|
||||||
|
with open(filename, "w") as out:
|
||||||
|
out.write(r.text)
|
||||||
|
events = r.json()
|
||||||
|
else:
|
||||||
|
events = json.load(open(os.path.join(import_dir, recent_filename)))
|
||||||
|
|
||||||
|
return typing.cast(typing.Any, events)
|
||||||
|
|
||||||
|
|
||||||
|
def set_now() -> None:
|
||||||
|
"""Record current datetime in flask globals."""
|
||||||
|
flask.g.now = datetime.utcnow()
|
||||||
|
flask.g.now_str = flask.g.now.strftime("%Y-%m-%d_%H:%M")
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/import/<hostname>")
|
||||||
|
def import_hostname(hostname: str) -> str:
|
||||||
|
"""Import from hostname."""
|
||||||
|
assert check_admin_mode()
|
||||||
|
set_now()
|
||||||
|
events = pretalx_api(hostname, "events/")
|
||||||
|
|
||||||
|
slugs = [event["slug"] for event in events]
|
||||||
|
titles = [prefer_en_label(event["name"]) for event in events]
|
||||||
|
slug_lookup = {event["slug"]: event for event in events}
|
||||||
|
name_lookup = {prefer_en_label(event["name"]): event for event in events}
|
||||||
|
|
||||||
|
print(slugs)
|
||||||
|
|
||||||
|
q = model.Conference.query.filter(
|
||||||
|
or_(model.Conference.short_name.in_(slugs), model.Conference.title.in_(titles))
|
||||||
|
)
|
||||||
|
print(q.count())
|
||||||
|
|
||||||
|
for conf in q:
|
||||||
|
slug = conf.short_name
|
||||||
|
if slug in slug_lookup:
|
||||||
|
slug_lookup[slug]["existing"] = conf
|
||||||
|
if conf.title in name_lookup:
|
||||||
|
name_lookup[conf.title]["existing"] = conf
|
||||||
|
|
||||||
|
return flask.render_template(
|
||||||
|
"import/start.html",
|
||||||
|
events=events,
|
||||||
|
hostname=hostname,
|
||||||
|
prefer_en_label=prefer_en_label,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_import(
|
||||||
|
event: dict[str, typing.Any],
|
||||||
|
speakers: list[dict[str, typing.Any]],
|
||||||
|
talks: list[dict[str, typing.Any]],
|
||||||
|
speaker_lookup: dict[str, typing.Any],
|
||||||
|
) -> model.Conference:
|
||||||
|
if True:
|
||||||
|
conf = model.Conference(
|
||||||
|
short_name=event["slug"],
|
||||||
|
title=prefer_en_label(event["name"]),
|
||||||
|
start=event["date_from"],
|
||||||
|
end=event["date_to"],
|
||||||
|
)
|
||||||
|
database.session.add(conf)
|
||||||
|
|
||||||
|
code_to_speaker = {}
|
||||||
|
for speaker in speakers:
|
||||||
|
code, name, photo_url = speaker["code"], speaker["name"], speaker["avatar"]
|
||||||
|
if code == "9NX3NE":
|
||||||
|
continue
|
||||||
|
|
||||||
|
if speaker_lookup[code].count():
|
||||||
|
person = speaker_lookup[code].one()
|
||||||
|
else:
|
||||||
|
person = model.Person(name=name)
|
||||||
|
database.session.add(person)
|
||||||
|
code_to_speaker[code] = person
|
||||||
|
|
||||||
|
cp = model.ConferencePerson(
|
||||||
|
person=person,
|
||||||
|
conference=conf,
|
||||||
|
named_as=name,
|
||||||
|
photo_url=photo_url or None,
|
||||||
|
)
|
||||||
|
database.session.add(cp)
|
||||||
|
|
||||||
|
for talk in talks:
|
||||||
|
if not talk["speakers"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for s in talk["speakers"]:
|
||||||
|
if s["code"] == "9NX3NE":
|
||||||
|
s["code"] = "3BRWWP"
|
||||||
|
|
||||||
|
start_time = talk["slot"]["start"][11:16]
|
||||||
|
assert re.match(r"\d\d:\d\d", start_time)
|
||||||
|
event = model.Event(
|
||||||
|
title=talk["title"],
|
||||||
|
conference=conf,
|
||||||
|
event_date=talk["slot"]["start"],
|
||||||
|
duration=talk["duration"],
|
||||||
|
room=prefer_en_label(talk["slot"]["room"]),
|
||||||
|
track=prefer_en_label(talk["track"]),
|
||||||
|
abstract=talk["abstract"],
|
||||||
|
description=talk["description"],
|
||||||
|
start=start_time,
|
||||||
|
people=[code_to_speaker[s["code"]] for s in talk["speakers"]],
|
||||||
|
)
|
||||||
|
database.session.add(event)
|
||||||
|
|
||||||
|
database.session.commit()
|
||||||
|
|
||||||
|
if False:
|
||||||
|
conf = model.Conference.query.filter_by(short_name="osfc2018").one()
|
||||||
|
|
||||||
|
q = model.ConferencePerson.query.filter(
|
||||||
|
model.ConferencePerson.conference == conf,
|
||||||
|
model.ConferencePerson.photo_url.isnot(None),
|
||||||
|
)
|
||||||
|
|
||||||
|
photo_dir = "confarchive/static/conference_photo"
|
||||||
|
web_session = requests.Session()
|
||||||
|
web_session.headers.update({"User-Agent": user_agent})
|
||||||
|
|
||||||
|
for cp in q:
|
||||||
|
if not cp.photo_url:
|
||||||
|
continue
|
||||||
|
print(cp.photo_url)
|
||||||
|
r = web_session.get(cp.photo_url)
|
||||||
|
content_type = r.headers.get("Content-Type")
|
||||||
|
print(content_type)
|
||||||
|
assert content_type in model.content_type_to_extension
|
||||||
|
image_ext = model.content_type_to_extension[content_type]
|
||||||
|
photo_filename = f"{conf.id}_{cp.person_id}.{image_ext}"
|
||||||
|
print((cp.person_id, cp.named_as, content_type, image_ext, photo_filename))
|
||||||
|
full_photo = os.path.join(photo_dir, photo_filename)
|
||||||
|
|
||||||
|
with open(full_photo, "wb") as out:
|
||||||
|
out.write(r.content)
|
||||||
|
cp.photo_url_content_type = content_type
|
||||||
|
|
||||||
|
database.session.commit()
|
||||||
|
|
||||||
|
return conf
|
||||||
|
|
||||||
|
|
||||||
|
def prefer_en_label(labels: dict[str, str] | None) -> str | None:
|
||||||
|
if labels is None:
|
||||||
|
return None
|
||||||
|
if "en" in labels:
|
||||||
|
return labels["en"]
|
||||||
|
if len(labels) == 1:
|
||||||
|
return list(labels.values())[0]
|
||||||
|
else:
|
||||||
|
return " / ".join(f"{lang}: {label}" for lang, label in labels.items())
|
||||||
|
|
||||||
|
|
||||||
|
def find_matching_name(name: str):
|
||||||
|
"""People with a matching name."""
|
||||||
|
name_parts = name.split()
|
||||||
|
q1 = model.Person.query.filter(model.Person.name == name)
|
||||||
|
if q1.count():
|
||||||
|
return q1
|
||||||
|
name_pattern = "%" + "%".join(name_parts) + "%"
|
||||||
|
q2 = model.Person.query.filter(model.Person.name.ilike(name_pattern))
|
||||||
|
if len(name_parts) == 1 and q2.count() > 1:
|
||||||
|
return q1
|
||||||
|
return q2
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/import/<hostname>/<slug>", methods=["GET", "POST"])
|
||||||
|
def import_event(hostname: str, slug: str) -> str | Response:
|
||||||
|
"""Import event."""
|
||||||
|
set_now()
|
||||||
|
|
||||||
|
event = pretalx_api(hostname, f"events/{slug}")
|
||||||
|
rooms = pretalx_api(hostname, f"events/{slug}/rooms")
|
||||||
|
speakers = pretalx_api(hostname, f"events/{slug}/speakers")
|
||||||
|
talks = pretalx_api(hostname, f"events/{slug}/talks")
|
||||||
|
|
||||||
|
print(len(speakers["results"]), speakers["count"])
|
||||||
|
print((speakers["next"], speakers["previous"]))
|
||||||
|
assert len(speakers["results"]) == speakers["count"]
|
||||||
|
assert len(talks["results"]) == talks["count"]
|
||||||
|
|
||||||
|
all_talk_speakers: set[str] = set()
|
||||||
|
for talk in talks["results"]:
|
||||||
|
all_talk_speakers.update(speaker["code"] for speaker in talk["speakers"])
|
||||||
|
|
||||||
|
person_candidates = {
|
||||||
|
speaker["code"]: find_matching_name(speaker["name"])
|
||||||
|
for speaker in speakers["results"]
|
||||||
|
}
|
||||||
|
|
||||||
|
if flask.request.method == "GET":
|
||||||
|
return flask.render_template(
|
||||||
|
"import/event.html",
|
||||||
|
hostname=hostname,
|
||||||
|
slug=slug,
|
||||||
|
event=event,
|
||||||
|
rooms=rooms,
|
||||||
|
speakers=[s for s in speakers["results"] if s["code"] in all_talk_speakers],
|
||||||
|
talks=talks,
|
||||||
|
person_candidates=person_candidates,
|
||||||
|
plural=utils.plural,
|
||||||
|
prefer_en_label=prefer_en_label,
|
||||||
|
)
|
||||||
|
|
||||||
|
conf = run_import(event, speakers["results"], talks["results"], person_candidates)
|
||||||
|
|
||||||
|
return flask.redirect(flask.url_for("conference_page", short_name=conf.short_name))
|
||||||
|
|
||||||
|
|
||||||
@app.route("/reports")
|
@app.route("/reports")
|
||||||
def reports_page() -> str:
|
def reports_page() -> str:
|
||||||
"""Page showing statistics."""
|
"""Page showing statistics."""
|
||||||
|
|
Loading…
Reference in a new issue