various improvements

This commit is contained in:
Edward Betts 2023-09-22 21:00:56 +01:00
parent 6922b1bf11
commit d1a46dd422
11 changed files with 504 additions and 74 deletions

View file

@ -1,9 +1,13 @@
"""Database models."""
import os
import typing
import sqlalchemy
import sqlalchemy.orm.decl_api
from sqlalchemy import func
from sqlalchemy import Index, func, text
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import TSVECTOR
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.orderinglist import ordering_list
@ -131,6 +135,8 @@ class ConferencePerson(Base):
bio = Column(String)
slug = Column(String)
url = Column(String)
affiliation = Column(String)
photo_url = Column(String)
person = relationship("Person", back_populates="conferences_association")
conference = relationship("Conference", back_populates="people_detail")
@ -165,12 +171,30 @@ class Event(TimeStampedModel):
description = Column(String)
event_type = Column(String)
url = Column(String)
cancelled = Column(Boolean)
search_vector = Column(TSVECTOR)
trigger = text(
"""
CREATE TRIGGER event_vector_update BEFORE INSERT OR UPDATE
ON event FOR EACH ROW EXECUTE FUNCTION
tsvector_update_trigger(search_vector, 'pg_catalog.english', title, abstract, description);
"""
)
Index(
"event_search_vector_idx",
search_vector,
postgresql_using="gin",
postgresql_ops={"abstract_search_vector": "gin_trgm_ops"},
)
conference = relationship("Conference", back_populates="events")
people_detail = relationship(
"EventPerson",
order_by="EventPerson.position",
lazy="dynamic",
back_populates="event",
collection_class=ordering_list("position"),
)
@ -198,7 +222,9 @@ class Person(TimeStampedModel):
)
events = association_proxy("events_association", "event")
conferences_association = relationship("ConferencePerson", back_populates="person")
conferences_association = relationship(
"ConferencePerson", lazy="dynamic", back_populates="person"
)
conferences = association_proxy("conferences_association", "conference")
@property
@ -239,6 +265,39 @@ class Person(TimeStampedModel):
return q
def bio_source(self) -> ConferencePerson | None:
bio_list = [cp for cp in self.conferences_association if cp.bio]
if not bio_list:
return None
if len(bio_list) == 1:
return typing.cast(ConferencePerson, bio_list[0])
recent = max(bio_list, key=lambda cp: cp.conference.start)
len_recent_bio = len(recent.bio)
longest = max(bio_list, key=lambda cp: len(cp.bio))
if recent == longest:
return typing.cast(ConferencePerson, recent)
best = longest if len(longest.bio) > len_recent_bio * 2 else recent
return typing.cast(ConferencePerson, best)
def photo_filename(self) -> str | None:
if self.wikidata_photo:
return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0])
q = self.conferences_association.filter(ConferencePerson.photo_url.isnot(None))
if q.count() == 0:
return None
best = max(q, key=lambda cp: cp.conference.start)
ext = best.photo_url.rpartition(".")[-1]
filename = f"{best.conference_id}_{self.id}.{ext}"
return os.path.join("conference_photo", filename)
# class PersonPhoto(TimeStampedModel):
# """Person photo."""

184
main.py
View file

@ -13,7 +13,7 @@ import sqlalchemy
from sqlalchemy import func, or_, update
from werkzeug.wrappers import Response
from confarchive import database, model
from confarchive import database, model, wikidata
app = flask.Flask(__name__)
app.debug = True
@ -53,33 +53,14 @@ def wikidata_search(q: str) -> list[dict[str, typing.Any]]:
return cast(list[dict[str, typing.Any]], data["query"]["search"])
def wikidata_get_item(qid: str) -> typing.Any:
cache_filename = os.path.join("items", qid + ".json")
if os.path.exists(cache_filename):
item = json.load(open(cache_filename))
else:
print(qid)
params: dict[str, str | int] = {
"action": "wbgetentities",
"ids": qid,
"format": "json",
"formatversion": 2,
}
r = requests.get(wikidata_api, params=params)
item = r.json()["entities"][qid]
with open(cache_filename, "w") as f:
json.dump(item, f, indent=2)
time.sleep(0.1)
return item
def top_speakers() -> sqlalchemy.orm.query.Query:
q = (
database.session.query(model.Person, func.count())
.join(model.ConferencePerson)
.filter(model.Person.id != 1046) # FOSDEM Staff
.group_by(model.Person)
.order_by(func.count().desc())
.having(func.count() > 3)
.order_by(func.count().desc(), model.Person.name)
.having(func.count() > 4)
)
return q
@ -107,20 +88,68 @@ def top_events() -> sqlalchemy.orm.query.Query:
return q
def drop_start(s: str, start: str) -> str:
return s[len(start) :] if s.startswith(start) else s
@app.route("/person/<int:person_id>", methods=["GET", "POST"])
def person(person_id: int) -> str | Response:
item = model.Person.query.get(person_id)
if flask.request.method == "POST":
item.wikidata_qid = flask.request.form["wikidata_qid"] or None
qid = flask.request.form["wikidata_qid"] or None
item.name = flask.request.form["name"]
if qid and qid != item.wikidata_qid:
item.wikidata_qid = qid
wd_item = wikidata.get_item(qid)
if "P18" in wd_item["claims"]:
claim_p18 = wd_item["claims"]["P18"]
wikidata_photo = [
drop_start(s["mainsnak"]["datavalue"]["value"], "-")
for s in claim_p18
]
for filename in wikidata_photo:
print(filename)
wikidata.get_photo(filename)
item.wikidata_photo = wikidata_photo
database.session.commit()
assert flask.request.endpoint
return flask.redirect(
flask.url_for(flask.request.endpoint, person_id=person_id)
)
wikidata_hits: list[dict[str, str]] = []
if False and item.wikidata_qid is None:
search_hits = wikidata_search(item.name)
print(len(search_hits))
for search_hit in search_hits:
qid = search_hit["title"]
wd_item = wikidata.get_item(qid)
if "en" in wd_item["labels"]:
label = wd_item["labels"]["en"]["value"]
else:
label = "[no english label]"
if "en" in wd_item["descriptions"]:
description = wd_item["descriptions"]["en"]["value"]
else:
description = "[no english description]"
wikidata_hits.append(
{
"qid": qid,
"label": label,
"description": description,
}
)
return flask.render_template(
"person.html", item=item, Event=model.Event, plural=plural
"person.html",
item=item,
Event=model.Event,
plural=plural,
wikidata_hits=wikidata_hits,
)
@ -260,12 +289,33 @@ order by num
@app.route("/speakers")
def top_speakers_page() -> str:
top = top_speakers()
"""Top speakers page."""
photos = []
for person, count in top:
photo = person.photo_filename()
if photo:
photos.append((person, photo))
left_photos = photos[::2]
right_photos = photos[1::2]
photo_person_ids = [person.id for person, photo in photos]
left = photo_person_ids[::2]
right = photo_person_ids[1::2]
return flask.render_template(
"top_speakers.html",
top_speakers=top_speakers(),
top_speakers=top,
speaker_counts=speaker_counts(),
plural=plural,
person_image_filename=person_image_filename,
# photo_person_ids=photo_person_ids,
left=left,
right=right,
left_photos=left_photos,
right_photos=right_photos,
)
@ -335,7 +385,7 @@ def link_to_wikidata() -> str:
@app.route("/search")
def search_everything() -> str:
search_for = flask.request.args["q"]
search_for = flask.request.args.get("q")
if not search_for:
return flask.render_template("search_everything.html")
@ -374,6 +424,7 @@ def delete_person(person_id: int) -> str | Response:
def person_image_filename(person_id):
person = model.Person.query.get(person_id)
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
for filename in person.wikidata_photo:
face_crop = "face_1_" + filename
full = os.path.join("static", "wikidata_photo", "face_cropped", face_crop)
@ -383,5 +434,84 @@ def person_image_filename(person_id):
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
@app.route("/github_wikidata")
def github_wikidata() -> str:
items = []
for line in open("found_wikidata_github"):
person_id, person_name, qid, wd_name, github, photo = eval(line)
person = model.Person.query.get(person_id)
if person.wikidata_qid:
continue
items.append((person, qid, wd_name, photo))
return flask.render_template("github.html", items=items)
@app.route("/reports")
def reports_page() -> str:
event_count = model.Event.query.count()
missing_event_date_count = model.Event.query.filter(
model.Event.event_date.is_(None)
).count()
speaker_count = model.Person.query.count()
no_bio_count = (
model.Person.query.join(model.ConferencePerson)
.filter(model.ConferencePerson.bio.is_(None))
.group_by(model.Person)
.count()
)
one_bio_count = (
model.Person.query.join(model.ConferencePerson)
.group_by(model.Person)
.filter(model.ConferencePerson.bio.isnot(None))
.having(func.count() == 1)
.count()
)
multiple_bio = (
model.Person.query.join(model.ConferencePerson)
.group_by(model.Person)
.filter(model.ConferencePerson.bio.isnot(None))
.having(func.count() > 1)
)
shorter_recent_bio = []
for person in multiple_bio:
bio_with_date = sorted(
[
(cp.conference.start, cp.bio)
for cp in person.conferences_association
if cp.bio
],
reverse=True,
)
if len(bio_with_date) < 2:
continue
most_recent_bio = bio_with_date[0][1]
len_recent_bio = len(most_recent_bio)
longest = max(len(bio) for start, bio in bio_with_date[1:])
if longest > len_recent_bio * 2:
shorter_recent_bio.append((person, len_recent_bio, longest))
return flask.render_template(
"reports.html",
event_count=event_count,
speaker_count=speaker_count,
no_bio_count=no_bio_count,
one_bio_count=one_bio_count,
multiple_bio_count=multiple_bio.count(),
shorter_recent_bio=shorter_recent_bio,
missing_event_date_count=missing_event_date_count,
missing_event_date=model.Event.query.filter(
model.Event.event_date.is_(None)
).order_by(model.Event.title),
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5002)

View file

@ -11,7 +11,7 @@
body {
font-family: monospace;
margin: 40px auto;
max-width: 1040px;
max-width: 900px;
line-height: 1.6;
font-size: 18px;
color: #444;

View file

@ -2,22 +2,22 @@
{% block style %}
<style>
.image-container {
width: 200px; /* Adjust this to your desired square size */
height: 240px; /* Same as width for a square */
display: inline-flex; /* Use inline-flex to display containers horizontally */
margin-right: 10px; /* Add some spacing between images (adjust as needed) */
justify-content: center; /* Horizontally center the content */
align-items: center; /* Vertically center the content */
overflow: hidden; /* Hide overflowing image parts */
}
.images {
position: absolute;
right: -200px;
top: 0;
width: 180px;
}
.image-container img {
.image {
max-width: 100%;
max-height: 100%;
object-fit: cover; /* Crop and scale the image to fit the container */
object-position: center; /* Center the cropping horizontally */
}
}
.container {
position: relative;
}
</style>
{% endblock %}
@ -26,17 +26,36 @@
{% block title %}{{ item.title }}{% endblock %}
{% block content %}
<div class="container">
{% if show_images %}
<div class="images">
{% for person in item.people %}
{% set photo = person.photo_filename() %}
{% if photo %}
<div class="image-container">
<a href="{{ url_for("person", person_id=person.id) }}">
{{ person.name }}<br>
<img class="image" src="{{ url_for("static", filename=photo) }}" alt="{{ person.name}}" title="{{ person.name}}">
</a>
</div>
{% endif %}
{% endfor %}
</div>
{% endif %}
<div class="row">
<h1>{{ item.title }}</h1>
<p><a href="{{ url_for("index") }}">home</a></p>
<div>
{% if item.series %}
<div>series: {{ item.series.name }}
{% if item.series.wikidata_qid %}
<a href="https://www.wikidata.org/wiki/{{ item.series.wikidata_qid }}">Wikidata</a>
{% endif %}
</div>
{% endif %}
<div>start: {{ item.start }}</div>
<div>end: {{ item.end }}</div>
{% if days %}
@ -64,20 +83,6 @@
{% endif %}
</div>
{% if show_images %}
<div>
{% for person in item.people %}
{% if person.wikidata_photo %}
<span class="image-container">
<a href="{{ url_for("person", person_id=person.id) }}">
<img src="{{ url_for("static", filename=person_image_filename(person.id)) }}" alt="{{ person.name}}" title="{{ person.name}}">
</a>
</span>
{% endif %}
{% endfor %}
</div>
{% endif %}
<h3>Talks</h3>
<p>{{ item.events.count() }} talks</p>

24
templates/github.html Normal file
View file

@ -0,0 +1,24 @@
{% extends "base.html" %}
{% block title %}Conference archive{% endblock %}
{% block content %}
<h1>Conference archive</h1>
<p>{{ items | count }} matches found</p>
{% for person, qid, wd_name, photo in items %}
<div>
<a href="{{ url_for("person", person_id=person.id) }}">{{ person.name }}</a>
##
<a href="https://www.wikidata.org/wiki/{{ qid }}">{{ wd_name }} ({{ qid }})</a>
{% if photo %}📷{% endif %}
</div>
{% endfor %}
{% endblock %}
{% block style %}
<style>
.searchmatch { background: lightgreen }
</style>
{% endblock %}

View file

@ -7,6 +7,7 @@
<div class="row">
<h1>Conference archive</h1>
{#
<form action="{{ url_for("search_people") }}">
<div class="mb-3">
<label for="q" class="form-label">speaker name</label>
@ -14,6 +15,7 @@
</div>
<button type="submit" class="btn btn-primary">Search</button>
</form>
#}
<div style="margin-bottom:1rem">
👥

View file

@ -1,5 +1,12 @@
<p>
<form action="{{ url_for("search_people") }}">
<a href="{{ url_for("index") }}">home</a>
| <a href="{{ url_for("events_page") }}">events</a>
| <a href="{{ url_for("top_speakers_page") }}">speakers</a>
<input type="text" class="form-control" placeholder="speaker name" name="q" id="q">
<button type="submit" class="btn btn-primary">search</button>
</form>
</p>

View file

@ -2,9 +2,36 @@
{% block title %}{{ item.name }}{% endblock %}
{% block style %}
<style>
.image-container {
float: right;
width: 300px;
}
.image {
max-width: 100%;
}
.text-nowrap {
white-space: nowrap;
}
</style>
{% endblock %}
{% block content %}
<div class="container">
<div class="row">
{% set photo = item.photo_filename() %}
{% if photo %}
<div class="image-container">
<img class="image" src="{{ url_for("static", filename=photo) }}">
</div>
{% endif %}
<h1>{{ item.name }}</h1>
<p>
@ -19,10 +46,6 @@
</p>
{% if item.wikidata_photo %}
<img src="{{ url_for("static", filename="wikidata_photo/thumb/" + item.wikidata_photo.0) }}">
{% endif %}
{% set search_for = item.name + ' ' + " haswbstatement:P31=Q5" %}
<p><a href="https://www.wikidata.org/w/index.php?search={{ search_for | urlencode }}&title=Special%3ASearch&ns0=1&ns120=1">Search for {{ item.name }} on Wikidata</a></p>
@ -42,25 +65,57 @@
<button type="submit" class="btn btn-primary">delete</button>
</form>
{% if show_wikidata_matches %}
{% if wikidata_hits %}
<p>Possible Wikidata matches</p>
<ul>
{% for hit in wikidata_hits %}
<li>
<a href="https://www.wikidata.org/wiki/{{ hit.qid }}">{{ hit.qid }}</a>
{{ hit.label }} &mdash; {{ hit.description }}
</li>
{% endfor %}
</ul>
{% elif not item.wikidata_qid %}
<p>No similar names found on Wikidata</p>
{% endif %}
{% endif %}
{% set bio_source = item.bio_source() %}
{% if bio_source %}
<h2>Biography</h2>
<blockquote>
<div>{{ bio_source.bio | safe }}</div>
<div>&mdash; biography from
<a href="{{ url_for("conference_page", short_name=bio_source.conference.short_name) }}">{{ bio_source.conference.title }}</a><br>
<a href="{{ bio_source.url }}">{{ bio_source.url }}</a>
</div>
</blockquote>
{% else %}
<p>No biography available.</p>
{% endif %}
<h2>Conferences</h2>
<p>{{ item.conferences_association.count() }} known conferences</p>
{% for apperance in item.conference_by_time() %}
{% set conf = apperance.conference %}
<div>
<h3>👥 {{ conf.title }}
<h3>👥
<a href="{{ url_for("conference_page", short_name=conf.short_name) }}">{{ conf.title }}</a>
<small>📅 {{ conf.start.strftime("%d %b %Y") }}</small>
</h3>
{% if apperance.bio %}<p>Biography: {{ apperance.bio | safe }}</p>{% endif %}
{% if 0 and apperance.bio %}<p>Biography: {{ apperance.bio | safe }}</p>{% endif %}
</div>
{% for event in apperance.events %}
<div>
<h4>
🎤
<a href="{{ url_for("event_page", event_id=event.id) }}">{{ event.title }}</a>
<small>
&nbsp;&nbsp;🎤
<a href="{{ url_for("event_page", event_id=event.id) }}">{{ event.title }}</a><br>
&nbsp;&nbsp;
<span class="text-nowrap">
{% if event.event_date %}
{{ event.event_date.strftime("%d %b %Y") }}
{% else %}
@ -68,8 +123,7 @@
{% endif %}
<a class="event-detail-toggle" href="#">show details</a>
</small>
</h4>
</span>
<div class="event-detail" id="event_{{event.id }}" style="display:none">
<p>
@ -97,13 +151,16 @@
{% endif %}
</div>
{% endif %}
{% if event.people_detail.count() > 1 %}
<div>
Other people:
{% for p in event.people %}
{% if p.id != item.id %}
<a href="{{ url_for(request.endpoint, person_id=p.id) }}">{{ p.name }}</a>
{% endif %}
{% endfor %}
</div>
{% endif %}
</div>
</div>

37
templates/reports.html Normal file
View file

@ -0,0 +1,37 @@
{% extends "base.html" %}
{% block title %}Conference archive{% endblock %}
{% block content %}
<h1>Conference archive</h1>
<h2>Reports</h2>
<div>Talks: {{ "{:,d}".format(event_count) }}</div>
<div>Talks with no event date: {{ "{:,d}".format(missing_event_date_count) }}</div>
<div>Speakers: {{ "{:,d}".format(speaker_count) }}</div>
<div>Speakers with no bio: {{ "{:,d}".format(no_bio_count) }}</div>
<div>Speakers with one bio: {{ "{:,d}".format(one_bio_count) }}</div>
<div>Speakers with more than one bio: {{ "{:,d}".format(multiple_bio_count) }}</div>
<h2>Talks with missing dates</h2>
{% for event in missing_event_date %}
<div>{{ event.title }}<br>
&nbsp;{{ event.conference.title }}<br>
&nbsp;GUID: {{ event.guid or "missing" }}
</div>
{% endfor %}
<h2>Speakers with a shorter recent biography</h2>
<p>{{ shorter_recent_bio | count }} found</p>
{% for person, recent_bio_length, longest in shorter_recent_bio %}
<div>
<a href="{{ url_for("person", person_id=person.id) }}">{{ person.name }}</a>
recent bio: {{ recent_bio_length }} vs {{ longest }}
</div>
{% endfor %}
{% endblock %}

View file

@ -17,9 +17,9 @@
<p>Found {{ q.count() }} events matching '{{ search_for }}'</p>
<ul>
{% for item in q %}
<li>
<div>
🎤
<a href="{{ url_for("event_page", event_id=item.id) }}">{{ item.title }}</a>
&mdash;
<a href="{{ url_for("conference_page", short_name=item.conference.short_name) }}">{{ item.conference.title }}</a>

View file

@ -1,8 +1,66 @@
{% extends "base.html" %}
{% block style %}
<script src="{{ url_for("static", filename="leader-line.min.js") }}"></script>
<style>
.right-images {
position: absolute;
right: -280px;
top: 0;
width: 120px;
}
.left-images {
position: absolute;
right: -140px;
top: 0;
width: 120px;
}
.image {
max-width: 100%;
}
.container {
position: relative;
}
</style>
{% endblock %}
{% set show_images = True %}
{% block title %}Conference archive{% endblock %}
{% block content %}
<div class="container">
{% if show_images %}
<div class="left-images">
{% for person, photo in left_photos %}
<div class="image-container" id="image-{{person.id}}">
<a href="{{ url_for("person", person_id=person.id) }}">
<img class="image" src="{{ url_for("static", filename=photo) }}" alt="{{ person.name}}" title="{{ person.name}}">
</a>
</div>
{% endfor %}
</div>
<div class="right-images">
{% for person, photo in right_photos %}
<div class="image-container">
<a href="{{ url_for("person", person_id=person.id) }}">
<img id="image-{{person.id}}" class="image" src="{{ url_for("static", filename=photo) }}" alt="{{ person.name}}" title="{{ person.name}}">
</a>
</div>
{% endfor %}
</div>
{% endif %}
<h1>Conference archive</h1>
<form action="{{ url_for("search_people") }}">
@ -27,17 +85,68 @@
<ul>
{% for person, count in top_speakers %}
{% if loop.first or loop.previtem[1] != count %}
<h4>{{ count }} conferences</h4>
{% endif %}
<div>
<span id="person-{{ person.id }}">
👤
<a href="{{ url_for("person", person_id=person.id) }}">{{ person.name }}</a>
({{ count }} conferences, {{ person.event_count }} talks)
{% if person.wikidata_photo %}📷{% endif %}
{% if person.photo_filename() %}📷{% endif %}
{% if person.wikidata_qid %}
<a href="https://www.wikidata.org/wiki/{{ person.wikidata_qid }}">Wikidata</a>
{% endif %}
</span>
</div>
{% endfor %}
</div>
{% endblock %}
{% block script %}
{% if show_images %}
<script>
{# var person_ids = {{ photo_person_ids | tojson }}; #}
var left = {{ left | tojson }};
var right = {{ right | tojson }};
var lines = {};
window.addEventListener('load', function() {
for(var i =0; i < left.length; i++) {
var id = left[i];
var person = document.getElementById('person-' + id);
var image = document.getElementById('image-' + id);
var line = new LeaderLine(LeaderLine.mouseHoverAnchor(person, 'draw'), image);
line.setOptions({startSocket: 'right', endSocket: 'left', path: 'fluid'});
var line2 = new LeaderLine(LeaderLine.mouseHoverAnchor(image, 'draw'), person);
line2.setOptions({startSocket: 'left', endSocket: 'right', path: 'fluid'});
// lines[id] = line;
}
for(var i =0; i < right.length; i++) {
var id = right[i];
var person = document.getElementById('person-' + id);
var image = document.getElementById('image-' + id);
var line = new LeaderLine(LeaderLine.mouseHoverAnchor(person, 'draw'), image);
line.setOptions({startSocket: 'right', endSocket: 'left', path: 'fluid'});
var line2 = new LeaderLine(LeaderLine.mouseHoverAnchor(image, 'draw'), person);
line2.setOptions({startSocket: 'left', endSocket: 'right', path: 'fluid'});
// lines[id] = line;
}
});
</script>
{% endif %}
{% endblock %}