diff --git a/confarchive/model.py b/confarchive/model.py index 2795491..9949275 100644 --- a/confarchive/model.py +++ b/confarchive/model.py @@ -1,9 +1,13 @@ """Database models.""" +import os +import typing + import sqlalchemy import sqlalchemy.orm.decl_api -from sqlalchemy import func +from sqlalchemy import Index, func, text from sqlalchemy.dialects import postgresql +from sqlalchemy.dialects.postgresql import TSVECTOR from sqlalchemy.ext.associationproxy import association_proxy from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.orderinglist import ordering_list @@ -131,6 +135,8 @@ class ConferencePerson(Base): bio = Column(String) slug = Column(String) url = Column(String) + affiliation = Column(String) + photo_url = Column(String) person = relationship("Person", back_populates="conferences_association") conference = relationship("Conference", back_populates="people_detail") @@ -165,12 +171,30 @@ class Event(TimeStampedModel): description = Column(String) event_type = Column(String) url = Column(String) + cancelled = Column(Boolean) + search_vector = Column(TSVECTOR) + + trigger = text( + """ + CREATE TRIGGER event_vector_update BEFORE INSERT OR UPDATE + ON event FOR EACH ROW EXECUTE FUNCTION + tsvector_update_trigger(search_vector, 'pg_catalog.english', title, abstract, description); + """ + ) + + Index( + "event_search_vector_idx", + search_vector, + postgresql_using="gin", + postgresql_ops={"abstract_search_vector": "gin_trgm_ops"}, + ) conference = relationship("Conference", back_populates="events") people_detail = relationship( "EventPerson", order_by="EventPerson.position", + lazy="dynamic", back_populates="event", collection_class=ordering_list("position"), ) @@ -198,7 +222,9 @@ class Person(TimeStampedModel): ) events = association_proxy("events_association", "event") - conferences_association = relationship("ConferencePerson", back_populates="person") + conferences_association = relationship( + "ConferencePerson", lazy="dynamic", back_populates="person" + ) conferences = association_proxy("conferences_association", "conference") @property @@ -239,6 +265,39 @@ class Person(TimeStampedModel): return q + def bio_source(self) -> ConferencePerson | None: + bio_list = [cp for cp in self.conferences_association if cp.bio] + + if not bio_list: + return None + + if len(bio_list) == 1: + return typing.cast(ConferencePerson, bio_list[0]) + + recent = max(bio_list, key=lambda cp: cp.conference.start) + + len_recent_bio = len(recent.bio) + longest = max(bio_list, key=lambda cp: len(cp.bio)) + + if recent == longest: + return typing.cast(ConferencePerson, recent) + + best = longest if len(longest.bio) > len_recent_bio * 2 else recent + return typing.cast(ConferencePerson, best) + + def photo_filename(self) -> str | None: + if self.wikidata_photo: + return os.path.join("wikidata_photo", "thumb", self.wikidata_photo[0]) + + q = self.conferences_association.filter(ConferencePerson.photo_url.isnot(None)) + if q.count() == 0: + return None + + best = max(q, key=lambda cp: cp.conference.start) + ext = best.photo_url.rpartition(".")[-1] + filename = f"{best.conference_id}_{self.id}.{ext}" + return os.path.join("conference_photo", filename) + # class PersonPhoto(TimeStampedModel): # """Person photo.""" diff --git a/main.py b/main.py index 7037f7c..b099df6 100755 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ import sqlalchemy from sqlalchemy import func, or_, update from werkzeug.wrappers import Response -from confarchive import database, model +from confarchive import database, model, wikidata app = flask.Flask(__name__) app.debug = True @@ -53,33 +53,14 @@ def wikidata_search(q: str) -> list[dict[str, typing.Any]]: return cast(list[dict[str, typing.Any]], data["query"]["search"]) -def wikidata_get_item(qid: str) -> typing.Any: - cache_filename = os.path.join("items", qid + ".json") - if os.path.exists(cache_filename): - item = json.load(open(cache_filename)) - else: - print(qid) - params: dict[str, str | int] = { - "action": "wbgetentities", - "ids": qid, - "format": "json", - "formatversion": 2, - } - r = requests.get(wikidata_api, params=params) - item = r.json()["entities"][qid] - with open(cache_filename, "w") as f: - json.dump(item, f, indent=2) - time.sleep(0.1) - return item - - def top_speakers() -> sqlalchemy.orm.query.Query: q = ( database.session.query(model.Person, func.count()) .join(model.ConferencePerson) + .filter(model.Person.id != 1046) # FOSDEM Staff .group_by(model.Person) - .order_by(func.count().desc()) - .having(func.count() > 3) + .order_by(func.count().desc(), model.Person.name) + .having(func.count() > 4) ) return q @@ -107,20 +88,68 @@ def top_events() -> sqlalchemy.orm.query.Query: return q +def drop_start(s: str, start: str) -> str: + return s[len(start) :] if s.startswith(start) else s + + @app.route("/person/", methods=["GET", "POST"]) def person(person_id: int) -> str | Response: item = model.Person.query.get(person_id) if flask.request.method == "POST": - item.wikidata_qid = flask.request.form["wikidata_qid"] or None + qid = flask.request.form["wikidata_qid"] or None item.name = flask.request.form["name"] + + if qid and qid != item.wikidata_qid: + item.wikidata_qid = qid + wd_item = wikidata.get_item(qid) + if "P18" in wd_item["claims"]: + claim_p18 = wd_item["claims"]["P18"] + wikidata_photo = [ + drop_start(s["mainsnak"]["datavalue"]["value"], "-") + for s in claim_p18 + ] + for filename in wikidata_photo: + print(filename) + wikidata.get_photo(filename) + item.wikidata_photo = wikidata_photo + database.session.commit() assert flask.request.endpoint return flask.redirect( flask.url_for(flask.request.endpoint, person_id=person_id) ) + wikidata_hits: list[dict[str, str]] = [] + if False and item.wikidata_qid is None: + search_hits = wikidata_search(item.name) + print(len(search_hits)) + for search_hit in search_hits: + qid = search_hit["title"] + wd_item = wikidata.get_item(qid) + if "en" in wd_item["labels"]: + label = wd_item["labels"]["en"]["value"] + else: + label = "[no english label]" + + if "en" in wd_item["descriptions"]: + description = wd_item["descriptions"]["en"]["value"] + else: + description = "[no english description]" + + wikidata_hits.append( + { + "qid": qid, + "label": label, + "description": description, + } + ) + return flask.render_template( - "person.html", item=item, Event=model.Event, plural=plural + "person.html", + item=item, + Event=model.Event, + plural=plural, + wikidata_hits=wikidata_hits, ) @@ -260,12 +289,33 @@ order by num @app.route("/speakers") def top_speakers_page() -> str: + top = top_speakers() + """Top speakers page.""" + photos = [] + for person, count in top: + photo = person.photo_filename() + if photo: + photos.append((person, photo)) + + left_photos = photos[::2] + right_photos = photos[1::2] + + photo_person_ids = [person.id for person, photo in photos] + left = photo_person_ids[::2] + right = photo_person_ids[1::2] + return flask.render_template( "top_speakers.html", - top_speakers=top_speakers(), + top_speakers=top, speaker_counts=speaker_counts(), plural=plural, + person_image_filename=person_image_filename, + # photo_person_ids=photo_person_ids, + left=left, + right=right, + left_photos=left_photos, + right_photos=right_photos, ) @@ -335,7 +385,7 @@ def link_to_wikidata() -> str: @app.route("/search") def search_everything() -> str: - search_for = flask.request.args["q"] + search_for = flask.request.args.get("q") if not search_for: return flask.render_template("search_everything.html") @@ -374,6 +424,7 @@ def delete_person(person_id: int) -> str | Response: def person_image_filename(person_id): person = model.Person.query.get(person_id) return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0]) + for filename in person.wikidata_photo: face_crop = "face_1_" + filename full = os.path.join("static", "wikidata_photo", "face_cropped", face_crop) @@ -383,5 +434,84 @@ def person_image_filename(person_id): return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0]) +@app.route("/github_wikidata") +def github_wikidata() -> str: + items = [] + for line in open("found_wikidata_github"): + person_id, person_name, qid, wd_name, github, photo = eval(line) + person = model.Person.query.get(person_id) + if person.wikidata_qid: + continue + items.append((person, qid, wd_name, photo)) + + return flask.render_template("github.html", items=items) + + +@app.route("/reports") +def reports_page() -> str: + event_count = model.Event.query.count() + + missing_event_date_count = model.Event.query.filter( + model.Event.event_date.is_(None) + ).count() + + speaker_count = model.Person.query.count() + no_bio_count = ( + model.Person.query.join(model.ConferencePerson) + .filter(model.ConferencePerson.bio.is_(None)) + .group_by(model.Person) + .count() + ) + one_bio_count = ( + model.Person.query.join(model.ConferencePerson) + .group_by(model.Person) + .filter(model.ConferencePerson.bio.isnot(None)) + .having(func.count() == 1) + .count() + ) + + multiple_bio = ( + model.Person.query.join(model.ConferencePerson) + .group_by(model.Person) + .filter(model.ConferencePerson.bio.isnot(None)) + .having(func.count() > 1) + ) + + shorter_recent_bio = [] + for person in multiple_bio: + bio_with_date = sorted( + [ + (cp.conference.start, cp.bio) + for cp in person.conferences_association + if cp.bio + ], + reverse=True, + ) + + if len(bio_with_date) < 2: + continue + + most_recent_bio = bio_with_date[0][1] + len_recent_bio = len(most_recent_bio) + longest = max(len(bio) for start, bio in bio_with_date[1:]) + + if longest > len_recent_bio * 2: + shorter_recent_bio.append((person, len_recent_bio, longest)) + + return flask.render_template( + "reports.html", + event_count=event_count, + speaker_count=speaker_count, + no_bio_count=no_bio_count, + one_bio_count=one_bio_count, + multiple_bio_count=multiple_bio.count(), + shorter_recent_bio=shorter_recent_bio, + missing_event_date_count=missing_event_date_count, + missing_event_date=model.Event.query.filter( + model.Event.event_date.is_(None) + ).order_by(model.Event.title), + ) + + if __name__ == "__main__": app.run(host="0.0.0.0", port=5002) diff --git a/templates/base.html b/templates/base.html index 0afe229..a332686 100644 --- a/templates/base.html +++ b/templates/base.html @@ -11,7 +11,7 @@ body { font-family: monospace; margin: 40px auto; - max-width: 1040px; + max-width: 900px; line-height: 1.6; font-size: 18px; color: #444; diff --git a/templates/conference.html b/templates/conference.html index fd5064c..f676234 100644 --- a/templates/conference.html +++ b/templates/conference.html @@ -2,22 +2,22 @@ {% block style %} {% endblock %} @@ -26,17 +26,36 @@ {% block title %}{{ item.title }}{% endblock %} {% block content %} +
+ {% if show_images %} +
+ {% for person in item.people %} + {% set photo = person.photo_filename() %} + {% if photo %} + + {% endif %} + {% endfor %} +
+ {% endif %} +

{{ item.title }}

home

+ {% if item.series %}
series: {{ item.series.name }} {% if item.series.wikidata_qid %} Wikidata {% endif %}
+ {% endif %}
start: {{ item.start }}
end: {{ item.end }}
{% if days %} @@ -64,20 +83,6 @@ {% endif %}
- {% if show_images %} -
- {% for person in item.people %} - {% if person.wikidata_photo %} - - - {{ person.name}} - - - {% endif %} - {% endfor %} -
- {% endif %} -

Talks

{{ item.events.count() }} talks

diff --git a/templates/github.html b/templates/github.html new file mode 100644 index 0000000..58a92e0 --- /dev/null +++ b/templates/github.html @@ -0,0 +1,24 @@ +{% extends "base.html" %} + +{% block title %}Conference archive{% endblock %} + + {% block content %} +

Conference archive

+ +

{{ items | count }} matches found

+ {% for person, qid, wd_name, photo in items %} +
+ {{ person.name }} + ## + {{ wd_name }} ({{ qid }}) + {% if photo %}📷{% endif %} +
+ {% endfor %} + +{% endblock %} + +{% block style %} + +{% endblock %} diff --git a/templates/index.html b/templates/index.html index 8e063c1..a02dbf0 100644 --- a/templates/index.html +++ b/templates/index.html @@ -7,6 +7,7 @@

Conference archive

+ {#
@@ -14,6 +15,7 @@
+ #}
👥 diff --git a/templates/navbar.html b/templates/navbar.html index 1c19061..c858ae0 100644 --- a/templates/navbar.html +++ b/templates/navbar.html @@ -1,5 +1,12 @@

+

home | events | speakers + + + +
+ +

diff --git a/templates/person.html b/templates/person.html index b23897e..191eaad 100644 --- a/templates/person.html +++ b/templates/person.html @@ -2,9 +2,36 @@ {% block title %}{{ item.name }}{% endblock %} + {% block style %} + + {% endblock %} + + + {% block content %}
+ + {% set photo = item.photo_filename() %} + {% if photo %} +
+ +
+ {% endif %} + +

{{ item.name }}

@@ -19,10 +46,6 @@

- {% if item.wikidata_photo %} - - {% endif %} - {% set search_for = item.name + ' ' + " haswbstatement:P31=Q5" %}

Search for {{ item.name }} on Wikidata

@@ -42,25 +65,57 @@ + {% if show_wikidata_matches %} + {% if wikidata_hits %} +

Possible Wikidata matches

+
    + {% for hit in wikidata_hits %} +
  • + {{ hit.qid }} + {{ hit.label }} — {{ hit.description }} +
  • + {% endfor %} +
+ {% elif not item.wikidata_qid %} +

No similar names found on Wikidata

+ {% endif %} + {% endif %} + + {% set bio_source = item.bio_source() %} + {% if bio_source %} +

Biography

+
+
{{ bio_source.bio | safe }}
+ +
+ {% else %} +

No biography available.

+ {% endif %} +

Conferences

+

{{ item.conferences_association.count() }} known conferences

{% for apperance in item.conference_by_time() %} {% set conf = apperance.conference %}
-

👥 {{ conf.title }} +

👥 + {{ conf.title }} 📅 {{ conf.start.strftime("%d %b %Y") }}

- {% if apperance.bio %}

Biography: {{ apperance.bio | safe }}

{% endif %} + {% if 0 and apperance.bio %}

Biography: {{ apperance.bio | safe }}

{% endif %}
{% for event in apperance.events %}
-

- 🎤 - {{ event.title }} - +   ðŸŽ¤ + {{ event.title }}
+    + {% if event.event_date %} {{ event.event_date.strftime("%d %b %Y") }} {% else %} @@ -68,8 +123,7 @@ {% endif %} show details -
-

+ {% endif %} + {% if event.people_detail.count() > 1 %}
+ Other people: {% for p in event.people %} {% if p.id != item.id %} {{ p.name }} {% endif %} {% endfor %}
+ {% endif %}
diff --git a/templates/reports.html b/templates/reports.html new file mode 100644 index 0000000..9a0f159 --- /dev/null +++ b/templates/reports.html @@ -0,0 +1,37 @@ +{% extends "base.html" %} + +{% block title %}Conference archive{% endblock %} + + {% block content %} +

Conference archive

+ +

Reports

+ +
Talks: {{ "{:,d}".format(event_count) }}
+
Talks with no event date: {{ "{:,d}".format(missing_event_date_count) }}
+
Speakers: {{ "{:,d}".format(speaker_count) }}
+
Speakers with no bio: {{ "{:,d}".format(no_bio_count) }}
+
Speakers with one bio: {{ "{:,d}".format(one_bio_count) }}
+
Speakers with more than one bio: {{ "{:,d}".format(multiple_bio_count) }}
+ +

Talks with missing dates

+ + {% for event in missing_event_date %} +
{{ event.title }}
+  {{ event.conference.title }}
+  GUID: {{ event.guid or "missing" }} +
+ {% endfor %} + +

Speakers with a shorter recent biography

+ +

{{ shorter_recent_bio | count }} found

+ + {% for person, recent_bio_length, longest in shorter_recent_bio %} +
+ {{ person.name }} + recent bio: {{ recent_bio_length }} vs {{ longest }} +
+ {% endfor %} + +{% endblock %} diff --git a/templates/search_events.html b/templates/search_events.html index c5c375a..f4c6905 100644 --- a/templates/search_events.html +++ b/templates/search_events.html @@ -17,9 +17,9 @@

Found {{ q.count() }} events matching '{{ search_for }}'

-
    {% for item in q %} -
  • +
    + 🎤 {{ item.title }}{{ item.conference.title }} diff --git a/templates/top_speakers.html b/templates/top_speakers.html index 8b7beb1..ff3b533 100644 --- a/templates/top_speakers.html +++ b/templates/top_speakers.html @@ -1,8 +1,66 @@ {% extends "base.html" %} +{% block style %} + + +{% endblock %} + +{% set show_images = True %} + {% block title %}Conference archive{% endblock %} {% block content %} +
    + + {% if show_images %} + +
    + {% for person, photo in left_photos %} +
    + + {{ person.name}} + +
    + {% endfor %} +
    + +
    + {% for person, photo in right_photos %} +
    + + {{ person.name}} + +
    + {% endfor %} + +
    + {% endif %} + +

    Conference archive

    @@ -27,17 +85,68 @@
      {% for person, count in top_speakers %} + {% if loop.first or loop.previtem[1] != count %} +

      {{ count }} conferences

      + {% endif %}
      + 👤 {{ person.name }} ({{ count }} conferences, {{ person.event_count }} talks) - {% if person.wikidata_photo %}📷{% endif %} + {% if person.photo_filename() %}📷{% endif %} {% if person.wikidata_qid %} Wikidata {% endif %} +
      {% endfor %} +
    {% endblock %} +{% block script %} +{% if show_images %} + +{% endif %} +{% endblock %}