various improvements

This commit is contained in:
Edward Betts 2023-09-22 21:00:56 +01:00
parent 6922b1bf11
commit d1a46dd422
11 changed files with 504 additions and 74 deletions

184
main.py
View file

@ -13,7 +13,7 @@ import sqlalchemy
from sqlalchemy import func, or_, update
from werkzeug.wrappers import Response
from confarchive import database, model
from confarchive import database, model, wikidata
app = flask.Flask(__name__)
app.debug = True
@ -53,33 +53,14 @@ def wikidata_search(q: str) -> list[dict[str, typing.Any]]:
return cast(list[dict[str, typing.Any]], data["query"]["search"])
def wikidata_get_item(qid: str) -> typing.Any:
cache_filename = os.path.join("items", qid + ".json")
if os.path.exists(cache_filename):
item = json.load(open(cache_filename))
else:
print(qid)
params: dict[str, str | int] = {
"action": "wbgetentities",
"ids": qid,
"format": "json",
"formatversion": 2,
}
r = requests.get(wikidata_api, params=params)
item = r.json()["entities"][qid]
with open(cache_filename, "w") as f:
json.dump(item, f, indent=2)
time.sleep(0.1)
return item
def top_speakers() -> sqlalchemy.orm.query.Query:
q = (
database.session.query(model.Person, func.count())
.join(model.ConferencePerson)
.filter(model.Person.id != 1046) # FOSDEM Staff
.group_by(model.Person)
.order_by(func.count().desc())
.having(func.count() > 3)
.order_by(func.count().desc(), model.Person.name)
.having(func.count() > 4)
)
return q
@ -107,20 +88,68 @@ def top_events() -> sqlalchemy.orm.query.Query:
return q
def drop_start(s: str, start: str) -> str:
return s[len(start) :] if s.startswith(start) else s
@app.route("/person/<int:person_id>", methods=["GET", "POST"])
def person(person_id: int) -> str | Response:
item = model.Person.query.get(person_id)
if flask.request.method == "POST":
item.wikidata_qid = flask.request.form["wikidata_qid"] or None
qid = flask.request.form["wikidata_qid"] or None
item.name = flask.request.form["name"]
if qid and qid != item.wikidata_qid:
item.wikidata_qid = qid
wd_item = wikidata.get_item(qid)
if "P18" in wd_item["claims"]:
claim_p18 = wd_item["claims"]["P18"]
wikidata_photo = [
drop_start(s["mainsnak"]["datavalue"]["value"], "-")
for s in claim_p18
]
for filename in wikidata_photo:
print(filename)
wikidata.get_photo(filename)
item.wikidata_photo = wikidata_photo
database.session.commit()
assert flask.request.endpoint
return flask.redirect(
flask.url_for(flask.request.endpoint, person_id=person_id)
)
wikidata_hits: list[dict[str, str]] = []
if False and item.wikidata_qid is None:
search_hits = wikidata_search(item.name)
print(len(search_hits))
for search_hit in search_hits:
qid = search_hit["title"]
wd_item = wikidata.get_item(qid)
if "en" in wd_item["labels"]:
label = wd_item["labels"]["en"]["value"]
else:
label = "[no english label]"
if "en" in wd_item["descriptions"]:
description = wd_item["descriptions"]["en"]["value"]
else:
description = "[no english description]"
wikidata_hits.append(
{
"qid": qid,
"label": label,
"description": description,
}
)
return flask.render_template(
"person.html", item=item, Event=model.Event, plural=plural
"person.html",
item=item,
Event=model.Event,
plural=plural,
wikidata_hits=wikidata_hits,
)
@ -260,12 +289,33 @@ order by num
@app.route("/speakers")
def top_speakers_page() -> str:
top = top_speakers()
"""Top speakers page."""
photos = []
for person, count in top:
photo = person.photo_filename()
if photo:
photos.append((person, photo))
left_photos = photos[::2]
right_photos = photos[1::2]
photo_person_ids = [person.id for person, photo in photos]
left = photo_person_ids[::2]
right = photo_person_ids[1::2]
return flask.render_template(
"top_speakers.html",
top_speakers=top_speakers(),
top_speakers=top,
speaker_counts=speaker_counts(),
plural=plural,
person_image_filename=person_image_filename,
# photo_person_ids=photo_person_ids,
left=left,
right=right,
left_photos=left_photos,
right_photos=right_photos,
)
@ -335,7 +385,7 @@ def link_to_wikidata() -> str:
@app.route("/search")
def search_everything() -> str:
search_for = flask.request.args["q"]
search_for = flask.request.args.get("q")
if not search_for:
return flask.render_template("search_everything.html")
@ -374,6 +424,7 @@ def delete_person(person_id: int) -> str | Response:
def person_image_filename(person_id):
person = model.Person.query.get(person_id)
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
for filename in person.wikidata_photo:
face_crop = "face_1_" + filename
full = os.path.join("static", "wikidata_photo", "face_cropped", face_crop)
@ -383,5 +434,84 @@ def person_image_filename(person_id):
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
@app.route("/github_wikidata")
def github_wikidata() -> str:
items = []
for line in open("found_wikidata_github"):
person_id, person_name, qid, wd_name, github, photo = eval(line)
person = model.Person.query.get(person_id)
if person.wikidata_qid:
continue
items.append((person, qid, wd_name, photo))
return flask.render_template("github.html", items=items)
@app.route("/reports")
def reports_page() -> str:
event_count = model.Event.query.count()
missing_event_date_count = model.Event.query.filter(
model.Event.event_date.is_(None)
).count()
speaker_count = model.Person.query.count()
no_bio_count = (
model.Person.query.join(model.ConferencePerson)
.filter(model.ConferencePerson.bio.is_(None))
.group_by(model.Person)
.count()
)
one_bio_count = (
model.Person.query.join(model.ConferencePerson)
.group_by(model.Person)
.filter(model.ConferencePerson.bio.isnot(None))
.having(func.count() == 1)
.count()
)
multiple_bio = (
model.Person.query.join(model.ConferencePerson)
.group_by(model.Person)
.filter(model.ConferencePerson.bio.isnot(None))
.having(func.count() > 1)
)
shorter_recent_bio = []
for person in multiple_bio:
bio_with_date = sorted(
[
(cp.conference.start, cp.bio)
for cp in person.conferences_association
if cp.bio
],
reverse=True,
)
if len(bio_with_date) < 2:
continue
most_recent_bio = bio_with_date[0][1]
len_recent_bio = len(most_recent_bio)
longest = max(len(bio) for start, bio in bio_with_date[1:])
if longest > len_recent_bio * 2:
shorter_recent_bio.append((person, len_recent_bio, longest))
return flask.render_template(
"reports.html",
event_count=event_count,
speaker_count=speaker_count,
no_bio_count=no_bio_count,
one_bio_count=one_bio_count,
multiple_bio_count=multiple_bio.count(),
shorter_recent_bio=shorter_recent_bio,
missing_event_date_count=missing_event_date_count,
missing_event_date=model.Event.query.filter(
model.Event.event_date.is_(None)
).order_by(model.Event.title),
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5002)