Improvements

This commit is contained in:
Edward Betts 2023-09-24 21:41:30 +01:00
parent 5c934e19bc
commit 0ccaebe418

51
main.py
View file

@ -1,14 +1,8 @@
#!/usr/bin/python3
import hashlib
import json
import os
import time
import typing
from typing import cast
import flask
import requests
import sqlalchemy
from sqlalchemy import func, or_, update
from werkzeug.wrappers import Response
@ -21,37 +15,6 @@ app.debug = True
app.config.from_object("config.default")
database.init_app(app)
wikidata_api = "https://www.wikidata.org/w/api.php"
def md5sum(s: str) -> str:
return hashlib.md5(s.encode("utf-8")).hexdigest()
def wikidata_search(q: str) -> list[dict[str, typing.Any]]:
q += " haswbstatement:P31=Q5"
q_md5 = md5sum(q)
cache_filename = os.path.join("cache", q_md5 + ".json")
if os.path.exists(cache_filename):
data = json.load(open(cache_filename))
else:
params: dict[str, str | int] = {
"action": "query",
"list": "search",
"format": "json",
"formatversion": 2,
"srsearch": q,
"srlimit": "max",
}
r = requests.get(wikidata_api, params=params)
open(cache_filename, "w").write(r.text)
data = r.json()
time.sleep(1)
return cast(list[dict[str, typing.Any]], data["query"]["search"])
def top_speakers() -> sqlalchemy.orm.query.Query:
q = (
@ -89,6 +52,7 @@ def top_events() -> sqlalchemy.orm.query.Query:
def drop_start(s: str, start: str) -> str:
"""Remove text from the start of a string."""
return s[len(start) :] if s.startswith(start) else s
@ -121,7 +85,8 @@ def person(person_id: int) -> str | Response:
wikidata_hits: list[dict[str, str]] = []
if False and item.wikidata_qid is None:
search_hits = wikidata_search(item.name)
q = item.name + " haswbstatement:P31=Q5"
search_hits = wikidata.search(q)
print(len(search_hits))
for search_hit in search_hits:
qid = search_hit["title"]
@ -350,7 +315,8 @@ def link_to_wikidata() -> str:
for person, num in top_speakers2():
if person.wikidata_qid:
continue
search_hits = wikidata_search(person.name)
q = person.name + " haswbstatement:P31=Q5"
search_hits = wikidata.search(q)
if not search_hits:
continue
@ -361,7 +327,7 @@ def link_to_wikidata() -> str:
for search_hit in search_hits:
qid = search_hit["title"]
item = wikidata_get_item(qid)
item = wikidata.get_item(qid)
if "en" in item["labels"]:
label = item["labels"]["en"]["value"]
else:
@ -425,7 +391,8 @@ def delete_person(person_id: int) -> str | Response:
return flask.redirect(flask.url_for("index"))
def person_image_filename(person_id):
def person_image_filename(person_id: int) -> str:
"""Filename for speaker photo."""
person = model.Person.query.get(person_id)
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
@ -440,6 +407,7 @@ def person_image_filename(person_id):
@app.route("/github_wikidata")
def github_wikidata() -> str:
"""Look for speakers on Wikidata based on the GitHub property."""
items = []
for line in open("found_wikidata_github"):
person_id, person_name, qid, wd_name, github, photo = eval(line)
@ -453,6 +421,7 @@ def github_wikidata() -> str:
@app.route("/reports")
def reports_page() -> str:
"""Page showing statistics."""
event_count = model.Event.query.count()
missing_event_date_count = model.Event.query.filter(