Compare commits

..

2 commits

Author SHA1 Message Date
Edward Betts 0ccaebe418 Improvements 2023-09-24 21:41:30 +01:00
Edward Betts 5c934e19bc Types and docstrings. 2023-09-24 21:41:05 +01:00
2 changed files with 14 additions and 42 deletions

View file

@ -13,6 +13,8 @@ commons_url = "https://www.wikidata.org/w/api.php"
wikidata_api = "https://www.wikidata.org/w/api.php"
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
CallParams = dict[str, str | int]
s = requests.Session()
s.headers.update({"User-Agent": user_agent})
@ -49,7 +51,7 @@ def search(q: str) -> list[dict[str, typing.Any]]:
def api_image_detail_call(filename: str) -> requests.Response:
"""Call the Commons API."""
call_params = {
call_params: CallParams = {
"format": "json",
"formatversion": 2,
"action": "query",
@ -102,6 +104,7 @@ def download_photo(filename: str) -> None:
def get_photo(filename: str) -> None:
"""Download filename and resize."""
save_to = os.path.join("static", "wikidata_photo", filename)
thumb = os.path.join("static", "wikidata_photo", "thumb", filename)
if not os.path.exists(save_to):

51
main.py
View file

@ -1,14 +1,8 @@
#!/usr/bin/python3
import hashlib
import json
import os
import time
import typing
from typing import cast
import flask
import requests
import sqlalchemy
from sqlalchemy import func, or_, update
from werkzeug.wrappers import Response
@ -21,37 +15,6 @@ app.debug = True
app.config.from_object("config.default")
database.init_app(app)
wikidata_api = "https://www.wikidata.org/w/api.php"
def md5sum(s: str) -> str:
return hashlib.md5(s.encode("utf-8")).hexdigest()
def wikidata_search(q: str) -> list[dict[str, typing.Any]]:
q += " haswbstatement:P31=Q5"
q_md5 = md5sum(q)
cache_filename = os.path.join("cache", q_md5 + ".json")
if os.path.exists(cache_filename):
data = json.load(open(cache_filename))
else:
params: dict[str, str | int] = {
"action": "query",
"list": "search",
"format": "json",
"formatversion": 2,
"srsearch": q,
"srlimit": "max",
}
r = requests.get(wikidata_api, params=params)
open(cache_filename, "w").write(r.text)
data = r.json()
time.sleep(1)
return cast(list[dict[str, typing.Any]], data["query"]["search"])
def top_speakers() -> sqlalchemy.orm.query.Query:
q = (
@ -89,6 +52,7 @@ def top_events() -> sqlalchemy.orm.query.Query:
def drop_start(s: str, start: str) -> str:
"""Remove text from the start of a string."""
return s[len(start) :] if s.startswith(start) else s
@ -121,7 +85,8 @@ def person(person_id: int) -> str | Response:
wikidata_hits: list[dict[str, str]] = []
if False and item.wikidata_qid is None:
search_hits = wikidata_search(item.name)
q = item.name + " haswbstatement:P31=Q5"
search_hits = wikidata.search(q)
print(len(search_hits))
for search_hit in search_hits:
qid = search_hit["title"]
@ -350,7 +315,8 @@ def link_to_wikidata() -> str:
for person, num in top_speakers2():
if person.wikidata_qid:
continue
search_hits = wikidata_search(person.name)
q = person.name + " haswbstatement:P31=Q5"
search_hits = wikidata.search(q)
if not search_hits:
continue
@ -361,7 +327,7 @@ def link_to_wikidata() -> str:
for search_hit in search_hits:
qid = search_hit["title"]
item = wikidata_get_item(qid)
item = wikidata.get_item(qid)
if "en" in item["labels"]:
label = item["labels"]["en"]["value"]
else:
@ -425,7 +391,8 @@ def delete_person(person_id: int) -> str | Response:
return flask.redirect(flask.url_for("index"))
def person_image_filename(person_id):
def person_image_filename(person_id: int) -> str:
"""Filename for speaker photo."""
person = model.Person.query.get(person_id)
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
@ -440,6 +407,7 @@ def person_image_filename(person_id):
@app.route("/github_wikidata")
def github_wikidata() -> str:
"""Look for speakers on Wikidata based on the GitHub property."""
items = []
for line in open("found_wikidata_github"):
person_id, person_name, qid, wd_name, github, photo = eval(line)
@ -453,6 +421,7 @@ def github_wikidata() -> str:
@app.route("/reports")
def reports_page() -> str:
"""Page showing statistics."""
event_count = model.Event.query.count()
missing_event_date_count = model.Event.query.filter(