Add type hints and docstrings

This commit is contained in:
Edward Betts 2023-10-25 07:54:05 +01:00
parent 63d0d198eb
commit fd281532e6
26 changed files with 1774 additions and 968 deletions

252
app.py
View file

@ -1,18 +1,18 @@
#!/usr/bin/python3
import hashlib
import inspect
import itertools
import json
import os
import re
import socket
import typing
from collections import defaultdict
from datetime import datetime
from typing import Iterable, Mapping
import requests.exceptions
import simplejson.errors
import werkzeug
from flask import (
Flask,
g,
@ -27,6 +27,7 @@ from requests_oauthlib import OAuth1Session
from sqlalchemy import distinct, func
from sqlalchemy.orm import aliased
from sqlalchemy.sql.expression import desc
from werkzeug.wrappers import Response
from depicts import (
artwork,
@ -54,6 +55,7 @@ from depicts.model import (
WikidataQuery,
)
from depicts.pager import Pagination, init_pager
from depicts.type import Entity
user_agent = "Mozilla/5.0 (X11; Linux i586; rv:32.0) Gecko/20160101 Firefox/32.0"
@ -109,30 +111,15 @@ re_pid = re.compile(r"^P(\d+)")
@app.teardown_appcontext
def shutdown_session(exception=None):
database.session.remove()
@app.errorhandler(werkzeug.exceptions.InternalServerError)
def exception_handler(e):
tb = werkzeug.debug.tbtools.get_current_traceback()
last_frame = next(frame for frame in reversed(tb.frames) if not frame.is_library)
last_frame_args = inspect.getargs(last_frame.code)
return (
render_template(
"show_error.html",
tb=tb,
last_frame=last_frame,
last_frame_args=last_frame_args,
),
500,
)
def shutdown_session(exception: Exception | None = None) -> None:
database.session.remove() # type:ignore
@app.template_global()
def set_url_args(endpoint=None, **new_args):
def set_url_args(endpoint: str | None = None, **new_args: str) -> str:
if endpoint is None:
endpoint = request.endpoint
assert endpoint and request.view_args
args = request.view_args.copy()
args.update(request.args)
args.update(new_args)
@ -141,23 +128,25 @@ def set_url_args(endpoint=None, **new_args):
@app.template_global()
def current_url():
def current_url() -> str:
"""Get current URL."""
assert request and request.view_args and request.endpoint
args = request.view_args.copy()
args.update(request.args)
return url_for(request.endpoint, **args)
@app.before_request
def init_profile():
def init_profile() -> None:
g.profiling = []
@app.before_request
def global_user():
def global_user() -> None:
g.user = wikidata_oauth.get_username()
def check_for_blocks():
def check_for_blocks() -> None:
if hasattr(g, "server_ip"): # already done
return
hostname = app.config.get("HOSTNAME")
@ -172,43 +161,45 @@ def check_for_blocks():
@app.before_request
def get_blocks():
def get_blocks() -> None:
if app.config.get("SHOW_BLOCK_ALERT") is not False:
check_for_blocks()
@app.route("/find_more_setting")
def flip_find_more():
def flip_find_more() -> str:
session["no_find_more"] = not session.get("no_find_more")
display = {True: "on", False: "off"}[not session["no_find_more"]]
return "flipped. find more is " + display
def existing_edit(item_id, depicts_id):
q = Edit.query.filter_by(artwork_id=item_id, depicts_id=depicts_id)
return q.count() != 0
def existing_edit(item_id: int, depicts_id: int) -> bool:
q = Edit.query.filter_by(artwork_id=item_id, depicts_id=depicts_id) # type: ignore
return bool(q.count() != 0)
@app.route("/save/Q<int:item_id>", methods=["POST"])
def save(item_id):
def save(item_id: int) -> str | Response:
depicts = request.form.getlist("depicts")
username = wikidata_oauth.get_username()
assert username
token = wikidata_oauth.get_token()
artwork_item = Item.query.get(item_id)
artwork_item = Item.query.get(item_id) # type: ignore
if artwork_item is None:
artwork_entity = mediawiki.get_entity_with_cache(f"Q{item_id}")
artwork_item = Item(item_id=item_id, entity=artwork_entity)
artwork_item = Item(
item_id=item_id, entity=typing.cast(dict[str, str], artwork_entity)
)
database.session.add(artwork_item)
database.session.commit()
for depicts_qid in depicts:
depicts_id = int(depicts_qid[1:])
depicts_item = DepictsItem.query.get(depicts_id)
depicts_item = DepictsItem.query.get(depicts_id) # type: ignore
if depicts_item is None:
depicts_item = wikidata_edit.create_depicts_item(depicts_id)
database.session.add(depicts_item)
@ -247,24 +238,24 @@ def save(item_id):
@app.route("/settings", methods=["GET", "POST"])
def user_settings():
def user_settings() -> str:
return render_template("user_settings.html")
@app.route("/test/lookup")
def test_lookup_page():
def test_lookup_page() -> str:
return render_template("test_lookup.html")
@app.route("/property/P<int:property_id>")
def property_query_page(property_id):
def property_query_page(property_id: int) -> str:
pid = f"P{property_id}"
g.title = find_more_props[pid]
sort = request.args.get("sort")
sort_by_name = sort and sort.lower().strip() == "name"
q = (
database.session.query(
database.session.query( # type: ignore
Triple.object_id, func.count(func.distinct(Triple.subject_id)).label("c")
)
.filter_by(predicate_id=property_id)
@ -302,15 +293,19 @@ def property_query_page(property_id):
@app.route("/")
def start():
def start() -> Response:
return random_artwork()
@app.route("/next")
def random_artwork():
def random_artwork() -> Response:
found = None
while True:
q = Item.query.filter_by(is_artwork=True).order_by(func.random()).limit(30)
q = (
Item.query.filter_by(is_artwork=True) # type: ignore
.order_by(func.random())
.limit(30)
)
for item in q:
has_depicts = "P180" in item.entity["claims"]
if has_depicts:
@ -325,7 +320,7 @@ def random_artwork():
@app.route("/oauth/start")
def start_oauth():
def start_oauth() -> Response:
next_page = request.args.get("next")
if next_page:
session["after_login"] = next_page
@ -349,7 +344,7 @@ def start_oauth():
@app.route("/oauth/callback", methods=["GET"])
def oauth_callback():
def oauth_callback() -> Response:
base_url = "https://www.wikidata.org/w/index.php"
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
@ -381,17 +376,17 @@ def oauth_callback():
@app.route("/oauth/disconnect")
def oauth_disconnect():
def oauth_disconnect() -> Response:
for key in "owner_key", "owner_secret", "username", "after_login":
if key in session:
del session[key]
return redirect(url_for("browse_page"))
def create_claim(artwork_id, depicts_id, token):
def create_claim(artwork_id: int, depicts_id: int, token: str) -> requests.Response:
artwork_qid = f"Q{artwork_id}"
value = json.dumps({"entity-type": "item", "numeric-id": depicts_id})
params = {
params: dict[str, str | int] = {
"action": "wbcreateclaim",
"entity": artwork_qid,
"property": "P180",
@ -401,10 +396,11 @@ def create_claim(artwork_id, depicts_id, token):
"format": "json",
"formatversion": 2,
}
return wikidata_oauth.api_post_request(params)
r: requests.Response = wikidata_oauth.api_post_request(params)
return r
def image_with_cache(qid, image_filename, width):
def image_with_cache(qid: str, image_filename: str, width: int) -> dict[str, str]:
filename = f"cache/{qid}_{width}_image.json"
detail = json.load(open(filename)) if os.path.exists(filename) else {}
@ -416,10 +412,11 @@ def image_with_cache(qid, image_filename, width):
detail = commons.image_detail([image_filename], thumbwidth=width)
json.dump(detail, open(filename, "w"), indent=2)
return detail.get(image_filename)
image: dict[str, str] = detail.get(image_filename)
return image
def existing_depicts_from_entity(entity):
def existing_depicts_from_entity(entity: Entity) -> list[dict[str, typing.Any]]:
if "P180" not in entity["claims"]:
return []
existing = []
@ -430,7 +427,7 @@ def existing_depicts_from_entity(entity):
item_id = claim["mainsnak"]["datavalue"]["value"]["numeric-id"]
item = DepictsItem.query.get(item_id)
item = DepictsItem.query.get(item_id) # type: ignore
if not item:
item = wikidata_edit.create_depicts_item(item_id)
database.session.add(item)
@ -448,27 +445,32 @@ def existing_depicts_from_entity(entity):
return existing
def get_institution(entity, other):
def get_institution(entity: Entity, other: Mapping[str, str | None]) -> str | None:
if "P276" in entity["claims"]:
location = wikibase.first_datavalue(entity, "P276")
assert isinstance(location, dict)
if location:
return other.get(location["id"])
return typing.cast(str, other.get(location["id"]))
if "P195" in entity["claims"]:
collection = wikibase.first_datavalue(entity, "P195")
assert isinstance(collection, dict)
if collection:
return other.get(collection["id"])
return typing.cast(str, other.get(collection["id"]))
return None
@app.route("/item/Q<int:item_id>")
def item_page(item_id):
def item_page(item_id: int) -> str | Response:
qid = f"Q{item_id}"
g.qid = qid
item = artwork.Artwork(qid)
from_redirect = qid in session and session.pop(qid) == "from redirect"
entity = mediawiki.get_entity_with_cache(qid, refresh=not from_redirect)
assert entity
if "redirects" in entity:
redirect_to_item_id = int(entity["redirects"]["to"][1:])
assert request.endpoint
return redirect(url_for(request.endpoint, item_id=redirect_to_item_id))
existing_depicts = existing_depicts_from_entity(entity)
@ -494,7 +496,7 @@ def item_page(item_id):
label_languages = label_and_language["languages"] if label_and_language else []
show_translation_links = all(lang.code != "en" for lang in label_languages)
artwork_item = Item.query.get(item_id)
artwork_item = Item.query.get(item_id) # type: ignore
if artwork_item is None:
if not wdqs.is_artificial_physical_object(qid):
return render_template(
@ -517,7 +519,7 @@ def item_page(item_id):
artwork_item = Item(
item_id=item_id,
entity=entity,
entity=typing.cast(dict[str, str], entity),
lastrevid=entity["lastrevid"],
modified=modified,
)
@ -525,7 +527,9 @@ def item_page(item_id):
catalog = wd_catalog.get_catalog_from_artwork(entity)
if not catalog.get("institution"):
catalog["institution"] = get_institution(entity, other)
institution = get_institution(entity, other)
assert institution
catalog["institution"] = institution
return render_template(
"item.html",
@ -548,11 +552,13 @@ def item_page(item_id):
)
def get_languages(codes):
return Language.query.filter(Language.wikimedia_language_code.in_(codes))
def get_languages(codes: typing.Iterable[str]) -> typing.Any:
return Language.query.filter( # type: ignore
Language.wikimedia_language_code.in_(codes)
)
def get_entity_label_and_language(entity):
def get_entity_label_and_language(entity: Entity) -> dict[str, typing.Any] | None:
"""
Look for a useful label and return it with a list of languages that have that label.
@ -573,8 +579,10 @@ def get_entity_label_and_language(entity):
label, languages = list(group_by_label.items())[0]
return {"label": label, "languages": get_languages(languages)}
return None
def get_labels(keys, name=None):
def get_labels(keys: typing.Iterable[str], name: str | None = None) -> dict[str, str]:
keys = sorted(keys, key=lambda i: int(i[1:]))
if name is None:
name = hashlib.md5("_".join(keys).encode("utf-8")).hexdigest()
@ -590,10 +598,13 @@ def get_labels(keys, name=None):
json.dump({"keys": keys, "labels": labels}, open(filename, "w"), indent=2)
return {entity["id"]: wikibase.get_entity_label(entity) for entity in labels}
return {
entity["id"]: wikibase.get_entity_label(entity) or "no English label"
for entity in labels
}
def get_labels_db(keys):
def get_labels_db(keys: Iterable[str]):
keys = set(keys)
labels = {}
missing = set()
@ -601,7 +612,7 @@ def get_labels_db(keys):
m = re_qid.match(qid)
if m:
item_id = int(m.group(1))
item = Item.query.get(item_id)
item = Item.query.get(item_id) # type: ignore
if item:
labels[qid] = item.label
continue
@ -619,8 +630,8 @@ def get_labels_db(keys):
modified = datetime.strptime(entity["modified"], "%Y-%m-%dT%H:%M:%SZ")
# FIXME: check if the item is an artwork and set is_artwork correctly
item = Item(
item_id=qid[1:],
item = Item( # type: ignore
item_id=int(qid[1:]),
entity=entity,
lastrevid=entity["lastrevid"],
modified=modified,
@ -635,7 +646,7 @@ def get_labels_db(keys):
return labels
def build_other_set(entity):
def build_other_set(entity: Entity) -> set[str]:
other_items = set()
for key in find_more_props.keys():
for claim in entity["claims"].get(key, []):
@ -645,20 +656,24 @@ def build_other_set(entity):
return other_items
def get_other(entity):
def get_other(entity: Entity) -> Mapping[str, str | None]:
other_items = build_other_set(entity)
return get_labels(other_items)
@app.route("/edits")
def list_edits():
q = Edit.query.order_by(Edit.timestamp.desc())
def list_edits() -> str:
q = Edit.query.order_by(Edit.timestamp.desc()) # type: ignore
page = utils.get_int_arg("page") or 1
pager = Pagination(page, 100, q.count())
item_count = database.session.query(func.count(distinct(Edit.artwork_id))).scalar()
item_count = database.session.query(
func.count(distinct(Edit.artwork_id))
).scalar() # type: ignore
user_count = database.session.query(func.count(distinct(Edit.username))).scalar()
user_count = database.session.query(
func.count(distinct(Edit.username))
).scalar() # type: ignore
return render_template(
"list_edits.html",
@ -670,11 +685,13 @@ def list_edits():
@app.route("/user/<username>")
def user_page(username):
edit_list = Edit.query.filter_by(username=username).order_by(Edit.timestamp.desc())
def user_page(username: str) -> str:
edit_list = Edit.query.filter_by(username=username).order_by( # type: ignore
Edit.timestamp.desc()
)
item_count = (
database.session.query(func.count(distinct(Edit.artwork_id)))
database.session.query(func.count(distinct(Edit.artwork_id))) # type: ignore
.filter_by(username=username)
.scalar()
)
@ -682,20 +699,22 @@ def user_page(username):
return render_template(
"user_page.html",
username=username,
edits=Edit.query,
edits=Edit.query, # type: ignore
edit_list=edit_list,
item_count=item_count,
)
@app.route("/next/Q<int:item_id>")
def next_page(item_id):
def next_page(item_id: int) -> str:
qid = f"Q{item_id}"
entity = mediawiki.get_entity_with_cache(qid)
assert entity
width = 800
image_filename = wikibase.first_datavalue(entity, "P18")
assert isinstance(image_filename, str)
image = image_with_cache(qid, image_filename, width)
label = wikibase.get_entity_label(entity)
@ -756,13 +775,13 @@ def next_page(item_id):
@app.route("/P<int:property_id>/Q<int:item_id>")
def find_more_page(property_id, item_id):
def find_more_page(property_id: int, item_id: int) -> Response:
pid, qid = f"P{property_id}", f"Q{item_id}"
return redirect(url_for("browse_page", **{pid: qid}))
return redirect(url_for("browse_page", **{pid: qid})) # type: ignore
@app.route("/toolinfo.json")
def tool_info():
def tool_info() -> Response:
info = {
"name": "wade",
"title": "Wikidata Art Depiction Explorer",
@ -775,14 +794,16 @@ def tool_info():
return jsonify(info)
def get_facets(params):
def get_facets(params) -> dict[str, typing.Any]:
properties = [pid for pid in find_more_props.keys() if pid not in request.args]
bindings = wdqs.run_from_template_with_cache(
"query/facet.sparql", params=params, isa_list=isa_list, properties=properties
)
facets = {key: [] for key in find_more_props.keys()}
facets: dict[str, list[dict[str, str | int]]] = {
key: [] for key in find_more_props.keys()
}
for row in bindings:
pid = row["property"]["value"].rpartition("/")[2]
qid = row["object"]["value"].rpartition("/")[2]
@ -800,7 +821,7 @@ def get_facets(params):
}
def get_artwork_params():
def get_artwork_params() -> list[tuple[str, str]]:
params = []
for pid, qid in request.args.items():
m = re_pid.match(pid)
@ -817,14 +838,14 @@ def get_artwork_params():
return params
def filter_artwork(params):
def filter_artwork(params: list[tuple[str, str]]) -> list[wdqs.Row]:
return wdqs.run_from_template_with_cache(
"query/find_more.sparql", params=params, isa_list=isa_list
)
@app.route("/catalog")
def catalog_page():
def catalog_page() -> str:
params = get_artwork_params()
bindings = filter_artwork(params)
page = utils.get_int_arg("page") or 1
@ -837,7 +858,7 @@ def catalog_page():
qids = [f"Q{item_id}" for item_id in sorted(item_ids)]
items = [Item.query.get(item_id) for item_id in item_ids]
items = [Item.query.get(item_id) for item_id in item_ids] # type: ignore
entities = mediawiki.get_entities_with_cache(qids)
@ -878,7 +899,9 @@ def catalog_page():
)
def get_image_detail_with_cache(items, cache_name, thumbwidth=None, refresh=False):
def get_image_detail_with_cache(
items, cache_name: str, thumbwidth: int | None = None, refresh: bool = False
):
filenames = [cur.image_filename() for cur in items]
if thumbwidth is None:
@ -902,9 +925,9 @@ def get_image_detail_with_cache(items, cache_name, thumbwidth=None, refresh=Fals
return detail
def browse_index():
def browse_index() -> str:
q = (
database.session.query(
database.session.query( # type: ignore
Triple.predicate_id, func.count(func.distinct(Triple.object_id))
)
.join(Item, Triple.subject_id == Item.item_id)
@ -918,13 +941,13 @@ def browse_index():
@app.route("/debug/show_user")
def debug_show_user():
def debug_show_user() -> str:
userinfo = wikidata_oauth.userinfo_call()
return "<pre>" + json.dumps(userinfo, indent=2) + "</pre>"
@app.route("/browse/facets.json")
def browse_facets():
def browse_facets() -> Response:
params = get_artwork_params()
if not params:
return jsonify(notice="facet criteria missing")
@ -940,7 +963,7 @@ def browse_facets():
def get_db_items(params):
"""Get items for browse page based on criteria."""
q = Item.query.filter_by(is_artwork=True)
q = Item.query.filter_by(is_artwork=True) # type: ignore
for pid, qid in params:
q = q.join(Triple, Item.item_id == Triple.subject_id, aliased=True).filter(
Triple.predicate_id == pid[1:], Triple.object_id == qid[1:]
@ -955,7 +978,9 @@ def get_db_facets(params):
facet_limit = 18
for pid, qid in params:
q = q.join(Triple, t.subject_id == Triple.subject_id, aliased=True).filter(
q = q.join( # type: ignore
Triple, t.subject_id == Triple.subject_id, aliased=True
).filter(
Triple.predicate_id == pid[1:],
Triple.object_id == qid[1:],
t.predicate_id != pid[1:],
@ -967,9 +992,9 @@ def get_db_facets(params):
results = sorted(tuple(row) for row in q.all())
facet_list = {}
subject_qids = set()
subject_qids: set[str] = set()
for predicate_id, x in itertools.groupby(results, lambda row: row[0]):
hits = sorted(list(x), key=lambda row: row[1], reverse=True)
hits = sorted(x, key=lambda row: row[1], reverse=True)
values = [
{"count": count, "qid": f"Q{value}"}
for _, count, value in hits[:facet_limit]
@ -987,7 +1012,7 @@ def get_db_facets(params):
@app.route("/browse")
def browse_page():
def browse_page() -> str:
page_size = 45
params = get_artwork_params()
@ -1051,7 +1076,7 @@ def browse_page():
@app.route("/find_more.json")
def find_more_json():
def find_more_json() -> Response:
pid = request.args.get("pid")
qid_list = request.args.getlist("qid")
limit = 6
@ -1090,7 +1115,10 @@ def find_more_json():
return jsonify(items=items)
def wikibase_search(terms):
Hit = dict[str, str | int | None]
def wikibase_search(terms: str) -> list[Hit]:
hits = []
r = mediawiki.api_call(
{
@ -1114,10 +1142,10 @@ def wikibase_search(terms):
return hits
def add_images_to_depicts_lookup(hits):
def add_images_to_depicts_lookup(hits: list[dict[str, str]]) -> None:
qid_to_item = {hit["qid"]: hit for hit in hits}
all_qids = [hit["qid"] for hit in hits]
entities = mediawiki.get_entities_with_cache(all_qids)
entities: list[Entity] = mediawiki.get_entities_with_cache(all_qids)
for entity in entities:
qid = entity["id"]
@ -1144,7 +1172,7 @@ def add_images_to_depicts_lookup(hits):
@app.route("/lookup")
def depicts_lookup():
def depicts_lookup() -> Response:
terms = request.args.get("terms")
if not terms:
return jsonify(error="terms parameter is required")
@ -1159,7 +1187,7 @@ def depicts_lookup():
item_ids = []
hits = []
q1 = DepictsItem.query.filter(DepictsItem.label.ilike(terms + "%"))
q1 = DepictsItem.query.filter(DepictsItem.label.ilike(terms + "%")) # type: ignore
seen = set()
for item in q1:
hit = {
@ -1173,7 +1201,9 @@ def depicts_lookup():
seen.add(item.qid)
cls = DepictsItemAltLabel
q2 = cls.query.filter(cls.alt_label.ilike(terms + "%"), ~cls.item_id.in_(item_ids))
q2 = cls.query.filter( # type: ignore
cls.alt_label.ilike(terms + "%"), ~cls.item_id.in_(item_ids)
)
for alt in q2:
item = alt.item
@ -1208,12 +1238,12 @@ def depicts_lookup():
@app.route("/report/missing_image")
def missing_image_report():
def missing_image_report() -> str:
limit = utils.get_int_arg("limit") or 1000
q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit)
q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit) # type:ignore
qids = [item.qid for item in q]
entities = mediawiki.get_entities_dict_with_cache(qids)
entities: dict[str, Entity] = mediawiki.get_entities_dict_with_cache(qids)
item_list = []
@ -1229,19 +1259,19 @@ def missing_image_report():
@app.route("/report/wdqs")
def wikidata_query_list():
q = WikidataQuery.query.order_by(WikidataQuery.start_time.desc())
def wikidata_query_list() -> str:
q = WikidataQuery.query.order_by(WikidataQuery.start_time.desc()) # type: ignore
return render_template("query_list.html", q=q)
@app.route("/report/blocks")
def server_block_report():
def server_block_report() -> str:
check_for_blocks()
return render_template("block_report.html")
@app.route("/fixture/save_error")
def save_error_fixture():
def save_error_fixture() -> str:
error = fixtures.save_error()["error"]
return render_template("save_error.html", error=error)