depicts/app.py

1281 lines
37 KiB
Python
Raw Normal View History

2019-09-12 19:51:05 +01:00
#!/usr/bin/python3
2023-10-23 13:02:28 +01:00
import hashlib
import itertools
2019-09-12 19:51:05 +01:00
import json
2023-10-23 13:02:28 +01:00
import os
import re
2023-10-23 13:02:28 +01:00
import socket
2023-10-25 07:54:05 +01:00
import typing
2023-10-23 13:02:28 +01:00
from collections import defaultdict
from datetime import datetime
2023-10-25 07:54:05 +01:00
from typing import Iterable, Mapping
2023-10-23 13:02:28 +01:00
import requests.exceptions
import simplejson.errors
from flask import (
Flask,
g,
jsonify,
redirect,
render_template,
request,
session,
url_for,
)
2023-11-09 06:40:37 +00:00
from requests_oauthlib import OAuth1Session # type: ignore
2023-10-23 13:02:28 +01:00
from sqlalchemy import distinct, func
from sqlalchemy.orm import aliased
from sqlalchemy.sql.expression import desc
2023-10-25 07:54:05 +01:00
from werkzeug.wrappers import Response
2023-10-23 13:02:28 +01:00
from depicts import (
artwork,
commons,
database,
fixtures,
human,
mail,
mediawiki,
utils,
wd_catalog,
wdqs,
wikibase,
wikidata_edit,
wikidata_oauth,
)
from depicts.error_mail import setup_error_mail
from depicts.model import (
DepictsItem,
DepictsItemAltLabel,
Edit,
Item,
Language,
Triple,
WikidataQuery,
)
from depicts.pager import Pagination, init_pager
2023-10-25 07:54:05 +01:00
from depicts.type import Entity
2023-10-23 13:02:28 +01:00
user_agent = "Mozilla/5.0 (X11; Linux i586; rv:32.0) Gecko/20160101 Firefox/32.0"
2019-09-12 19:51:05 +01:00
app = Flask(__name__)
2023-10-23 13:02:28 +01:00
app.config.from_object("config.default")
database.init_db(app.config["DB_URL"])
2019-09-29 20:19:40 +01:00
init_pager(app)
2019-09-29 21:14:41 +01:00
setup_error_mail(app)
2019-09-12 19:51:05 +01:00
find_more_props = {
2023-10-23 13:02:28 +01:00
"P135": "movement",
"P136": "genre",
"P170": "artist",
"P195": "collection",
"P276": "location",
"P495": "country of origin",
"P127": "owned by",
"P179": "part of the series",
"P921": "main subject",
"P186": "material used",
"P88": "commissioned by",
"P1028": "donated by",
"P1071": "location of final assembly",
"P138": "named after",
"P1433": "published in",
"P144": "based on",
"P2079": "fabrication method",
"P2348": "time period",
"P361": "part of",
"P608": "exhibition history",
"P180": "depicts",
"P31": "instance of",
2019-09-12 19:51:05 +01:00
# possible future props
# 'P571': 'inception',
2019-09-13 17:16:16 +01:00
# 'P166': 'award received', (only 2)
# 'P1419': 'shape', (only 2)
# 'P123': 'publisher', (only 1)
2019-09-12 19:51:05 +01:00
}
isa_list = [
2023-10-23 13:02:28 +01:00
"Q60520", # sketchbook
"Q93184", # drawing
"Q3305213", # painting
"Q15123870", # lithograph
"Q18761202", # watercolor painting
"Q79218", # triptych
"Q2647254", # study
"Q46686", # reredos
]
2023-10-23 13:02:28 +01:00
re_qid = re.compile(r"^Q(\d+)")
re_pid = re.compile(r"^P(\d+)")
@app.teardown_appcontext
2023-10-25 07:54:05 +01:00
def shutdown_session(exception: Exception | None = None) -> None:
database.session.remove() # type:ignore
2023-10-23 13:02:28 +01:00
2019-09-29 08:27:35 +01:00
@app.template_global()
2023-10-25 07:54:05 +01:00
def set_url_args(endpoint: str | None = None, **new_args: str) -> str:
2019-12-10 15:45:14 +00:00
if endpoint is None:
endpoint = request.endpoint
2023-11-09 06:40:37 +00:00
assert endpoint and request.view_args is not None
args = request.view_args.copy()
args.update(request.args)
args.update(new_args)
args = {k: v for k, v in args.items() if v is not None}
2019-12-10 15:45:14 +00:00
return url_for(endpoint, **args)
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
2019-09-27 20:23:01 +01:00
@app.template_global()
2023-10-25 07:54:05 +01:00
def current_url() -> str:
"""Get current URL."""
assert request and request.view_args and request.endpoint
2019-09-27 20:23:01 +01:00
args = request.view_args.copy()
args.update(request.args)
return url_for(request.endpoint, **args)
2023-10-23 13:02:28 +01:00
@app.before_request
2023-10-25 07:54:05 +01:00
def init_profile() -> None:
g.profiling = []
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
2019-10-15 12:21:05 +01:00
@app.before_request
2023-10-25 07:54:05 +01:00
def global_user() -> None:
2019-10-15 12:21:05 +01:00
g.user = wikidata_oauth.get_username()
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def check_for_blocks() -> None:
2023-10-23 13:02:28 +01:00
if hasattr(g, "server_ip"): # already done
2020-04-22 16:30:44 +01:00
return
2023-10-23 13:02:28 +01:00
hostname = app.config.get("HOSTNAME")
if not hostname:
return
g.server_ip = socket.gethostbyname(hostname)
try:
2023-10-23 13:02:28 +01:00
g.local_blocks = mediawiki.get_list("blocks", bkip=g.server_ip)
g.global_blocks = mediawiki.get_list("globalblocks", bgip=g.server_ip)
except Exception:
pass
2023-10-23 13:02:28 +01:00
2020-04-22 16:30:44 +01:00
@app.before_request
2023-10-25 07:54:05 +01:00
def get_blocks() -> None:
2023-10-23 13:02:28 +01:00
if app.config.get("SHOW_BLOCK_ALERT") is not False:
2020-04-22 16:30:44 +01:00
check_for_blocks()
2023-10-23 13:02:28 +01:00
@app.route("/find_more_setting")
2023-10-25 07:54:05 +01:00
def flip_find_more() -> str:
2023-10-23 13:02:28 +01:00
session["no_find_more"] = not session.get("no_find_more")
display = {True: "on", False: "off"}[not session["no_find_more"]]
return "flipped. find more is " + display
2019-10-04 12:16:16 +01:00
2023-10-25 07:54:05 +01:00
def existing_edit(item_id: int, depicts_id: int) -> bool:
q = Edit.query.filter_by(artwork_id=item_id, depicts_id=depicts_id) # type: ignore
return bool(q.count() != 0)
2019-10-04 16:56:06 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/save/Q<int:item_id>", methods=["POST"])
2023-10-25 07:54:05 +01:00
def save(item_id: int) -> str | Response:
2023-10-23 13:02:28 +01:00
depicts = request.form.getlist("depicts")
username = wikidata_oauth.get_username()
2019-09-27 16:53:17 +01:00
assert username
token = wikidata_oauth.get_token()
2019-09-27 16:53:17 +01:00
2023-10-25 07:54:05 +01:00
artwork_item = Item.query.get(item_id) # type: ignore
if artwork_item is None:
2023-10-23 13:02:28 +01:00
artwork_entity = mediawiki.get_entity_with_cache(f"Q{item_id}")
2023-10-25 07:54:05 +01:00
artwork_item = Item(
item_id=item_id, entity=typing.cast(dict[str, str], artwork_entity)
)
database.session.add(artwork_item)
2019-09-29 13:17:36 +01:00
database.session.commit()
2019-09-27 16:53:17 +01:00
for depicts_qid in depicts:
depicts_id = int(depicts_qid[1:])
2023-10-25 07:54:05 +01:00
depicts_item = DepictsItem.query.get(depicts_id) # type: ignore
if depicts_item is None:
depicts_item = wikidata_edit.create_depicts_item(depicts_id)
database.session.add(depicts_item)
database.session.commit()
2019-10-04 16:56:06 +01:00
for depicts_qid in depicts:
depicts_id = int(depicts_qid[1:])
if existing_edit(item_id, depicts_id):
continue
2019-09-27 16:53:17 +01:00
r = create_claim(item_id, depicts_id, token)
2020-04-22 16:45:59 +01:00
try:
reply = r.json()
except simplejson.errors.JSONDecodeError:
2023-10-23 13:02:28 +01:00
mail.send_mail("depicts save error", r.text)
2020-04-22 16:45:59 +01:00
raise
2023-10-23 13:02:28 +01:00
save_error = reply.get("error")
2020-04-22 17:11:23 +01:00
if save_error:
2023-10-23 13:02:28 +01:00
mail.send_mail("depicts save error", r.text)
return render_template("save_error.html", error=save_error)
2020-04-22 17:11:23 +01:00
saved = r.json()
2023-10-23 13:02:28 +01:00
lastrevid = saved["pageinfo"]["lastrevid"]
assert saved["success"] == 1
edit = Edit(
username=username,
artwork_id=item_id,
depicts_id=depicts_id,
lastrevid=lastrevid,
)
2019-09-27 16:53:17 +01:00
database.session.add(edit)
database.session.commit()
2023-10-23 13:02:28 +01:00
return redirect(url_for("next_page", item_id=item_id))
2019-09-27 16:07:37 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/settings", methods=["GET", "POST"])
2023-10-25 07:54:05 +01:00
def user_settings() -> str:
2023-10-23 13:02:28 +01:00
return render_template("user_settings.html")
2019-12-10 15:45:14 +00:00
2023-10-23 13:02:28 +01:00
@app.route("/test/lookup")
2023-10-25 07:54:05 +01:00
def test_lookup_page() -> str:
2023-10-23 13:02:28 +01:00
return render_template("test_lookup.html")
2019-12-10 15:45:14 +00:00
2019-09-13 17:16:16 +01:00
@app.route("/property/P<int:property_id>")
2023-10-25 07:54:05 +01:00
def property_query_page(property_id: int) -> str:
2023-10-23 13:02:28 +01:00
pid = f"P{property_id}"
g.title = find_more_props[pid]
2023-10-23 13:02:28 +01:00
sort = request.args.get("sort")
sort_by_name = sort and sort.lower().strip() == "name"
q = (
2023-10-25 07:54:05 +01:00
database.session.query( # type: ignore
2023-10-23 13:02:28 +01:00
Triple.object_id, func.count(func.distinct(Triple.subject_id)).label("c")
)
.filter_by(predicate_id=property_id)
.join(Item, Item.item_id == Triple.subject_id)
.filter_by(is_artwork=True)
.group_by(Triple.object_id)
.order_by(desc("c"))
)
page = utils.get_int_arg("page") or 1
2019-12-18 18:49:15 +00:00
total = q.count()
2019-12-18 19:00:26 +00:00
page_size = 100
2019-12-18 18:49:15 +00:00
pager = Pagination(page, page_size, total)
page_hits = pager.slice(q)
2023-10-23 13:02:28 +01:00
labels = get_labels_db({f"Q{object_id}" for object_id, c in page_hits})
hits = []
2019-12-18 18:49:15 +00:00
for object_id, count in page_hits:
2023-10-23 13:02:28 +01:00
qid = f"Q{object_id}"
hits.append(
{"qid": qid, "label": labels.get(qid) or "[item missing]", "count": count}
)
return render_template(
"property.html",
label=g.title,
order=("name" if sort_by_name else "count"),
pid=pid,
page=page,
pager=pager,
hits=hits,
)
@app.route("/")
2023-10-25 07:54:05 +01:00
def start() -> Response:
return random_artwork()
2019-09-27 14:13:28 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/next")
2023-10-25 07:54:05 +01:00
def random_artwork() -> Response:
found = None
while True:
2023-10-25 07:54:05 +01:00
q = (
Item.query.filter_by(is_artwork=True) # type: ignore
.order_by(func.random())
.limit(30)
)
for item in q:
2023-10-23 13:02:28 +01:00
has_depicts = "P180" in item.entity["claims"]
if has_depicts:
continue
found = item
break
if found:
break
2023-10-23 13:02:28 +01:00
session[found.qid] = "from redirect"
return redirect(url_for("item_page", item_id=found.item_id))
2023-10-23 13:02:28 +01:00
@app.route("/oauth/start")
2023-10-25 07:54:05 +01:00
def start_oauth() -> Response:
2023-10-23 13:02:28 +01:00
next_page = request.args.get("next")
2019-09-27 20:15:24 +01:00
if next_page:
2023-10-23 13:02:28 +01:00
session["after_login"] = next_page
2019-09-27 20:15:24 +01:00
2023-10-23 13:02:28 +01:00
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
base_url = "https://www.wikidata.org/w/index.php"
request_token_url = base_url + "?title=Special%3aOAuth%2finitiate"
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob")
2019-09-27 11:02:24 +01:00
fetch_response = oauth.fetch_request_token(request_token_url)
2023-10-23 13:02:28 +01:00
session["owner_key"] = fetch_response.get("oauth_token")
session["owner_secret"] = fetch_response.get("oauth_token_secret")
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
base_authorization_url = "https://www.wikidata.org/wiki/Special:OAuth/authorize"
authorization_url = oauth.authorization_url(
base_authorization_url, oauth_consumer_key=client_key
)
2019-09-27 11:02:24 +01:00
return redirect(authorization_url)
2023-10-23 13:02:28 +01:00
2019-09-27 11:02:24 +01:00
@app.route("/oauth/callback", methods=["GET"])
2023-10-25 07:54:05 +01:00
def oauth_callback() -> Response:
2023-10-23 13:02:28 +01:00
base_url = "https://www.wikidata.org/w/index.php"
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
)
2019-09-27 11:02:24 +01:00
oauth_response = oauth.parse_authorization_response(request.url)
2023-10-23 13:02:28 +01:00
verifier = oauth_response.get("oauth_verifier")
access_token_url = base_url + "?title=Special%3aOAuth%2ftoken"
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
verifier=verifier,
)
2019-09-27 11:02:24 +01:00
oauth_tokens = oauth.fetch_access_token(access_token_url)
2023-10-23 13:02:28 +01:00
session["owner_key"] = oauth_tokens.get("oauth_token")
session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
next_page = session.get("after_login")
return redirect(next_page) if next_page else random_artwork()
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/oauth/disconnect")
2023-10-25 07:54:05 +01:00
def oauth_disconnect() -> Response:
2023-10-23 13:02:28 +01:00
for key in "owner_key", "owner_secret", "username", "after_login":
2019-09-27 20:19:29 +01:00
if key in session:
del session[key]
2023-10-23 13:02:28 +01:00
return redirect(url_for("browse_page"))
2019-09-27 20:19:29 +01:00
2023-10-25 07:54:05 +01:00
def create_claim(artwork_id: int, depicts_id: int, token: str) -> requests.Response:
2023-10-23 13:02:28 +01:00
artwork_qid = f"Q{artwork_id}"
value = json.dumps({"entity-type": "item", "numeric-id": depicts_id})
2023-10-25 07:54:05 +01:00
params: dict[str, str | int] = {
2023-10-23 13:02:28 +01:00
"action": "wbcreateclaim",
"entity": artwork_qid,
"property": "P180",
"snaktype": "value",
"value": value,
"token": token,
"format": "json",
"formatversion": 2,
2019-09-27 16:53:17 +01:00
}
2023-10-25 07:54:05 +01:00
r: requests.Response = wikidata_oauth.api_post_request(params)
return r
2019-09-27 11:02:24 +01:00
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def image_with_cache(qid: str, image_filename: str, width: int) -> dict[str, str]:
2023-10-23 13:02:28 +01:00
filename = f"cache/{qid}_{width}_image.json"
detail = json.load(open(filename)) if os.path.exists(filename) else {}
2023-10-23 13:02:28 +01:00
image_filename = image_filename.replace("_", " ")
2020-06-30 09:00:58 +01:00
# The image associated with an item can change.
# If that happens the detail in the cache will be for the wrong file.
if not detail or image_filename not in detail:
2019-09-25 13:40:15 +01:00
detail = commons.image_detail([image_filename], thumbwidth=width)
2023-10-23 13:02:28 +01:00
json.dump(detail, open(filename, "w"), indent=2)
2019-09-25 13:40:15 +01:00
2023-10-25 07:54:05 +01:00
image: dict[str, str] = detail.get(image_filename)
return image
2019-09-25 13:40:15 +01:00
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def existing_depicts_from_entity(entity: Entity) -> list[dict[str, typing.Any]]:
2023-10-23 13:02:28 +01:00
if "P180" not in entity["claims"]:
2019-10-07 14:12:30 +01:00
return []
existing = []
2019-10-10 21:05:17 +01:00
new_depicts = False
2023-10-23 13:02:28 +01:00
for claim in entity["claims"]["P180"]:
if "datavalue" not in claim["mainsnak"]:
continue
2023-10-23 13:02:28 +01:00
item_id = claim["mainsnak"]["datavalue"]["value"]["numeric-id"]
2019-10-07 14:12:30 +01:00
2023-10-25 07:54:05 +01:00
item = DepictsItem.query.get(item_id) # type: ignore
2019-10-10 21:05:17 +01:00
if not item:
item = wikidata_edit.create_depicts_item(item_id)
database.session.add(item)
new_depicts = True
d = {
2023-10-23 13:02:28 +01:00
"label": item.label,
"description": item.description,
"qid": f"Q{item.item_id}",
"count": item.count,
"existing": True,
2019-10-10 21:05:17 +01:00
}
2019-10-07 14:12:30 +01:00
existing.append(d)
2019-10-10 21:05:17 +01:00
if new_depicts:
database.session.commit()
2019-10-07 14:12:30 +01:00
return existing
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def get_institution(entity: Entity, other: Mapping[str, str | None]) -> str | None:
2023-10-23 13:02:28 +01:00
if "P276" in entity["claims"]:
location = wikibase.first_datavalue(entity, "P276")
2023-10-25 07:54:05 +01:00
assert isinstance(location, dict)
2019-10-14 20:04:04 +01:00
if location:
2023-10-25 07:54:05 +01:00
return typing.cast(str, other.get(location["id"]))
2023-10-23 13:02:28 +01:00
if "P195" in entity["claims"]:
collection = wikibase.first_datavalue(entity, "P195")
2023-10-25 07:54:05 +01:00
assert isinstance(collection, dict)
2019-10-14 20:04:04 +01:00
if collection:
2023-10-25 07:54:05 +01:00
return typing.cast(str, other.get(collection["id"]))
return None
2023-10-23 13:02:28 +01:00
2019-10-10 10:58:42 +01:00
2019-09-12 19:51:05 +01:00
@app.route("/item/Q<int:item_id>")
2023-10-25 07:54:05 +01:00
def item_page(item_id: int) -> str | Response:
2023-10-23 13:02:28 +01:00
qid = f"Q{item_id}"
g.qid = qid
item = artwork.Artwork(qid)
2023-10-23 13:02:28 +01:00
from_redirect = qid in session and session.pop(qid) == "from redirect"
entity = mediawiki.get_entity_with_cache(qid, refresh=not from_redirect)
2023-10-25 07:54:05 +01:00
assert entity
2019-10-07 14:12:30 +01:00
2023-10-23 13:02:28 +01:00
if "redirects" in entity:
redirect_to_item_id = int(entity["redirects"]["to"][1:])
2023-10-25 07:54:05 +01:00
assert request.endpoint
2020-06-30 11:17:21 +01:00
return redirect(url_for(request.endpoint, item_id=redirect_to_item_id))
2019-10-07 14:12:30 +01:00
existing_depicts = existing_depicts_from_entity(entity)
width = 800
image_filename = item.image_filename
2019-10-10 19:52:51 +01:00
if image_filename:
image = image_with_cache(qid, image_filename, width)
else:
image = None
2019-09-25 13:40:15 +01:00
# hits = item.run_query()
label_and_language = get_entity_label_and_language(entity)
2019-09-30 14:23:19 +01:00
if label_and_language:
2023-10-23 13:02:28 +01:00
label = label_and_language["label"]
2019-09-30 14:23:19 +01:00
else:
label = None
g.label = label
2019-09-25 13:40:15 +01:00
other = get_other(item.entity)
people = human.from_name(label) if label else None
2023-10-23 13:02:28 +01:00
label_languages = label_and_language["languages"] if label_and_language else []
show_translation_links = all(lang.code != "en" for lang in label_languages)
2019-12-31 08:58:37 +00:00
2023-11-09 06:40:37 +00:00
artwork_item = Item.query.get(item_id)
if artwork_item is None:
2019-12-31 08:58:37 +00:00
if not wdqs.is_artificial_physical_object(qid):
2023-10-23 13:02:28 +01:00
return render_template(
"not_artwork.html",
qid=qid,
item_id=item_id,
item=item,
labels=find_more_props,
entity=item.entity,
username=g.user,
label=label,
label_languages=label_languages,
show_translation_links=show_translation_links,
image=image,
other=other,
title=item.display_title,
)
modified = datetime.strptime(entity["modified"], "%Y-%m-%dT%H:%M:%SZ")
artwork_item = Item(
item_id=item_id,
2023-10-25 07:54:05 +01:00
entity=typing.cast(dict[str, str], entity),
2023-10-23 13:02:28 +01:00
lastrevid=entity["lastrevid"],
modified=modified,
)
database.session.add(artwork_item)
catalog = wd_catalog.get_catalog_from_artwork(entity)
2023-10-23 13:02:28 +01:00
if not catalog.get("institution"):
2023-10-25 07:54:05 +01:00
institution = get_institution(entity, other)
catalog["institution"] = institution
2023-10-23 13:02:28 +01:00
return render_template(
"item.html",
qid=qid,
item_id=item_id,
item=item,
catalog=catalog,
labels=find_more_props,
entity=item.entity,
username=g.user,
label=label,
label_languages=label_languages,
show_translation_links=show_translation_links,
existing_depicts=existing_depicts,
image=image,
people=people,
other=other,
# hits=hits,
title=item.display_title,
)
2019-09-12 19:51:05 +01:00
2023-10-25 07:54:05 +01:00
def get_languages(codes: typing.Iterable[str]) -> typing.Any:
return Language.query.filter( # type: ignore
Language.wikimedia_language_code.in_(codes)
)
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def get_entity_label_and_language(entity: Entity) -> dict[str, typing.Any] | None:
2023-10-23 13:02:28 +01:00
"""
Look for a useful label and return it with a list of languages that have that label.
If the entity has a label in English return it.
Otherwise check if all languages have the same label, if so then return it.
2023-10-23 13:02:28 +01:00
"""
group_by_label = defaultdict(set)
2023-10-23 13:02:28 +01:00
for language, l in entity["labels"].items():
group_by_label[l["value"]].add(language)
2023-10-23 13:02:28 +01:00
if "en" in entity["labels"]:
label = entity["labels"]["en"]["value"]
return {"label": label, "languages": get_languages(group_by_label[label])}
if len(group_by_label) == 1:
label, languages = list(group_by_label.items())[0]
2023-10-23 13:02:28 +01:00
return {"label": label, "languages": get_languages(languages)}
2023-10-25 07:54:05 +01:00
return None
2023-10-25 07:54:05 +01:00
def get_labels(keys: typing.Iterable[str], name: str | None = None) -> dict[str, str]:
2019-09-12 19:51:05 +01:00
keys = sorted(keys, key=lambda i: int(i[1:]))
if name is None:
2023-10-23 13:02:28 +01:00
name = hashlib.md5("_".join(keys).encode("utf-8")).hexdigest()
filename = f"cache/{name}_labels.json"
labels = []
2019-09-12 19:51:05 +01:00
if os.path.exists(filename):
from_cache = json.load(open(filename))
2023-10-23 13:02:28 +01:00
if isinstance(from_cache, dict) and from_cache.get("keys") == keys:
labels = from_cache["labels"]
if not labels:
2020-03-04 09:05:56 +00:00
for cur in utils.chunk(keys, 50):
2023-10-23 13:02:28 +01:00
labels += mediawiki.get_entities(cur, props="labels")
json.dump({"keys": keys, "labels": labels}, open(filename, "w"), indent=2)
2023-10-25 07:54:05 +01:00
return {
entity["id"]: wikibase.get_entity_label(entity) or "no English label"
for entity in labels
}
2019-09-12 19:51:05 +01:00
2023-10-25 07:54:05 +01:00
def get_labels_db(keys: Iterable[str]):
keys = set(keys)
labels = {}
missing = set()
for qid in keys:
m = re_qid.match(qid)
if m:
item_id = int(m.group(1))
2023-10-25 07:54:05 +01:00
item = Item.query.get(item_id) # type: ignore
if item:
labels[qid] = item.label
continue
missing.add(qid)
page_size = 50
try:
for cur in utils.chunk(missing, page_size):
for entity in mediawiki.get_entities(cur):
2023-10-23 13:02:28 +01:00
if "redirects" in entity:
continue
2023-10-23 13:02:28 +01:00
qid = entity["id"]
2023-10-23 13:02:28 +01:00
modified = datetime.strptime(entity["modified"], "%Y-%m-%dT%H:%M:%SZ")
# FIXME: check if the item is an artwork and set is_artwork correctly
2023-10-25 07:54:05 +01:00
item = Item( # type: ignore
item_id=int(qid[1:]),
2023-10-23 13:02:28 +01:00
entity=entity,
lastrevid=entity["lastrevid"],
modified=modified,
is_artwork=False,
)
database.session.add(item)
labels[qid] = item.label
database.session.commit()
except requests.exceptions.ReadTimeout:
pass
return labels
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def build_other_set(entity: Entity) -> set[str]:
2019-09-12 19:51:05 +01:00
other_items = set()
for key in find_more_props.keys():
2023-10-23 13:02:28 +01:00
for claim in entity["claims"].get(key, []):
if "datavalue" not in claim["mainsnak"]:
2020-03-04 09:06:54 +00:00
continue
2023-10-23 13:02:28 +01:00
other_items.add(claim["mainsnak"]["datavalue"]["value"]["id"])
2019-10-10 20:52:11 +01:00
return other_items
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def get_other(entity: Entity) -> Mapping[str, str | None]:
2019-10-10 20:52:11 +01:00
other_items = build_other_set(entity)
2019-09-25 13:40:15 +01:00
return get_labels(other_items)
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
2019-10-15 12:21:05 +01:00
@app.route("/edits")
2023-10-25 07:54:05 +01:00
def list_edits() -> str:
q = Edit.query.order_by(Edit.timestamp.desc()) # type: ignore
2023-10-23 13:02:28 +01:00
page = utils.get_int_arg("page") or 1
2019-12-30 16:11:19 +00:00
pager = Pagination(page, 100, q.count())
2019-09-29 11:23:07 +01:00
2023-10-25 07:54:05 +01:00
item_count = database.session.query(
func.count(distinct(Edit.artwork_id))
).scalar() # type: ignore
2019-09-29 11:23:07 +01:00
2023-10-25 07:54:05 +01:00
user_count = database.session.query(
func.count(distinct(Edit.username))
).scalar() # type: ignore
2023-10-23 13:02:28 +01:00
return render_template(
"list_edits.html",
pager=pager,
edit_list=pager.slice(q),
item_count=item_count,
user_count=user_count,
)
2019-09-29 11:23:07 +01:00
2019-09-29 09:47:55 +01:00
2019-09-29 11:52:12 +01:00
@app.route("/user/<username>")
2023-10-25 07:54:05 +01:00
def user_page(username: str) -> str:
edit_list = Edit.query.filter_by(username=username).order_by( # type: ignore
Edit.timestamp.desc()
)
2023-10-23 13:02:28 +01:00
item_count = (
2023-10-25 07:54:05 +01:00
database.session.query(func.count(distinct(Edit.artwork_id))) # type: ignore
2023-10-23 13:02:28 +01:00
.filter_by(username=username)
.scalar()
)
2019-09-29 11:52:12 +01:00
2023-10-23 13:02:28 +01:00
return render_template(
"user_page.html",
username=username,
2023-10-25 07:54:05 +01:00
edits=Edit.query, # type: ignore
2023-10-23 13:02:28 +01:00
edit_list=edit_list,
item_count=item_count,
)
2019-09-29 11:52:12 +01:00
2019-09-25 13:40:15 +01:00
@app.route("/next/Q<int:item_id>")
2023-10-25 07:54:05 +01:00
def next_page(item_id: int) -> str:
2023-10-23 13:02:28 +01:00
qid = f"Q{item_id}"
2019-09-25 13:40:15 +01:00
entity = mediawiki.get_entity_with_cache(qid)
2023-10-25 07:54:05 +01:00
assert entity
2019-09-25 13:40:15 +01:00
width = 800
2023-10-23 13:02:28 +01:00
image_filename = wikibase.first_datavalue(entity, "P18")
2023-10-25 07:54:05 +01:00
assert isinstance(image_filename, str)
2019-09-25 13:40:15 +01:00
image = image_with_cache(qid, image_filename, width)
2019-10-09 15:30:48 +01:00
label = wikibase.get_entity_label(entity)
2019-09-25 13:40:15 +01:00
other = get_other(entity)
2019-09-12 19:51:05 +01:00
2019-09-29 19:00:59 +01:00
other_list = []
for key, prop_label in find_more_props.items():
2023-10-23 13:02:28 +01:00
if key == "P186": # skip material used
continue # too generic
claims = entity["claims"].get(key)
2019-09-29 19:00:59 +01:00
if not claims:
continue
values = []
for claim in claims:
2023-10-23 13:02:28 +01:00
if "datavalue" not in claim["mainsnak"]:
2019-09-29 19:24:24 +01:00
continue
2023-10-23 13:02:28 +01:00
value = claim["mainsnak"]["datavalue"]["value"]
claim_qid = value["id"]
if claim_qid == "Q4233718":
continue # anonymous artist
2023-10-23 13:02:28 +01:00
numeric_id = value["numeric-id"]
href = url_for("find_more_page", property_id=key[1:], item_id=numeric_id)
values.append(
{
"href": href,
"qid": claim_qid,
"label": other.get(claim_qid),
}
)
2019-09-29 19:00:59 +01:00
2019-09-29 19:24:24 +01:00
if not values:
continue
2023-10-23 13:02:28 +01:00
qid_list = [v["qid"] for v in values]
other_list.append(
{
"label": prop_label,
"image_lookup": url_for("find_more_json", pid=key, qid=qid_list),
"pid": key,
"values": values,
"images": [],
}
)
return render_template(
"next.html",
qid=qid,
label=label,
image=image,
labels=find_more_props,
other=other,
entity=entity,
other_props=other_list,
)
@app.route("/P<int:property_id>/Q<int:item_id>")
2023-10-25 07:54:05 +01:00
def find_more_page(property_id: int, item_id: int) -> Response:
2023-10-23 13:02:28 +01:00
pid, qid = f"P{property_id}", f"Q{item_id}"
2023-10-25 07:54:05 +01:00
return redirect(url_for("browse_page", **{pid: qid})) # type: ignore
2023-10-23 13:02:28 +01:00
2019-09-13 17:16:16 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/toolinfo.json")
2023-10-25 07:54:05 +01:00
def tool_info() -> Response:
info = {
2023-10-23 13:02:28 +01:00
"name": "wade",
"title": "Wikidata Art Depiction Explorer",
"description": "Add depicts statements to works of art.",
"url": "https://art.wikidata.link/",
"keywords": "art, depicts, paintings, depiction",
"author": "Edward Betts",
"repository": "https://github.com/edwardbetts/depicts.git",
}
return jsonify(info)
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def get_facets(params) -> dict[str, typing.Any]:
2023-10-23 13:02:28 +01:00
properties = [pid for pid in find_more_props.keys() if pid not in request.args]
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
bindings = wdqs.run_from_template_with_cache(
"query/facet.sparql", params=params, isa_list=isa_list, properties=properties
)
2019-09-13 17:16:16 +01:00
2023-10-25 07:54:05 +01:00
facets: dict[str, list[dict[str, str | int]]] = {
key: [] for key in find_more_props.keys()
}
2019-09-13 17:16:16 +01:00
for row in bindings:
2023-10-23 13:02:28 +01:00
pid = row["property"]["value"].rpartition("/")[2]
qid = row["object"]["value"].rpartition("/")[2]
label = row["objectLabel"]["value"]
count = int(row["count"]["value"])
2019-09-13 17:16:16 +01:00
2019-11-27 12:42:07 +00:00
if pid not in find_more_props:
continue
2023-10-23 13:02:28 +01:00
facets[pid].append({"qid": qid, "label": label, "count": count})
2019-09-13 17:16:16 +01:00
return {
2023-10-23 13:02:28 +01:00
key: sorted(values, key=lambda i: i["count"], reverse=True)[:15]
2019-09-13 17:16:16 +01:00
for key, values in facets.items()
if values
}
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def get_artwork_params() -> list[tuple[str, str]]:
params = []
for pid, qid in request.args.items():
m = re_pid.match(pid)
if not m:
continue
pid = m.group(0)
m = re_qid.match(qid)
if not m:
continue
qid = m.group(0)
params.append((pid, qid))
return params
2019-10-10 17:44:21 +01:00
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def filter_artwork(params: list[tuple[str, str]]) -> list[wdqs.Row]:
2023-10-23 13:02:28 +01:00
return wdqs.run_from_template_with_cache(
"query/find_more.sparql", params=params, isa_list=isa_list
)
2019-10-10 17:44:21 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/catalog")
2023-10-25 07:54:05 +01:00
def catalog_page() -> str:
params = get_artwork_params()
bindings = filter_artwork(params)
2023-10-23 13:02:28 +01:00
page = utils.get_int_arg("page") or 1
2019-10-10 17:44:21 +01:00
page_size = 45
item_ids = set()
for row in bindings:
item_id = wdqs.row_id(row)
item_ids.add(item_id)
2023-10-23 13:02:28 +01:00
qids = [f"Q{item_id}" for item_id in sorted(item_ids)]
2019-10-10 17:44:21 +01:00
2023-10-25 07:54:05 +01:00
items = [Item.query.get(item_id) for item_id in item_ids] # type: ignore
2019-10-10 17:44:21 +01:00
entities = mediawiki.get_entities_with_cache(qids)
items = []
2019-10-10 20:52:11 +01:00
other_items = set()
2019-10-10 17:44:21 +01:00
for entity in entities:
2019-10-10 20:52:11 +01:00
other_items.update(build_other_set(entity))
continue
2019-10-10 17:44:21 +01:00
item = {
2023-10-23 13:02:28 +01:00
"label": wikibase.get_entity_label(entity),
"qid": entity["id"],
"item_id": int(entity["id"][1:]),
"image_filename": wikibase.first_datavalue(entity, "P18"),
"entity": entity,
2019-10-10 17:44:21 +01:00
}
items.append(item)
2019-10-10 20:52:11 +01:00
other = get_labels(other_items)
2023-10-23 13:02:28 +01:00
flat = "_".join(f"{pid}={qid}" for pid, qid in params)
2019-10-10 17:44:21 +01:00
thumbwidth = 400
2019-11-26 17:21:58 +00:00
# FIXME cache_name can be too long for filesystem
2023-10-23 13:02:28 +01:00
cache_name = f"{flat}_{page}_{page_size}_{thumbwidth}"
2019-10-10 17:44:21 +01:00
detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth)
for item in items:
2023-10-23 13:02:28 +01:00
item["url"] = url_for("item_page", item_id=item["item_id"])
item["image"] = detail[item["image_filename"]]
2019-10-10 17:44:21 +01:00
item_labels = get_labels(qid for pid, qid in params)
2023-10-23 13:02:28 +01:00
title = " / ".join(
find_more_props[pid] + ": " + item_labels[qid] for pid, qid in params
)
return render_template(
"catalog.html", labels=find_more_props, items=items, other=other, title=title
)
2019-10-10 17:44:21 +01:00
2023-10-25 07:54:05 +01:00
def get_image_detail_with_cache(
items, cache_name: str, thumbwidth: int | None = None, refresh: bool = False
):
filenames = [cur.image_filename() for cur in items]
2019-10-10 17:44:21 +01:00
if thumbwidth is None:
2023-10-23 13:02:28 +01:00
thumbwidth = app.config["THUMBWIDTH"]
2019-10-10 17:44:21 +01:00
2023-10-23 13:02:28 +01:00
filename = f"cache/{cache_name}_images.json"
cache_exists = os.path.exists(filename)
2020-06-30 16:26:01 +01:00
detail = None
if not refresh and cache_exists:
2020-06-30 16:26:01 +01:00
try:
detail = json.load(open(filename))
except json.decoder.JSONDecodeError:
pass
if not detail:
try:
detail = commons.image_detail(filenames, thumbwidth=thumbwidth)
2023-10-23 13:02:28 +01:00
json.dump(detail, open(filename, "w"), indent=2)
except requests.exceptions.ReadTimeout:
detail = json.load(open(filename)) if cache_exists else {}
2019-10-10 17:44:21 +01:00
return detail
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def browse_index() -> str:
2023-10-23 13:02:28 +01:00
q = (
2023-10-25 07:54:05 +01:00
database.session.query( # type: ignore
2023-10-23 13:02:28 +01:00
Triple.predicate_id, func.count(func.distinct(Triple.object_id))
)
.join(Item, Triple.subject_id == Item.item_id)
.filter_by(is_artwork=True)
.group_by(Triple.predicate_id)
)
counts = {f"P{predicate_id}": count for predicate_id, count in q}
2023-10-23 13:02:28 +01:00
return render_template("browse_index.html", props=find_more_props, counts=counts)
2019-10-15 12:21:05 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/debug/show_user")
2023-10-25 07:54:05 +01:00
def debug_show_user() -> str:
2019-10-15 16:32:13 +01:00
userinfo = wikidata_oauth.userinfo_call()
2023-10-23 13:02:28 +01:00
return "<pre>" + json.dumps(userinfo, indent=2) + "</pre>"
2019-10-15 16:32:13 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/browse/facets.json")
2023-10-25 07:54:05 +01:00
def browse_facets() -> Response:
2019-12-10 15:45:14 +00:00
params = get_artwork_params()
if not params:
2023-10-23 13:02:28 +01:00
return jsonify(notice="facet criteria missing")
2019-12-10 15:45:14 +00:00
facets = get_facets(params)
for key, values in facets.items():
for v in values:
2023-10-23 13:02:28 +01:00
v["href"] = set_url_args(endpoint="browse_page", **{key: v["qid"]})
return jsonify(params=params, facets=facets, prop_labels=find_more_props)
2019-12-10 15:45:14 +00:00
def get_db_items(params):
2023-10-23 13:02:28 +01:00
"""Get items for browse page based on criteria."""
2023-10-25 07:54:05 +01:00
q = Item.query.filter_by(is_artwork=True) # type: ignore
for pid, qid in params:
2023-10-23 13:02:28 +01:00
q = q.join(Triple, Item.item_id == Triple.subject_id, aliased=True).filter(
Triple.predicate_id == pid[1:], Triple.object_id == qid[1:]
)
2019-09-13 17:16:16 +01:00
return q
2019-09-27 15:35:26 +01:00
2023-10-23 13:02:28 +01:00
def get_db_facets(params):
t = aliased(Triple)
2023-10-23 13:02:28 +01:00
q = database.session.query(t.predicate_id, func.count().label("count"), t.object_id)
2019-12-18 19:03:54 +00:00
facet_limit = 18
2019-09-13 17:16:16 +01:00
for pid, qid in params:
2023-10-25 07:54:05 +01:00
q = q.join( # type: ignore
Triple, t.subject_id == Triple.subject_id, aliased=True
).filter(
2023-10-23 13:02:28 +01:00
Triple.predicate_id == pid[1:],
Triple.object_id == qid[1:],
t.predicate_id != pid[1:],
t.object_id != qid[1:],
)
2019-09-13 17:16:16 +01:00
q = q.group_by(t.predicate_id, t.object_id)
results = sorted(tuple(row) for row in q.all())
facet_list = {}
2023-10-25 07:54:05 +01:00
subject_qids: set[str] = set()
for predicate_id, x in itertools.groupby(results, lambda row: row[0]):
2023-10-25 07:54:05 +01:00
hits = sorted(x, key=lambda row: row[1], reverse=True)
2023-10-23 13:02:28 +01:00
values = [
{"count": count, "qid": f"Q{value}"}
for _, count, value in hits[:facet_limit]
]
facet_list[f"P{predicate_id}"] = values
subject_qids.update(i["qid"] for i in values)
2019-09-12 19:51:05 +01:00
labels = get_labels_db(subject_qids)
2019-12-10 15:45:14 +00:00
for values in facet_list.values():
for v in values:
2023-10-23 13:02:28 +01:00
v["label"] = labels.get(v["qid"])
return facet_list
2019-09-13 17:16:16 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/browse")
2023-10-25 07:54:05 +01:00
def browse_page() -> str:
2019-09-12 19:51:05 +01:00
page_size = 45
params = get_artwork_params()
2019-09-12 19:51:05 +01:00
if not params:
return browse_index()
2019-09-29 20:19:40 +01:00
2023-10-23 13:02:28 +01:00
flat = "_".join(f"{pid}={qid}" for pid, qid in params)
item_labels = get_labels_db(qid for pid, qid in params)
2023-10-23 13:02:28 +01:00
g.title = " / ".join(
find_more_props[pid] + ": " + (item_labels.get(qid) or qid)
for pid, qid in params
)
q_items = get_db_items(params)
facets = get_db_facets(params)
all_items = q_items.all()
2019-09-29 20:19:40 +01:00
2023-10-23 13:02:28 +01:00
page = utils.get_int_arg("page") or 1
total = q_items.count()
pager = Pagination(page, page_size, total)
2019-09-29 20:19:40 +01:00
2019-12-18 19:13:23 +00:00
items = [item for item in pager.slice(all_items) if item.image_filename()]
2019-09-12 19:51:05 +01:00
2023-10-23 13:02:28 +01:00
cache_name = f"{flat}_{page}_{page_size}"
2019-10-10 17:44:21 +01:00
detail = get_image_detail_with_cache(items, cache_name)
cache_refreshed = False
2019-09-12 19:51:05 +01:00
linked_qids = {qid for pid, qid in params}
2019-09-12 19:51:05 +01:00
for item in items:
artist_qid = item.artist
if artist_qid:
linked_qids.add(artist_qid)
2023-10-23 13:02:28 +01:00
for prop in "P31", "P180":
linked_qids.update(item.linked_qids(prop))
linked_labels = get_labels_db(linked_qids)
for item in items:
image_filename = item.image_filename()
if not cache_refreshed and image_filename not in detail:
detail = get_image_detail_with_cache(items, cache_name, refresh=True)
cache_refreshed = True
item.image = detail.get(image_filename)
2019-10-14 14:42:36 +01:00
2023-10-23 13:02:28 +01:00
return render_template(
"find_more.html",
page=page,
label=g.title,
pager=pager,
prop_labels=find_more_props,
labels=find_more_props,
linked_labels=linked_labels,
items=items,
total=total,
params=params,
facets=facets,
)
return jsonify(params=params, items=items.count(), facets=facets)
@app.route("/find_more.json")
2023-10-25 07:54:05 +01:00
def find_more_json() -> Response:
2023-10-23 13:02:28 +01:00
pid = request.args.get("pid")
qid_list = request.args.getlist("qid")
2019-09-29 19:00:59 +01:00
limit = 6
filenames = []
2019-11-26 17:21:58 +00:00
cache_name = f'{pid}={",".join(qid_list)}_{limit}'
2023-10-23 13:02:28 +01:00
bindings = wdqs.run_from_template_with_cache(
"query/find_more_basic.sparql",
cache_name=cache_name,
qid_list=qid_list,
pid=pid,
limit=limit,
)
2019-11-26 17:21:58 +00:00
2019-09-29 19:00:59 +01:00
items = []
for row in bindings:
item_id = wdqs.row_id(row)
2023-10-23 13:02:28 +01:00
row_qid = f"Q{item_id}"
image_filename = wdqs.commons_uri_to_filename(row["image"]["value"])
2019-09-29 19:00:59 +01:00
filenames.append(image_filename)
2023-10-23 13:02:28 +01:00
items.append(
{
"qid": row_qid,
"item_id": item_id,
"href": url_for("item_page", item_id=item_id),
"filename": image_filename,
}
)
2019-09-29 19:00:59 +01:00
thumbheight = 120
detail = commons.image_detail(filenames, thumbheight=thumbheight)
for item in items:
2023-10-23 13:02:28 +01:00
item["image"] = detail[item["filename"]]
2019-09-29 19:00:59 +01:00
2019-11-26 17:21:58 +00:00
return jsonify(items=items)
2019-09-29 19:00:59 +01:00
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
Hit = dict[str, str | int | None]
def wikibase_search(terms: str) -> list[Hit]:
2019-10-14 13:09:42 +01:00
hits = []
2023-10-23 13:02:28 +01:00
r = mediawiki.api_call(
{
"action": "wbsearchentities",
"search": terms,
"limit": "max",
"language": "en",
}
)
for result in r.json()["search"]:
2019-10-14 13:09:42 +01:00
hit = {
2023-10-23 13:02:28 +01:00
"label": result["label"],
"description": result.get("description") or None,
"qid": result["id"],
"count": 0,
2019-10-14 13:09:42 +01:00
}
2023-10-23 13:02:28 +01:00
if result["match"]["type"] == "alias":
hit["alt_label"] = result["match"]["text"]
2019-10-14 13:09:42 +01:00
hits.append(hit)
return hits
2023-10-23 13:02:28 +01:00
2023-10-25 07:54:05 +01:00
def add_images_to_depicts_lookup(hits: list[dict[str, str]]) -> None:
2023-10-23 13:02:28 +01:00
qid_to_item = {hit["qid"]: hit for hit in hits}
all_qids = [hit["qid"] for hit in hits]
2023-10-25 07:54:05 +01:00
entities: list[Entity] = mediawiki.get_entities_with_cache(all_qids)
2019-10-14 13:09:42 +01:00
for entity in entities:
2023-10-23 13:02:28 +01:00
qid = entity["id"]
2019-10-14 13:09:42 +01:00
item = qid_to_item[qid]
item.entity = entity
database.session.commit()
for hit in hits:
2023-10-23 13:02:28 +01:00
item = qid_to_item[hit["qid"]]
2019-10-14 13:09:42 +01:00
if item.entity:
2023-10-23 13:02:28 +01:00
image_filename = wikibase.first_datavalue(item.entity, "P18")
hit["image_filename"] = image_filename
2019-10-14 13:09:42 +01:00
2023-10-23 13:02:28 +01:00
filenames = [hit["image_filename"] for hit in hits if hit.get("image_filename")]
2019-10-14 13:09:42 +01:00
filenames = filenames[:50]
thumbwidth = 200
detail = commons.image_detail(filenames, thumbwidth=thumbwidth)
for hit in hits:
2023-10-23 13:02:28 +01:00
filename = hit.get("image_filename")
2019-10-14 13:09:42 +01:00
if not filename or filename not in detail:
continue
2023-10-23 13:02:28 +01:00
hit["image"] = detail[filename]
2019-10-14 13:09:42 +01:00
2023-10-23 13:02:28 +01:00
@app.route("/lookup")
2023-10-25 07:54:05 +01:00
def depicts_lookup() -> Response:
2023-10-23 13:02:28 +01:00
terms = request.args.get("terms")
2019-09-25 13:40:15 +01:00
if not terms:
2023-10-23 13:02:28 +01:00
return jsonify(error="terms parameter is required")
2019-09-25 13:40:15 +01:00
terms = terms.strip()
if len(terms) < 3:
return jsonify(
count=0,
hits=[],
2023-10-23 13:02:28 +01:00
notice="terms too short for lookup",
2019-09-25 13:40:15 +01:00
)
item_ids = []
hits = []
2023-10-25 07:54:05 +01:00
q1 = DepictsItem.query.filter(DepictsItem.label.ilike(terms + "%")) # type: ignore
seen = set()
2019-09-25 13:40:15 +01:00
for item in q1:
hit = {
2023-10-23 13:02:28 +01:00
"label": item.label,
"description": item.description,
"qid": item.qid,
"count": item.count,
2019-09-25 13:40:15 +01:00
}
item_ids.append(item.item_id)
hits.append(hit)
seen.add(item.qid)
2019-09-25 13:40:15 +01:00
cls = DepictsItemAltLabel
2023-10-25 07:54:05 +01:00
q2 = cls.query.filter( # type: ignore
cls.alt_label.ilike(terms + "%"), ~cls.item_id.in_(item_ids)
)
2019-09-25 13:40:15 +01:00
for alt in q2:
item = alt.item
2020-11-01 15:29:03 +00:00
if item.count is None:
continue
2019-09-25 13:40:15 +01:00
hit = {
2023-10-23 13:02:28 +01:00
"label": item.label,
"description": item.description,
"qid": item.qid,
"count": item.count,
"alt_label": alt.alt_label,
2019-09-25 13:40:15 +01:00
}
hits.append(hit)
seen.add(item.qid)
2023-10-23 13:02:28 +01:00
hits.sort(key=lambda hit: hit["count"], reverse=True)
2019-09-25 13:40:15 +01:00
2023-10-23 13:02:28 +01:00
if app.config.get("LOOKUP_INCLUDES_IMAGES"):
2019-10-14 13:09:42 +01:00
add_images_to_depicts_lookup(hits)
2023-10-23 13:02:28 +01:00
if app.config.get("SEARCH_WIKIDATA"):
2019-10-14 13:09:42 +01:00
search_hits = wikibase_search(terms)
2023-10-23 13:02:28 +01:00
hits += [hit for hit in search_hits if hit["qid"] not in seen]
2019-09-25 13:40:15 +01:00
ret = {
2023-10-23 13:02:28 +01:00
"count": q1.count() + q2.count(),
"hits": hits,
"terms": terms,
2019-09-25 13:40:15 +01:00
}
return jsonify(ret)
2023-10-23 13:02:28 +01:00
@app.route("/report/missing_image")
2023-10-25 07:54:05 +01:00
def missing_image_report() -> str:
2023-10-23 13:02:28 +01:00
limit = utils.get_int_arg("limit") or 1000
2023-10-25 07:54:05 +01:00
q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit) # type:ignore
2019-11-07 10:20:23 +00:00
qids = [item.qid for item in q]
2023-10-25 07:54:05 +01:00
entities: dict[str, Entity] = mediawiki.get_entities_dict_with_cache(qids)
2019-11-07 10:20:23 +00:00
item_list = []
for depicts in q:
entity = entities[depicts.qid]
2023-10-23 13:02:28 +01:00
if any(wikibase.first_datavalue(entity, prop) for prop in ("P18", "P2716")):
2019-11-07 10:20:23 +00:00
continue
item_list.append(depicts)
# TODO: call wikidata search to find images that depict item
2023-10-23 13:02:28 +01:00
return render_template("missing_image.html", item_list=item_list)
2019-11-07 10:20:23 +00:00
2023-10-23 13:02:28 +01:00
@app.route("/report/wdqs")
2023-10-25 07:54:05 +01:00
def wikidata_query_list() -> str:
q = WikidataQuery.query.order_by(WikidataQuery.start_time.desc()) # type: ignore
2023-10-23 13:02:28 +01:00
return render_template("query_list.html", q=q)
2019-11-26 14:01:14 +00:00
2023-10-23 13:02:28 +01:00
@app.route("/report/blocks")
2023-10-25 07:54:05 +01:00
def server_block_report() -> str:
2020-04-22 16:30:44 +01:00
check_for_blocks()
2023-10-23 13:02:28 +01:00
return render_template("block_report.html")
2023-10-23 13:02:28 +01:00
@app.route("/fixture/save_error")
2023-10-25 07:54:05 +01:00
def save_error_fixture() -> str:
2023-10-23 13:02:28 +01:00
error = fixtures.save_error()["error"]
return render_template("save_error.html", error=error)
2020-04-22 17:11:23 +01:00
2019-09-12 19:51:05 +01:00
if __name__ == "__main__":
app.debug = True
2023-10-23 13:02:28 +01:00
app.run(host="0.0.0.0", debug=True)