depicts/app.py

903 lines
29 KiB
Python
Raw Normal View History

2019-09-12 19:51:05 +01:00
#!/usr/bin/python3
2019-09-27 11:02:24 +01:00
from flask import Flask, render_template, url_for, redirect, request, g, jsonify, session
from depicts import (utils, wdqs, commons, mediawiki, artwork, database,
2019-10-10 10:58:42 +01:00
wd_catalog, human, wikibase, wikidata_oauth, wikidata_edit)
2019-09-29 20:19:40 +01:00
from depicts.pager import Pagination, init_pager
from depicts.model import (DepictsItem, DepictsItemAltLabel, Edit, ArtworkItem,
Language)
2019-09-29 21:14:41 +01:00
from depicts.error_mail import setup_error_mail
2019-09-27 11:02:24 +01:00
from requests_oauthlib import OAuth1Session
2019-09-29 08:27:35 +01:00
from werkzeug.exceptions import InternalServerError
from werkzeug.debug.tbtools import get_current_traceback
2019-09-29 11:23:07 +01:00
from sqlalchemy import func, distinct
from collections import defaultdict
2019-09-12 19:51:05 +01:00
import json
import os
import locale
import random
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
2019-09-27 11:02:24 +01:00
user_agent = 'Mozilla/5.0 (X11; Linux i586; rv:32.0) Gecko/20160101 Firefox/32.0'
2019-09-12 19:51:05 +01:00
app = Flask(__name__)
2019-09-25 13:40:15 +01:00
app.config.from_object('config.default')
database.init_db(app.config['DB_URL'])
2019-09-29 20:19:40 +01:00
init_pager(app)
2019-09-29 21:14:41 +01:00
setup_error_mail(app)
2019-09-12 19:51:05 +01:00
find_more_props = {
'P135': 'movement',
'P136': 'genre',
'P170': 'artist',
'P195': 'collection',
'P276': 'location',
'P495': 'country of origin',
'P127': 'owned by',
'P179': 'part of the series',
2019-09-13 17:16:16 +01:00
'P921': 'main subject',
'P186': 'material used',
'P88': 'commissioned by',
'P1028': 'donated by',
'P1071': 'location of final assembly',
'P138': 'named after',
'P1433': 'published in',
'P144': 'based on',
'P2079': 'fabrication method',
'P2348': 'time period',
'P361': 'part of',
'P608': 'exhibition history',
'P180': 'depicts',
'P31': 'instance of',
2019-09-13 17:16:16 +01:00
2019-09-12 19:51:05 +01:00
# possible future props
# 'P571': 'inception',
2019-09-13 17:16:16 +01:00
# 'P166': 'award received', (only 2)
# 'P1419': 'shape', (only 2)
# 'P123': 'publisher', (only 1)
2019-09-12 19:51:05 +01:00
}
isa_list = [
'Q60520', # sketchbook
'Q93184', # drawing
'Q3305213', # painting
'Q15123870', # lithograph
'Q18761202', # watercolor painting
'Q79218', # triptych
'Q2647254', # study
'Q46686' # reredos
]
@app.teardown_appcontext
def shutdown_session(exception=None):
database.session.remove()
2019-09-29 08:27:35 +01:00
@app.errorhandler(InternalServerError)
def exception_handler(e):
tb = get_current_traceback()
return render_template('show_error.html', tb=tb), 500
@app.template_global()
def set_url_args(**new_args):
args = request.view_args.copy()
args.update(request.args)
args.update(new_args)
args = {k: v for k, v in args.items() if v is not None}
return url_for(request.endpoint, **args)
2019-09-12 19:51:05 +01:00
2019-09-27 20:23:01 +01:00
@app.template_global()
def current_url():
args = request.view_args.copy()
args.update(request.args)
return url_for(request.endpoint, **args)
@app.before_request
def init_profile():
g.profiling = []
2019-09-12 19:51:05 +01:00
2019-10-15 12:21:05 +01:00
@app.before_request
def global_user():
g.user = wikidata_oauth.get_username()
@app.route('/settings')
2019-10-04 12:16:16 +01:00
def user_settings():
session['no_find_more'] = not session.get('no_find_more')
display = {True: 'on', False: 'off'}[not session['no_find_more']]
return 'flipped. find more is ' + display
def existing_edit(item_id, depicts_id):
q = Edit.query.filter_by(artwork_id=item_id, depicts_id=depicts_id)
return q.count() != 0
2019-10-04 16:56:06 +01:00
2019-09-27 16:07:37 +01:00
@app.route('/save/Q<int:item_id>', methods=['POST'])
def save(item_id):
depicts = request.form.getlist('depicts')
username = wikidata_oauth.get_username()
2019-09-27 16:53:17 +01:00
assert username
token = wikidata_oauth.get_token()
2019-09-27 16:53:17 +01:00
artwork_item = ArtworkItem.query.get(item_id)
if artwork_item is None:
artwork_entity = mediawiki.get_entity_with_cache(f'Q{item_id}')
label = wikibase.get_entity_label(artwork_entity)
artwork_item = ArtworkItem(item_id=item_id, label=label, entity=artwork_entity)
database.session.add(artwork_item)
2019-09-29 13:17:36 +01:00
database.session.commit()
2019-09-27 16:53:17 +01:00
for depicts_qid in depicts:
depicts_id = int(depicts_qid[1:])
depicts_item = DepictsItem.query.get(depicts_id)
if depicts_item is None:
depicts_item = wikidata_edit.create_depicts_item(depicts_id)
database.session.add(depicts_item)
database.session.commit()
2019-10-04 16:56:06 +01:00
for depicts_qid in depicts:
depicts_id = int(depicts_qid[1:])
if existing_edit(item_id, depicts_id):
continue
2019-09-27 16:53:17 +01:00
r = create_claim(item_id, depicts_id, token)
reply = r.json()
if 'error' in reply:
return 'error:' + r.text
print(r.text)
saved = r.json()
lastrevid = saved['pageinfo']['lastrevid']
assert saved['success'] == 1
2019-09-27 16:53:17 +01:00
edit = Edit(username=username,
artwork_id=item_id,
depicts_id=depicts_id,
lastrevid=lastrevid)
2019-09-27 16:53:17 +01:00
database.session.add(edit)
database.session.commit()
return redirect(url_for('next_page', item_id=item_id))
2019-09-27 16:07:37 +01:00
2019-09-13 17:16:16 +01:00
@app.route("/property/P<int:property_id>")
def property_query_page(property_id):
pid = f'P{property_id}'
sort = request.args.get('sort')
sort_by_name = sort and sort.lower().strip() == 'name'
2019-09-13 17:16:16 +01:00
q = render_template('query/property.sparql', pid=pid)
rows = wdqs.run_query_with_cache(q, name=pid)
no_label_qid = [row['object']['value'].rpartition('/')[2]
for row in rows
if 'objectLabel' not in row and '/' in row['object']['value']]
if no_label_qid:
extra_label = get_labels(no_label_qid, name=f'{pid}_extra_labels')
if extra_label:
for row in rows:
item = row['object']['value']
if 'objectLabel' in row or '/' not in item:
continue
qid = item.rpartition('/')[2]
if extra_label.get(qid):
row['objectLabel'] = {'value': extra_label[qid]}
if sort_by_name:
# put rows with no English label at the end
no_label = [row for row in rows if 'objectLabel' not in row]
has_label = sorted((row for row in rows if 'objectLabel' in row),
key=lambda row: locale.strxfrm(row['objectLabel']['value']))
rows = has_label + no_label
2019-09-13 17:16:16 +01:00
label = find_more_props[pid]
return render_template('property.html',
label=label,
order=('name' if sort_by_name else 'count'),
pid=pid,
rows=rows)
2019-09-12 19:51:05 +01:00
2019-09-27 14:13:28 +01:00
@app.route('/')
def start():
return random_artwork()
username = wikidata_oauth.get_username()
username = None
return render_template('start.html', username=username)
2019-09-27 14:13:28 +01:00
@app.route('/next')
def random_artwork():
q = render_template('query/artwork_no_depicts.sparql')
rows = wdqs.run_query_with_cache(q)
has_depicts = True
while has_depicts:
item_id = wdqs.row_id(random.choice(rows))
if ArtworkItem.query.get(item_id):
continue
entity = mediawiki.get_entity_with_cache(f'Q{item_id}', refresh=True)
en_label = wikibase.get_en_label(entity)
if en_label and en_label.startswith('Page from '):
# example: Q60467422
# title: Page from Tales of a Parrot (Tuti-nama): text page
# this is not a painting
continue
has_depicts = 'P180' in entity['claims']
session[f'Q{item_id}'] = 'from redirect'
return redirect(url_for('item_page', item_id=item_id))
2019-09-27 11:02:24 +01:00
@app.route('/oauth/start')
def start_oauth():
2019-09-27 20:15:24 +01:00
next_page = request.args.get('next')
if next_page:
session['after_login'] = next_page
2019-09-27 11:02:24 +01:00
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
base_url = 'https://www.wikidata.org/w/index.php'
request_token_url = base_url + '?title=Special%3aOAuth%2finitiate'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
callback_uri='oob')
fetch_response = oauth.fetch_request_token(request_token_url)
session['owner_key'] = fetch_response.get('oauth_token')
session['owner_secret'] = fetch_response.get('oauth_token_secret')
base_authorization_url = 'https://www.wikidata.org/wiki/Special:OAuth/authorize'
authorization_url = oauth.authorization_url(base_authorization_url,
oauth_consumer_key=client_key)
return redirect(authorization_url)
@app.route("/oauth/callback", methods=["GET"])
def oauth_callback():
base_url = 'https://www.wikidata.org/w/index.php'
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'])
oauth_response = oauth.parse_authorization_response(request.url)
verifier = oauth_response.get('oauth_verifier')
access_token_url = base_url + '?title=Special%3aOAuth%2ftoken'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'],
verifier=verifier)
oauth_tokens = oauth.fetch_access_token(access_token_url)
session['owner_key'] = oauth_tokens.get('oauth_token')
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
2019-09-27 20:19:29 +01:00
next_page = session.get('after_login')
return redirect(next_page) if next_page else random_artwork()
2019-09-27 11:02:24 +01:00
2019-09-27 20:19:29 +01:00
@app.route('/oauth/disconnect')
def oauth_disconnect():
for key in 'owner_key', 'owner_secret', 'username', 'after_login':
if key in session:
del session[key]
2019-10-15 12:21:05 +01:00
return redirect(url_for('browse_page'))
2019-09-27 20:19:29 +01:00
def create_claim(artwork_id, depicts_id, token):
artwork_qid = f'Q{artwork_id}'
2019-09-27 16:53:17 +01:00
value = json.dumps({'entity-type': 'item',
'numeric-id': depicts_id})
params = {
'action': 'wbcreateclaim',
'entity': artwork_qid,
2019-09-27 16:53:17 +01:00
'property': 'P180',
'snaktype': 'value',
'value': value,
'token': token,
'format': 'json',
'formatversion': 2,
}
return wikidata_oauth.api_post_request(params)
2019-09-27 11:02:24 +01:00
2019-09-25 13:40:15 +01:00
def image_with_cache(qid, image_filename, width):
filename = f'cache/{qid}_{width}_image.json'
if os.path.exists(filename):
detail = json.load(open(filename))
else:
detail = commons.image_detail([image_filename], thumbwidth=width)
json.dump(detail, open(filename, 'w'), indent=2)
return detail[image_filename]
2019-10-07 14:12:30 +01:00
def existing_depicts_from_entity(entity):
if 'P180' not in entity['claims']:
return []
existing = []
2019-10-10 21:05:17 +01:00
new_depicts = False
2019-10-07 14:12:30 +01:00
for claim in entity['claims']['P180']:
item_id = claim['mainsnak']['datavalue']['value']['numeric-id']
item = DepictsItem.query.get(item_id)
2019-10-10 21:05:17 +01:00
if not item:
item = wikidata_edit.create_depicts_item(item_id)
database.session.add(item)
new_depicts = True
d = {
'label': item.label,
'description': item.description,
'qid': f'Q{item.item_id}',
'count': item.count,
'existing': True,
}
2019-10-07 14:12:30 +01:00
existing.append(d)
2019-10-10 21:05:17 +01:00
if new_depicts:
database.session.commit()
2019-10-07 14:12:30 +01:00
return existing
2019-10-10 10:58:42 +01:00
def get_institution(entity, other):
if 'P276' in entity['claims']:
2019-10-14 20:04:04 +01:00
location = wikibase.first_datavalue(entity, 'P276')
if location:
return other[location['id']]
if 'P195' in entity['claims']:
collection = wikibase.first_datavalue(entity, 'P195')
if collection:
return other[collection['id']]
2019-10-10 10:58:42 +01:00
2019-09-12 19:51:05 +01:00
@app.route("/item/Q<int:item_id>")
def item_page(item_id):
qid = f'Q{item_id}'
item = artwork.Artwork(qid)
from_redirect = qid in session and session.pop(qid) == 'from redirect'
entity = mediawiki.get_entity_with_cache(qid, refresh=not from_redirect)
2019-10-07 14:12:30 +01:00
existing_depicts = existing_depicts_from_entity(entity)
width = 800
image_filename = item.image_filename
2019-10-10 19:52:51 +01:00
if image_filename:
image = image_with_cache(qid, image_filename, width)
else:
image = None
2019-09-25 13:40:15 +01:00
# hits = item.run_query()
label_and_language = get_entity_label_and_language(entity)
2019-09-30 14:23:19 +01:00
if label_and_language:
label = label_and_language['label']
else:
label = None
2019-09-25 13:40:15 +01:00
other = get_other(item.entity)
people = human.from_name(label) if label else None
artwork_item = ArtworkItem.query.get(item_id)
if artwork_item is None:
artwork_item = ArtworkItem(item_id=item_id, label=label, entity=entity)
database.session.add(artwork_item)
catalog = wd_catalog.get_catalog_from_artwork(entity)
2019-10-10 10:58:42 +01:00
if not catalog.get('institution'):
catalog['institution'] = get_institution(entity, other)
2019-09-30 14:23:19 +01:00
label_languages = label_and_language['languages'] if label_and_language else []
show_translation_links = all(lang.code != 'en' for lang in label_languages)
return render_template('item.html',
qid=qid,
2019-09-27 16:07:37 +01:00
item_id=item_id,
item=item,
2019-09-27 11:02:24 +01:00
catalog=catalog,
2019-09-25 13:40:15 +01:00
labels=find_more_props,
entity=item.entity,
2019-11-07 10:21:13 +00:00
username=g.user,
2019-09-25 13:40:15 +01:00
label=label,
label_languages=label_languages,
show_translation_links=show_translation_links,
2019-10-07 14:12:30 +01:00
existing_depicts=existing_depicts,
2019-09-25 13:40:15 +01:00
image=image,
people=people,
2019-09-25 13:40:15 +01:00
other=other,
# hits=hits,
title=item.display_title)
2019-09-12 19:51:05 +01:00
def get_languages(codes):
return Language.query.filter(Language.wikimedia_language_code.in_(codes))
def get_entity_label_and_language(entity):
'''
Look for a useful label and return it with a list of languages that have that label.
If the entity has a label in English return it.
Otherwise check if all languages have the same label, if so then return it.
'''
group_by_label = defaultdict(set)
for language, l in entity['labels'].items():
group_by_label[l['value']].add(language)
if 'en' in entity['labels']:
label = entity['labels']['en']['value']
return {'label': label,
'languages': get_languages(group_by_label[label])}
if len(group_by_label) == 1:
label, languages = list(group_by_label.items())[0]
return {'label': label,
'languages': get_languages(languages)}
2019-09-12 19:51:05 +01:00
def get_labels(keys, name=None):
keys = sorted(keys, key=lambda i: int(i[1:]))
if name is None:
name = '_'.join(keys)
filename = f'cache/{name}_labels.json'
labels = []
2019-09-12 19:51:05 +01:00
if os.path.exists(filename):
from_cache = json.load(open(filename))
if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
labels = from_cache['labels']
if not labels:
for cur in utils.chunk(keys, 50):
labels += mediawiki.get_entities(cur, props='labels')
json.dump({'keys': keys, 'labels': labels},
open(filename, 'w'), indent=2)
2019-09-12 19:51:05 +01:00
2019-10-09 15:30:48 +01:00
return {entity['id']: wikibase.get_entity_label(entity) for entity in labels}
2019-09-12 19:51:05 +01:00
2019-10-10 20:52:11 +01:00
def build_other_set(entity):
2019-09-12 19:51:05 +01:00
other_items = set()
for key in find_more_props.keys():
if key not in entity['claims']:
continue
for claim in entity['claims'][key]:
2019-09-29 08:30:01 +01:00
if 'datavalue' in claim['mainsnak']:
other_items.add(claim['mainsnak']['datavalue']['value']['id'])
2019-10-10 20:52:11 +01:00
return other_items
2019-09-12 19:51:05 +01:00
2019-10-10 20:52:11 +01:00
def get_other(entity):
other_items = build_other_set(entity)
2019-09-25 13:40:15 +01:00
return get_labels(other_items)
2019-09-12 19:51:05 +01:00
2019-10-15 12:21:05 +01:00
@app.route("/edits")
2019-09-29 09:47:55 +01:00
def list_edits():
2019-10-07 12:23:13 +01:00
edit_list = Edit.query.order_by(Edit.timestamp.desc())
2019-09-29 11:23:07 +01:00
item_count = (database.session
.query(func.count(distinct(Edit.artwork_id)))
.scalar())
2019-09-29 11:23:07 +01:00
user_count = (database.session
.query(func.count(distinct(Edit.username)))
.scalar())
return render_template('list_edits.html',
edits=Edit.query,
edit_list=edit_list,
item_count=item_count,
2019-09-29 11:23:07 +01:00
user_count=user_count)
2019-09-29 09:47:55 +01:00
2019-09-29 11:52:12 +01:00
@app.route("/user/<username>")
def user_page(username):
2019-10-07 12:23:13 +01:00
edit_list = (Edit.query.filter_by(username=username)
.order_by(Edit.timestamp.desc()))
2019-09-29 11:52:12 +01:00
item_count = (database.session
.query(func.count(distinct(Edit.artwork_id)))
.filter_by(username=username)
.scalar())
2019-09-29 11:52:12 +01:00
return render_template('user_page.html',
username=username,
edits=Edit.query,
edit_list=edit_list,
item_count=item_count)
2019-09-29 11:52:12 +01:00
2019-09-25 13:40:15 +01:00
@app.route("/next/Q<int:item_id>")
def next_page(item_id):
qid = f'Q{item_id}'
entity = mediawiki.get_entity_with_cache(qid)
width = 800
2019-10-09 15:30:48 +01:00
image_filename = wikibase.first_datavalue(entity, 'P18')
2019-09-25 13:40:15 +01:00
image = image_with_cache(qid, image_filename, width)
2019-10-09 15:30:48 +01:00
label = wikibase.get_entity_label(entity)
2019-09-25 13:40:15 +01:00
other = get_other(entity)
2019-09-12 19:51:05 +01:00
2019-09-29 19:00:59 +01:00
other_list = []
for key, prop_label in find_more_props.items():
if key == 'P186': # skip material used
continue # too generic
claims = entity['claims'].get(key)
if not claims:
continue
values = []
for claim in claims:
2019-09-29 19:24:24 +01:00
if 'datavalue' not in claim['mainsnak']:
continue
2019-09-29 19:00:59 +01:00
value = claim['mainsnak']['datavalue']['value']
claim_qid = value['id']
if claim_qid == 'Q4233718':
continue # anonymous artist
2019-09-29 19:00:59 +01:00
numeric_id = value['numeric-id']
href = url_for('find_more_page', property_id=key[1:], item_id=numeric_id)
values.append({
'href': href,
'qid': claim_qid,
'label': other.get(claim_qid),
})
2019-09-29 19:24:24 +01:00
if not values:
continue
2019-09-29 19:00:59 +01:00
qid_list = [v['qid'] for v in values]
other_list.append({
'label': prop_label,
'image_lookup': url_for('find_more_json', pid=key, qid=qid_list),
'pid': key,
'values': values,
'images': [],
})
2019-09-12 19:51:05 +01:00
return render_template('next.html',
qid=qid,
label=label,
2019-09-25 13:40:15 +01:00
image=image,
2019-09-12 19:51:05 +01:00
labels=find_more_props,
2019-09-25 13:40:15 +01:00
other=other,
2019-09-29 19:00:59 +01:00
entity=entity,
other_props=other_list)
2019-09-12 19:51:05 +01:00
@app.route('/P<int:property_id>/Q<int:item_id>')
def find_more_page(property_id, item_id):
pid, qid = f'P{property_id}', f'Q{item_id}'
return redirect(url_for('browse_page', **{pid: qid}))
2019-09-13 17:16:16 +01:00
2019-10-09 14:38:10 +01:00
def get_facets(params):
2019-09-13 17:16:16 +01:00
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
2019-09-12 19:51:05 +01:00
2019-10-09 14:38:10 +01:00
properties = [pid for pid in find_more_props.keys()
if pid not in request.args]
2019-09-12 19:51:05 +01:00
2019-10-09 14:38:10 +01:00
q = render_template('query/facet.sparql',
params=params,
isa_list=isa_list,
2019-10-09 14:38:10 +01:00
properties=properties)
2019-09-12 19:51:05 +01:00
bindings = wdqs.run_query_with_cache(q, flat + '_facets')
2019-09-13 17:16:16 +01:00
facets = {key: [] for key in find_more_props.keys()}
for row in bindings:
pid = row['property']['value'].rpartition('/')[2]
qid = row['object']['value'].rpartition('/')[2]
label = row['objectLabel']['value']
count = int(row['count']['value'])
facets[pid].append({'qid': qid, 'label': label, 'count': count})
return {
key: sorted(values, key=lambda i: i['count'], reverse=True)[:15]
for key, values in facets.items()
if values
}
def get_artwork_params():
2019-10-10 17:44:21 +01:00
return [(pid, qid) for pid, qid in request.args.items()
if pid.startswith('P') and qid.startswith('Q')]
def filter_artwork(params):
2019-10-10 17:44:21 +01:00
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
q = render_template('query/find_more.sparql',
params=params,
isa_list=isa_list)
2019-10-10 17:44:21 +01:00
bindings = wdqs.run_query_with_cache(q, flat)
return bindings
@app.route('/catalog')
def catalog_page():
params = get_artwork_params()
bindings = filter_artwork(params)
2019-10-10 17:44:21 +01:00
page = utils.get_int_arg('page') or 1
page_size = 45
item_ids = set()
for row in bindings:
item_id = wdqs.row_id(row)
item_ids.add(item_id)
qids = [f'Q{item_id}' for item_id in sorted(item_ids)]
entities = mediawiki.get_entities_with_cache(qids)
items = []
2019-10-10 20:52:11 +01:00
other_items = set()
2019-10-10 17:44:21 +01:00
for entity in entities:
2019-10-10 20:52:11 +01:00
other_items.update(build_other_set(entity))
2019-10-10 17:44:21 +01:00
item = {
'label': wikibase.get_entity_label(entity),
'qid': entity['id'],
'item_id': int(entity['id'][1:]),
'image_filename': wikibase.first_datavalue(entity, 'P18'),
2019-10-10 20:52:11 +01:00
'entity': entity,
2019-10-10 17:44:21 +01:00
}
items.append(item)
2019-10-10 20:52:11 +01:00
other = get_labels(other_items)
2019-10-10 17:44:21 +01:00
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
thumbwidth = 400
cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}'
detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth)
for item in items:
item['url'] = url_for('item_page', item_id=item['item_id'])
item['image'] = detail[item['image_filename']]
item_labels = get_labels(qid for pid, qid in params)
title = ' / '.join(find_more_props[pid] + ': ' + item_labels[qid]
for pid, qid in params)
2019-10-10 20:52:11 +01:00
return render_template('catalog.html',
labels=find_more_props,
items=items,
other=other,
title=title)
2019-10-10 17:44:21 +01:00
def get_image_detail_with_cache(items, cache_name, thumbwidth=None, refresh=False):
2019-10-10 17:44:21 +01:00
filenames = [cur['image_filename'] for cur in items]
if thumbwidth is None:
thumbwidth = app.config['THUMBWIDTH']
filename = f'cache/{cache_name}_images.json'
if not refresh and os.path.exists(filename):
2019-10-10 17:44:21 +01:00
detail = json.load(open(filename))
else:
detail = commons.image_detail(filenames, thumbwidth=thumbwidth)
json.dump(detail, open(filename, 'w'), indent=2)
return detail
2019-10-15 12:21:05 +01:00
def browse_index():
return render_template('browse_index.html', props=find_more_props)
2019-10-15 16:32:13 +01:00
@app.route('/debug/show_user')
def debug_show_user():
userinfo = wikidata_oauth.userinfo_call()
2019-10-15 16:41:23 +01:00
return '<pre>' + json.dumps(userinfo, indent=2) + '</pre>'
2019-10-15 16:32:13 +01:00
2019-09-13 17:16:16 +01:00
@app.route('/browse')
def browse_page():
params = get_artwork_params()
2019-09-13 17:16:16 +01:00
2019-09-27 15:35:26 +01:00
if not params:
2019-10-15 12:21:05 +01:00
return browse_index()
2019-09-27 15:35:26 +01:00
2019-09-13 17:16:16 +01:00
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
item_labels = get_labels(qid for pid, qid in params)
bindings = filter_artwork(params)
2019-09-12 19:51:05 +01:00
2019-10-09 14:38:10 +01:00
facets = get_facets(params)
2019-09-13 17:16:16 +01:00
2019-09-12 19:51:05 +01:00
page_size = 45
item_map = wdqs.build_browse_item_map(bindings)
2019-09-29 20:19:40 +01:00
all_items = []
2019-09-12 19:51:05 +01:00
for item in item_map.values():
if len(item['image_filename']) != 1:
continue
item['image_filename'] = item['image_filename'][0]
2019-09-29 20:19:40 +01:00
all_items.append(item)
page = utils.get_int_arg('page') or 1
pager = Pagination(page, page_size, len(all_items))
items = pager.slice(all_items)
2019-09-12 19:51:05 +01:00
2019-10-10 17:44:21 +01:00
cache_name = f'{flat}_{page}_{page_size}'
detail = get_image_detail_with_cache(items, cache_name)
cache_refreshed = False
2019-09-12 19:51:05 +01:00
for item in items:
item['url'] = url_for('item_page', item_id=item['item_id'])
image_filename = item['image_filename']
if not cache_refreshed and image_filename not in detail:
detail = get_image_detail_with_cache(items, cache_name, refresh=True)
cache_refreshed = True
item['image'] = detail[image_filename]
2019-09-12 19:51:05 +01:00
title = ' / '.join(find_more_props[pid] + ': ' + item_labels[qid]
for pid, qid in params)
2019-09-13 17:16:16 +01:00
2019-10-14 14:42:36 +01:00
catalog_url = url_for('catalog_page', **dict(params))
2019-09-12 19:51:05 +01:00
return render_template('find_more.html',
2019-09-13 17:16:16 +01:00
facets=facets,
prop_labels=find_more_props,
label=title,
2019-09-29 20:19:40 +01:00
pager=pager,
params=params,
2019-09-30 14:23:19 +01:00
item_map=item_map,
2019-10-14 14:42:36 +01:00
catalog_url=catalog_url,
2019-09-29 20:19:40 +01:00
page=page,
2019-09-12 19:51:05 +01:00
labels=find_more_props,
bindings=bindings,
2019-09-30 14:23:19 +01:00
total=len(item_map),
items=items)
2019-09-12 19:51:05 +01:00
2019-09-29 19:00:59 +01:00
@app.route('/find_more.json')
def find_more_json():
pid = request.args.get('pid')
qid_list = request.args.getlist('qid')
limit = 6
q = render_template('query/find_more_basic.sparql',
qid_list=qid_list,
pid=pid,
limit=limit)
2019-09-29 19:00:59 +01:00
filenames = []
bindings = wdqs.run_query_with_cache(q, f'{pid}={",".join(qid_list)}_{limit}')
items = []
for row in bindings:
item_id = wdqs.row_id(row)
row_qid = f'Q{item_id}'
image_filename = wdqs.commons_uri_to_filename(row['image']['value'])
filenames.append(image_filename)
items.append({'qid': row_qid,
'item_id': item_id,
'href': url_for('item_page', item_id=item_id),
'filename': image_filename})
thumbheight = 120
detail = commons.image_detail(filenames, thumbheight=thumbheight)
for item in items:
item['image'] = detail[item['filename']]
return jsonify(items=items, q=q)
2019-10-14 13:09:42 +01:00
def wikibase_search(terms):
hits = []
r = mediawiki.api_call({
'action': 'wbsearchentities',
'search': terms,
'limit': 'max',
'language': 'en'
})
for result in r.json()['search']:
hit = {
'label': result['label'],
'description': result.get('description') or None,
'qid': result['id'],
'count': 0,
}
if result['match']['type'] == 'alias':
hit['alt_label'] = result['match']['text']
hits.append(hit)
return hits
def add_images_to_depicts_lookup(hits):
qid_to_item = {hit['qid']: hit for hit in hits}
all_qids = [hit['qid'] for hit in hits]
entities = mediawiki.get_entities_with_cache(all_qids)
for entity in entities:
qid = entity['id']
item = qid_to_item[qid]
item.entity = entity
database.session.commit()
for hit in hits:
item = qid_to_item[hit['qid']]
if item.entity:
image_filename = wikibase.first_datavalue(item.entity, 'P18')
hit['image_filename'] = image_filename
filenames = [hit['image_filename']
for hit in hits
if hit.get('image_filename')]
filenames = filenames[:50]
thumbwidth = 200
detail = commons.image_detail(filenames, thumbwidth=thumbwidth)
for hit in hits:
filename = hit.get('image_filename')
if not filename or filename not in detail:
continue
hit['image'] = detail[filename]
2019-09-25 13:40:15 +01:00
@app.route('/lookup')
def depicts_lookup():
terms = request.args.get('terms')
if not terms:
return jsonify(error='terms parameter is required')
terms = terms.strip()
if len(terms) < 3:
return jsonify(
count=0,
hits=[],
notice='terms too short for lookup',
)
item_ids = []
hits = []
q1 = DepictsItem.query.filter(DepictsItem.label.ilike(terms + '%'))
seen = set()
2019-09-25 13:40:15 +01:00
for item in q1:
hit = {
'label': item.label,
'description': item.description,
'qid': item.qid,
'count': item.count,
}
item_ids.append(item.item_id)
hits.append(hit)
seen.add(item.qid)
2019-09-25 13:40:15 +01:00
cls = DepictsItemAltLabel
q2 = cls.query.filter(cls.alt_label.ilike(terms + '%'),
~cls.item_id.in_(item_ids))
for alt in q2:
item = alt.item
hit = {
'label': item.label,
'description': item.description,
'qid': item.qid,
'count': item.count,
'alt_label': alt.alt_label,
}
hits.append(hit)
seen.add(item.qid)
2019-09-25 13:40:15 +01:00
hits.sort(key=lambda hit: hit['count'], reverse=True)
2019-10-14 13:09:42 +01:00
if app.config.get('LOOKUP_INCLUDES_IMAGES'):
add_images_to_depicts_lookup(hits)
2019-10-14 13:09:42 +01:00
if app.config.get('SEARCH_WIKIDATA'):
search_hits = wikibase_search(terms)
hits += [hit for hit in search_hits if hit['qid'] not in seen]
2019-09-25 13:40:15 +01:00
ret = {
'count': q1.count() + q2.count(),
'hits': hits,
'terms': terms,
}
return jsonify(ret)
2019-11-07 10:20:23 +00:00
@app.route('/report/missing_image')
def missing_image_report():
limit = utils.get_int_arg('limit') or 1000
q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit)
qids = [item.qid for item in q]
entities = mediawiki.get_entities_dict_with_cache(qids)
item_list = []
for depicts in q:
entity = entities[depicts.qid]
if any(wikibase.first_datavalue(entity, prop) for prop in ('P18', 'P2716')):
continue
item_list.append(depicts)
# TODO: call wikidata search to find images that depict item
return render_template('missing_image.html', item_list=item_list)
2019-09-12 19:51:05 +01:00
if __name__ == "__main__":
app.debug = True
app.run(host='0.0.0.0', debug=True)