2019-09-12 19:51:05 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
2019-09-27 11:02:24 +01:00
|
|
|
from flask import Flask, render_template, url_for, redirect, request, g, jsonify, session
|
|
|
|
from depicts import (utils, wdqs, commons, mediawiki, painting, saam, database,
|
|
|
|
dia, rijksmuseum, npg, museodelprado, barnesfoundation,
|
|
|
|
wd_catalog)
|
2019-09-27 16:53:17 +01:00
|
|
|
from depicts.model import DepictsItem, DepictsItemAltLabel, Edit
|
2019-09-27 11:02:24 +01:00
|
|
|
from requests_oauthlib import OAuth1Session
|
|
|
|
from urllib.parse import urlencode
|
|
|
|
import requests.exceptions
|
|
|
|
import requests
|
|
|
|
import lxml.html
|
2019-09-12 19:51:05 +01:00
|
|
|
import json
|
|
|
|
import os
|
2019-09-14 13:26:16 +01:00
|
|
|
import locale
|
2019-09-16 08:59:53 +01:00
|
|
|
import random
|
2019-09-14 13:26:16 +01:00
|
|
|
|
|
|
|
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
2019-09-27 11:02:24 +01:00
|
|
|
user_agent = 'Mozilla/5.0 (X11; Linux i586; rv:32.0) Gecko/20160101 Firefox/32.0'
|
2019-09-12 19:51:05 +01:00
|
|
|
|
|
|
|
app = Flask(__name__)
|
2019-09-25 13:40:15 +01:00
|
|
|
app.config.from_object('config.default')
|
|
|
|
database.init_db(app.config['DB_URL'])
|
2019-09-12 19:51:05 +01:00
|
|
|
|
|
|
|
find_more_props = {
|
|
|
|
'P135': 'movement',
|
|
|
|
'P136': 'genre',
|
|
|
|
'P170': 'artist',
|
|
|
|
'P195': 'collection',
|
|
|
|
'P276': 'location',
|
|
|
|
'P495': 'country of origin',
|
|
|
|
'P127': 'owned by',
|
|
|
|
'P179': 'part of the series',
|
2019-09-13 17:16:16 +01:00
|
|
|
'P921': 'main subject',
|
|
|
|
'P186': 'material used',
|
|
|
|
'P88': 'commissioned by',
|
|
|
|
'P1028': 'donated by',
|
|
|
|
'P1071': 'location of final assembly',
|
|
|
|
'P138': 'named after',
|
|
|
|
'P1433': 'published in',
|
|
|
|
'P144': 'based on',
|
|
|
|
'P2079': 'fabrication method',
|
|
|
|
'P2348': 'time period',
|
|
|
|
'P361': 'part of',
|
|
|
|
'P608': 'exhibition history',
|
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
# possible future props
|
|
|
|
# 'P571': 'inception',
|
2019-09-13 17:16:16 +01:00
|
|
|
# 'P166': 'award received', (only 2)
|
|
|
|
# 'P1419': 'shape', (only 2)
|
|
|
|
# 'P123': 'publisher', (only 1)
|
2019-09-12 19:51:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
find_more_query = '''
|
|
|
|
select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision {
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
|
|
|
?item wdt:P31 wd:Q3305213 .
|
2019-09-13 17:16:16 +01:00
|
|
|
PARAMS
|
2019-09-12 19:51:05 +01:00
|
|
|
?item wdt:P18 ?image .
|
|
|
|
OPTIONAL {
|
|
|
|
?item p:P571/psv:P571 ?timenode .
|
|
|
|
?timenode wikibase:timeValue ?time.
|
|
|
|
?timenode wikibase:timePrecision ?timeprecision.
|
|
|
|
}
|
|
|
|
OPTIONAL { ?item wdt:P1476 ?title }
|
|
|
|
OPTIONAL { ?item wdt:P170 ?artist }
|
|
|
|
FILTER NOT EXISTS { ?item wdt:P180 ?depicts }
|
|
|
|
}
|
|
|
|
'''
|
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
facet_query = '''
|
|
|
|
select ?property ?object ?objectLabel (count(*) as ?count) {
|
|
|
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
|
|
|
?item wdt:P31 wd:Q3305213 .
|
|
|
|
?item wdt:P18 ?image .
|
|
|
|
PARAMS
|
|
|
|
values ?property { PROPERTY_LIST }
|
|
|
|
?item ?property ?object .
|
|
|
|
FILTER NOT EXISTS { ?item wdt:P180 ?depicts }
|
|
|
|
} group by ?property ?propertyLabel ?object ?objectLabel
|
|
|
|
'''
|
|
|
|
|
|
|
|
property_query = '''
|
|
|
|
select ?object ?objectLabel ?objectDescription (count(*) as ?count) {
|
|
|
|
?item wdt:P31 wd:Q3305213 .
|
|
|
|
?item wdt:P18 ?image .
|
|
|
|
?item wdt:PID ?object .
|
|
|
|
filter not exists { ?item wdt:P180 ?depicts }
|
|
|
|
optional {
|
|
|
|
?object rdfs:label ?objectLabel.
|
|
|
|
FILTER(LANG(?objectLabel) = "en").
|
|
|
|
}
|
|
|
|
optional {
|
|
|
|
?object schema:description ?objectDescription .
|
|
|
|
filter(lang(?objectDescription) = "en")
|
|
|
|
}
|
|
|
|
|
|
|
|
} group by ?object ?objectLabel ?objectDescription
|
|
|
|
order by desc(?count)
|
|
|
|
'''
|
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
painting_no_depicts_query = '''
|
|
|
|
select distinct ?item where {
|
|
|
|
?item wdt:P31 wd:Q3305213 .
|
|
|
|
?item wdt:P18 ?image .
|
|
|
|
filter not exists { ?item wdt:P180 ?depicts }
|
|
|
|
}
|
|
|
|
'''
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
@app.template_global()
|
|
|
|
def set_url_args(**new_args):
|
|
|
|
args = request.view_args.copy()
|
|
|
|
args.update(request.args)
|
|
|
|
args.update(new_args)
|
|
|
|
args = {k: v for k, v in args.items() if v is not None}
|
|
|
|
return url_for(request.endpoint, **args)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
@app.before_request
|
|
|
|
def init_profile():
|
|
|
|
g.profiling = []
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-27 16:07:37 +01:00
|
|
|
@app.route('/save/Q<int:item_id>', methods=['POST'])
|
|
|
|
def save(item_id):
|
|
|
|
depicts = request.form.getlist('depicts')
|
2019-09-27 16:53:17 +01:00
|
|
|
username = get_username()
|
|
|
|
assert username
|
|
|
|
|
|
|
|
token = get_token()
|
|
|
|
|
|
|
|
for depicts_qid in depicts:
|
|
|
|
depicts_id = int(depicts_qid[1:])
|
|
|
|
r = create_claim(item_id, depicts_id, token)
|
|
|
|
reply = r.json()
|
|
|
|
if 'error' in reply:
|
|
|
|
return 'error:' + r.text
|
|
|
|
print(r.text)
|
|
|
|
edit = Edit(username=username,
|
|
|
|
painting_id=item_id,
|
|
|
|
depicts_id=depicts_id)
|
|
|
|
database.session.add(edit)
|
|
|
|
database.session.commit()
|
|
|
|
|
|
|
|
return redirect(url_for('next_page', item_id=item_id))
|
2019-09-27 16:07:37 +01:00
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
@app.route("/property/P<int:property_id>")
|
|
|
|
def property_query_page(property_id):
|
|
|
|
pid = f'P{property_id}'
|
2019-09-14 13:26:16 +01:00
|
|
|
sort = request.args.get('sort')
|
|
|
|
sort_by_name = sort and sort.lower().strip() == 'name'
|
2019-09-13 17:16:16 +01:00
|
|
|
|
2019-09-14 13:26:16 +01:00
|
|
|
q = property_query.replace('PID', pid)
|
2019-09-16 08:59:53 +01:00
|
|
|
rows = wdqs.run_query_with_cache(q, name=pid)
|
2019-09-14 13:26:16 +01:00
|
|
|
|
|
|
|
no_label_qid = [row['object']['value'].rpartition('/')[2]
|
|
|
|
for row in rows
|
|
|
|
if 'objectLabel' not in row and '/' in row['object']['value']]
|
|
|
|
|
|
|
|
if no_label_qid:
|
|
|
|
extra_label = get_labels(no_label_qid, name=f'{pid}_extra_labels')
|
|
|
|
if extra_label:
|
|
|
|
for row in rows:
|
|
|
|
item = row['object']['value']
|
|
|
|
if 'objectLabel' in row or '/' not in item:
|
|
|
|
continue
|
|
|
|
qid = item.rpartition('/')[2]
|
|
|
|
if extra_label.get(qid):
|
|
|
|
row['objectLabel'] = {'value': extra_label[qid]}
|
|
|
|
|
|
|
|
if sort_by_name:
|
|
|
|
# put rows with no English label at the end
|
|
|
|
no_label = [row for row in rows if 'objectLabel' not in row]
|
|
|
|
has_label = sorted((row for row in rows if 'objectLabel' in row),
|
|
|
|
key=lambda row: locale.strxfrm(row['objectLabel']['value']))
|
|
|
|
rows = has_label + no_label
|
2019-09-13 17:16:16 +01:00
|
|
|
label = find_more_props[pid]
|
|
|
|
|
2019-09-14 13:26:16 +01:00
|
|
|
return render_template('property.html',
|
|
|
|
label=label,
|
|
|
|
order=('name' if sort_by_name else 'count'),
|
|
|
|
pid=pid,
|
|
|
|
rows=rows)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-27 14:13:28 +01:00
|
|
|
@app.route('/')
|
|
|
|
def start():
|
|
|
|
return random_painting()
|
|
|
|
|
|
|
|
@app.route('/next')
|
2019-09-16 08:59:53 +01:00
|
|
|
def random_painting():
|
|
|
|
rows = wdqs.run_query_with_cache(painting_no_depicts_query)
|
|
|
|
row = random.choice(rows)
|
|
|
|
item_id = wdqs.row_id(row)
|
|
|
|
return redirect(url_for('item_page', item_id=item_id))
|
|
|
|
|
2019-09-27 11:02:24 +01:00
|
|
|
@app.route('/oauth/start')
|
|
|
|
def start_oauth():
|
2019-09-27 20:15:24 +01:00
|
|
|
|
|
|
|
next_page = request.args.get('next')
|
|
|
|
if next_page:
|
|
|
|
session['after_login'] = next_page
|
|
|
|
|
2019-09-27 11:02:24 +01:00
|
|
|
client_key = app.config['CLIENT_KEY']
|
|
|
|
client_secret = app.config['CLIENT_SECRET']
|
|
|
|
base_url = 'https://www.wikidata.org/w/index.php'
|
|
|
|
request_token_url = base_url + '?title=Special%3aOAuth%2finitiate'
|
|
|
|
|
|
|
|
oauth = OAuth1Session(client_key,
|
|
|
|
client_secret=client_secret,
|
|
|
|
callback_uri='oob')
|
|
|
|
fetch_response = oauth.fetch_request_token(request_token_url)
|
|
|
|
|
|
|
|
session['owner_key'] = fetch_response.get('oauth_token')
|
|
|
|
session['owner_secret'] = fetch_response.get('oauth_token_secret')
|
|
|
|
|
|
|
|
base_authorization_url = 'https://www.wikidata.org/wiki/Special:OAuth/authorize'
|
|
|
|
authorization_url = oauth.authorization_url(base_authorization_url,
|
|
|
|
oauth_consumer_key=client_key)
|
|
|
|
return redirect(authorization_url)
|
|
|
|
|
|
|
|
@app.route("/oauth/callback", methods=["GET"])
|
|
|
|
def oauth_callback():
|
|
|
|
base_url = 'https://www.wikidata.org/w/index.php'
|
|
|
|
client_key = app.config['CLIENT_KEY']
|
|
|
|
client_secret = app.config['CLIENT_SECRET']
|
|
|
|
|
|
|
|
oauth = OAuth1Session(client_key,
|
|
|
|
client_secret=client_secret,
|
|
|
|
resource_owner_key=session['owner_key'],
|
|
|
|
resource_owner_secret=session['owner_secret'])
|
|
|
|
|
|
|
|
oauth_response = oauth.parse_authorization_response(request.url)
|
|
|
|
verifier = oauth_response.get('oauth_verifier')
|
|
|
|
access_token_url = base_url + '?title=Special%3aOAuth%2ftoken'
|
|
|
|
oauth = OAuth1Session(client_key,
|
|
|
|
client_secret=client_secret,
|
|
|
|
resource_owner_key=session['owner_key'],
|
|
|
|
resource_owner_secret=session['owner_secret'],
|
|
|
|
verifier=verifier)
|
|
|
|
|
|
|
|
oauth_tokens = oauth.fetch_access_token(access_token_url)
|
|
|
|
session['owner_key'] = oauth_tokens.get('oauth_token')
|
|
|
|
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
|
|
|
|
|
2019-09-27 20:15:24 +01:00
|
|
|
next_page = session.get('next_page')
|
|
|
|
return redirect(next_page) if next_page else random_painting()
|
2019-09-27 11:02:24 +01:00
|
|
|
|
|
|
|
def get_username():
|
|
|
|
if 'owner_key' not in session:
|
|
|
|
return # not authorized
|
|
|
|
|
|
|
|
if 'username' in session:
|
|
|
|
return session['username']
|
|
|
|
|
|
|
|
params = {'action': 'query', 'meta': 'userinfo', 'format': 'json'}
|
2019-09-27 20:08:54 +01:00
|
|
|
reply = oauth_api_request(params)
|
|
|
|
if 'query' not in reply:
|
|
|
|
return
|
|
|
|
session['username'] = reply['query']['userinfo']['name']
|
2019-09-27 11:02:24 +01:00
|
|
|
|
|
|
|
return session['username']
|
|
|
|
|
|
|
|
@app.route("/show_user")
|
|
|
|
def show_user():
|
|
|
|
# Make authenticated calls to the API
|
|
|
|
params = {'action': 'query', 'meta': 'userinfo', 'format': 'json'}
|
|
|
|
reply = oauth_api_request(params)['query']
|
|
|
|
|
|
|
|
return repr(reply)
|
|
|
|
|
|
|
|
def oauth_api_request(params):
|
|
|
|
url = 'https://www.wikidata.org/w/api.php?' + urlencode(params)
|
|
|
|
client_key = app.config['CLIENT_KEY']
|
|
|
|
client_secret = app.config['CLIENT_SECRET']
|
|
|
|
oauth = OAuth1Session(client_key,
|
|
|
|
client_secret=client_secret,
|
|
|
|
resource_owner_key=session['owner_key'],
|
|
|
|
resource_owner_secret=session['owner_secret'])
|
|
|
|
r = oauth.get(url)
|
|
|
|
reply = r.json()
|
|
|
|
|
|
|
|
return reply
|
|
|
|
|
2019-09-27 16:53:17 +01:00
|
|
|
def create_claim(painting_id, depicts_id, token):
|
|
|
|
painting_qid = f'Q{painting_id}'
|
|
|
|
value = json.dumps({'entity-type': 'item',
|
|
|
|
'numeric-id': depicts_id})
|
|
|
|
params = {
|
|
|
|
'action': 'wbcreateclaim',
|
|
|
|
'entity': painting_qid,
|
|
|
|
'property': 'P180',
|
|
|
|
'snaktype': 'value',
|
|
|
|
'value': value,
|
|
|
|
'token': token,
|
|
|
|
'format': 'json',
|
|
|
|
'formatversion': 2,
|
|
|
|
}
|
|
|
|
return oauth_api_post_request(params)
|
|
|
|
|
|
|
|
def get_token():
|
|
|
|
params = {
|
|
|
|
'action': 'query',
|
|
|
|
'meta': 'tokens',
|
|
|
|
'format': 'json',
|
|
|
|
'formatversion': 2,
|
|
|
|
}
|
|
|
|
reply = oauth_api_request(params)
|
|
|
|
token = reply['query']['tokens']['csrftoken']
|
|
|
|
|
|
|
|
return token
|
|
|
|
|
|
|
|
def oauth_api_post_request(params):
|
|
|
|
url = 'https://www.wikidata.org/w/api.php'
|
|
|
|
client_key = app.config['CLIENT_KEY']
|
|
|
|
client_secret = app.config['CLIENT_SECRET']
|
|
|
|
oauth = OAuth1Session(client_key,
|
|
|
|
client_secret=client_secret,
|
|
|
|
resource_owner_key=session['owner_key'],
|
|
|
|
resource_owner_secret=session['owner_secret'])
|
|
|
|
return oauth.post(url, data=params)
|
2019-09-27 11:02:24 +01:00
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
def image_with_cache(qid, image_filename, width):
|
|
|
|
filename = f'cache/{qid}_{width}_image.json'
|
|
|
|
if os.path.exists(filename):
|
|
|
|
detail = json.load(open(filename))
|
|
|
|
else:
|
|
|
|
detail = commons.image_detail([image_filename], thumbwidth=width)
|
|
|
|
json.dump(detail, open(filename, 'w'), indent=2)
|
|
|
|
|
|
|
|
return detail[image_filename]
|
|
|
|
|
|
|
|
def first_datavalue(entity, pid):
|
2019-09-27 11:02:24 +01:00
|
|
|
if pid in entity['claims']:
|
|
|
|
return entity['claims'][pid][0]['mainsnak']['datavalue']['value']
|
|
|
|
|
|
|
|
def get_catalog_page(property_id, value):
|
|
|
|
detail = wd_catalog.lookup(property_id, value)
|
|
|
|
url = detail['url']
|
|
|
|
catalog_id = value.replace('/', '_')
|
|
|
|
|
|
|
|
filename = f'cache/{property_id}_{catalog_id}.html'
|
|
|
|
|
|
|
|
if os.path.exists(filename):
|
|
|
|
html = open(filename).read()
|
|
|
|
else:
|
|
|
|
r = requests.get(url, headers={'User-Agent': user_agent})
|
|
|
|
html = r.text
|
|
|
|
open(filename, 'w').write(html)
|
2019-09-25 13:40:15 +01:00
|
|
|
|
2019-09-27 11:02:24 +01:00
|
|
|
return html
|
|
|
|
|
|
|
|
def get_description_from_page(html):
|
|
|
|
root = lxml.html.fromstring(html)
|
|
|
|
div = root.find('.//div[@itemprop="description"]')
|
|
|
|
if div is not None:
|
|
|
|
return div.text
|
2019-09-25 13:40:15 +01:00
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
@app.route("/item/Q<int:item_id>")
|
|
|
|
def item_page(item_id):
|
|
|
|
qid = f'Q{item_id}'
|
2019-09-16 08:59:53 +01:00
|
|
|
item = painting.Painting(qid)
|
2019-09-25 13:40:15 +01:00
|
|
|
entity = mediawiki.get_entity_with_cache(qid)
|
2019-09-16 08:59:53 +01:00
|
|
|
|
|
|
|
width = 800
|
|
|
|
image_filename = item.image_filename
|
2019-09-25 13:40:15 +01:00
|
|
|
image = image_with_cache(qid, image_filename, width)
|
|
|
|
|
|
|
|
# hits = item.run_query()
|
|
|
|
label = get_entity_label(entity)
|
|
|
|
other = get_other(item.entity)
|
|
|
|
|
2019-09-27 11:02:24 +01:00
|
|
|
catalog_ids = wd_catalog.find_catalog_id(entity)
|
|
|
|
catalog_detail = []
|
|
|
|
for property_id in sorted(catalog_ids):
|
|
|
|
value = first_datavalue(entity, property_id)
|
|
|
|
detail = wd_catalog.lookup(property_id, value)
|
|
|
|
catalog_detail.append(detail)
|
|
|
|
|
|
|
|
catalog_url = first_datavalue(entity, 'P973')
|
|
|
|
|
|
|
|
catalog = None
|
2019-09-25 13:40:15 +01:00
|
|
|
if 'P4704' in entity['claims']:
|
|
|
|
saam_id = first_datavalue(entity, 'P4704')
|
|
|
|
catalog = saam.get_catalog(saam_id)
|
2019-09-27 11:02:24 +01:00
|
|
|
elif 'P4709' in entity['claims']:
|
|
|
|
catalog_id = first_datavalue(entity, 'P4709')
|
|
|
|
catalog = barnesfoundation.get_catalog(catalog_id)
|
|
|
|
elif catalog_url and 'www.dia.org' in catalog_url:
|
|
|
|
catalog = dia.get_catalog(catalog_url)
|
|
|
|
elif catalog_url and 'www.rijksmuseum.nl' in catalog_url:
|
|
|
|
catalog = rijksmuseum.get_catalog(catalog_url)
|
|
|
|
elif catalog_url and 'www.npg.org.uk' in catalog_url:
|
|
|
|
catalog = npg.get_catalog(catalog_url)
|
|
|
|
elif catalog_url and 'www.museodelprado.es' in catalog_url:
|
|
|
|
catalog = museodelprado.get_catalog(catalog_url)
|
|
|
|
|
|
|
|
if not catalog and catalog_ids:
|
|
|
|
for property_id in sorted(catalog_ids):
|
|
|
|
if property_id == 'P350':
|
|
|
|
continue # RKDimages ID
|
|
|
|
value = first_datavalue(entity, property_id)
|
|
|
|
detail = wd_catalog.lookup(property_id, value)
|
|
|
|
try:
|
|
|
|
html = get_catalog_page(property_id, value)
|
|
|
|
except requests.exceptions.SSLError:
|
|
|
|
continue # ignore this error
|
|
|
|
description = get_description_from_page(html)
|
|
|
|
if not description:
|
|
|
|
continue
|
|
|
|
catalog = {
|
|
|
|
'institution': detail['label'],
|
|
|
|
'description': description,
|
|
|
|
}
|
2019-09-16 08:59:53 +01:00
|
|
|
|
|
|
|
return render_template('item.html',
|
|
|
|
qid=qid,
|
2019-09-27 16:07:37 +01:00
|
|
|
item_id=item_id,
|
2019-09-16 08:59:53 +01:00
|
|
|
item=item,
|
2019-09-27 11:02:24 +01:00
|
|
|
catalog=catalog,
|
|
|
|
catalog_url=catalog_url,
|
|
|
|
catalog_detail=catalog_detail,
|
2019-09-25 13:40:15 +01:00
|
|
|
labels=find_more_props,
|
|
|
|
entity=item.entity,
|
2019-09-27 16:59:43 +01:00
|
|
|
username=get_username(),
|
2019-09-25 13:40:15 +01:00
|
|
|
label=label,
|
|
|
|
image=image,
|
|
|
|
other=other,
|
|
|
|
# hits=hits,
|
2019-09-16 08:59:53 +01:00
|
|
|
title=item.display_title)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-14 13:26:16 +01:00
|
|
|
def get_entity_label(entity):
|
2019-09-13 17:16:16 +01:00
|
|
|
if 'en' in entity['labels']:
|
|
|
|
return entity['labels']['en']['value']
|
|
|
|
|
2019-09-14 13:26:16 +01:00
|
|
|
label_values = {l['value'] for l in entity['labels'].values()}
|
|
|
|
if len(label_values) == 1:
|
|
|
|
return list(label_values)[0]
|
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
def get_labels(keys, name=None):
|
|
|
|
keys = sorted(keys, key=lambda i: int(i[1:]))
|
|
|
|
if name is None:
|
|
|
|
name = '_'.join(keys)
|
|
|
|
filename = f'cache/{name}_labels.json'
|
2019-09-14 13:26:16 +01:00
|
|
|
labels = []
|
2019-09-12 19:51:05 +01:00
|
|
|
if os.path.exists(filename):
|
2019-09-14 13:26:16 +01:00
|
|
|
from_cache = json.load(open(filename))
|
|
|
|
if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
|
|
|
|
labels = from_cache['labels']
|
|
|
|
if not labels:
|
2019-09-14 13:44:53 +01:00
|
|
|
for cur in utils.chunk(keys, 50):
|
2019-09-16 08:59:53 +01:00
|
|
|
labels += mediawiki.get_entities(cur, props='labels')
|
2019-09-14 13:26:16 +01:00
|
|
|
|
|
|
|
json.dump({'keys': keys, 'labels': labels},
|
|
|
|
open(filename, 'w'), indent=2)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-14 13:44:53 +01:00
|
|
|
return {entity['id']: get_entity_label(entity) for entity in labels}
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
def get_other(entity):
|
2019-09-12 19:51:05 +01:00
|
|
|
other_items = set()
|
|
|
|
for key in find_more_props.keys():
|
|
|
|
if key not in entity['claims']:
|
|
|
|
continue
|
|
|
|
for claim in entity['claims'][key]:
|
|
|
|
other_items.add(claim['mainsnak']['datavalue']['value']['id'])
|
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
return get_labels(other_items)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
@app.route("/next/Q<int:item_id>")
|
|
|
|
def next_page(item_id):
|
|
|
|
qid = f'Q{item_id}'
|
|
|
|
|
|
|
|
entity = mediawiki.get_entity_with_cache(qid)
|
|
|
|
|
|
|
|
width = 800
|
|
|
|
image_filename = first_datavalue(entity, 'P18')
|
|
|
|
image = image_with_cache(qid, image_filename, width)
|
|
|
|
|
|
|
|
label = get_entity_label(entity)
|
|
|
|
other = get_other(entity)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
|
|
|
return render_template('next.html',
|
|
|
|
qid=qid,
|
|
|
|
label=label,
|
2019-09-25 13:40:15 +01:00
|
|
|
image=image,
|
2019-09-12 19:51:05 +01:00
|
|
|
labels=find_more_props,
|
2019-09-25 13:40:15 +01:00
|
|
|
other=other,
|
2019-09-12 19:51:05 +01:00
|
|
|
entity=entity)
|
|
|
|
|
|
|
|
@app.route('/P<int:property_id>/Q<int:item_id>')
|
|
|
|
def find_more_page(property_id, item_id):
|
|
|
|
pid, qid = f'P{property_id}', f'Q{item_id}'
|
2019-09-16 08:59:53 +01:00
|
|
|
return redirect(url_for('browse_page', **{pid: qid}))
|
2019-09-13 17:16:16 +01:00
|
|
|
|
|
|
|
def get_facets(sparql_params, params):
|
|
|
|
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
property_list = ' '.join(f'wdt:{pid}' for pid in find_more_props.keys()
|
|
|
|
if pid not in request.args)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
q = (facet_query.replace('PARAMS', sparql_params)
|
|
|
|
.replace('PROPERTY_LIST', property_list))
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
bindings = wdqs.run_query_with_cache(q, flat + '_facets')
|
2019-09-13 17:16:16 +01:00
|
|
|
|
|
|
|
facets = {key: [] for key in find_more_props.keys()}
|
|
|
|
for row in bindings:
|
|
|
|
pid = row['property']['value'].rpartition('/')[2]
|
|
|
|
qid = row['object']['value'].rpartition('/')[2]
|
|
|
|
label = row['objectLabel']['value']
|
|
|
|
count = int(row['count']['value'])
|
|
|
|
|
|
|
|
facets[pid].append({'qid': qid, 'label': label, 'count': count})
|
|
|
|
|
|
|
|
return {
|
|
|
|
key: sorted(values, key=lambda i: i['count'], reverse=True)[:15]
|
|
|
|
for key, values in facets.items()
|
|
|
|
if values
|
|
|
|
}
|
|
|
|
|
|
|
|
@app.route('/browse')
|
|
|
|
def browse_page():
|
|
|
|
params = [(pid, qid) for pid, qid in request.args.items()
|
|
|
|
if pid.startswith('P') and qid.startswith('Q')]
|
|
|
|
|
2019-09-27 15:35:26 +01:00
|
|
|
if not params:
|
|
|
|
return render_template('browse_index.html',
|
|
|
|
props=find_more_props,
|
|
|
|
username=get_username())
|
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
|
|
|
|
|
|
|
|
# item_entity = get_entity_with_cache(qid)
|
|
|
|
|
|
|
|
item_labels = get_labels(qid for pid, qid in params)
|
|
|
|
|
|
|
|
# property_keys = item_entity['claims'].keys()
|
|
|
|
# property_labels = get_labels(property_keys, name=f'{flat}_property_labels')
|
|
|
|
|
|
|
|
sparql_params = ''.join(
|
|
|
|
f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params)
|
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
q = find_more_query.replace('PARAMS', sparql_params)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
bindings = wdqs.run_query_with_cache(q, flat)
|
2019-09-13 17:16:16 +01:00
|
|
|
facets = get_facets(sparql_params, params)
|
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
page_size = 45
|
|
|
|
|
2019-09-16 08:59:53 +01:00
|
|
|
item_map = wdqs.build_browse_item_map(bindings)
|
2019-09-12 19:51:05 +01:00
|
|
|
items = []
|
|
|
|
for item in item_map.values():
|
|
|
|
if len(item['image_filename']) != 1:
|
|
|
|
continue
|
|
|
|
item['image_filename'] = item['image_filename'][0]
|
|
|
|
items.append(item)
|
|
|
|
if len(items) >= page_size:
|
|
|
|
break
|
|
|
|
|
|
|
|
filenames = [cur['image_filename'] for cur in items]
|
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
thumbwidth = app.config['THUMBWIDTH']
|
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
filename = f'cache/{flat}_{page_size}_images.json'
|
2019-09-12 19:51:05 +01:00
|
|
|
if os.path.exists(filename):
|
|
|
|
detail = json.load(open(filename))
|
|
|
|
else:
|
2019-09-16 08:59:53 +01:00
|
|
|
detail = commons.image_detail(filenames, thumbwidth=thumbwidth)
|
2019-09-12 19:51:05 +01:00
|
|
|
json.dump(detail, open(filename, 'w'), indent=2)
|
|
|
|
|
|
|
|
for item in items:
|
2019-09-16 08:59:53 +01:00
|
|
|
item['url'] = url_for('item_page', item_id=item['item_id'])
|
2019-09-12 19:51:05 +01:00
|
|
|
item['image'] = detail[item['image_filename']]
|
|
|
|
|
2019-09-13 17:16:16 +01:00
|
|
|
title = ' / '.join(item_labels[qid] for pid, qid in params)
|
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
return render_template('find_more.html',
|
2019-09-13 17:16:16 +01:00
|
|
|
facets=facets,
|
|
|
|
prop_labels=find_more_props,
|
|
|
|
label=title,
|
2019-09-12 19:51:05 +01:00
|
|
|
labels=find_more_props,
|
|
|
|
bindings=bindings,
|
2019-09-16 08:59:53 +01:00
|
|
|
total=len(bindings),
|
|
|
|
items=items)
|
2019-09-12 19:51:05 +01:00
|
|
|
|
2019-09-25 13:40:15 +01:00
|
|
|
@app.route('/lookup')
|
|
|
|
def depicts_lookup():
|
|
|
|
terms = request.args.get('terms')
|
|
|
|
if not terms:
|
|
|
|
return jsonify(error='terms parameter is required')
|
|
|
|
|
|
|
|
terms = terms.strip()
|
|
|
|
if len(terms) < 3:
|
|
|
|
return jsonify(
|
|
|
|
count=0,
|
|
|
|
hits=[],
|
|
|
|
notice='terms too short for lookup',
|
|
|
|
)
|
|
|
|
|
|
|
|
item_ids = []
|
|
|
|
hits = []
|
|
|
|
q1 = DepictsItem.query.filter(DepictsItem.label.ilike(terms + '%'))
|
|
|
|
for item in q1:
|
|
|
|
hit = {
|
|
|
|
'label': item.label,
|
|
|
|
'description': item.description,
|
|
|
|
'qid': item.qid,
|
|
|
|
'count': item.count,
|
|
|
|
}
|
|
|
|
item_ids.append(item.item_id)
|
|
|
|
hits.append(hit)
|
|
|
|
|
|
|
|
cls = DepictsItemAltLabel
|
|
|
|
q2 = cls.query.filter(cls.alt_label.ilike(terms + '%'),
|
|
|
|
~cls.item_id.in_(item_ids))
|
|
|
|
|
|
|
|
for alt in q2:
|
|
|
|
item = alt.item
|
|
|
|
hit = {
|
|
|
|
'label': item.label,
|
|
|
|
'description': item.description,
|
|
|
|
'qid': item.qid,
|
|
|
|
'count': item.count,
|
|
|
|
'alt_label': alt.alt_label,
|
|
|
|
}
|
|
|
|
hits.append(hit)
|
|
|
|
|
|
|
|
hits.sort(key=lambda hit: hit['count'], reverse=True)
|
|
|
|
|
|
|
|
ret = {
|
|
|
|
'count': q1.count() + q2.count(),
|
|
|
|
'hits': hits,
|
|
|
|
'terms': terms,
|
|
|
|
}
|
|
|
|
|
|
|
|
return jsonify(ret)
|
|
|
|
|
2019-09-12 19:51:05 +01:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
app.debug = True
|
|
|
|
app.run(host='0.0.0.0', debug=True)
|