More features.
This commit is contained in:
parent
1a7b1dab4c
commit
0a6ea8b416
155
app.py
155
app.py
|
@ -1,6 +1,6 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
from flask import Flask, render_template, url_for
|
from flask import Flask, render_template, url_for, redirect, request
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
import dateutil.parser
|
import dateutil.parser
|
||||||
|
@ -31,16 +31,31 @@ find_more_props = {
|
||||||
'P495': 'country of origin',
|
'P495': 'country of origin',
|
||||||
'P127': 'owned by',
|
'P127': 'owned by',
|
||||||
'P179': 'part of the series',
|
'P179': 'part of the series',
|
||||||
|
'P921': 'main subject',
|
||||||
|
'P186': 'material used',
|
||||||
|
'P88': 'commissioned by',
|
||||||
|
'P1028': 'donated by',
|
||||||
|
'P1071': 'location of final assembly',
|
||||||
|
'P138': 'named after',
|
||||||
|
'P1433': 'published in',
|
||||||
|
'P144': 'based on',
|
||||||
|
'P2079': 'fabrication method',
|
||||||
|
'P2348': 'time period',
|
||||||
|
'P361': 'part of',
|
||||||
|
'P608': 'exhibition history',
|
||||||
|
|
||||||
# possible future props
|
# possible future props
|
||||||
# 'P571': 'inception',
|
# 'P571': 'inception',
|
||||||
# 'P921': 'main subject',
|
# 'P166': 'award received', (only 2)
|
||||||
|
# 'P1419': 'shape', (only 2)
|
||||||
|
# 'P123': 'publisher', (only 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
find_more_query = '''
|
find_more_query = '''
|
||||||
select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision {
|
select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision {
|
||||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
?item wdt:P31 wd:Q3305213 .
|
?item wdt:P31 wd:Q3305213 .
|
||||||
?item wdt:PID wd:QID .
|
PARAMS
|
||||||
?item wdt:P18 ?image .
|
?item wdt:P18 ?image .
|
||||||
OPTIONAL {
|
OPTIONAL {
|
||||||
?item p:P571/psv:P571 ?timenode .
|
?item p:P571/psv:P571 ?timenode .
|
||||||
|
@ -53,6 +68,37 @@ select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision
|
||||||
}
|
}
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
facet_query = '''
|
||||||
|
select ?property ?object ?objectLabel (count(*) as ?count) {
|
||||||
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
|
?item wdt:P31 wd:Q3305213 .
|
||||||
|
?item wdt:P18 ?image .
|
||||||
|
PARAMS
|
||||||
|
values ?property { PROPERTY_LIST }
|
||||||
|
?item ?property ?object .
|
||||||
|
FILTER NOT EXISTS { ?item wdt:P180 ?depicts }
|
||||||
|
} group by ?property ?propertyLabel ?object ?objectLabel
|
||||||
|
'''
|
||||||
|
|
||||||
|
property_query = '''
|
||||||
|
select ?object ?objectLabel ?objectDescription (count(*) as ?count) {
|
||||||
|
?item wdt:P31 wd:Q3305213 .
|
||||||
|
?item wdt:P18 ?image .
|
||||||
|
?item wdt:PID ?object .
|
||||||
|
filter not exists { ?item wdt:P180 ?depicts }
|
||||||
|
optional {
|
||||||
|
?object rdfs:label ?objectLabel.
|
||||||
|
FILTER(LANG(?objectLabel) = "en").
|
||||||
|
}
|
||||||
|
optional {
|
||||||
|
?object schema:description ?objectDescription .
|
||||||
|
filter(lang(?objectDescription) = "en")
|
||||||
|
}
|
||||||
|
|
||||||
|
} group by ?object ?objectLabel ?objectDescription
|
||||||
|
order by desc(?count)
|
||||||
|
'''
|
||||||
|
|
||||||
def ordinal(n):
|
def ordinal(n):
|
||||||
return "%d%s" % (n, 'tsnrhtdd'[(n / 10 % 10 != 1) * (n % 10 < 4) * n % 10::4])
|
return "%d%s" % (n, 'tsnrhtdd'[(n / 10 % 10 != 1) * (n % 10 < 4) * n % 10::4])
|
||||||
|
|
||||||
|
@ -107,13 +153,40 @@ def get_entities(ids, **params):
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
return render_template('index.html')
|
return render_template('index.html', props=find_more_props)
|
||||||
|
|
||||||
|
def run_query_with_cache(q, name):
|
||||||
|
filename = f'cache/{name}.json'
|
||||||
|
if os.path.exists(filename):
|
||||||
|
bindings = json.load(open(filename))
|
||||||
|
else:
|
||||||
|
r = run_wikidata_query(q)
|
||||||
|
bindings = r.json()['results']['bindings']
|
||||||
|
json.dump(bindings, open(filename, 'w'), indent=2)
|
||||||
|
|
||||||
|
return bindings
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/property/P<int:property_id>")
|
||||||
|
def property_query_page(property_id):
|
||||||
|
pid = f'P{property_id}'
|
||||||
|
q = property_query.replace('PID', pid)
|
||||||
|
|
||||||
|
open(f'cache/{pid}_query.sparql', 'w').write(q)
|
||||||
|
rows = run_query_with_cache(q, name=pid)
|
||||||
|
label = find_more_props[pid]
|
||||||
|
|
||||||
|
return render_template('property.html', label=label, pid=pid, rows=rows)
|
||||||
|
|
||||||
@app.route("/item/Q<int:item_id>")
|
@app.route("/item/Q<int:item_id>")
|
||||||
def item_page(item_id):
|
def item_page(item_id):
|
||||||
qid = f'Q{item_id}'
|
qid = f'Q{item_id}'
|
||||||
return render_template('item.html', qid=qid)
|
return render_template('item.html', qid=qid)
|
||||||
|
|
||||||
|
def get_en_label(entity):
|
||||||
|
if 'en' in entity['labels']:
|
||||||
|
return entity['labels']['en']['value']
|
||||||
|
|
||||||
def get_labels(keys, name=None):
|
def get_labels(keys, name=None):
|
||||||
keys = sorted(keys, key=lambda i: int(i[1:]))
|
keys = sorted(keys, key=lambda i: int(i[1:]))
|
||||||
if name is None:
|
if name is None:
|
||||||
|
@ -128,7 +201,7 @@ def get_labels(keys, name=None):
|
||||||
json.dump(labels, open(filename, 'w'), indent=2)
|
json.dump(labels, open(filename, 'w'), indent=2)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return {prop['id']: prop['labels']['en']['value'] for prop in labels}
|
return {prop['id']: get_en_label(prop) or '[ label missing ]' for prop in labels}
|
||||||
except TypeError:
|
except TypeError:
|
||||||
pprint(labels)
|
pprint(labels)
|
||||||
raise
|
raise
|
||||||
|
@ -190,8 +263,6 @@ def image_detail_old(filenames, thumbwidth=None):
|
||||||
|
|
||||||
xml = xml.replace(f'name="{f}"', f'name="{esc}"')
|
xml = xml.replace(f'name="{f}"', f'name="{esc}"')
|
||||||
|
|
||||||
# print(xml)
|
|
||||||
|
|
||||||
root = lxml.etree.fromstring(xml.encode('utf-8'))
|
root = lxml.etree.fromstring(xml.encode('utf-8'))
|
||||||
|
|
||||||
images = []
|
images = []
|
||||||
|
@ -280,14 +351,56 @@ def next_page(item_id):
|
||||||
def find_more_page(property_id, item_id):
|
def find_more_page(property_id, item_id):
|
||||||
pid, qid = f'P{property_id}', f'Q{item_id}'
|
pid, qid = f'P{property_id}', f'Q{item_id}'
|
||||||
|
|
||||||
item_entity = get_entity_with_cache(qid)
|
return redirect(url_for('browse_page') + f'?{pid}={qid}')
|
||||||
|
|
||||||
property_keys = item_entity['claims'].keys()
|
def get_facets(sparql_params, params):
|
||||||
property_labels = get_labels(property_keys, name=f'{qid}_property_labels')
|
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
|
||||||
|
|
||||||
query = find_more_query.replace('QID', qid).replace('PID', pid)
|
property_list = ' '.join(f'wdt:{pid}' for pid in find_more_props.keys()
|
||||||
|
if pid not in request.args)
|
||||||
|
|
||||||
filename = f'cache/{pid}_{qid}.json'
|
q = (facet_query.replace('PARAMS', sparql_params)
|
||||||
|
.replace('PROPERTY_LIST', property_list))
|
||||||
|
|
||||||
|
open(f'cache/{flat}_facets_query.sparql', 'w').write(q)
|
||||||
|
|
||||||
|
bindings = run_query_with_cache(q, flat + '_facets')
|
||||||
|
|
||||||
|
facets = {key: [] for key in find_more_props.keys()}
|
||||||
|
for row in bindings:
|
||||||
|
pid = row['property']['value'].rpartition('/')[2]
|
||||||
|
qid = row['object']['value'].rpartition('/')[2]
|
||||||
|
label = row['objectLabel']['value']
|
||||||
|
count = int(row['count']['value'])
|
||||||
|
|
||||||
|
facets[pid].append({'qid': qid, 'label': label, 'count': count})
|
||||||
|
|
||||||
|
return {
|
||||||
|
key: sorted(values, key=lambda i: i['count'], reverse=True)[:15]
|
||||||
|
for key, values in facets.items()
|
||||||
|
if values
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.route('/browse')
|
||||||
|
def browse_page():
|
||||||
|
params = [(pid, qid) for pid, qid in request.args.items()
|
||||||
|
if pid.startswith('P') and qid.startswith('Q')]
|
||||||
|
|
||||||
|
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
|
||||||
|
|
||||||
|
# item_entity = get_entity_with_cache(qid)
|
||||||
|
|
||||||
|
item_labels = get_labels(qid for pid, qid in params)
|
||||||
|
|
||||||
|
# property_keys = item_entity['claims'].keys()
|
||||||
|
# property_labels = get_labels(property_keys, name=f'{flat}_property_labels')
|
||||||
|
|
||||||
|
sparql_params = ''.join(
|
||||||
|
f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params)
|
||||||
|
|
||||||
|
query = find_more_query.replace('PARAMS', sparql_params)
|
||||||
|
|
||||||
|
filename = f'cache/{flat}.json'
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
bindings = json.load(open(filename))
|
bindings = json.load(open(filename))
|
||||||
else:
|
else:
|
||||||
|
@ -295,6 +408,8 @@ def find_more_page(property_id, item_id):
|
||||||
bindings = r.json()['results']['bindings']
|
bindings = r.json()['results']['bindings']
|
||||||
json.dump(bindings, open(filename, 'w'), indent=2)
|
json.dump(bindings, open(filename, 'w'), indent=2)
|
||||||
|
|
||||||
|
facets = get_facets(sparql_params, params)
|
||||||
|
|
||||||
page_size = 45
|
page_size = 45
|
||||||
|
|
||||||
item_map = {}
|
item_map = {}
|
||||||
|
@ -321,7 +436,6 @@ def find_more_page(property_id, item_id):
|
||||||
if 'time' in row:
|
if 'time' in row:
|
||||||
t = dateutil.parser.parse(row['time']['value'])
|
t = dateutil.parser.parse(row['time']['value'])
|
||||||
precision = int(row['timeprecision']['value'])
|
precision = int(row['timeprecision']['value'])
|
||||||
print((row['time']['value'], precision))
|
|
||||||
|
|
||||||
if precision == 9:
|
if precision == 9:
|
||||||
d = t.year
|
d = t.year
|
||||||
|
@ -358,7 +472,7 @@ def find_more_page(property_id, item_id):
|
||||||
|
|
||||||
filenames = [cur['image_filename'] for cur in items]
|
filenames = [cur['image_filename'] for cur in items]
|
||||||
|
|
||||||
filename = f'cache/{pid}_{qid}_{page_size}_images.json'
|
filename = f'cache/{flat}_{page_size}_images.json'
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
detail = json.load(open(filename))
|
detail = json.load(open(filename))
|
||||||
else:
|
else:
|
||||||
|
@ -370,11 +484,16 @@ def find_more_page(property_id, item_id):
|
||||||
|
|
||||||
total = len(bindings)
|
total = len(bindings)
|
||||||
|
|
||||||
|
title = ' / '.join(item_labels[qid] for pid, qid in params)
|
||||||
|
|
||||||
return render_template('find_more.html',
|
return render_template('find_more.html',
|
||||||
qid=qid,
|
# qid=qid,
|
||||||
pid=pid,
|
# pid=pid,
|
||||||
item_entity=item_entity,
|
# item_entity=item_entity,
|
||||||
property_labels=property_labels,
|
# property_labels=property_labels,
|
||||||
|
facets=facets,
|
||||||
|
prop_labels=find_more_props,
|
||||||
|
label=title,
|
||||||
labels=find_more_props,
|
labels=find_more_props,
|
||||||
bindings=bindings,
|
bindings=bindings,
|
||||||
items=items,
|
items=items,
|
||||||
|
|
Loading…
Reference in a new issue