Improve property page
Allow sorting property values by name Use label from different language if English label is missing
This commit is contained in:
parent
3215333caf
commit
a20666f9d4
73
app.py
73
app.py
|
@ -9,6 +9,9 @@ import lxml.etree
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import locale
|
||||||
|
|
||||||
|
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
|
||||||
|
|
||||||
url_start = 'http://www.wikidata.org/entity/Q'
|
url_start = 'http://www.wikidata.org/entity/Q'
|
||||||
wikidata_url = 'https://www.wikidata.org/w/api.php'
|
wikidata_url = 'https://www.wikidata.org/w/api.php'
|
||||||
|
@ -158,50 +161,90 @@ def index():
|
||||||
def run_query_with_cache(q, name):
|
def run_query_with_cache(q, name):
|
||||||
filename = f'cache/{name}.json'
|
filename = f'cache/{name}.json'
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
bindings = json.load(open(filename))
|
from_cache = json.load(open(filename))
|
||||||
else:
|
if isinstance(from_cache, dict) and from_cache.get('query') == q:
|
||||||
r = run_wikidata_query(q)
|
return from_cache['bindings']
|
||||||
bindings = r.json()['results']['bindings']
|
|
||||||
json.dump(bindings, open(filename, 'w'), indent=2)
|
r = run_wikidata_query(q)
|
||||||
|
bindings = r.json()['results']['bindings']
|
||||||
|
json.dump({'query': q, 'bindings': bindings},
|
||||||
|
open(filename, 'w'), indent=2)
|
||||||
|
|
||||||
return bindings
|
return bindings
|
||||||
|
|
||||||
|
def get_row_value(row, field):
|
||||||
|
return row[field]['value'] if field in row else None
|
||||||
|
|
||||||
@app.route("/property/P<int:property_id>")
|
@app.route("/property/P<int:property_id>")
|
||||||
def property_query_page(property_id):
|
def property_query_page(property_id):
|
||||||
pid = f'P{property_id}'
|
pid = f'P{property_id}'
|
||||||
q = property_query.replace('PID', pid)
|
sort = request.args.get('sort')
|
||||||
|
sort_by_name = sort and sort.lower().strip() == 'name'
|
||||||
|
|
||||||
open(f'cache/{pid}_query.sparql', 'w').write(q)
|
q = property_query.replace('PID', pid)
|
||||||
rows = run_query_with_cache(q, name=pid)
|
rows = run_query_with_cache(q, name=pid)
|
||||||
|
|
||||||
|
no_label_qid = [row['object']['value'].rpartition('/')[2]
|
||||||
|
for row in rows
|
||||||
|
if 'objectLabel' not in row and '/' in row['object']['value']]
|
||||||
|
|
||||||
|
if no_label_qid:
|
||||||
|
extra_label = get_labels(no_label_qid, name=f'{pid}_extra_labels')
|
||||||
|
if extra_label:
|
||||||
|
for row in rows:
|
||||||
|
item = row['object']['value']
|
||||||
|
if 'objectLabel' in row or '/' not in item:
|
||||||
|
continue
|
||||||
|
qid = item.rpartition('/')[2]
|
||||||
|
if extra_label.get(qid):
|
||||||
|
row['objectLabel'] = {'value': extra_label[qid]}
|
||||||
|
|
||||||
|
if sort_by_name:
|
||||||
|
# put rows with no English label at the end
|
||||||
|
no_label = [row for row in rows if 'objectLabel' not in row]
|
||||||
|
has_label = sorted((row for row in rows if 'objectLabel' in row),
|
||||||
|
key=lambda row: locale.strxfrm(row['objectLabel']['value']))
|
||||||
|
rows = has_label + no_label
|
||||||
label = find_more_props[pid]
|
label = find_more_props[pid]
|
||||||
|
|
||||||
return render_template('property.html', label=label, pid=pid, rows=rows)
|
return render_template('property.html',
|
||||||
|
label=label,
|
||||||
|
order=('name' if sort_by_name else 'count'),
|
||||||
|
pid=pid,
|
||||||
|
rows=rows)
|
||||||
|
|
||||||
@app.route("/item/Q<int:item_id>")
|
@app.route("/item/Q<int:item_id>")
|
||||||
def item_page(item_id):
|
def item_page(item_id):
|
||||||
qid = f'Q{item_id}'
|
qid = f'Q{item_id}'
|
||||||
return render_template('item.html', qid=qid)
|
return render_template('item.html', qid=qid)
|
||||||
|
|
||||||
def get_en_label(entity):
|
def get_entity_label(entity):
|
||||||
if 'en' in entity['labels']:
|
if 'en' in entity['labels']:
|
||||||
return entity['labels']['en']['value']
|
return entity['labels']['en']['value']
|
||||||
|
|
||||||
|
label_values = {l['value'] for l in entity['labels'].values()}
|
||||||
|
if len(label_values) == 1:
|
||||||
|
return list(label_values)[0]
|
||||||
|
|
||||||
def get_labels(keys, name=None):
|
def get_labels(keys, name=None):
|
||||||
keys = sorted(keys, key=lambda i: int(i[1:]))
|
keys = sorted(keys, key=lambda i: int(i[1:]))
|
||||||
if name is None:
|
if name is None:
|
||||||
name = '_'.join(keys)
|
name = '_'.join(keys)
|
||||||
filename = f'cache/{name}_labels.json'
|
filename = f'cache/{name}_labels.json'
|
||||||
|
labels = []
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
labels = json.load(open(filename))
|
from_cache = json.load(open(filename))
|
||||||
else:
|
if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
|
||||||
labels = []
|
labels = from_cache['labels']
|
||||||
|
if not labels:
|
||||||
for cur in chunk(keys, 50):
|
for cur in chunk(keys, 50):
|
||||||
labels += get_entities(cur, props='labels')
|
labels += get_entities(cur, props='labels')
|
||||||
json.dump(labels, open(filename, 'w'), indent=2)
|
|
||||||
|
json.dump({'keys': keys, 'labels': labels},
|
||||||
|
open(filename, 'w'), indent=2)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return {prop['id']: get_en_label(prop) or '[ label missing ]' for prop in labels}
|
return {entity['id']: get_entity_label(entity) for entity in labels}
|
||||||
except TypeError:
|
except TypeError:
|
||||||
pprint(labels)
|
pprint(labels)
|
||||||
raise
|
raise
|
||||||
|
@ -362,7 +405,7 @@ def get_facets(sparql_params, params):
|
||||||
q = (facet_query.replace('PARAMS', sparql_params)
|
q = (facet_query.replace('PARAMS', sparql_params)
|
||||||
.replace('PROPERTY_LIST', property_list))
|
.replace('PROPERTY_LIST', property_list))
|
||||||
|
|
||||||
open(f'cache/{flat}_facets_query.sparql', 'w').write(q)
|
# open(f'cache/{flat}_facets_query.sparql', 'w').write(q)
|
||||||
|
|
||||||
bindings = run_query_with_cache(q, flat + '_facets')
|
bindings = run_query_with_cache(q, flat + '_facets')
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
<h3>{{ prop_label }} ({{ key }})</h3>
|
<h3>{{ prop_label }} ({{ key }})</h3>
|
||||||
{% for claim in claims %}
|
{% for claim in claims %}
|
||||||
{% set claim_qid = claim.mainsnak.datavalue.value.id %}
|
{% set claim_qid = claim.mainsnak.datavalue.value.id %}
|
||||||
<a href="{{ url_for('find_more_page', property_id=key[1:], item_id=claim_qid[1:]) }}">{{ other[claim_qid] }}</a> ({{ claim_qid }})
|
<a href="{{ url_for('find_more_page', property_id=key[1:], item_id=claim_qid[1:]) }}">{{ other[claim_qid] or '[ label missing ]' }}</a> ({{ claim_qid }})
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
@ -8,6 +8,14 @@
|
||||||
|
|
||||||
<p><a href="{{ url_for('index') }}">back to index</a></p>
|
<p><a href="{{ url_for('index') }}">back to index</a></p>
|
||||||
|
|
||||||
|
<p>Sort order:
|
||||||
|
{% if order == 'name' %}
|
||||||
|
<b>name</b> or <a href="?sort=count">count</a>
|
||||||
|
{% else %}
|
||||||
|
<a href="?sort=name">name</a> or <b>count</b>
|
||||||
|
{% endif %}
|
||||||
|
</p>
|
||||||
|
|
||||||
<ul>
|
<ul>
|
||||||
{% for row in rows if '/' in row.object.value %}
|
{% for row in rows if '/' in row.object.value %}
|
||||||
{% set qid = row.object.value.rpartition('/')[2] %}
|
{% set qid = row.object.value.rpartition('/')[2] %}
|
||||||
|
|
Loading…
Reference in a new issue