Improvements to browse page

This commit is contained in:
Edward Betts 2019-09-30 14:23:19 +01:00
parent 47f31811f9
commit af4bdb956b
2 changed files with 58 additions and 24 deletions

20
app.py
View file

@ -62,7 +62,7 @@ find_more_props = {
} }
find_more_query = ''' find_more_query = '''
select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision { select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?titleLang ?time ?timeprecision {
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
?item wdt:P31 wd:Q3305213 . ?item wdt:P31 wd:Q3305213 .
PARAMS PARAMS
@ -72,7 +72,10 @@ select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision
?timenode wikibase:timeValue ?time. ?timenode wikibase:timeValue ?time.
?timenode wikibase:timePrecision ?timeprecision. ?timenode wikibase:timePrecision ?timeprecision.
} }
OPTIONAL { ?item wdt:P1476 ?title } OPTIONAL {
?item wdt:P1476 ?title .
BIND(LANG(?title) as ?titleLang)
}
OPTIONAL { ?item wdt:P170 ?artist } OPTIONAL { ?item wdt:P170 ?artist }
FILTER NOT EXISTS { ?item wdt:P180 ?depicts } FILTER NOT EXISTS { ?item wdt:P180 ?depicts }
} }
@ -456,7 +459,10 @@ def item_page(item_id):
# hits = item.run_query() # hits = item.run_query()
label_and_language = get_entity_label_and_language(entity) label_and_language = get_entity_label_and_language(entity)
if label_and_language:
label = label_and_language['label'] label = label_and_language['label']
else:
label = None
other = get_other(item.entity) other = get_other(item.entity)
if 'P276' in entity['claims']: if 'P276' in entity['claims']:
@ -528,7 +534,7 @@ def item_page(item_id):
except requests.exceptions.ReadTimeout: except requests.exceptions.ReadTimeout:
pass pass
label_languages = label_and_language['languages'] label_languages = label_and_language['languages'] if label_and_language else []
show_translation_links = all(lang.code != 'en' for lang in label_languages) show_translation_links = all(lang.code != 'en' for lang in label_languages)
return render_template('item.html', return render_template('item.html',
qid=qid, qid=qid,
@ -748,13 +754,8 @@ def browse_page():
flat = '_'.join(f'{pid}={qid}' for pid, qid in params) flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
# item_entity = get_entity_with_cache(qid)
item_labels = get_labels(qid for pid, qid in params) item_labels = get_labels(qid for pid, qid in params)
# property_keys = item_entity['claims'].keys()
# property_labels = get_labels(property_keys, name=f'{flat}_property_labels')
sparql_params = ''.join( sparql_params = ''.join(
f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params) f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params)
@ -801,10 +802,11 @@ def browse_page():
prop_labels=find_more_props, prop_labels=find_more_props,
label=title, label=title,
pager=pager, pager=pager,
item_map=item_map,
page=page, page=page,
labels=find_more_props, labels=find_more_props,
bindings=bindings, bindings=bindings,
total=len(bindings), total=len(item_map),
items=items) items=items)
@app.route('/find_more.json') @app.route('/find_more.json')

View file

@ -4,6 +4,7 @@ import urllib.parse
import os import os
import dateutil.parser import dateutil.parser
import hashlib import hashlib
from collections import defaultdict
from . import utils from . import utils
query_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql' query_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
@ -65,32 +66,63 @@ def format_time(row_time, row_timeprecision):
return row_time['value'] return row_time['value']
def build_browse_item_map(bindings): def build_browse_item_map(bindings):
item_map = {} row_map = defaultdict(list)
for row in bindings: for row in bindings:
item_id = row_id(row) item_id = row_id(row)
row_qid = f'Q{item_id}'
label = row['itemLabel']['value'] label = row['itemLabel']['value']
image_filename = commons_uri_to_filename(row['image']['value']) image_filename = commons_uri_to_filename(row['image']['value'])
if item_id in item_map:
item = item_map[item_id]
item['image_filename'].append(image_filename)
continue
if label == row_qid: artist_name = get_row_value(row, 'artistLabel')
label = get_row_value(row, 'title') or 'name missing'
artist_name = get_row_value(row, 'artistLabel') or '[artist unknown]'
d = format_time(row['time'], row['timeprecision']) if 'time' in row else None d = format_time(row['time'], row['timeprecision']) if 'time' in row else None
row_qid = f'Q{item_id}'
item = { item = {
'image_filename': [image_filename], 'image_filename': image_filename,
'item_id': item_id,
'qid': row_qid,
'label': label,
'date': d, 'date': d,
'artist_name': artist_name, 'artist_name': artist_name,
} }
if label != row_qid:
item['label'] = label
title = get_row_value(row, 'title')
if title:
lang = get_row_value(row, 'titleLang')
item['title'] = (lang, title)
row_map[item_id].append(item)
item_map = {}
for item_id, items in row_map.items():
titles = {}
filenames = set()
artist_names = []
labels = set()
for item in items:
if 'title' in item:
lang, title = item['title']
titles[lang] = title
filenames.add(item['image_filename'])
if item['artist_name'] not in artist_names:
artist_names.append(item['artist_name'])
if 'label' in item:
labels.add(item['label'])
item = {
'qid': f'Q{item_id}',
'item_id': item_id,
'image_filename': list(filenames),
'artist_name': ', '.join(artist_names),
}
if labels:
assert len(labels) == 1
item['label'] = list(labels)[0]
elif 'en' in titles:
item['label'] = titles['en']
else:
item['label'] = '[ label missing ]'
item_map[item_id] = item item_map[item_id] = item
return item_map return item_map