From af4bdb956b0e0d5fcc72a2bc3736310b180fdfb9 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 30 Sep 2019 14:23:19 +0100 Subject: [PATCH] Improvements to browse page --- app.py | 22 +++++++++--------- depicts/wdqs.py | 60 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/app.py b/app.py index 02427e5..b36aaef 100755 --- a/app.py +++ b/app.py @@ -62,7 +62,7 @@ find_more_props = { } find_more_query = ''' -select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision { +select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?titleLang ?time ?timeprecision { SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } ?item wdt:P31 wd:Q3305213 . PARAMS @@ -72,7 +72,10 @@ select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision ?timenode wikibase:timeValue ?time. ?timenode wikibase:timePrecision ?timeprecision. } - OPTIONAL { ?item wdt:P1476 ?title } + OPTIONAL { + ?item wdt:P1476 ?title . + BIND(LANG(?title) as ?titleLang) + } OPTIONAL { ?item wdt:P170 ?artist } FILTER NOT EXISTS { ?item wdt:P180 ?depicts } } @@ -456,7 +459,10 @@ def item_page(item_id): # hits = item.run_query() label_and_language = get_entity_label_and_language(entity) - label = label_and_language['label'] + if label_and_language: + label = label_and_language['label'] + else: + label = None other = get_other(item.entity) if 'P276' in entity['claims']: @@ -528,7 +534,7 @@ def item_page(item_id): except requests.exceptions.ReadTimeout: pass - label_languages = label_and_language['languages'] + label_languages = label_and_language['languages'] if label_and_language else [] show_translation_links = all(lang.code != 'en' for lang in label_languages) return render_template('item.html', qid=qid, @@ -748,13 +754,8 @@ def browse_page(): flat = '_'.join(f'{pid}={qid}' for pid, qid in params) - # item_entity = get_entity_with_cache(qid) - item_labels = get_labels(qid for pid, qid in params) - # property_keys = item_entity['claims'].keys() - # property_labels = get_labels(property_keys, name=f'{flat}_property_labels') - sparql_params = ''.join( f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params) @@ -801,10 +802,11 @@ def browse_page(): prop_labels=find_more_props, label=title, pager=pager, + item_map=item_map, page=page, labels=find_more_props, bindings=bindings, - total=len(bindings), + total=len(item_map), items=items) @app.route('/find_more.json') diff --git a/depicts/wdqs.py b/depicts/wdqs.py index ee4224f..cbacf79 100644 --- a/depicts/wdqs.py +++ b/depicts/wdqs.py @@ -4,6 +4,7 @@ import urllib.parse import os import dateutil.parser import hashlib +from collections import defaultdict from . import utils query_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql' @@ -65,32 +66,63 @@ def format_time(row_time, row_timeprecision): return row_time['value'] def build_browse_item_map(bindings): - item_map = {} + row_map = defaultdict(list) + for row in bindings: item_id = row_id(row) - row_qid = f'Q{item_id}' label = row['itemLabel']['value'] image_filename = commons_uri_to_filename(row['image']['value']) - if item_id in item_map: - item = item_map[item_id] - item['image_filename'].append(image_filename) - continue - if label == row_qid: - label = get_row_value(row, 'title') or 'name missing' - - artist_name = get_row_value(row, 'artistLabel') or '[artist unknown]' + artist_name = get_row_value(row, 'artistLabel') d = format_time(row['time'], row['timeprecision']) if 'time' in row else None + row_qid = f'Q{item_id}' item = { - 'image_filename': [image_filename], - 'item_id': item_id, - 'qid': row_qid, - 'label': label, + 'image_filename': image_filename, 'date': d, 'artist_name': artist_name, } + if label != row_qid: + item['label'] = label + + title = get_row_value(row, 'title') + if title: + lang = get_row_value(row, 'titleLang') + item['title'] = (lang, title) + + row_map[item_id].append(item) + + item_map = {} + for item_id, items in row_map.items(): + titles = {} + filenames = set() + artist_names = [] + labels = set() + for item in items: + if 'title' in item: + lang, title = item['title'] + titles[lang] = title + filenames.add(item['image_filename']) + if item['artist_name'] not in artist_names: + artist_names.append(item['artist_name']) + if 'label' in item: + labels.add(item['label']) + + item = { + 'qid': f'Q{item_id}', + 'item_id': item_id, + 'image_filename': list(filenames), + 'artist_name': ', '.join(artist_names), + } + if labels: + assert len(labels) == 1 + item['label'] = list(labels)[0] + elif 'en' in titles: + item['label'] = titles['en'] + else: + item['label'] = '[ label missing ]' + item_map[item_id] = item return item_map