From af4bdb956b0e0d5fcc72a2bc3736310b180fdfb9 Mon Sep 17 00:00:00 2001
From: Edward Betts <edward@4angle.com>
Date: Mon, 30 Sep 2019 14:23:19 +0100
Subject: [PATCH] Improvements to browse page

---
 app.py          | 22 +++++++++---------
 depicts/wdqs.py | 60 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/app.py b/app.py
index 02427e5..b36aaef 100755
--- a/app.py
+++ b/app.py
@@ -62,7 +62,7 @@ find_more_props = {
 }
 
 find_more_query = '''
-select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision {
+select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?titleLang ?time ?timeprecision {
   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
   ?item wdt:P31 wd:Q3305213 .
   PARAMS
@@ -72,7 +72,10 @@ select ?item ?itemLabel ?image ?artist ?artistLabel ?title ?time ?timeprecision
     ?timenode wikibase:timeValue         ?time.
     ?timenode wikibase:timePrecision     ?timeprecision.
   }
-  OPTIONAL { ?item wdt:P1476 ?title }
+  OPTIONAL {
+    ?item wdt:P1476 ?title .
+    BIND(LANG(?title) as ?titleLang)
+  }
   OPTIONAL { ?item wdt:P170 ?artist }
   FILTER NOT EXISTS { ?item wdt:P180 ?depicts }
 }
@@ -456,7 +459,10 @@ def item_page(item_id):
 
     # hits = item.run_query()
     label_and_language = get_entity_label_and_language(entity)
-    label = label_and_language['label']
+    if label_and_language:
+        label = label_and_language['label']
+    else:
+        label = None
     other = get_other(item.entity)
 
     if 'P276' in entity['claims']:
@@ -528,7 +534,7 @@ def item_page(item_id):
     except requests.exceptions.ReadTimeout:
         pass
 
-    label_languages = label_and_language['languages']
+    label_languages = label_and_language['languages'] if label_and_language else []
     show_translation_links = all(lang.code != 'en' for lang in label_languages)
     return render_template('item.html',
                            qid=qid,
@@ -748,13 +754,8 @@ def browse_page():
 
     flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
 
-    # item_entity = get_entity_with_cache(qid)
-
     item_labels = get_labels(qid for pid, qid in params)
 
-    # property_keys = item_entity['claims'].keys()
-    # property_labels = get_labels(property_keys, name=f'{flat}_property_labels')
-
     sparql_params = ''.join(
         f'?item wdt:{pid} wd:{qid} .\n' for pid, qid in params)
 
@@ -801,10 +802,11 @@ def browse_page():
                            prop_labels=find_more_props,
                            label=title,
                            pager=pager,
+                           item_map=item_map,
                            page=page,
                            labels=find_more_props,
                            bindings=bindings,
-                           total=len(bindings),
+                           total=len(item_map),
                            items=items)
 
 @app.route('/find_more.json')
diff --git a/depicts/wdqs.py b/depicts/wdqs.py
index ee4224f..cbacf79 100644
--- a/depicts/wdqs.py
+++ b/depicts/wdqs.py
@@ -4,6 +4,7 @@ import urllib.parse
 import os
 import dateutil.parser
 import hashlib
+from collections import defaultdict
 from . import utils
 
 query_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
@@ -65,32 +66,63 @@ def format_time(row_time, row_timeprecision):
     return row_time['value']
 
 def build_browse_item_map(bindings):
-    item_map = {}
+    row_map = defaultdict(list)
+
     for row in bindings:
         item_id = row_id(row)
-        row_qid = f'Q{item_id}'
         label = row['itemLabel']['value']
         image_filename = commons_uri_to_filename(row['image']['value'])
-        if item_id in item_map:
-            item = item_map[item_id]
-            item['image_filename'].append(image_filename)
-            continue
 
-        if label == row_qid:
-            label = get_row_value(row, 'title') or 'name missing'
-
-        artist_name = get_row_value(row, 'artistLabel') or '[artist unknown]'
+        artist_name = get_row_value(row, 'artistLabel')
 
         d = format_time(row['time'], row['timeprecision']) if 'time' in row else None
+        row_qid = f'Q{item_id}'
 
         item = {
-            'image_filename': [image_filename],
-            'item_id': item_id,
-            'qid': row_qid,
-            'label': label,
+            'image_filename': image_filename,
             'date': d,
             'artist_name': artist_name,
         }
+        if label != row_qid:
+            item['label'] = label
+
+        title = get_row_value(row, 'title')
+        if title:
+            lang = get_row_value(row, 'titleLang')
+            item['title'] = (lang, title)
+
+        row_map[item_id].append(item)
+
+    item_map = {}
+    for item_id, items in row_map.items():
+        titles = {}
+        filenames = set()
+        artist_names = []
+        labels = set()
+        for item in items:
+            if 'title' in item:
+                lang, title = item['title']
+                titles[lang] = title
+            filenames.add(item['image_filename'])
+            if item['artist_name'] not in artist_names:
+                artist_names.append(item['artist_name'])
+            if 'label' in item:
+                labels.add(item['label'])
+
+        item = {
+            'qid': f'Q{item_id}',
+            'item_id': item_id,
+            'image_filename': list(filenames),
+            'artist_name': ', '.join(artist_names),
+        }
+        if labels:
+            assert len(labels) == 1
+            item['label'] = list(labels)[0]
+        elif 'en' in titles:
+            item['label'] = titles['en']
+        else:
+            item['label'] = '[ label missing ]'
+
         item_map[item_id] = item
 
     return item_map