Switch to using database for browse pages.
This commit is contained in:
parent
7313df54f7
commit
b952213b23
210
app.py
210
app.py
|
@ -5,13 +5,17 @@ from depicts import (utils, wdqs, commons, mediawiki, artwork, database,
|
|||
wd_catalog, human, wikibase, wikidata_oauth, wikidata_edit)
|
||||
from depicts.pager import Pagination, init_pager
|
||||
from depicts.model import (DepictsItem, DepictsItemAltLabel, Edit, Item,
|
||||
Language, WikidataQuery)
|
||||
Language, WikidataQuery, Triple)
|
||||
from depicts.error_mail import setup_error_mail
|
||||
from requests_oauthlib import OAuth1Session
|
||||
from werkzeug.exceptions import InternalServerError
|
||||
from werkzeug.debug.tbtools import get_current_traceback
|
||||
from sqlalchemy import func, distinct
|
||||
from sqlalchemy.orm import aliased
|
||||
from sqlalchemy.sql.expression import desc
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
import itertools
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
|
@ -175,38 +179,28 @@ def property_query_page(property_id):
|
|||
sort = request.args.get('sort')
|
||||
sort_by_name = sort and sort.lower().strip() == 'name'
|
||||
|
||||
rows = wdqs.run_from_template_with_cache('query/property.sparql',
|
||||
cache_name=pid,
|
||||
pid=pid,
|
||||
isa_list=isa_list)
|
||||
q = (database.session.query(Triple.object_id,
|
||||
func.count(func.distinct(Triple.subject_id)).label('c'))
|
||||
.filter_by(predicate_id=property_id)
|
||||
.join(Item, Item.item_id == Triple.subject_id)
|
||||
.filter_by(is_artwork=True)
|
||||
.group_by(Triple.object_id)
|
||||
.order_by(desc('c')))
|
||||
|
||||
no_label_qid = [row['object']['value'].rpartition('/')[2]
|
||||
for row in rows
|
||||
if 'objectLabel' not in row and '/' in row['object']['value']]
|
||||
labels = get_labels_db({f'Q{object_id}' for object_id, c in q})
|
||||
|
||||
if no_label_qid:
|
||||
extra_label = get_labels(no_label_qid, name=f'{pid}_extra_labels')
|
||||
if extra_label:
|
||||
for row in rows:
|
||||
item = row['object']['value']
|
||||
if 'objectLabel' in row or '/' not in item:
|
||||
continue
|
||||
qid = item.rpartition('/')[2]
|
||||
if extra_label.get(qid):
|
||||
row['objectLabel'] = {'value': extra_label[qid]}
|
||||
|
||||
if sort_by_name:
|
||||
# put rows with no English label at the end
|
||||
no_label = [row for row in rows if 'objectLabel' not in row]
|
||||
has_label = sorted((row for row in rows if 'objectLabel' in row),
|
||||
key=lambda row: locale.strxfrm(row['objectLabel']['value']))
|
||||
rows = has_label + no_label
|
||||
hits = []
|
||||
for object_id, count in q:
|
||||
qid = f'Q{object_id}'
|
||||
hits.append({'qid': qid,
|
||||
'label': labels.get(qid) or '[item missing]',
|
||||
'count': count})
|
||||
|
||||
return render_template('property.html',
|
||||
label=g.title,
|
||||
order=('name' if sort_by_name else 'count'),
|
||||
pid=pid,
|
||||
rows=rows)
|
||||
hits=hits)
|
||||
|
||||
@app.route('/')
|
||||
def start():
|
||||
|
@ -446,7 +440,9 @@ def get_labels(keys, name=None):
|
|||
if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
|
||||
labels = from_cache['labels']
|
||||
if not labels:
|
||||
for cur in utils.chunk(keys, 50):
|
||||
print(len(keys))
|
||||
for num, cur in enumerate(utils.chunk(keys, 50)):
|
||||
print(f'{num * 50} / {len(keys)}')
|
||||
labels += mediawiki.get_entities(cur, props='labels')
|
||||
|
||||
json.dump({'keys': keys, 'labels': labels},
|
||||
|
@ -454,6 +450,40 @@ def get_labels(keys, name=None):
|
|||
|
||||
return {entity['id']: wikibase.get_entity_label(entity) for entity in labels}
|
||||
|
||||
def get_labels_db(keys):
|
||||
keys = set(keys)
|
||||
labels = {}
|
||||
missing = set()
|
||||
for qid in keys:
|
||||
item = Item.query.get(qid[1:])
|
||||
if item:
|
||||
labels[qid] = item.label
|
||||
else:
|
||||
missing.add(qid)
|
||||
|
||||
print(len(missing))
|
||||
page_size = 50
|
||||
for num, cur in enumerate(utils.chunk(missing, page_size)):
|
||||
print(f'{num * page_size} / {len(missing)}')
|
||||
for entity in mediawiki.get_entities(cur):
|
||||
if 'redirects' in entity:
|
||||
continue
|
||||
|
||||
qid = entity['id']
|
||||
|
||||
modified = datetime.strptime(entity['modified'], "%Y-%m-%dT%H:%M:%SZ")
|
||||
# FIXME: check if the item is an artwork and set is_artwork correctly
|
||||
item = Item(item_id=qid[1:],
|
||||
entity=entity,
|
||||
lastrevid=entity['lastrevid'],
|
||||
modified=modified,
|
||||
is_artwork=False)
|
||||
database.session.add(item)
|
||||
labels[qid] = item.label
|
||||
database.session.commit()
|
||||
|
||||
return labels
|
||||
|
||||
def build_other_set(entity):
|
||||
other_items = set()
|
||||
for key in find_more_props.keys():
|
||||
|
@ -667,7 +697,7 @@ def catalog_page():
|
|||
title=title)
|
||||
|
||||
def get_image_detail_with_cache(items, cache_name, thumbwidth=None, refresh=False):
|
||||
filenames = [cur['image_filename'] for cur in items]
|
||||
filenames = [cur.image_filename() for cur in items]
|
||||
|
||||
if thumbwidth is None:
|
||||
thumbwidth = app.config['THUMBWIDTH']
|
||||
|
@ -682,7 +712,17 @@ def get_image_detail_with_cache(items, cache_name, thumbwidth=None, refresh=Fals
|
|||
return detail
|
||||
|
||||
def browse_index():
|
||||
return render_template('browse_index.html', props=find_more_props)
|
||||
q = (database.session.query(Triple.predicate_id,
|
||||
func.count(func.distinct(Triple.object_id)))
|
||||
.join(Item, Triple.subject_id == Item.item_id)
|
||||
.filter_by(is_artwork=True)
|
||||
.group_by(Triple.predicate_id))
|
||||
|
||||
counts = {f'P{predicate_id}': count for predicate_id, count in q}
|
||||
|
||||
return render_template('browse_index.html',
|
||||
props=find_more_props,
|
||||
counts=counts)
|
||||
|
||||
@app.route('/debug/show_user')
|
||||
def debug_show_user():
|
||||
|
@ -705,41 +745,70 @@ def browse_facets():
|
|||
facets=facets,
|
||||
prop_labels=find_more_props)
|
||||
|
||||
def get_db_items(params):
|
||||
''' Get items for browse page based on criteria. '''
|
||||
q = Item.query
|
||||
for pid, qid in params:
|
||||
q = (q.join(Triple, Item.item_id == Triple.subject_id, aliased=True)
|
||||
.filter(Triple.predicate_id == pid[1:], Triple.object_id == qid[1:]))
|
||||
|
||||
return q
|
||||
|
||||
def get_db_facets(params):
|
||||
t = aliased(Triple)
|
||||
q = database.session.query(t.predicate_id, func.count().label('count'), t.object_id)
|
||||
facet_limit = 15
|
||||
|
||||
for pid, qid in params:
|
||||
q = (q.join(Triple, t.subject_id == Triple.subject_id, aliased=True)
|
||||
.filter(Triple.predicate_id == pid[1:],
|
||||
Triple.object_id == qid[1:],
|
||||
t.predicate_id != pid[1:],
|
||||
t.object_id != qid[1:]))
|
||||
|
||||
q = q.group_by(t.predicate_id, t.object_id)
|
||||
|
||||
results = sorted(tuple(row) for row in q.all())
|
||||
|
||||
facet_list = {}
|
||||
subject_qids = set()
|
||||
for predicate_id, x in itertools.groupby(results, lambda row: row[0]):
|
||||
hits = sorted(list(x), key=lambda row: row[1], reverse=True)
|
||||
values = [{'count': count, 'qid': f'Q{value}'}
|
||||
for _, count, value in hits[:facet_limit]]
|
||||
facet_list[f'P{predicate_id}'] = values
|
||||
subject_qids.update(i['qid'] for i in values)
|
||||
|
||||
print(len(subject_qids))
|
||||
labels = get_labels_db(subject_qids)
|
||||
|
||||
for values in facet_list.values():
|
||||
for v in values:
|
||||
v['label'] = labels[v['qid']]
|
||||
|
||||
return facet_list
|
||||
|
||||
@app.route('/browse')
|
||||
def browse_page():
|
||||
page_size = 45
|
||||
params = get_artwork_params()
|
||||
|
||||
if not params:
|
||||
return browse_index()
|
||||
|
||||
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
|
||||
|
||||
item_labels = get_labels(qid for pid, qid in params)
|
||||
|
||||
g.title = ' / '.join(find_more_props[pid] + ': ' + item_labels[qid]
|
||||
item_labels = get_labels_db(qid for pid, qid in params)
|
||||
g.title = ' / '.join(find_more_props[pid] + ': ' + (item_labels.get(qid) or qid)
|
||||
for pid, qid in params)
|
||||
|
||||
bindings = filter_artwork(params)
|
||||
q_items = get_db_items(params)
|
||||
facets = get_db_facets(params)
|
||||
|
||||
try:
|
||||
facets = get_facets(params)
|
||||
except wdqs.QueryError:
|
||||
facets = {}
|
||||
|
||||
|
||||
page_size = 45
|
||||
|
||||
item_map = wdqs.build_browse_item_map(bindings)
|
||||
|
||||
all_items = []
|
||||
for item in item_map.values():
|
||||
if len(item['image_filename']) != 1:
|
||||
continue
|
||||
item['image_filename'] = item['image_filename'][0]
|
||||
all_items.append(item)
|
||||
all_items = q_items.all()
|
||||
|
||||
page = utils.get_int_arg('page') or 1
|
||||
pager = Pagination(page, page_size, len(all_items))
|
||||
total = q_items.count()
|
||||
pager = Pagination(page, page_size, total)
|
||||
|
||||
items = pager.slice(all_items)
|
||||
|
||||
|
@ -747,29 +816,38 @@ def browse_page():
|
|||
detail = get_image_detail_with_cache(items, cache_name)
|
||||
cache_refreshed = False
|
||||
|
||||
linked_qids = {qid for pid, qid in params}
|
||||
for item in items:
|
||||
item['url'] = url_for('item_page', item_id=item['item_id'])
|
||||
image_filename = item['image_filename']
|
||||
artist_qid = item.artist
|
||||
if artist_qid:
|
||||
linked_qids.add(artist_qid)
|
||||
for prop in 'P31', 'P180':
|
||||
linked_qids.update(item.linked_qids(prop))
|
||||
|
||||
linked_labels = get_labels_db(linked_qids)
|
||||
|
||||
for item in items:
|
||||
image_filename = item.image_filename()
|
||||
if not cache_refreshed and image_filename not in detail:
|
||||
detail = get_image_detail_with_cache(items, cache_name, refresh=True)
|
||||
cache_refreshed = True
|
||||
item['image'] = detail[image_filename]
|
||||
item.image = detail[image_filename]
|
||||
|
||||
catalog_url = url_for('catalog_page', **dict(params))
|
||||
|
||||
return render_template('find_more.html',
|
||||
facets=facets,
|
||||
prop_labels=find_more_props,
|
||||
return render_template('new_find_more.html',
|
||||
page=page,
|
||||
label=g.title,
|
||||
pager=pager,
|
||||
params=params,
|
||||
item_map=item_map,
|
||||
catalog_url=catalog_url,
|
||||
page=page,
|
||||
prop_labels=find_more_props,
|
||||
labels=find_more_props,
|
||||
bindings=bindings,
|
||||
total=len(item_map),
|
||||
items=items)
|
||||
linked_labels=linked_labels,
|
||||
items=items,
|
||||
total=total,
|
||||
params=params,
|
||||
facets=facets)
|
||||
|
||||
return jsonify(params=params,
|
||||
items=items.count(),
|
||||
facets=facets)
|
||||
|
||||
@app.route('/find_more.json')
|
||||
def find_more_json():
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from .database import session, now_utc
|
||||
from . import wikibase, utils
|
||||
from sqlalchemy.schema import Column, ForeignKey
|
||||
from sqlalchemy.types import Integer, String, DateTime, Boolean
|
||||
from sqlalchemy.orm import column_property, relationship, synonym
|
||||
|
@ -49,13 +50,59 @@ class DepictsItemAltLabel(Base):
|
|||
class Item(Base):
|
||||
__tablename__ = 'item'
|
||||
item_id = Column(Integer, primary_key=True, autoincrement=False)
|
||||
label = Column(String)
|
||||
# label = Column(String) # column removed 2019-12-18
|
||||
entity = Column(postgresql.JSON)
|
||||
lastrevid = Column(Integer, nullable=True, unique=True)
|
||||
modified = Column(DateTime, nullable=True)
|
||||
is_artwork = Column(Boolean, nullable=False, default=False)
|
||||
qid = column_property('Q' + cast(item_id, String))
|
||||
|
||||
def image_count(self):
|
||||
p18 = self.entity['claims'].get('P18')
|
||||
return len(p18) if p18 else 0
|
||||
|
||||
def image_filename(self):
|
||||
p18 = self.entity['claims'].get('P18')
|
||||
if not p18:
|
||||
return
|
||||
|
||||
try:
|
||||
return p18[0]['mainsnak']['datavalue']['value']
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
return wikibase.get_entity_label(self.entity)
|
||||
|
||||
@property
|
||||
def artist(self):
|
||||
v = wikibase.first_datavalue(self.entity, 'P170')
|
||||
if not v:
|
||||
return
|
||||
return v['id']
|
||||
|
||||
@property
|
||||
def depicts(self):
|
||||
return self.linked_qids('P180')
|
||||
|
||||
@property
|
||||
def instance_of(self):
|
||||
return self.linked_qids('P31')
|
||||
|
||||
def linked_qids(self, prop):
|
||||
values = self.entity['claims'].get(prop) or []
|
||||
return [v['mainsnak']['datavalue']['value']['id']
|
||||
for v in values
|
||||
if 'datavalue' in v['mainsnak']]
|
||||
|
||||
|
||||
@property
|
||||
def date(self):
|
||||
v = wikibase.first_datavalue(self.entity, 'P571')
|
||||
if v:
|
||||
return utils.format_time(v['time'], v['precision'])
|
||||
|
||||
class Triple(Base):
|
||||
__tablename__ = 'triple'
|
||||
subject_id = Column(Integer, primary_key=True)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from flask import request
|
||||
from itertools import islice
|
||||
from datetime import datetime
|
||||
import urllib.parse
|
||||
import inflect
|
||||
|
||||
|
@ -74,3 +75,25 @@ def wiki_url(title, site, ns=None):
|
|||
def get_int_arg(name):
|
||||
if name in request.args and request.args[name].isdigit():
|
||||
return int(request.args[name])
|
||||
|
||||
def format_time(time_value, precision):
|
||||
# FIXME handle dates like '1965-04-00T00:00:00Z'
|
||||
# FIXME handle BC dates properly, "120 B.C." instead of "-120"
|
||||
year = None
|
||||
if '-00' in time_value:
|
||||
# can't be represented as python datetime
|
||||
year = int(time_value[:time_value.find('-', 1)])
|
||||
else:
|
||||
t = datetime.strptime(time_value[1:], "%Y-%m-%dT%H:%M:%SZ")
|
||||
year = t.year
|
||||
|
||||
if precision == 9:
|
||||
return str(year)
|
||||
if precision == 8:
|
||||
return f'{year}s'
|
||||
if precision == 7:
|
||||
return f'{ordinal((year // 100) + 1)} century'
|
||||
if precision == 6:
|
||||
return f'{ordinal((year // 1000) + 1)} millennium'
|
||||
|
||||
return time_value
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
<ul>
|
||||
{% for pid, label in props.items() %}
|
||||
<li><a href="{{ url_for('property_query_page', property_id=pid[1:]) }}">{{ label }}</a>
|
||||
({{ pid }})
|
||||
({{ pid }}) – {{ '{:,d}'.format(counts[pid]) }}
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
|
|
|
@ -20,6 +20,14 @@
|
|||
{% endfor %}
|
||||
#}
|
||||
|
||||
<p>
|
||||
Current filter –
|
||||
{% for pid, qid in params %}
|
||||
<span>{{ prop_labels[pid] }}: {{ linked_labels[qid] }}
|
||||
<a href="{{ set_url_args(page=None, **{pid: None}) }}">[remove filter]</a></span>
|
||||
{% endfor %}
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<a href="{{ url_for('browse_page') }}">browse index</a>
|
||||
{% for pid, qid in params %}
|
||||
|
@ -31,34 +39,48 @@
|
|||
|
||||
<p class="mb-3">
|
||||
<a href="#" id="toggle-filters" class="btn btn-primary">toggle filters</a>
|
||||
{#
|
||||
<a href="{{ catalog_url }}" class="btn btn-primary">catalog artwork</a>
|
||||
#}
|
||||
</p>
|
||||
|
||||
<div id="filters">
|
||||
{% for key, values in facets.items() %}
|
||||
<p>{{ prop_labels[key] }}:
|
||||
{% for v in values %}
|
||||
<a href="{{ set_url_args(**{key: v.qid}) }}">{{ v.label }}</a> ({{ v.count }})
|
||||
<a href="{{ set_url_args(page=None, **{key: v.qid}) }}">{{ v.label }}</a>
|
||||
({{ '{:,d}'.format(v.count) }})
|
||||
{% if not loop.last %}|{% endif %}
|
||||
{% endfor %}
|
||||
</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{{ render_pagination(pager) }}
|
||||
|
||||
<div class="card-columns">
|
||||
{% for item in items %}
|
||||
{% set image = item.image %}
|
||||
<div class="card">
|
||||
<a href="{{ item.url }}">
|
||||
<a href="{{ url_for('item_page', item_id=item.item_id) }}">
|
||||
{# <img src="{{ image.thumburl }}" height="{{ image.thumbheight }}" width="{{ image.thumbwidth }}" class="card-img-top"></a> #}
|
||||
<img src="{{ image.thumburl }}" class="card-img-top"></a>
|
||||
<div class="card-body">
|
||||
<h5 class="card-title">{{ item.label }}</h5>
|
||||
<p class="card-text">by {{ item.artist_name }}
|
||||
<p class="card-text">
|
||||
<div>
|
||||
{% for qid in item.instance_of %}
|
||||
{% if not loop.first %} / {% endif %}
|
||||
<span>{{ linked_labels[qid] }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if item.artist %}
|
||||
by {{ linked_labels[item.artist] }}
|
||||
{% endif %}
|
||||
{% if item.date %}({{ item.date }}){% endif %}
|
||||
<div>
|
||||
{% for depicts_label in item.depicts %}
|
||||
<span class="badge badge-primary">{{ depicts_label }}</span>
|
||||
{% for depicts_qid in item.depicts %}
|
||||
<span class="badge badge-primary">{{ linked_labels[depicts_qid] }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</p>
|
||||
|
|
|
@ -16,19 +16,17 @@
|
|||
{% endif %}
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
{% for row in rows if '/' in row.object.value %}
|
||||
{% set qid = row.object.value.rpartition('/')[2] %}
|
||||
{% set row_label = row.objectLabel.value if 'objectLabel' in row else '[ label missing ]' %}
|
||||
<li>
|
||||
<a href="{{ url_for('browse_page', **{pid: qid}) }}">{{ row_label }}</a>
|
||||
{% if 'objectDescription' in row %}
|
||||
— {{ row.objectDescription.value }}
|
||||
{% endif %}
|
||||
<p>Total: {{ hits | length }}</p>
|
||||
|
||||
({{ '{:,d}'.format(row.count.value | int) }} artworks)
|
||||
{% if 'objectLabel' not in row %}
|
||||
<a href="https://wikidata.org/wiki/{{ qid }}">view in Wikidata</a>
|
||||
<ul>
|
||||
{% for hit in hits %}
|
||||
<li>
|
||||
<a href="{{ url_for('browse_page', **{pid: hit.qid}) }}">{{ hit.label }}</a>
|
||||
|
||||
({{ '{:,d}'.format(hit.count) }} artworks)
|
||||
|
||||
{% if not hit.label %}
|
||||
<a href="https://wikidata.org/wiki/{{ hit.qid }}">view in Wikidata</a>
|
||||
{% endif %}
|
||||
</li>
|
||||
{% endfor %}
|
||||
|
|
Loading…
Reference in a new issue