Save query template name

This commit is contained in:
Edward Betts 2019-11-26 17:21:58 +00:00
parent 69b4faad25
commit d94d1bb170
4 changed files with 65 additions and 44 deletions

50
app.py
View file

@ -163,11 +163,10 @@ def property_query_page(property_id):
sort = request.args.get('sort') sort = request.args.get('sort')
sort_by_name = sort and sort.lower().strip() == 'name' sort_by_name = sort and sort.lower().strip() == 'name'
q = render_template('query/property.sparql', rows = wdqs.run_from_template_with_cache('query/property.sparql',
pid=pid, cache_name=pid,
isa_list=isa_list) pid=pid,
isa_list=isa_list)
rows = wdqs.run_query_with_cache(q, name=pid)
no_label_qid = [row['object']['value'].rpartition('/')[2] no_label_qid = [row['object']['value'].rpartition('/')[2]
for row in rows for row in rows
@ -207,8 +206,7 @@ def start():
@app.route('/next') @app.route('/next')
def random_artwork(): def random_artwork():
q = render_template('query/artwork_no_depicts.sparql') rows = wdqs.run_from_template_with_cache('query/artwork_no_depicts.sparql')
rows = wdqs.run_query_with_cache(q)
has_depicts = True has_depicts = True
while has_depicts: while has_depicts:
item_id = wdqs.row_id(random.choice(rows)) item_id = wdqs.row_id(random.choice(rows))
@ -559,17 +557,13 @@ def find_more_page(property_id, item_id):
return redirect(url_for('browse_page', **{pid: qid})) return redirect(url_for('browse_page', **{pid: qid}))
def get_facets(params): def get_facets(params):
flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
properties = [pid for pid in find_more_props.keys() properties = [pid for pid in find_more_props.keys()
if pid not in request.args] if pid not in request.args]
q = render_template('query/facet.sparql', bindings = wdqs.run_from_template_with_cache('query/facet.sparql',
params=params, params=params,
isa_list=isa_list, isa_list=isa_list,
properties=properties) properties=properties)
bindings = wdqs.run_query_with_cache(q, flat + '_facets')
facets = {key: [] for key in find_more_props.keys()} facets = {key: [] for key in find_more_props.keys()}
for row in bindings: for row in bindings:
@ -591,13 +585,9 @@ def get_artwork_params():
if pid.startswith('P') and qid.startswith('Q')] if pid.startswith('P') and qid.startswith('Q')]
def filter_artwork(params): def filter_artwork(params):
flat = '_'.join(f'{pid}={qid}' for pid, qid in params) return wdqs.run_from_template_with_cache('query/find_more.sparql',
q = render_template('query/find_more.sparql', params=params,
params=params, isa_list=isa_list)
isa_list=isa_list)
bindings = wdqs.run_query_with_cache(q, flat)
return bindings
@app.route('/catalog') @app.route('/catalog')
def catalog_page(): def catalog_page():
@ -632,6 +622,7 @@ def catalog_page():
flat = '_'.join(f'{pid}={qid}' for pid, qid in params) flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
thumbwidth = 400 thumbwidth = 400
# FIXME cache_name can be too long for filesystem
cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}' cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}'
detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth) detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth)
@ -740,13 +731,14 @@ def find_more_json():
qid_list = request.args.getlist('qid') qid_list = request.args.getlist('qid')
limit = 6 limit = 6
q = render_template('query/find_more_basic.sparql',
qid_list=qid_list,
pid=pid,
limit=limit)
filenames = [] filenames = []
bindings = wdqs.run_query_with_cache(q, f'{pid}={",".join(qid_list)}_{limit}') cache_name = f'{pid}={",".join(qid_list)}_{limit}'
bindings = wdqs.run_from_template_with_cache('query/find_more_basic.sparql',
cache_name=cache_name,
qid_list=qid_list,
pid=pid,
limit=limit)
items = [] items = []
for row in bindings: for row in bindings:
item_id = wdqs.row_id(row) item_id = wdqs.row_id(row)
@ -764,7 +756,7 @@ def find_more_json():
for item in items: for item in items:
item['image'] = detail[item['filename']] item['image'] = detail[item['filename']]
return jsonify(items=items, q=q) return jsonify(items=items)
def wikibase_search(terms): def wikibase_search(terms):
hits = [] hits = []

View file

@ -109,6 +109,7 @@ class WikidataQuery(Base):
path = Column(String) path = Column(String)
status_code = Column(Integer) status_code = Column(Integer)
error_text = Column(String) error_text = Column(String)
query_template = Column(String)
@hybrid_property @hybrid_property
def duration(self): def duration(self):
@ -117,3 +118,16 @@ class WikidataQuery(Base):
@property @property
def display_seconds(self): def display_seconds(self):
return f'{self.duration.total_seconds():.1f}' return f'{self.duration.total_seconds():.1f}'
@property
def template(self):
if not self.query_template:
return
t = self.query_template
if t.startswith('query/'):
t = t[6:]
if t.endswith('.sparql'):
t = t[:-7]
return t

View file

@ -4,7 +4,7 @@ import urllib.parse
import os import os
import dateutil.parser import dateutil.parser
import hashlib import hashlib
from flask import request from flask import request, render_template
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from .model import WikidataQuery from .model import WikidataQuery
@ -27,24 +27,34 @@ def get_row_text(row, field):
def commons_uri_to_filename(uri): def commons_uri_to_filename(uri):
return urllib.parse.unquote(utils.drop_start(uri, commons_start)) return urllib.parse.unquote(utils.drop_start(uri, commons_start))
def run_query(query): def run_from_template(template_name, **context):
query = render_template(template_name, **context)
return run_query(query, query_template=template_name)
def run_from_template_with_cache(template_name, cache_name=None, **context):
query = render_template(template_name, **context)
return run_query_with_cache(query, name=cache_name, query_template=template_name)
def run_query(query, query_template=None):
params = {'query': query, 'format': 'json'} params = {'query': query, 'format': 'json'}
start = datetime.utcnow() start = datetime.utcnow()
r = requests.post(query_url, data=params, stream=True)
end = datetime.utcnow()
db_query = WikidataQuery( db_query = WikidataQuery(
start_time=start, start_time=start,
end_time=end,
sparql_query=query, sparql_query=query,
path=request.full_path.rstrip('?'), path=request.full_path.rstrip('?'),
status_code=r.status_code) query_template=query_template)
database.session.add(db_query)
database.session.commit()
r = requests.post(query_url, data=params, stream=True)
db_query.end_time = datetime.utcnow()
db_query.status_code = r.status_code
if r.status_code != 200: if r.status_code != 200:
print(r.text) print(r.text)
db_query.error_text = r.text db_query.error_text = r.text
database.session.add(db_query)
database.session.commit() database.session.commit()
assert r.status_code == 200 assert r.status_code == 200
@ -54,7 +64,7 @@ def md5_query(query):
''' generate the md5 hexdigest of a SPARQL query ''' ''' generate the md5 hexdigest of a SPARQL query '''
return hashlib.md5(query.encode('utf-8')).hexdigest() return hashlib.md5(query.encode('utf-8')).hexdigest()
def run_query_with_cache(q, name=None): def run_query_with_cache(q, name=None, query_template=None):
if name is None: if name is None:
name = md5_query(q) name = md5_query(q)
filename = f'cache/{name}.json' filename = f'cache/{name}.json'
@ -63,7 +73,7 @@ def run_query_with_cache(q, name=None):
if isinstance(from_cache, dict) and from_cache.get('query') == q: if isinstance(from_cache, dict) and from_cache.get('query') == q:
return from_cache['bindings'] return from_cache['bindings']
r = run_query(q) r = run_query(q, query_template=query_template)
bindings = r.json()['results']['bindings'] bindings = r.json()['results']['bindings']
json.dump({'query': q, 'bindings': bindings}, json.dump({'query': q, 'bindings': bindings},
open(filename, 'w'), indent=2) open(filename, 'w'), indent=2)

View file

@ -16,8 +16,9 @@
<div class="row"> <div class="row">
<div class="col-2">when</div> <div class="col-2">when</div>
<div class="col-3">page</div> <div class="col-4">page</div>
<div class="col-2">query time</div> <div class="col-1">time</div>
<div class="col-2">template</div>
<div class="col">options</div> <div class="col">options</div>
</div> </div>
{% for obj in q %} {% for obj in q %}
@ -26,15 +27,19 @@
<div class="col-2{{ class }}"> <div class="col-2{{ class }}">
{{ obj.start_time.strftime('%Y %b %d %H:%M') }} {{ obj.start_time.strftime('%Y %b %d %H:%M') }}
</div> </div>
<div class="col-3"><a href="{{ obj.path }}">{{ obj.path }}</a></div> <div class="col-4"><a href="{{ obj.path }}">{{ obj.path }}</a></div>
<div class="col-1{{ class }}">
{{ obj.display_seconds }}&nbsp;secs
</div>
<div class="col-2{{ class }}"> <div class="col-2{{ class }}">
{{ obj.display_seconds }}&nbsp;seconds {{ obj.template or '' }}
</div> </div>
<div class="col"> <div class="col">
[<a href="#" class="toggle" data-id="{{ obj.id }}">show/hide query</a>] <a href="#" class="toggle" data-id="{{ obj.id }}">show/hide</a> |
[<a href="https://query.wikidata.org/#{{ obj.sparql_query | urlencode }}">Wikidata Query Service</a>] <a href="https://query.wikidata.org/#{{ obj.sparql_query | urlencode }}">run</a>
</div> </div>
</div> </div>