From d94d1bb1700b05b41c47df38428833af68fe8e35 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Tue, 26 Nov 2019 17:21:58 +0000 Subject: [PATCH] Save query template name --- app.py | 50 ++++++++++++++++----------------------- depicts/model.py | 14 +++++++++++ depicts/wdqs.py | 28 +++++++++++++++------- templates/query_list.html | 17 ++++++++----- 4 files changed, 65 insertions(+), 44 deletions(-) diff --git a/app.py b/app.py index c813111..57d56e0 100755 --- a/app.py +++ b/app.py @@ -163,11 +163,10 @@ def property_query_page(property_id): sort = request.args.get('sort') sort_by_name = sort and sort.lower().strip() == 'name' - q = render_template('query/property.sparql', - pid=pid, - isa_list=isa_list) - - rows = wdqs.run_query_with_cache(q, name=pid) + rows = wdqs.run_from_template_with_cache('query/property.sparql', + cache_name=pid, + pid=pid, + isa_list=isa_list) no_label_qid = [row['object']['value'].rpartition('/')[2] for row in rows @@ -207,8 +206,7 @@ def start(): @app.route('/next') def random_artwork(): - q = render_template('query/artwork_no_depicts.sparql') - rows = wdqs.run_query_with_cache(q) + rows = wdqs.run_from_template_with_cache('query/artwork_no_depicts.sparql') has_depicts = True while has_depicts: item_id = wdqs.row_id(random.choice(rows)) @@ -559,17 +557,13 @@ def find_more_page(property_id, item_id): return redirect(url_for('browse_page', **{pid: qid})) def get_facets(params): - flat = '_'.join(f'{pid}={qid}' for pid, qid in params) - properties = [pid for pid in find_more_props.keys() if pid not in request.args] - q = render_template('query/facet.sparql', - params=params, - isa_list=isa_list, - properties=properties) - - bindings = wdqs.run_query_with_cache(q, flat + '_facets') + bindings = wdqs.run_from_template_with_cache('query/facet.sparql', + params=params, + isa_list=isa_list, + properties=properties) facets = {key: [] for key in find_more_props.keys()} for row in bindings: @@ -591,13 +585,9 @@ def get_artwork_params(): if pid.startswith('P') and qid.startswith('Q')] def filter_artwork(params): - flat = '_'.join(f'{pid}={qid}' for pid, qid in params) - q = render_template('query/find_more.sparql', - params=params, - isa_list=isa_list) - bindings = wdqs.run_query_with_cache(q, flat) - - return bindings + return wdqs.run_from_template_with_cache('query/find_more.sparql', + params=params, + isa_list=isa_list) @app.route('/catalog') def catalog_page(): @@ -632,6 +622,7 @@ def catalog_page(): flat = '_'.join(f'{pid}={qid}' for pid, qid in params) thumbwidth = 400 + # FIXME cache_name can be too long for filesystem cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}' detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth) @@ -740,13 +731,14 @@ def find_more_json(): qid_list = request.args.getlist('qid') limit = 6 - q = render_template('query/find_more_basic.sparql', - qid_list=qid_list, - pid=pid, - limit=limit) - filenames = [] - bindings = wdqs.run_query_with_cache(q, f'{pid}={",".join(qid_list)}_{limit}') + cache_name = f'{pid}={",".join(qid_list)}_{limit}' + bindings = wdqs.run_from_template_with_cache('query/find_more_basic.sparql', + cache_name=cache_name, + qid_list=qid_list, + pid=pid, + limit=limit) + items = [] for row in bindings: item_id = wdqs.row_id(row) @@ -764,7 +756,7 @@ def find_more_json(): for item in items: item['image'] = detail[item['filename']] - return jsonify(items=items, q=q) + return jsonify(items=items) def wikibase_search(terms): hits = [] diff --git a/depicts/model.py b/depicts/model.py index a075335..3e18169 100644 --- a/depicts/model.py +++ b/depicts/model.py @@ -109,6 +109,7 @@ class WikidataQuery(Base): path = Column(String) status_code = Column(Integer) error_text = Column(String) + query_template = Column(String) @hybrid_property def duration(self): @@ -117,3 +118,16 @@ class WikidataQuery(Base): @property def display_seconds(self): return f'{self.duration.total_seconds():.1f}' + + @property + def template(self): + if not self.query_template: + return + + t = self.query_template + if t.startswith('query/'): + t = t[6:] + if t.endswith('.sparql'): + t = t[:-7] + + return t diff --git a/depicts/wdqs.py b/depicts/wdqs.py index febdfd6..f5d5167 100644 --- a/depicts/wdqs.py +++ b/depicts/wdqs.py @@ -4,7 +4,7 @@ import urllib.parse import os import dateutil.parser import hashlib -from flask import request +from flask import request, render_template from collections import defaultdict from datetime import datetime from .model import WikidataQuery @@ -27,24 +27,34 @@ def get_row_text(row, field): def commons_uri_to_filename(uri): return urllib.parse.unquote(utils.drop_start(uri, commons_start)) -def run_query(query): +def run_from_template(template_name, **context): + query = render_template(template_name, **context) + return run_query(query, query_template=template_name) + +def run_from_template_with_cache(template_name, cache_name=None, **context): + query = render_template(template_name, **context) + return run_query_with_cache(query, name=cache_name, query_template=template_name) + +def run_query(query, query_template=None): params = {'query': query, 'format': 'json'} start = datetime.utcnow() - r = requests.post(query_url, data=params, stream=True) - end = datetime.utcnow() db_query = WikidataQuery( start_time=start, - end_time=end, sparql_query=query, path=request.full_path.rstrip('?'), - status_code=r.status_code) + query_template=query_template) + database.session.add(db_query) + database.session.commit() + + r = requests.post(query_url, data=params, stream=True) + db_query.end_time = datetime.utcnow() + db_query.status_code = r.status_code if r.status_code != 200: print(r.text) db_query.error_text = r.text - database.session.add(db_query) database.session.commit() assert r.status_code == 200 @@ -54,7 +64,7 @@ def md5_query(query): ''' generate the md5 hexdigest of a SPARQL query ''' return hashlib.md5(query.encode('utf-8')).hexdigest() -def run_query_with_cache(q, name=None): +def run_query_with_cache(q, name=None, query_template=None): if name is None: name = md5_query(q) filename = f'cache/{name}.json' @@ -63,7 +73,7 @@ def run_query_with_cache(q, name=None): if isinstance(from_cache, dict) and from_cache.get('query') == q: return from_cache['bindings'] - r = run_query(q) + r = run_query(q, query_template=query_template) bindings = r.json()['results']['bindings'] json.dump({'query': q, 'bindings': bindings}, open(filename, 'w'), indent=2) diff --git a/templates/query_list.html b/templates/query_list.html index 6d7f96a..ad173d9 100644 --- a/templates/query_list.html +++ b/templates/query_list.html @@ -16,8 +16,9 @@
when
-
page
-
query time
+
page
+
time
+
template
options
{% for obj in q %} @@ -26,15 +27,19 @@
{{ obj.start_time.strftime('%Y %b %d %H:%M') }}
-
{{ obj.path }}
+
{{ obj.path }}
+ +
+ {{ obj.display_seconds }} secs +
- {{ obj.display_seconds }} seconds + {{ obj.template or '' }}
- [show/hide query] - [Wikidata Query Service] + show/hide | + run