more detail from catalogs

This commit is contained in:
Edward Betts 2019-09-27 11:02:24 +01:00
parent 921824833f
commit 14b93fb7de
3 changed files with 204 additions and 17 deletions

170
app.py
View file

@ -1,17 +1,26 @@
#!/usr/bin/python3 #!/usr/bin/python3
from flask import Flask, render_template, url_for, redirect, request, g, jsonify from flask import Flask, render_template, url_for, redirect, request, g, jsonify, session
from depicts import utils, wdqs, commons, mediawiki, painting, saam, database from depicts import (utils, wdqs, commons, mediawiki, painting, saam, database,
dia, rijksmuseum, npg, museodelprado, barnesfoundation,
wd_catalog)
from depicts.model import DepictsItem, DepictsItemAltLabel from depicts.model import DepictsItem, DepictsItemAltLabel
from requests_oauthlib import OAuth1Session
from urllib.parse import urlencode
import requests.exceptions
import requests
import lxml.html
import json import json
import os import os
import locale import locale
import random import random
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')
user_agent = 'Mozilla/5.0 (X11; Linux i586; rv:32.0) Gecko/20160101 Firefox/32.0'
app = Flask(__name__) app = Flask(__name__)
app.config.from_object('config.default') app.config.from_object('config.default')
app.config['SECRET_KEY'] = '4e65d7cf665eb275b91b7b9a5d5dd3a9894a33dbd2ff4472'
database.init_db(app.config['DB_URL']) database.init_db(app.config['DB_URL'])
find_more_props = { find_more_props = {
@ -113,7 +122,9 @@ def init_profile():
@app.route("/") @app.route("/")
def index(): def index():
return render_template('index.html', props=find_more_props) return render_template('index.html',
props=find_more_props,
username=get_username())
@app.route("/property/P<int:property_id>") @app.route("/property/P<int:property_id>")
def property_query_page(property_id): def property_query_page(property_id):
@ -160,6 +171,86 @@ def random_painting():
item_id = wdqs.row_id(row) item_id = wdqs.row_id(row)
return redirect(url_for('item_page', item_id=item_id)) return redirect(url_for('item_page', item_id=item_id))
@app.route('/oauth/start')
def start_oauth():
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
base_url = 'https://www.wikidata.org/w/index.php'
request_token_url = base_url + '?title=Special%3aOAuth%2finitiate'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
callback_uri='oob')
fetch_response = oauth.fetch_request_token(request_token_url)
session['owner_key'] = fetch_response.get('oauth_token')
session['owner_secret'] = fetch_response.get('oauth_token_secret')
base_authorization_url = 'https://www.wikidata.org/wiki/Special:OAuth/authorize'
authorization_url = oauth.authorization_url(base_authorization_url,
oauth_consumer_key=client_key)
return redirect(authorization_url)
@app.route("/oauth/callback", methods=["GET"])
def oauth_callback():
base_url = 'https://www.wikidata.org/w/index.php'
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'])
oauth_response = oauth.parse_authorization_response(request.url)
verifier = oauth_response.get('oauth_verifier')
access_token_url = base_url + '?title=Special%3aOAuth%2ftoken'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'],
verifier=verifier)
oauth_tokens = oauth.fetch_access_token(access_token_url)
session['owner_key'] = oauth_tokens.get('oauth_token')
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
return redirect(url_for('show_user'))
def get_username():
if 'owner_key' not in session:
return # not authorized
if 'username' in session:
return session['username']
params = {'action': 'query', 'meta': 'userinfo', 'format': 'json'}
session['username'] = oauth_api_request(params)['query']['userinfo']['name']
return session['username']
@app.route("/show_user")
def show_user():
# Make authenticated calls to the API
params = {'action': 'query', 'meta': 'userinfo', 'format': 'json'}
reply = oauth_api_request(params)['query']
return repr(reply)
def oauth_api_request(params):
url = 'https://www.wikidata.org/w/api.php?' + urlencode(params)
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'])
r = oauth.get(url)
reply = r.json()
return reply
def image_with_cache(qid, image_filename, width): def image_with_cache(qid, image_filename, width):
filename = f'cache/{qid}_{width}_image.json' filename = f'cache/{qid}_{width}_image.json'
if os.path.exists(filename): if os.path.exists(filename):
@ -171,8 +262,30 @@ def image_with_cache(qid, image_filename, width):
return detail[image_filename] return detail[image_filename]
def first_datavalue(entity, pid): def first_datavalue(entity, pid):
return entity['claims'][pid][0]['mainsnak']['datavalue']['value'] if pid in entity['claims']:
return entity['claims'][pid][0]['mainsnak']['datavalue']['value']
def get_catalog_page(property_id, value):
detail = wd_catalog.lookup(property_id, value)
url = detail['url']
catalog_id = value.replace('/', '_')
filename = f'cache/{property_id}_{catalog_id}.html'
if os.path.exists(filename):
html = open(filename).read()
else:
r = requests.get(url, headers={'User-Agent': user_agent})
html = r.text
open(filename, 'w').write(html)
return html
def get_description_from_page(html):
root = lxml.html.fromstring(html)
div = root.find('.//div[@itemprop="description"]')
if div is not None:
return div.text
@app.route("/item/Q<int:item_id>") @app.route("/item/Q<int:item_id>")
def item_page(item_id): def item_page(item_id):
@ -188,20 +301,55 @@ def item_page(item_id):
label = get_entity_label(entity) label = get_entity_label(entity)
other = get_other(item.entity) other = get_other(item.entity)
catalog_ids = wd_catalog.find_catalog_id(entity)
catalog_detail = []
for property_id in sorted(catalog_ids):
value = first_datavalue(entity, property_id)
detail = wd_catalog.lookup(property_id, value)
catalog_detail.append(detail)
catalog_url = first_datavalue(entity, 'P973')
catalog = None
if 'P4704' in entity['claims']: if 'P4704' in entity['claims']:
saam_id = first_datavalue(entity, 'P4704') saam_id = first_datavalue(entity, 'P4704')
catalog = saam.get_catalog(saam_id) catalog = saam.get_catalog(saam_id)
saam_data = { elif 'P4709' in entity['claims']:
'keywords': catalog['keywords'], catalog_id = first_datavalue(entity, 'P4709')
'description': catalog['ld']['description'] catalog = barnesfoundation.get_catalog(catalog_id)
} elif catalog_url and 'www.dia.org' in catalog_url:
else: catalog = dia.get_catalog(catalog_url)
saam_data = None elif catalog_url and 'www.rijksmuseum.nl' in catalog_url:
catalog = rijksmuseum.get_catalog(catalog_url)
elif catalog_url and 'www.npg.org.uk' in catalog_url:
catalog = npg.get_catalog(catalog_url)
elif catalog_url and 'www.museodelprado.es' in catalog_url:
catalog = museodelprado.get_catalog(catalog_url)
if not catalog and catalog_ids:
for property_id in sorted(catalog_ids):
if property_id == 'P350':
continue # RKDimages ID
value = first_datavalue(entity, property_id)
detail = wd_catalog.lookup(property_id, value)
try:
html = get_catalog_page(property_id, value)
except requests.exceptions.SSLError:
continue # ignore this error
description = get_description_from_page(html)
if not description:
continue
catalog = {
'institution': detail['label'],
'description': description,
}
return render_template('item.html', return render_template('item.html',
qid=qid, qid=qid,
item=item, item=item,
saam_data=saam_data, catalog=catalog,
catalog_url=catalog_url,
catalog_detail=catalog_detail,
labels=find_more_props, labels=find_more_props,
entity=item.entity, entity=item.entity,
label=label, label=label,

View file

@ -4,7 +4,20 @@
{% block content %} {% block content %}
<div class="m-3"> <div class="m-3">
<p><a href="{{ url_for('random_painting') }}">random painting</a></p> <p><a href="{{ url_for('random_painting') }}">random painting</a>
{% if not username %}
| <a href="{{ url_for('start_oauth') }}">connect to Wikidata account</a>
{% endif %}
</p>
<p>username: {{ username }}</p>
{% if not username %}
<p>This tool must be connected with Wikidata to work.</p>
<p><a href="{{ url_for('start_oauth') }}" class="btn btn-primary btn-lg">Connect with Wikidata</a>
{% endif %}
<ul> <ul>
{% for pid, label in props.items() %} {% for pid, label in props.items() %}
<li><a href="{{ url_for('property_query_page', property_id=pid[1:]) }}">{{ label }}</a> <li><a href="{{ url_for('property_query_page', property_id=pid[1:]) }}">{{ label }}</a>

View file

@ -47,12 +47,38 @@
</div> </div>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if catalog_detail %}
{% for detail in catalog_detail %}
<div>
<strong>{{ detail.label }}</strong>:
<a href="{{ detail.url }}">{{ detail.value }}</a>
</div>
{% endfor %}
{% endif %}
{% if saam_data %}
{% if catalog_url %}
<p>
<strong>catalog URL</strong>:
<a href="{{ catalog_url }}">{{ catalog_url }}</a>
</p>
{% endif %}
{% if catalog %}
<div class="mt-2"> <div class="mt-2">
<h4>information from the Smithsonian American Art Museum</h4> <h4>information from the {{ catalog.institution }} catalog</h4>
<div><strong>description</strong>: {{ saam_data.description }}</div> {% if catalog.description %}
<div><strong>keywords</strong>: {{ '; '.join(saam_data.keywords) }}</div> <div><strong>description</strong>: {{ catalog.description }}</div>
{% endif %}
{% if catalog.keywords %}
<div><strong>keywords</strong>
<ul>
{% for keyword in catalog.keywords %}
<li>{{ keyword }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
</div> </div>
{% endif %} {% endif %}
</div> </div>
@ -95,7 +121,7 @@
{% endraw %} {% endraw %}
</div> </div>
<div class="flex-shrink-1 vh-100"> <div class="flex-shrink-1 vh-100">
<img src="{{ image.thumburl }}" class="h-100" /> <img src="{{ image.thumburl }}" />
</div> </div>
</div> </div>