Bug fixes
This commit is contained in:
parent
c3094355d7
commit
0df666742e
321
lookup.py
321
lookup.py
|
@ -1,11 +1,16 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
from flask import Flask, render_template, request, jsonify
|
from flask import Flask, render_template, request, jsonify, redirect, url_for
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import random
|
import random
|
||||||
|
import simplejson
|
||||||
|
import psycopg2
|
||||||
|
from geopy.distance import distance
|
||||||
|
|
||||||
|
# select gid, code, name from scotland where st_contains(geom, ST_Transform(ST_SetSRID(ST_MakePoint(-4.177, 55.7644), 4326), 27700));
|
||||||
|
|
||||||
commons_cat_start = 'https://commons.wikimedia.org/wiki/Category:'
|
commons_cat_start = 'https://commons.wikimedia.org/wiki/Category:'
|
||||||
use_cache = False
|
use_cache = False
|
||||||
|
@ -23,6 +28,8 @@ headers = {
|
||||||
OVERPASS_URL = 'https://lz4.overpass-api.de'
|
OVERPASS_URL = 'https://lz4.overpass-api.de'
|
||||||
wikidata_query_api_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
|
wikidata_query_api_url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
|
||||||
wikidata_url = 'https://www.wikidata.org/w/api.php'
|
wikidata_url = 'https://www.wikidata.org/w/api.php'
|
||||||
|
wd_entity = 'http://www.wikidata.org/entity/Q'
|
||||||
|
city_of_london_qid = 'Q23311'
|
||||||
|
|
||||||
samples = [
|
samples = [
|
||||||
(50.8326, -0.2689, 'Adur'),
|
(50.8326, -0.2689, 'Adur'),
|
||||||
|
@ -82,24 +89,194 @@ def random_location():
|
||||||
|
|
||||||
return render_template('random.html', lat=lat, lon=lon, result=result, elements=elements)
|
return render_template('random.html', lat=lat, lon=lon, result=result, elements=elements)
|
||||||
|
|
||||||
def do_lookup(elements, lat, lon):
|
@app.route("/wikidata_tag")
|
||||||
commons_cat = osm_lookup(elements)
|
def wikidata_tag():
|
||||||
|
lat = float(request.args.get('lat'))
|
||||||
|
lon = float(request.args.get('lon'))
|
||||||
|
|
||||||
|
scotland_code = get_scotland_code(lat, lon)
|
||||||
|
|
||||||
|
if scotland_code:
|
||||||
|
rows = lookup_scottish_parish_in_wikidata(scotland_code)
|
||||||
|
hit = commons_from_rows(rows)
|
||||||
|
elements = []
|
||||||
|
result = build_dict(hit, lat, lon)
|
||||||
|
else:
|
||||||
|
elements = get_osm_elements(lat, lon)
|
||||||
|
result = do_lookup(elements, lat, lon)
|
||||||
|
|
||||||
|
return render_template('wikidata_tag.html', lat=lat, lon=lon, result=result, elements=elements)
|
||||||
|
|
||||||
|
@app.route("/detail")
|
||||||
|
def detail_page():
|
||||||
|
try:
|
||||||
|
lat, lon = [float(request.args.get(param)) for param in ('lat', 'lon')]
|
||||||
|
except TypeError:
|
||||||
|
return redirect(url_for('index'))
|
||||||
|
reply = lat_lon_to_wikidata(lat, lon)
|
||||||
|
return render_template('random.html', lat=lat, lon=lon, **reply)
|
||||||
|
|
||||||
|
def bounding_box_area(element):
|
||||||
|
bbox = element['bounds']
|
||||||
|
|
||||||
|
x = distance((bbox['maxlat'], bbox['minlon']), (bbox['maxlat'], bbox['maxlon']))
|
||||||
|
y = distance((bbox['minlat'], bbox['maxlon']), (bbox['maxlat'], bbox['minlon']))
|
||||||
|
|
||||||
|
return x.km * y.km
|
||||||
|
|
||||||
|
def wd_to_qid(wd):
|
||||||
|
# expecting {'type': 'url', 'value': 'https://www.wikidata.org/wiki/Q30'}
|
||||||
|
if wd['type'] == 'uri':
|
||||||
|
return wd_uri_to_qid(wd['value'])
|
||||||
|
|
||||||
|
def wd_uri_to_qid(value):
|
||||||
|
assert value.startswith(wd_entity)
|
||||||
|
return value[len(wd_entity) - 1:]
|
||||||
|
|
||||||
|
def build_dict(hit, lat, lon):
|
||||||
coords = {'lat': lat, 'lon': lon}
|
coords = {'lat': lat, 'lon': lon}
|
||||||
if commons_cat is None:
|
if hit is None:
|
||||||
return dict(commons_cat=None, missing=True, coords=coords)
|
return dict(commons_cat=None, missing=True, coords=coords)
|
||||||
|
commons_cat = hit['commons_cat']
|
||||||
url = commons_cat_start + urllib.parse.quote(commons_cat.replace(' ', '_'))
|
url = commons_cat_start + urllib.parse.quote(commons_cat.replace(' ', '_'))
|
||||||
return dict(commons_cat={'title': commons_cat, 'url': url}, coords=coords)
|
return dict(commons_cat={'title': commons_cat, 'url': url},
|
||||||
|
coords=coords,
|
||||||
|
admin_level=hit.get('admin_level'),
|
||||||
|
wikidata=hit['wikidata'])
|
||||||
|
|
||||||
|
|
||||||
|
def do_lookup(elements, lat, lon):
|
||||||
|
try:
|
||||||
|
hit = osm_lookup(elements, lat, lon)
|
||||||
|
except QueryError as e:
|
||||||
|
return {
|
||||||
|
'query': e.query,
|
||||||
|
'error': e.r.text,
|
||||||
|
'query_url': 'https://query.wikidata.org/#' + e.query,
|
||||||
|
}
|
||||||
|
|
||||||
|
return build_dict(hit, lat, lon)
|
||||||
|
|
||||||
|
def get_scotland_code(lat, lon):
|
||||||
|
conn = psycopg2.connect(dbname='geocode', user='geocode', password='ooK3ohgh', host='localhost')
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
point = f'ST_Transform(ST_SetSRID(ST_MakePoint({lon}, {lat}), 4326), 27700)'
|
||||||
|
cur.execute(f'select code, name from scotland where st_contains(geom, {point});')
|
||||||
|
row = cur.fetchone()
|
||||||
|
|
||||||
|
# expand search, disabled for now 2020-04-20
|
||||||
|
if not row:
|
||||||
|
cur.execute(f'select code, name from scotland where ST_DWithin(geom, {point}, 100);')
|
||||||
|
row = cur.fetchone()
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
return row[0]
|
||||||
|
|
||||||
|
def wdqs_geosearch_query(lat, lon):
|
||||||
|
if isinstance(lat, float):
|
||||||
|
lat = f'{lat:f}'
|
||||||
|
if isinstance(lon, float):
|
||||||
|
lon = f'{lon:f}'
|
||||||
|
|
||||||
|
query_template = '''
|
||||||
|
|
||||||
|
SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink WHERE {
|
||||||
|
{
|
||||||
|
SELECT DISTINCT ?item ?location ?distance ?isa WHERE {
|
||||||
|
?item wdt:P31/wdt:P279* wd:Q486972.
|
||||||
|
?item wdt:P31 ?isa .
|
||||||
|
SERVICE wikibase:around {
|
||||||
|
?item wdt:P625 ?location.
|
||||||
|
bd:serviceParam wikibase:center "Point(LON LAT)"^^geo:wktLiteral;
|
||||||
|
wikibase:radius 5;
|
||||||
|
wikibase:distance ?distance.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MINUS { ?item wdt:P582 ?endTime . }
|
||||||
|
OPTIONAL { ?item wdt:P373 ?commonsCat. }
|
||||||
|
OPTIONAL { ?commonsSiteLink schema:about ?item;
|
||||||
|
schema:isPartOf <https://commons.wikimedia.org/>. }
|
||||||
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
|
} ORDER BY (?distance)'''
|
||||||
|
|
||||||
|
query = query_template.replace('LAT', lat).replace('LON', lon)
|
||||||
|
reply = wdqs(query)
|
||||||
|
return reply['results']['bindings']
|
||||||
|
|
||||||
|
def wdqs_geosearch(lat, lon):
|
||||||
|
default_max_dist = 1
|
||||||
|
rows = wdqs_geosearch_query(lat, lon)
|
||||||
|
max_dist = {
|
||||||
|
'Q188509': 1, # suburb
|
||||||
|
'Q3957': 2, # town
|
||||||
|
'Q532': 1, # village
|
||||||
|
'Q5084': 1, # hamlet
|
||||||
|
'Q515': 2, # city
|
||||||
|
'Q1549591': 3, # big city
|
||||||
|
}
|
||||||
|
for row in rows:
|
||||||
|
isa = wd_uri_to_qid(row['isa']['value'])
|
||||||
|
|
||||||
|
if ('commonsCat' not in row and 'commonsSiteLink' not in row and isa not in max_dist):
|
||||||
|
continue
|
||||||
|
|
||||||
|
distance = float(row['distance']['value'])
|
||||||
|
if distance > max_dist.get(isa, default_max_dist):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if 'commonsCat' not in row and 'commonsSiteLink' not in row:
|
||||||
|
break
|
||||||
|
|
||||||
|
return row
|
||||||
|
|
||||||
|
def lat_lon_to_wikidata(lat, lon):
|
||||||
|
scotland_code = get_scotland_code(lat, lon)
|
||||||
|
|
||||||
|
if scotland_code:
|
||||||
|
rows = lookup_scottish_parish_in_wikidata(scotland_code)
|
||||||
|
hit = commons_from_rows(rows)
|
||||||
|
elements = []
|
||||||
|
result = build_dict(hit, lat, lon)
|
||||||
|
|
||||||
|
return {'elements': elements, 'result': result}
|
||||||
|
|
||||||
|
elements = get_osm_elements(lat, lon)
|
||||||
|
result = do_lookup(elements, lat, lon)
|
||||||
|
|
||||||
|
# special case because the City of London is admin_level=6 in OSM
|
||||||
|
if result['wikidata'] == city_of_london_qid:
|
||||||
|
return {'elements': elements, 'result': result}
|
||||||
|
|
||||||
|
admin_level = result['admin_level']
|
||||||
|
|
||||||
|
if not admin_level or admin_level >= 7:
|
||||||
|
return {'elements': elements, 'result': result}
|
||||||
|
|
||||||
|
row = wdqs_geosearch(lat, lon)
|
||||||
|
if row:
|
||||||
|
hit = commons_from_rows([row])
|
||||||
|
elements = []
|
||||||
|
result = build_dict(hit, lat, lon)
|
||||||
|
|
||||||
|
return {'elements': elements, 'result': result}
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
|
q = request.args.get('q')
|
||||||
|
if q and q.strip():
|
||||||
|
lat, lon = [v.strip() for v in q.split(',', 1)]
|
||||||
|
return redirect(url_for('detail_page', lat=lat, lon=lon))
|
||||||
|
|
||||||
lat = request.args.get('lat')
|
lat = request.args.get('lat')
|
||||||
lon = request.args.get('lon')
|
lon = request.args.get('lon')
|
||||||
if lat is None or lon is None:
|
if lat is None or lon is None:
|
||||||
|
samples.sort(key=lambda row: row[2])
|
||||||
return render_template('index.html', samples=samples)
|
return render_template('index.html', samples=samples)
|
||||||
|
|
||||||
elements = get_osm_elements(lat, lon)
|
return jsonify(lat_lon_to_wikidata(lat, lon)['result'])
|
||||||
ret = do_lookup(elements, lat, lon)
|
|
||||||
return jsonify(ret)
|
|
||||||
|
|
||||||
def wikidata_api_call(params):
|
def wikidata_api_call(params):
|
||||||
call_params = {
|
call_params = {
|
||||||
|
@ -160,30 +337,72 @@ out bb tags qt;'''
|
||||||
|
|
||||||
return run_query(oql)
|
return run_query(oql)
|
||||||
|
|
||||||
def lookup_gss_in_wikidata(gss):
|
def lookup_scottish_parish_in_wikidata(code):
|
||||||
query = '''
|
query = '''
|
||||||
SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE {
|
SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE {
|
||||||
?item wdt:P836 "GSS" .
|
?item wdt:P528 "CODE" .
|
||||||
|
?item wdt:P31 wd:Q5124673 .
|
||||||
OPTIONAL { ?commonsSiteLink schema:about ?item ;
|
OPTIONAL { ?commonsSiteLink schema:about ?item ;
|
||||||
schema:isPartOf <https://commons.wikimedia.org/> }
|
schema:isPartOf <https://commons.wikimedia.org/> }
|
||||||
OPTIONAL { ?item wdt:P373 ?commonsCat }
|
OPTIONAL { ?item wdt:P373 ?commonsCat }
|
||||||
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
}
|
}
|
||||||
'''.replace('GSS', gss)
|
'''.replace('CODE', code)
|
||||||
|
reply = wdqs(query)
|
||||||
|
return reply['results']['bindings']
|
||||||
|
|
||||||
|
def lookup_gss_in_wikidata(gss):
|
||||||
|
query = '''
|
||||||
|
SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE {
|
||||||
|
?item wdt:P836 GSS .
|
||||||
|
OPTIONAL { ?commonsSiteLink schema:about ?item ;
|
||||||
|
schema:isPartOf <https://commons.wikimedia.org/> }
|
||||||
|
OPTIONAL { ?item wdt:P373 ?commonsCat }
|
||||||
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
|
}
|
||||||
|
'''.replace('GSS', repr(gss))
|
||||||
|
reply = wdqs(query)
|
||||||
|
return reply['results']['bindings']
|
||||||
|
|
||||||
|
def lookup_wikidata_by_name(name, lat, lon):
|
||||||
|
query = '''
|
||||||
|
SELECT DISTINCT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE {
|
||||||
|
?item rdfs:label LABEL@en .
|
||||||
|
FILTER NOT EXISTS { ?item wdt:P31 wd:Q17362920 } .# ignore Wikimedia duplicated page
|
||||||
|
OPTIONAL { ?commonsSiteLink schema:about ?item ;
|
||||||
|
schema:isPartOf <https://commons.wikimedia.org/> }
|
||||||
|
OPTIONAL { ?item wdt:P373 ?commonsCat }
|
||||||
|
?item wdt:P625 ?coords .
|
||||||
|
|
||||||
|
FILTER(geof:distance(?coords, "Point(LON LAT)"^^geo:wktLiteral) < 10)
|
||||||
|
FILTER(?commonsCat || ?commonsSiteLink)
|
||||||
|
|
||||||
|
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
|
||||||
|
}
|
||||||
|
'''.replace('LABEL', repr(name)).replace('LAT', str(lat)).replace('LON', str(lon))
|
||||||
|
|
||||||
reply = wdqs(query)
|
reply = wdqs(query)
|
||||||
return reply['results']['bindings']
|
return reply['results']['bindings']
|
||||||
|
|
||||||
def unescape_title(t):
|
def unescape_title(t):
|
||||||
return urllib.parse.unquote(t.replace('_', ' '))
|
return urllib.parse.unquote(t.replace('_', ' '))
|
||||||
|
|
||||||
def get_commons_cat_from_gss(gss):
|
def commons_from_rows(rows):
|
||||||
rows = lookup_gss_in_wikidata(gss)
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
if 'commonsCat' in row:
|
if 'commonsCat' in row:
|
||||||
return row['commonsCat']['value']
|
qid = wd_to_qid(row['item'])
|
||||||
|
return {'wikidata': qid,
|
||||||
|
'commons_cat': row['commonsCat']['value']}
|
||||||
if 'commonsSiteLink' in row:
|
if 'commonsSiteLink' in row:
|
||||||
site_link = row['commonsSiteLink']['value']
|
site_link = row['commonsSiteLink']['value']
|
||||||
return unescape_title(site_link[len(commons_cat_start):])
|
qid = wd_to_qid(row['item'])
|
||||||
|
cat = unescape_title(site_link[len(commons_cat_start):])
|
||||||
|
return {'wikidata': qid, 'commons_cat': cat}
|
||||||
|
|
||||||
|
def get_commons_cat_from_gss(gss):
|
||||||
|
print('GSS:', gss)
|
||||||
|
rows = lookup_gss_in_wikidata(gss)
|
||||||
|
return commons_from_rows(rows)
|
||||||
|
|
||||||
def get_osm_elements(lat, lon):
|
def get_osm_elements(lat, lon):
|
||||||
filename = f'cache/{lat}_{lon}.json'
|
filename = f'cache/{lat}_{lon}.json'
|
||||||
|
@ -198,24 +417,74 @@ def get_osm_elements(lat, lon):
|
||||||
|
|
||||||
return elements
|
return elements
|
||||||
|
|
||||||
def osm_lookup(elements):
|
def osm_lookup(elements, lat, lon):
|
||||||
is_in = []
|
is_in = []
|
||||||
|
elements.sort(key=lambda e: bounding_box_area(e))
|
||||||
|
|
||||||
|
if False:
|
||||||
|
for e in sorted(elements, key=lambda e: e['area']):
|
||||||
|
try:
|
||||||
|
admin_level = int(e['tags']['admin_level'])
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
admin_level = None
|
||||||
|
|
||||||
|
if admin_level is None:
|
||||||
|
if e['id'] == 6038068: # Great Britain
|
||||||
|
continue
|
||||||
|
if 'place' in e['tags'] or e['tags'].get('type') == 'boundary':
|
||||||
|
is_in.append((99, e['tags']))
|
||||||
|
continue
|
||||||
|
|
||||||
|
is_in.append((admin_level, e['tags']))
|
||||||
|
|
||||||
|
# for _, tags in sorted(is_in, key=lambda i: i[0], reverse=True):
|
||||||
for e in elements:
|
for e in elements:
|
||||||
try:
|
if 'tags' not in e:
|
||||||
admin_level = int(e['tags']['admin_level'])
|
continue
|
||||||
except (ValueError, KeyError):
|
tags = e['tags']
|
||||||
|
admin_level_tag = tags.get('admin_level')
|
||||||
|
admin_level = int(admin_level_tag) if admin_level_tag and admin_level_tag.isdigit() else None
|
||||||
|
if not admin_level and tags.get('boundary') != 'political':
|
||||||
continue
|
continue
|
||||||
|
|
||||||
is_in.append((admin_level, e['tags']))
|
|
||||||
|
|
||||||
for _, tags in sorted(is_in, key=lambda i: i[0], reverse=True):
|
|
||||||
if 'wikidata' in tags:
|
if 'wikidata' in tags:
|
||||||
qid = tags['wikidata']
|
qid = tags['wikidata']
|
||||||
return qid_to_commons_category(qid)
|
commons = qid_to_commons_category(qid)
|
||||||
|
if commons:
|
||||||
|
return {
|
||||||
|
'wikidata': qid,
|
||||||
|
'commons_cat': commons,
|
||||||
|
'admin_level': admin_level,
|
||||||
|
}
|
||||||
gss = tags.get('ref:gss')
|
gss = tags.get('ref:gss')
|
||||||
if not gss:
|
if gss:
|
||||||
|
ret = get_commons_cat_from_gss(gss)
|
||||||
|
if ret:
|
||||||
|
ret['admin_level'] = admin_level
|
||||||
|
return ret
|
||||||
|
|
||||||
|
name = tags.get('name')
|
||||||
|
if not name:
|
||||||
continue
|
continue
|
||||||
return get_commons_cat_from_gss(gss)
|
if name.endswith(' CP'):
|
||||||
|
name = name[:-3]
|
||||||
|
rows = lookup_wikidata_by_name(name, lat, lon)
|
||||||
|
|
||||||
|
if len(rows) == 1:
|
||||||
|
ret = commons_from_rows(rows)
|
||||||
|
if ret:
|
||||||
|
ret['admin_level'] = admin_level
|
||||||
|
return ret
|
||||||
|
|
||||||
|
has_wikidata_tag = [e['tags'] for e in elements if 'wikidata' in e['tags']]
|
||||||
|
if len(has_wikidata_tag) != 1:
|
||||||
|
return
|
||||||
|
|
||||||
|
qid = has_wikidata_tag[0]['wikidata']
|
||||||
|
return {
|
||||||
|
'wikidata': qid,
|
||||||
|
'commons_cat': qid_to_commons_category(qid),
|
||||||
|
'admin_level': admin_level,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue