From dc24e3f7e7d0dbf7f0ef79eb54ae15379b7e2297 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Fri, 16 Apr 2021 22:24:59 +0200 Subject: [PATCH] split out more code --- geocode/__init__.py | 95 +++++-------- geocode/overpass.py | 36 +++++ geocode/wikidata.py | 51 +++++++ lookup.py | 178 +++--------------------- templates/sparql/geosearch.sparql | 19 +++ templates/sparql/lookup_by_name.sparql | 14 ++ templates/sparql/lookup_gss.sparql | 8 ++ templates/sparql/scottish_parish.sparql | 8 ++ 8 files changed, 194 insertions(+), 215 deletions(-) create mode 100644 geocode/overpass.py create mode 100644 geocode/wikidata.py create mode 100644 templates/sparql/geosearch.sparql create mode 100644 templates/sparql/lookup_by_name.sparql create mode 100644 templates/sparql/lookup_gss.sparql create mode 100644 templates/sparql/scottish_parish.sparql diff --git a/geocode/__init__.py b/geocode/__init__.py index 6c235da..d1b913c 100644 --- a/geocode/__init__.py +++ b/geocode/__init__.py @@ -1,61 +1,38 @@ -import requests -import simplejson - -wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql" -OVERPASS_URL = "https://lz4.overpass-api.de" - headers = {"User-Agent": "UK gecode/0.1 (edward@4angle.com)"} -class QueryError(Exception): - def __init__(self, query, r): - self.query = query - self.r = r - - -def wikidata_api_call(params): - return requests.get( - "https://www.wikidata.org/w/api.php", - params={"format": "json", "formatversion": 2, **params}, - headers=headers, - ).json() - - -def get_entity(qid): - json_data = wikidata_api_call({"action": "wbgetentities", "ids": qid}) - - try: - entity = list(json_data["entities"].values())[0] - except KeyError: - return - if "missing" not in entity: - return entity - - -def qid_to_commons_category(qid): - entity = get_entity(qid) - try: - commons_cat = entity["claims"]["P373"][0]["mainsnak"]["datavalue"]["value"] - except Exception: - commons_cat = None - - return commons_cat - - -def wdqs(query): - r = requests.post( - wikidata_query_api_url, data={"query": query, "format": "json"}, headers=headers - ) - - try: - return r.json() - except simplejson.errors.JSONDecodeError: - raise QueryError(query, r) - - -def run_query(oql, error_on_rate_limit=True): - return requests.post( - OVERPASS_URL + "/api/interpreter", data=oql.encode("utf-8"), headers=headers - ) - - - +samples = [ + (50.8326, -0.2689, "Adur"), + (52.4914, -0.69645, "Corby"), + (50.893, -4.265, "Newton St Petrock"), + (51.779, 0.128, "Harlow"), + (52.387, 0.294, "Ely"), + (50.9, -1.6, "Minstead"), + (52.43, -1.11, "South Kilworth"), + (53.117, -0.202, "Tattershall Thorpe"), + (53.351, -2.701, "Halton"), + (52.421, -0.651, "Warkton"), + (51.51, -1.547, "Lambourn"), + (52.62, -1.884, "Shenstone"), + (53.309, -1.539, "Sheffield"), + (53.322, 0.032, "Haugham"), + (51.05, -2.598, "Babcary"), + (51.158, -1.906, "Berwick St James"), + (51.867, -1.204, "Weston-on-the-Green"), + (51.034, -2.005, "Ebbesbourne Wake"), + (51.07, -0.718, "Fernhurst"), + (53.059, -0.144, "Wildmore"), + (51.473, 0.221, "Dartford"), + (51.059, 0.05, "Danehill"), + (52.253, -0.122, "Papworth Everard"), + (53.498, -0.415, "West Lindsey"), + (53.392, -0.022, "Brackenborough with Little Grimsby"), + (53.463, -0.027, "Fulstow"), + (52.766, 0.31, "Terrington St Clement"), + (53.1540, -1.8034, "Hartington Town Quarter"), + (51.8532, -0.8829, "Fleet Marston"), + (51.4785, -0.354, "London Borough of Hounslow"), + (51.9687, -0.0327, "Buckland, Hertfordshire"), + (51.0804, -2.3263, "Zeals"), + (55.7644, -4.1770, "East Kilbride"), + (51.4520, -2.6210, "Bristol"), +] diff --git a/geocode/overpass.py b/geocode/overpass.py new file mode 100644 index 0000000..860ea2a --- /dev/null +++ b/geocode/overpass.py @@ -0,0 +1,36 @@ +from flask import current_app +from . import headers +import os +import json +import requests + +OVERPASS_URL = "https://lz4.overpass-api.de" + + +def run_query(oql): + return requests.post( + OVERPASS_URL + "/api/interpreter", data=oql.encode("utf-8"), headers=headers + ) + + +def is_in_lat_lon(lat, lon): + oql = f""" +[out:json][timeout:25]; +is_in({lat},{lon})->.a; +(way(pivot.a); rel(pivot.a);); +out bb tags qt;""" + + return run_query(oql) + + +def get_osm_elements(lat, lon): + filename = f"cache/{lat}_{lon}.json" + use_cache = current_app.config["USE_CACHE"] + + if use_cache and os.path.exists(filename): + return json.load(open(filename))["elements"] + + r = is_in_lat_lon(lat, lon) + if use_cache: + open(filename, "wb").write(r.content) + return r.json()["elements"] diff --git a/geocode/wikidata.py b/geocode/wikidata.py new file mode 100644 index 0000000..33ea0d2 --- /dev/null +++ b/geocode/wikidata.py @@ -0,0 +1,51 @@ +import requests +import simplejson +from . import headers + +wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql" + + +class QueryError(Exception): + def __init__(self, query, r): + self.query = query + self.r = r + + +def api_call(params): + return requests.get( + "https://www.wikidata.org/w/api.php", + params={"format": "json", "formatversion": 2, **params}, + headers=headers, + ).json() + + +def get_entity(qid): + json_data = api_call({"action": "wbgetentities", "ids": qid}) + + try: + entity = list(json_data["entities"].values())[0] + except KeyError: + return + if "missing" not in entity: + return entity + + +def qid_to_commons_category(qid): + entity = get_entity(qid) + try: + commons_cat = entity["claims"]["P373"][0]["mainsnak"]["datavalue"]["value"] + except Exception: + commons_cat = None + + return commons_cat + + +def wdqs(query): + r = requests.post( + wikidata_query_api_url, data={"query": query, "format": "json"}, headers=headers + ) + + try: + return r.json()["results"]["bindings"] + except simplejson.errors.JSONDecodeError: + raise QueryError(query, r) diff --git a/lookup.py b/lookup.py index 43f83e2..d7789ac 100755 --- a/lookup.py +++ b/lookup.py @@ -2,8 +2,8 @@ from flask import Flask, render_template, request, jsonify, redirect, url_for import geocode -import os -import json +import geocode.wikidata +import geocode.overpass import urllib.parse import random import psycopg2 @@ -12,53 +12,15 @@ from geopy.distance import distance # select gid, code, name from scotland where st_contains(geom, ST_Transform(ST_SetSRID(ST_MakePoint(-4.177, 55.7644), 4326), 27700)); commons_cat_start = "https://commons.wikimedia.org/wiki/Category:" -use_cache = True headers = {"User-Agent": "UK gecode/0.1 (edward@4angle.com)"} wd_entity = "http://www.wikidata.org/entity/Q" city_of_london_qid = "Q23311" -samples = [ - (50.8326, -0.2689, "Adur"), - (52.4914, -0.69645, "Corby"), - (50.893, -4.265, "Newton St Petrock"), - (51.779, 0.128, "Harlow"), - (52.387, 0.294, "Ely"), - (50.9, -1.6, "Minstead"), - (52.43, -1.11, "South Kilworth"), - (53.117, -0.202, "Tattershall Thorpe"), - (53.351, -2.701, "Halton"), - (52.421, -0.651, "Warkton"), - (51.51, -1.547, "Lambourn"), - (52.62, -1.884, "Shenstone"), - (53.309, -1.539, "Sheffield"), - (53.322, 0.032, "Haugham"), - (51.05, -2.598, "Babcary"), - (51.158, -1.906, "Berwick St James"), - (51.867, -1.204, "Weston-on-the-Green"), - (51.034, -2.005, "Ebbesbourne Wake"), - (51.07, -0.718, "Fernhurst"), - (53.059, -0.144, "Wildmore"), - (51.473, 0.221, "Dartford"), - (51.059, 0.05, "Danehill"), - (52.253, -0.122, "Papworth Everard"), - (53.498, -0.415, "West Lindsey"), - (53.392, -0.022, "Brackenborough with Little Grimsby"), - (53.463, -0.027, "Fulstow"), - (52.766, 0.31, "Terrington St Clement"), - (53.1540, -1.8034, "Hartington Town Quarter"), - (51.8532, -0.8829, "Fleet Marston"), - (51.4785, -0.354, "London Borough of Hounslow"), - (51.9687, -0.0327, "Buckland, Hertfordshire"), - (51.0804, -2.3263, "Zeals"), - (55.7644, -4.1770, "East Kilbride"), - (51.4520, -2.6210, "Bristol"), -] - app = Flask(__name__) -app.debug = True +app.config.from_object("config.default") def get_random_lat_lon(): @@ -77,7 +39,7 @@ def get_random_lat_lon(): def random_location(): lat, lon = get_random_lat_lon() - elements = get_osm_elements(lat, lon) + elements = geocode.overpass.get_osm_elements(lat, lon) result = do_lookup(elements, lat, lon) return render_template( @@ -98,7 +60,7 @@ def wikidata_tag(): elements = [] result = build_dict(hit, lat, lon) else: - elements = get_osm_elements(lat, lon) + elements = geocode.overpass.get_osm_elements(lat, lon) result = do_lookup(elements, lat, lon) return render_template( @@ -153,7 +115,7 @@ def build_dict(hit, lat, lon): def do_lookup(elements, lat, lon): try: hit = osm_lookup(elements, lat, lon) - except geocode.QueryError as e: + except geocode.wkidata.QueryError as e: return { "query": e.query, "error": e.r.text, @@ -164,9 +126,7 @@ def do_lookup(elements, lat, lon): def get_scotland_code(lat, lon): - conn = psycopg2.connect( - dbname="geocode", user="geocode", password="ooK3ohgh", host="localhost" - ) + conn = psycopg2.connect(**app.config["DB_PARAMS"]) cur = conn.cursor() point = f"ST_Transform(ST_SetSRID(ST_MakePoint({lon}, {lat}), 4326), 27700)" @@ -191,31 +151,8 @@ def wdqs_geosearch_query(lat, lon): if isinstance(lon, float): lon = f"{lon:f}" - query_template = """ - -SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink WHERE { - { - SELECT DISTINCT ?item ?location ?distance ?isa WHERE { - ?item wdt:P31/wdt:P279* wd:Q486972. - ?item wdt:P31 ?isa . - SERVICE wikibase:around { - ?item wdt:P625 ?location. - bd:serviceParam wikibase:center "Point(LON LAT)"^^geo:wktLiteral; - wikibase:radius 5; - wikibase:distance ?distance. - } - } - } - MINUS { ?item wdt:P582 ?endTime . } - OPTIONAL { ?item wdt:P373 ?commonsCat. } - OPTIONAL { ?commonsSiteLink schema:about ?item; - schema:isPartOf . } - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } -} ORDER BY (?distance)""" - - query = query_template.replace("LAT", lat).replace("LON", lon) - reply = geocode.wdqs(query) - return reply["results"]["bindings"] + query = render_template("sparql/geosearch.sparql", lat=lat, lon=lon) + return geocode.wikidata.wdqs(query) def wdqs_geosearch(lat, lon): @@ -260,7 +197,7 @@ def lat_lon_to_wikidata(lat, lon): return {"elements": elements, "result": result} - elements = get_osm_elements(lat, lon) + elements = geocode.overpass.get_osm_elements(lat, lon) result = do_lookup(elements, lat, lon) # special case because the City of London is admin_level=6 in OSM @@ -290,85 +227,28 @@ def index(): lat = request.args.get("lat") lon = request.args.get("lon") - if lat is None or lon is None: - samples.sort(key=lambda row: row[2]) - return render_template("index.html", samples=samples) + if lat is not None and lon is not None: + return jsonify(lat_lon_to_wikidata(lat, lon)["result"]) - return jsonify(lat_lon_to_wikidata(lat, lon)["result"]) - - -def get_elements(oql): - return geocode.run_query(oql).json()["elements"] - - -def is_in_lat_lon(lat, lon): - oql = f""" -[out:json][timeout:25]; -is_in({lat},{lon})->.a; -(way(pivot.a); rel(pivot.a);); -out bb tags qt;""" - - return geocode.run_query(oql) + samples = sorted(geocode.samples, key=lambda row: row[2]) + return render_template("index.html", samples=samples) def lookup_scottish_parish_in_wikidata(code): - query = """ -SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { - ?item wdt:P528 "CODE" . - ?item wdt:P31 wd:Q5124673 . - OPTIONAL { ?commonsSiteLink schema:about ?item ; - schema:isPartOf } - OPTIONAL { ?item wdt:P373 ?commonsCat } - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } -} -""".replace( - "CODE", code - ) - reply = geocode.wdqs(query) - return reply["results"]["bindings"] + query = render_template("sparql/scottish_parish.sparql", code=code) + return geocode.wikidata.wdqs(query) def lookup_gss_in_wikidata(gss): - query = """ -SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { - ?item wdt:P836 GSS . - OPTIONAL { ?commonsSiteLink schema:about ?item ; - schema:isPartOf } - OPTIONAL { ?item wdt:P373 ?commonsCat } - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } -} -""".replace( - "GSS", repr(gss) - ) - reply = geocode.wdqs(query) - return reply["results"]["bindings"] + query = render_template("sparql/lookup_gss.sparql", gss=gss) + return geocode.wikidata.wdqs(query) def lookup_wikidata_by_name(name, lat, lon): - query = ( - """ -SELECT DISTINCT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { - ?item rdfs:label LABEL@en . - FILTER NOT EXISTS { ?item wdt:P31 wd:Q17362920 } .# ignore Wikimedia duplicated page - OPTIONAL { ?commonsSiteLink schema:about ?item ; - schema:isPartOf } - OPTIONAL { ?item wdt:P373 ?commonsCat } - ?item wdt:P625 ?coords . - - FILTER(geof:distance(?coords, "Point(LON LAT)"^^geo:wktLiteral) < 10) - FILTER(?commonsCat || ?commonsSiteLink) - - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } -} -""".replace( - "LABEL", repr(name) - ) - .replace("LAT", str(lat)) - .replace("LON", str(lon)) + query = render_template( + "sparql/lookup_by_name.sparql", name=repr(name), lat=str(lat), lon=str(lon) ) - - reply = geocode.wdqs(query) - return reply["results"]["bindings"] + return geocode.wikidata.wdqs(query) def unescape_title(t): @@ -391,20 +271,6 @@ def get_commons_cat_from_gss(gss): return commons_from_rows(lookup_gss_in_wikidata(gss)) -def get_osm_elements(lat, lon): - filename = f"cache/{lat}_{lon}.json" - - if use_cache and os.path.exists(filename): - elements = json.load(open(filename))["elements"] - else: - r = is_in_lat_lon(lat, lon) - if use_cache: - open(filename, "wb").write(r.content) - elements = r.json()["elements"] - - return elements - - def osm_lookup(elements, lat, lon): elements.sort(key=lambda e: bounding_box_area(e)) @@ -422,7 +288,7 @@ def osm_lookup(elements, lat, lon): continue if "wikidata" in tags: qid = tags["wikidata"] - commons = geocode.qid_to_commons_category(qid) + commons = geocode.wikidata.qid_to_commons_category(qid) if commons: return { "wikidata": qid, diff --git a/templates/sparql/geosearch.sparql b/templates/sparql/geosearch.sparql new file mode 100644 index 0000000..433e072 --- /dev/null +++ b/templates/sparql/geosearch.sparql @@ -0,0 +1,19 @@ +SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink WHERE { + { + SELECT DISTINCT ?item ?location ?distance ?isa WHERE { + ?item wdt:P31/wdt:P279* wd:Q486972. + ?item wdt:P31 ?isa . + SERVICE wikibase:around { + ?item wdt:P625 ?location. + bd:serviceParam wikibase:center "Point({{ lon }} {{ lat }})"^^geo:wktLiteral; + wikibase:radius 5; + wikibase:distance ?distance. + } + } + } + MINUS { ?item wdt:P582 ?endTime . } + OPTIONAL { ?item wdt:P373 ?commonsCat. } + OPTIONAL { ?commonsSiteLink schema:about ?item; + schema:isPartOf . } + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +} ORDER BY (?distance)""" diff --git a/templates/sparql/lookup_by_name.sparql b/templates/sparql/lookup_by_name.sparql new file mode 100644 index 0000000..3f6afc4 --- /dev/null +++ b/templates/sparql/lookup_by_name.sparql @@ -0,0 +1,14 @@ +SELECT DISTINCT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { + ?item rdfs:label {{ name }}@en . + FILTER NOT EXISTS { ?item wdt:P31 wd:Q17362920 } .# ignore Wikimedia duplicated page + OPTIONAL { ?commonsSiteLink schema:about ?item ; + schema:isPartOf } + OPTIONAL { ?item wdt:P373 ?commonsCat } + ?item wdt:P625 ?coords . + + FILTER(geof:distance(?coords, "Point({{ lon }} {{ lat }})"^^geo:wktLiteral) < 10) + FILTER(?commonsCat || ?commonsSiteLink) + + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +} + diff --git a/templates/sparql/lookup_gss.sparql b/templates/sparql/lookup_gss.sparql new file mode 100644 index 0000000..f0ce841 --- /dev/null +++ b/templates/sparql/lookup_gss.sparql @@ -0,0 +1,8 @@ +SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { + ?item wdt:P836 {{ gss }} . + OPTIONAL { ?commonsSiteLink schema:about ?item ; + schema:isPartOf } + OPTIONAL { ?item wdt:P373 ?commonsCat } + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +} + diff --git a/templates/sparql/scottish_parish.sparql b/templates/sparql/scottish_parish.sparql new file mode 100644 index 0000000..d72ad1e --- /dev/null +++ b/templates/sparql/scottish_parish.sparql @@ -0,0 +1,8 @@ +SELECT ?item ?itemLabel ?commonsSiteLink ?commonsCat WHERE { + ?item wdt:P528 "{{ code }}" . + ?item wdt:P31 wd:Q5124673 . + OPTIONAL { ?commonsSiteLink schema:about ?item ; + schema:isPartOf } + OPTIONAL { ?item wdt:P373 ?commonsCat } + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +}