Compare commits

..

10 commits

Author SHA1 Message Date
Edward Betts 4375b80d45 Add .gitignore 2023-10-10 09:09:01 +01:00
Edward Betts ab81201908 Remove unused pprint import 2023-10-10 09:08:31 +01:00
Edward Betts faa0b3cb4e Bug fixes and improvements. 2023-10-10 07:27:25 +00:00
Edward Betts 89549b750f Add missing template 2022-05-18 14:13:29 +01:00
Edward Betts 5fde13608f Tidy 2022-05-18 14:12:50 +01:00
Edward Betts a5f7ba917d Bug fixes 2022-05-18 14:12:34 +01:00
Edward Betts 529611b42c Update model 2022-05-18 14:11:51 +01:00
Edward Betts c5a66abb25 Move code around 2021-04-17 19:29:09 +02:00
Edward Betts 44241751b2 Query database instead of overpass 2021-04-17 18:31:58 +02:00
Edward Betts 54b280655f Moving code around 2021-04-17 18:02:53 +02:00
11 changed files with 328 additions and 213 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
__pycache__
.mypy_cache/

25
geocode/database.py Normal file
View file

@ -0,0 +1,25 @@
from sqlalchemy import create_engine, func
from sqlalchemy.orm import scoped_session, sessionmaker
session = scoped_session(sessionmaker())
def init_db(db_url):
session.configure(bind=get_engine(db_url))
def get_engine(db_url, echo=False):
return create_engine(db_url, pool_recycle=3600, echo=echo)
def init_app(app, echo=False):
db_url = app.config["DB_URL"]
session.configure(bind=get_engine(db_url, echo=echo))
@app.teardown_appcontext
def shutdown_session(exception=None):
session.remove()
def now_utc():
return func.timezone("utc", func.now())

57
geocode/model.py Normal file
View file

@ -0,0 +1,57 @@
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.schema import Column
from sqlalchemy.types import Integer, Float, Numeric, String
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import column_property
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy import func, cast
from geoalchemy2 import Geometry
from .database import session
Base = declarative_base()
Base.query = session.query_property()
class Polygon(Base):
__tablename__ = "planet_osm_polygon"
osm_id = Column(Integer, primary_key=True, autoincrement=False)
admin_level = Column(String)
way_area = Column(Float)
tags = Column(postgresql.HSTORE)
way = Column(Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False)
area = column_property(func.ST_Area(way, False))
@property
def osm_url(self):
osm_type = "way" if self.osm_id > 0 else "relation"
return f"https://www.openstreetmap.org/{osm_type}/{abs(self.osm_id)}"
@hybrid_property
def area_in_sq_km(self):
return self.area / (1000 * 1000)
@classmethod
def coords_within(cls, lat, lon):
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
return (cls.query.filter(cls.admin_level.isnot(None),
cls.admin_level.regexp_match("^\d+$"),
func.ST_Within(point, cls.way))
.order_by(cls.area, cast(cls.admin_level, Integer).desc()))
class Scotland(Base):
__tablename__ = "scotland"
gid = Column(Integer, primary_key=True)
shape_leng = Column(Numeric)
shape_area = Column(Numeric)
code = Column(String(3))
c91code1 = Column(String(5))
c91code2 = Column(String(5))
c91code3 = Column(String(5))
c91code4 = Column(String(5))
name = Column(String(50))
geom = Column(Geometry("MULTIPOLYGON", srid=27700))

View file

@ -1,36 +0,0 @@
from flask import current_app
from . import headers
import os
import json
import requests
OVERPASS_URL = "https://lz4.overpass-api.de"
def run_query(oql):
return requests.post(
OVERPASS_URL + "/api/interpreter", data=oql.encode("utf-8"), headers=headers
)
def is_in_lat_lon(lat, lon):
oql = f"""
[out:json][timeout:25];
is_in({lat},{lon})->.a;
(way(pivot.a); rel(pivot.a););
out bb tags qt;"""
return run_query(oql)
def get_osm_elements(lat, lon):
filename = f"cache/{lat}_{lon}.json"
use_cache = current_app.config["USE_CACHE"]
if use_cache and os.path.exists(filename):
return json.load(open(filename))["elements"]
r = is_in_lat_lon(lat, lon)
if use_cache:
open(filename, "wb").write(r.content)
return r.json()["elements"]

12
geocode/scotland.py Normal file
View file

@ -0,0 +1,12 @@
from flask import current_app
import psycopg2
def get_scotland_code(lat, lon):
conn = psycopg2.connect(**current_app.config["DB_PARAMS"])
cur = conn.cursor()
point = f"ST_Transform(ST_SetSRID(ST_MakePoint({lon}, {lat}), 4326), 27700)"
cur.execute(f"select code, name from scotland where st_contains(geom, {point});")
row = cur.fetchone()
conn.close()
return row[0] if row else None

View file

@ -1,8 +1,12 @@
from flask import render_template
import requests
import simplejson
from . import headers
import urllib.parse
wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
wd_entity = "http://www.wikidata.org/entity/Q"
commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
class QueryError(Exception):
@ -49,3 +53,110 @@ def wdqs(query):
return r.json()["results"]["bindings"]
except simplejson.errors.JSONDecodeError:
raise QueryError(query, r)
def wd_to_qid(wd):
# expecting {"type": "url", "value": "https://www.wikidata.org/wiki/Q30"}
if wd["type"] == "uri":
return wd_uri_to_qid(wd["value"])
def wd_uri_to_qid(value):
assert value.startswith(wd_entity)
return value[len(wd_entity) - 1 :]
def geosearch_query(lat, lon):
if isinstance(lat, float):
lat = f"{lat:f}"
if isinstance(lon, float):
lon = f"{lon:f}"
query = render_template("sparql/geosearch.sparql", lat=lat, lon=lon)
return wdqs(query)
def geosearch(lat, lon):
default_max_dist = 1
rows = geosearch_query(lat, lon)
max_dist = {
"Q188509": 1, # suburb
"Q3957": 2, # town
"Q532": 1, # village
"Q5084": 1, # hamlet
"Q515": 2, # city
"Q1549591": 3, # big city
}
for row in rows:
isa = wd_uri_to_qid(row["isa"]["value"])
if (
"commonsCat" not in row
and "commonsSiteLink" not in row
and isa not in max_dist
):
continue
distance = float(row["distance"]["value"])
if distance > max_dist.get(isa, default_max_dist):
continue
if "commonsCat" not in row and "commonsSiteLink" not in row:
break
return row
def lookup_scottish_parish_in_wikidata(code):
query = render_template("sparql/scottish_parish.sparql", code=code)
return wdqs(query)
def lookup_gss_in_wikidata(gss):
query = render_template("sparql/lookup_gss.sparql", gss=gss)
return wdqs(query)
def lookup_wikidata_by_name(name, lat, lon):
query = render_template(
"sparql/lookup_by_name.sparql", name=repr(name), lat=str(lat), lon=str(lon)
)
return wdqs(query)
def unescape_title(t):
return urllib.parse.unquote(t.replace("_", " "))
def commons_from_rows(rows):
for row in rows:
if "commonsCat" in row:
qid = wd_to_qid(row["item"])
return {"wikidata": qid, "commons_cat": row["commonsCat"]["value"]}
if "commonsSiteLink" in row:
site_link = row["commonsSiteLink"]["value"]
qid = wd_to_qid(row["item"])
cat = unescape_title(site_link[len(commons_cat_start) :])
return {"wikidata": qid, "commons_cat": cat}
def get_commons_cat_from_gss(gss):
return commons_from_rows(lookup_gss_in_wikidata(gss))
def build_dict(hit, lat, lon):
coords = {"lat": lat, "lon": lon}
if hit is None:
return dict(commons_cat=None, missing=True, coords=coords)
commons_cat = hit["commons_cat"]
ret = dict(
coords=coords,
admin_level=hit.get("admin_level"),
wikidata=hit["wikidata"],
)
if not commons_cat:
return ret
url = commons_cat_start + urllib.parse.quote(commons_cat.replace(" ", "_"))
ret["commons_cat"] = {"title": commons_cat, "url": url}
return ret

214
lookup.py
View file

@ -1,24 +1,14 @@
#!/usr/bin/python3
from flask import Flask, render_template, request, jsonify, redirect, url_for
from geocode import wikidata, scotland, database, model
import geocode
import geocode.wikidata
import geocode.overpass
import urllib.parse
import random
import psycopg2
from geopy.distance import distance
# select gid, code, name from scotland where st_contains(geom, ST_Transform(ST_SetSRID(ST_MakePoint(-4.177, 55.7644), 4326), 27700));
commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
wd_entity = "http://www.wikidata.org/entity/Q"
city_of_london_qid = "Q23311"
app = Flask(__name__)
app.config.from_object("config.default")
database.init_app(app)
def get_random_lat_lon():
@ -33,191 +23,54 @@ def get_random_lat_lon():
return lat, lon
def bounding_box_area(element):
bbox = element["bounds"]
x = distance((bbox["maxlat"], bbox["minlon"]), (bbox["maxlat"], bbox["maxlon"]))
y = distance((bbox["minlat"], bbox["maxlon"]), (bbox["maxlat"], bbox["minlon"]))
return x.km * y.km
def wd_to_qid(wd):
# expecting {"type": "url", "value": "https://www.wikidata.org/wiki/Q30"}
if wd["type"] == "uri":
return wd_uri_to_qid(wd["value"])
def wd_uri_to_qid(value):
assert value.startswith(wd_entity)
return value[len(wd_entity) - 1 :]
def build_dict(hit, lat, lon):
coords = {"lat": lat, "lon": lon}
if hit is None:
return dict(commons_cat=None, missing=True, coords=coords)
commons_cat = hit["commons_cat"]
url = commons_cat_start + urllib.parse.quote(commons_cat.replace(" ", "_"))
return dict(
commons_cat={"title": commons_cat, "url": url},
coords=coords,
admin_level=hit.get("admin_level"),
wikidata=hit["wikidata"],
)
def do_lookup(elements, lat, lon):
try:
hit = osm_lookup(elements, lat, lon)
except geocode.wikidata.QueryError as e:
except wikidata.QueryError as e:
return {
"query": e.query,
"error": e.r.text,
"query_url": "https://query.wikidata.org/#" + e.query,
}
return build_dict(hit, lat, lon)
def get_scotland_code(lat, lon):
conn = psycopg2.connect(**app.config["DB_PARAMS"])
cur = conn.cursor()
point = f"ST_Transform(ST_SetSRID(ST_MakePoint({lon}, {lat}), 4326), 27700)"
cur.execute(f"select code, name from scotland where st_contains(geom, {point});")
row = cur.fetchone()
# expand search, disabled for now 2020-04-20
if not row:
cur.execute(
f"select code, name from scotland where ST_DWithin(geom, {point}, 100);"
)
row = cur.fetchone()
conn.close()
if row:
return row[0]
def wdqs_geosearch_query(lat, lon):
if isinstance(lat, float):
lat = f"{lat:f}"
if isinstance(lon, float):
lon = f"{lon:f}"
query = render_template("sparql/geosearch.sparql", lat=lat, lon=lon)
return geocode.wikidata.wdqs(query)
def wdqs_geosearch(lat, lon):
default_max_dist = 1
rows = wdqs_geosearch_query(lat, lon)
max_dist = {
"Q188509": 1, # suburb
"Q3957": 2, # town
"Q532": 1, # village
"Q5084": 1, # hamlet
"Q515": 2, # city
"Q1549591": 3, # big city
}
for row in rows:
isa = wd_uri_to_qid(row["isa"]["value"])
if (
"commonsCat" not in row
and "commonsSiteLink" not in row
and isa not in max_dist
):
continue
distance = float(row["distance"]["value"])
if distance > max_dist.get(isa, default_max_dist):
continue
if "commonsCat" not in row and "commonsSiteLink" not in row:
break
return row
return wikidata.build_dict(hit, lat, lon)
def lat_lon_to_wikidata(lat, lon):
scotland_code = get_scotland_code(lat, lon)
scotland_code = scotland.get_scotland_code(lat, lon)
if scotland_code:
rows = lookup_scottish_parish_in_wikidata(scotland_code)
hit = commons_from_rows(rows)
rows = wikidata.lookup_scottish_parish_in_wikidata(scotland_code)
hit = wikidata.commons_from_rows(rows)
elements = []
result = build_dict(hit, lat, lon)
result = wikidata.build_dict(hit, lat, lon)
return {"elements": elements, "result": result}
elements = geocode.overpass.get_osm_elements(lat, lon)
elements = model.Polygon.coords_within(lat, lon)
result = do_lookup(elements, lat, lon)
# special case because the City of London is admin_level=6 in OSM
if result["wikidata"] == city_of_london_qid:
if result.get("wikidata") == city_of_london_qid:
return {"elements": elements, "result": result}
admin_level = result["admin_level"]
admin_level = result.get("admin_level")
if not admin_level or admin_level >= 7:
return {"elements": elements, "result": result}
row = wdqs_geosearch(lat, lon)
row = wikidata.geosearch(lat, lon)
if row:
hit = commons_from_rows([row])
hit = wikidata.commons_from_rows([row])
elements = []
result = build_dict(hit, lat, lon)
result = wikidata.build_dict(hit, lat, lon)
return {"elements": elements, "result": result}
def lookup_scottish_parish_in_wikidata(code):
query = render_template("sparql/scottish_parish.sparql", code=code)
return geocode.wikidata.wdqs(query)
def lookup_gss_in_wikidata(gss):
query = render_template("sparql/lookup_gss.sparql", gss=gss)
return geocode.wikidata.wdqs(query)
def lookup_wikidata_by_name(name, lat, lon):
query = render_template(
"sparql/lookup_by_name.sparql", name=repr(name), lat=str(lat), lon=str(lon)
)
return geocode.wikidata.wdqs(query)
def unescape_title(t):
return urllib.parse.unquote(t.replace("_", " "))
def commons_from_rows(rows):
for row in rows:
if "commonsCat" in row:
qid = wd_to_qid(row["item"])
return {"wikidata": qid, "commons_cat": row["commonsCat"]["value"]}
if "commonsSiteLink" in row:
site_link = row["commonsSiteLink"]["value"]
qid = wd_to_qid(row["item"])
cat = unescape_title(site_link[len(commons_cat_start) :])
return {"wikidata": qid, "commons_cat": cat}
def get_commons_cat_from_gss(gss):
return commons_from_rows(lookup_gss_in_wikidata(gss))
def osm_lookup(elements, lat, lon):
elements.sort(key=lambda e: bounding_box_area(e))
for e in elements:
if "tags" not in e:
continue
tags = e["tags"]
tags = e.tags
admin_level_tag = tags.get("admin_level")
admin_level = (
int(admin_level_tag)
@ -228,7 +81,7 @@ def osm_lookup(elements, lat, lon):
continue
if "wikidata" in tags:
qid = tags["wikidata"]
commons = geocode.wikidata.qid_to_commons_category(qid)
commons = wikidata.qid_to_commons_category(qid)
if commons:
return {
"wikidata": qid,
@ -237,7 +90,7 @@ def osm_lookup(elements, lat, lon):
}
gss = tags.get("ref:gss")
if gss:
ret = get_commons_cat_from_gss(gss)
ret = wikidata.get_commons_cat_from_gss(gss)
if ret:
ret["admin_level"] = admin_level
return ret
@ -247,22 +100,22 @@ def osm_lookup(elements, lat, lon):
continue
if name.endswith(" CP"):
name = name[:-3]
rows = lookup_wikidata_by_name(name, lat, lon)
rows = wikidata.lookup_wikidata_by_name(name, lat, lon)
if len(rows) == 1:
ret = commons_from_rows(rows)
ret = wikidata.commons_from_rows(rows)
if ret:
ret["admin_level"] = admin_level
return ret
has_wikidata_tag = [e["tags"] for e in elements if "wikidata" in e["tags"]]
has_wikidata_tag = [e.tags for e in elements if e.tags.get("wikidata")]
if len(has_wikidata_tag) != 1:
return
qid = has_wikidata_tag[0]["wikidata"]
return {
"wikidata": qid,
"commons_cat": geocode.qid_to_commons_category(qid),
"commons_cat": wikidata.qid_to_commons_category(qid),
"admin_level": admin_level,
}
@ -287,7 +140,7 @@ def index():
def random_location():
lat, lon = get_random_lat_lon()
elements = geocode.overpass.get_osm_elements(lat, lon)
elements = model.Polygon.coords_within(lat, lon)
result = do_lookup(elements, lat, lon)
return render_template(
@ -300,15 +153,15 @@ def wikidata_tag():
lat = float(request.args.get("lat"))
lon = float(request.args.get("lon"))
scotland_code = get_scotland_code(lat, lon)
scotland_code = scotland.get_scotland_code(lat, lon)
if scotland_code:
rows = lookup_scottish_parish_in_wikidata(scotland_code)
hit = commons_from_rows(rows)
rows = wikidata.lookup_scottish_parish_in_wikidata(scotland_code)
hit = wikidata.commons_from_rows(rows)
elements = []
result = build_dict(hit, lat, lon)
result = wikidata.build_dict(hit, lat, lon)
else:
elements = geocode.overpass.get_osm_elements(lat, lon)
elements = model.Polygon.coords_within(lat, lon)
result = do_lookup(elements, lat, lon)
return render_template(
@ -322,7 +175,18 @@ def detail_page():
lat, lon = [float(request.args.get(param)) for param in ("lat", "lon")]
except TypeError:
return redirect(url_for("index"))
reply = lat_lon_to_wikidata(lat, lon)
try:
reply = lat_lon_to_wikidata(lat, lon)
except wikidata.QueryError as e:
query, r = e.args
return render_template(
"query_error.html",
lat=lat,
lon=lon,
query=query,
r=r
)
return render_template("detail.html", lat=lat, lon=lon, **reply)

View file

@ -32,7 +32,7 @@
{% for element in elements %}
{% set tags = element.tags %}
<pre>{{ element | pprint }}</pre>
<pre>{{ element.tags | pprint }}</pre>
{% endfor %}

View file

@ -0,0 +1,28 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Geocode to commons</title>
</head>
<body>
<h1>Geocode coordinates to Commons Category</h1>
<p><a href="{{ url_for('index', lat=lat, lon=lon) }}">visit endpoint</a>
| <a href="https://www.openstreetmap.org/#map=17/{{lat }}/{{ lon }}">view in OSM</a>
| <a href="{{ url_for('detail_page', lat=lat, lon=lon) }}">#</a>
</p>
<h2>query</h2>
<pre>{{ query }}</pre>
<h2>reply</h2>
<pre>{{ r.text }}</pre>
</body>
</html>

View file

@ -12,8 +12,9 @@ SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSi
}
}
MINUS { ?item wdt:P582 ?endTime . }
MINUS { ?item wdt:P31 wd:Q1497375 . }
OPTIONAL { ?item wdt:P373 ?commonsCat. }
OPTIONAL { ?commonsSiteLink schema:about ?item;
schema:isPartOf <https://commons.wikimedia.org/>. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} ORDER BY (?distance)"""
} ORDER BY (?distance)

View file

@ -0,0 +1,51 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Geocode to commons</title>
<style>
.text-end { text-align: right }
</style>
</head>
<body>
<h1>Geocode coordinates to Commons Category</h1>
<p><a href="{{ url_for('index', lat=lat, lon=lon) }}">visit endpoint</a>
| <a href="https://www.openstreetmap.org/#map=17/{{lat }}/{{ lon }}">view in OSM</a>
{% if result.commons_cat %}
| <a href="{{ result.commons_cat.url }}">Commons category</a>
{% endif %}
{% if result.wikidata %}
| <a href="https://www.wikidata.org/wiki/{{ result.wikidata }}">{{ result.wikidata }}</a>
{% endif %}
| <a href="{{ url_for('detail_page', lat=lat, lon=lon) }}">#</a>
</p>
<pre>{{ result | pprint }}</pre>
{% if result.commons_cat %}
<p>({{ lat }}, {{ lon }}, {{result.commons_cat.title | pprint }}),</p>
{% endif %}
<table>
{% for e in elements %}
{% set tags = e.tags %}
<tr>
<td><a href="{{ e.osm_url }}">{{ tags.name }}</a></td>
<td>{{ tags.admin_level }}</td>
<td>{{ tags.boundary }}</td>
<td>{{ tags.designation }}</td>
<td class="text-end">{{ '{:,.0f}'.format(e.area_in_sq_km) }} km²</td>
<td>{{ 'wikidata' in tags }}</td>
</tr>
{% endfor %}
</table>
</body>
</html>