Merge branch 'main' of github.com:EdwardBetts/owl-map

This commit is contained in:
Edward Betts 2023-05-14 10:30:15 +00:00
commit 6ce8b30fcc
14 changed files with 847 additions and 452 deletions

25
config/default.py Normal file
View File

@ -0,0 +1,25 @@
"""Sample config."""
ID_TAGGING_SCHEMA_DIR = "/var/lib/data/id-tagging-schema"
ID_PRESET_DIR = "/var/lib/data/id-tagging-schema/data/presets/"
GEOIP_DATA = "/var/lib/data/GeoIP/GeoIPCity.dat"
GEOLITE2 = "/var/lib/data/GeoLite2/GeoLite2-City.mmdb"
CLIENT_KEY = ""
CLIENT_SECRET = ""
SECRET_KEY = ""
DEFAULT_COMMENT = "+wikidata"
ADMIN_NAME = ""
ADMIN_EMAIL = ""
ADMINS = [ADMIN_EMAIL]
SMTP_HOST = "localhost"
MAIL_FROM = "osm-wikidata@localhost"
ERROR_MAIL = True
PROPAGATE_EXCEPTIONS = False

13
matcher/__init__.py Normal file
View File

@ -0,0 +1,13 @@
"""Match OSM and Wikidata items."""
CallParams = dict[str, str | int]
user_agent = (
"osm-wikidata/0.1 (https://github.com/EdwardBetts/osm-wikidata; edward@4angle.com)"
)
def user_agent_headers() -> dict[str, str]:
"""User-Agent headers."""
return {"User-Agent": user_agent}

View File

@ -1,19 +1,21 @@
from sqlalchemy import func, or_, and_, text
import json
import os.path
import re
from collections import Counter, defaultdict
from flask import current_app, g
from sqlalchemy import and_, func, or_, text
from sqlalchemy.dialects import postgresql
from sqlalchemy.orm import selectinload
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import literal, union, cast, column
from sqlalchemy.sql.expression import cast, column, literal, union
from sqlalchemy.types import Float
from sqlalchemy.dialects import postgresql
from matcher.planet import point, line, polygon
from matcher import model, database, wikidata_api, wikidata
from collections import Counter, defaultdict
from flask import g, current_app
import re
import os.path
import json
from matcher import database, model, wikidata, wikidata_api
from matcher.planet import line, point, polygon
srid = 4326
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
tag_prefixes = {
@ -37,9 +39,9 @@ skip_tags = {
"Key:brand",
}
def get_country_iso3166_1(lat, lon):
"""
For a given lat/lon return a set of ISO country codes.
"""For a given lat/lon return a set of ISO country codes.
Also cache the country code in the global object.
@ -48,8 +50,9 @@ def get_country_iso3166_1(lat, lon):
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
alpha2_codes = set()
q = model.Polygon.query.filter(func.ST_Covers(model.Polygon.way, point),
model.Polygon.admin_level == "2")
q = model.Polygon.query.filter(
func.ST_Covers(model.Polygon.way, point), model.Polygon.admin_level == "2"
)
for country in q:
alpha2 = country.tags.get("ISO3166-1")
if not alpha2:
@ -60,22 +63,23 @@ def get_country_iso3166_1(lat, lon):
return alpha2_codes
def is_street_number_first(lat, lon):
def is_street_number_first(lat: float, lon: float) -> bool:
"""Is lat/lon within a country that puts number first in a street address."""
if lat is None or lon is None:
return True
alpha2 = get_country_iso3166_1(lat, lon)
# Incomplete list of countries that put street number first.
alpha2_number_first = {
'GB', # United Kingdom
'IE', # Ireland
'US', # United States
'MX', # Mexico
'CA', # Canada
'FR', # France
'AU', # Australia
'NZ', # New Zealand
'ZA', # South Africa
"GB", # United Kingdom
"IE", # Ireland
"US", # United States
"MX", # Mexico
"CA", # Canada
"FR", # France
"AU", # Australia
"NZ", # New Zealand
"ZA", # South Africa
}
return bool(alpha2_number_first & alpha2)
@ -84,22 +88,26 @@ def is_street_number_first(lat, lon):
def make_envelope(bounds):
return func.ST_MakeEnvelope(*bounds, srid)
def get_bbox_centroid(bbox):
bbox = make_envelope(bbox)
centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar()
return reversed(re_point.match(centroid).groups())
def make_envelope_around_point(lat, lon, distance):
conn = database.session.connection()
p = func.ST_MakePoint(lon, lat)
s = select([
func.ST_AsText(func.ST_Project(p, distance, func.radians(0))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(90))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(180))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(270))),
])
s = select(
[
func.ST_AsText(func.ST_Project(p, distance, func.radians(0))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(90))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(180))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(270))),
]
)
row = conn.execute(s).fetchone()
coords = [[float(v) for v in re_point.match(i).groups()] for i in row]
@ -110,49 +118,64 @@ def make_envelope_around_point(lat, lon, distance):
return func.ST_MakeEnvelope(west, south, east, north, srid)
def drop_way_area(tags):
""" Remove the way_area field from a tags dict. """
def drop_way_area(tags: dict[str, str]) -> dict[str, str]:
"""Remove the way_area field from a tags dict."""
if "way_area" in tags:
del tags["way_area"]
return tags
def get_part_of(table_name, src_id, bbox):
table_map = {'point': point, 'line': line, 'polygon': polygon}
table_map = {"point": point, "line": line, "polygon": polygon}
table_alias = table_map[table_name].alias()
s = (select([polygon.c.osm_id,
polygon.c.tags,
func.ST_Area(func.ST_Collect(polygon.c.way))]).
where(and_(func.ST_Intersects(bbox, polygon.c.way),
func.ST_Covers(polygon.c.way, table_alias.c.way),
table_alias.c.osm_id == src_id,
polygon.c.tags.has_key("name"),
or_(
polygon.c.tags.has_key("landuse"),
polygon.c.tags.has_key("amenity"),
))).
group_by(polygon.c.osm_id, polygon.c.tags))
s = (
select(
[
polygon.c.osm_id,
polygon.c.tags,
func.ST_Area(func.ST_Collect(polygon.c.way)),
]
)
.where(
and_(
func.ST_Intersects(bbox, polygon.c.way),
func.ST_Covers(polygon.c.way, table_alias.c.way),
table_alias.c.osm_id == src_id,
polygon.c.tags.has_key("name"),
or_(
polygon.c.tags.has_key("landuse"),
polygon.c.tags.has_key("amenity"),
),
)
)
.group_by(polygon.c.osm_id, polygon.c.tags)
)
conn = database.session.connection()
return [{
"type": "way" if osm_id > 0 else "relation",
"id": abs(osm_id),
"tags": tags,
"area": area,
} for osm_id, tags, area in conn.execute(s)]
return [
{
"type": "way" if osm_id > 0 else "relation",
"id": abs(osm_id),
"tags": tags,
"area": area,
}
for osm_id, tags, area in conn.execute(s)
]
def get_and_save_item(qid):
""" Download an item from Wikidata and cache it in the database. """
def get_and_save_item(qid: str) -> model.Item | None:
"""Download an item from Wikidata and cache it in the database."""
entity = wikidata_api.get_entity(qid)
entity_qid = entity["id"]
if entity_qid != qid:
print(f'redirect {qid} -> {entity_qid}')
print(f"redirect {qid} -> {entity_qid}")
item = model.Item.query.get(entity_qid[1:])
return item
if "claims" not in entity:
return
return None
coords = wikidata.get_entity_coords(entity["claims"])
item_id = int(qid[1:])
@ -171,8 +194,9 @@ def get_and_save_item(qid):
return item
def get_isa_count(items):
isa_count = Counter()
def get_isa_count(items: list[model.Item]) -> list[tuple[int, int]]:
"""List of IsA counts."""
isa_count: Counter[int] = Counter()
for item in items:
if not item:
continue
@ -199,13 +223,11 @@ def get_items_in_bbox(bbox):
def get_osm_with_wikidata_tag(bbox, isa_filter=None):
bbox_str = ','.join(str(v) for v in bbox)
bbox_str = ",".join(str(v) for v in bbox)
extra_sql = ""
if isa_filter:
q = (
model.Item.query.join(model.ItemLocation)
.filter(func.ST_Covers(make_envelope(bbox),
model.ItemLocation.location))
q = model.Item.query.join(model.ItemLocation).filter(
func.ST_Covers(make_envelope(bbox), model.ItemLocation.location)
)
q = add_isa_filter(q, isa_filter)
qids = [isa.qid for isa in q]
@ -216,7 +238,8 @@ def get_osm_with_wikidata_tag(bbox, isa_filter=None):
extra_sql += f" AND tags -> 'wikidata' in ({qid_list})"
# easier than building this query with SQLAlchemy
sql = f'''
sql = (
f"""
SELECT tbl, osm_id, tags, ARRAY[ST_Y(centroid), ST_X(centroid)], geojson
FROM (
SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson
@ -235,24 +258,29 @@ UNION
HAVING st_area(st_collect(way)) < 20 * st_area(ST_MakeEnvelope({bbox_str}, {srid}))
) as anon
WHERE tags ? 'wikidata'
''' + extra_sql
"""
+ extra_sql
)
conn = database.session.connection()
result = conn.execute(text(sql))
print(sql)
point_sql = f'''
point_sql = (
f"""
SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson
FROM planet_osm_point
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) and tags ? 'wikidata'
''' + extra_sql
"""
+ extra_sql
)
print("point")
print(point_sql)
tagged = []
for tbl, osm_id, tags, centroid, geojson in result:
if tbl == 'point':
if tbl == "point":
osm_type = "node"
else:
osm_type = "way" if osm_id > 0 else "relation"
@ -260,15 +288,17 @@ WHERE tags ? 'wikidata'
name = tags.get("name") or tags.get("addr:housename") or "[no label]"
tagged.append({
"identifier": f"{osm_type}/{osm_id}",
"id": osm_id,
"type": osm_type,
"geojson": json.loads(geojson),
"centroid": centroid,
"name": name,
"wikidata": tags["wikidata"],
})
tagged.append(
{
"identifier": f"{osm_type}/{osm_id}",
"id": osm_id,
"type": osm_type,
"geojson": json.loads(geojson),
"centroid": centroid,
"name": name,
"wikidata": tags["wikidata"],
}
)
return tagged
@ -310,11 +340,13 @@ def get_item_tags(item):
isa, isa_path = isa_items.pop()
if not isa:
continue
isa_path = isa_path + [{'qid': isa.qid, 'label': isa.label()}]
isa_path = isa_path + [{"qid": isa.qid, "label": isa.label()}]
osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
osm += [extra.tag_or_key
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)]
osm += [
extra.tag_or_key
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)
]
for i in osm:
osm_list[i].append(isa_path[:])
@ -369,14 +401,16 @@ def get_tags_for_isa_item(item):
isa, isa_path = isa_items.pop()
if not isa:
continue
isa_path = isa_path + [{'qid': isa.qid, 'label': isa.label()}]
isa_path = isa_path + [{"qid": isa.qid, "label": isa.label()}]
if isa.item_id not in items_checked_done:
items_checked.append({'qid': isa.qid, 'label': isa.label()})
items_checked.append({"qid": isa.qid, "label": isa.label()})
items_checked_done.add(isa.item_id)
osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
osm += [extra.tag_or_key
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)]
osm += [
extra.tag_or_key
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)
]
for i in osm:
osm_list[i].append(isa_path[:])
@ -403,34 +437,31 @@ def get_tags_for_isa_item(item):
seen.update(isa_list)
isa_items += [(isa, isa_path) for isa in get_items(isa_list)]
return {
'tags': {key: list(values) for key, values in osm_list.items()},
'checked': items_checked,
"tags": {key: list(values) for key, values in osm_list.items()},
"checked": items_checked,
}
def add_isa_filter(q, isa_qids):
q_subclass = database.session.query(model.Item.qid).filter(
func.jsonb_path_query_array(
model.Item.claims,
'$.P279[*].mainsnak.datavalue.value.id',
).bool_op('?|')(list(isa_qids))
"$.P279[*].mainsnak.datavalue.value.id",
).bool_op("?|")(list(isa_qids))
)
subclass_qid = {qid for qid, in q_subclass.all()}
isa = func.jsonb_path_query_array(
model.Item.claims,
'$.P31[*].mainsnak.datavalue.value.id',
).bool_op('?|')
"$.P31[*].mainsnak.datavalue.value.id",
).bool_op("?|")
return q.filter(isa(list(isa_qids | subclass_qid)))
def wikidata_items_count(bounds, isa_filter=None):
q = (
model.Item.query.join(model.ItemLocation)
.filter(func.ST_Covers(make_envelope(bounds), model.ItemLocation.location))
q = model.Item.query.join(model.ItemLocation).filter(
func.ST_Covers(make_envelope(bounds), model.ItemLocation.location)
)
if isa_filter:
@ -440,12 +471,12 @@ def wikidata_items_count(bounds, isa_filter=None):
return q.count()
def wikidata_isa_counts(bounds, isa_filter=None):
db_bbox = make_envelope(bounds)
q = (
model.Item.query.join(model.ItemLocation)
.filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
q = model.Item.query.join(model.ItemLocation).filter(
func.ST_Covers(db_bbox, model.ItemLocation.location)
)
if isa_filter:
@ -474,12 +505,13 @@ def wikidata_isa_counts(bounds, isa_filter=None):
return isa_count
def get_tag_filter(tags, tag_list):
tag_filter = []
for tag_or_key in tag_list:
if tag_or_key.startswith("Key:"):
key = tag_or_key[4:]
tag_filter.append(and_(tags.has_key(key), tags[key] != 'no'))
tag_filter.append(and_(tags.has_key(key), tags[key] != "no"))
for prefix in tag_prefixes:
tag_filter.append(tags.has_key(f"{prefix}:{key}"))
@ -495,11 +527,11 @@ def get_tag_filter(tags, tag_list):
def get_preset_translations():
app = current_app
country_language = {
'AU': 'en-AU', # Australia
'GB': 'en-GB', # United Kingdom
'IE': 'en-GB', # Ireland
'IN': 'en-IN', # India
'NZ': 'en-NZ', # New Zealand
"AU": "en-AU", # Australia
"GB": "en-GB", # United Kingdom
"IE": "en-GB", # Ireland
"IN": "en-IN", # India
"NZ": "en-NZ", # New Zealand
}
ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
translation_dir = os.path.join(ts_dir, "dist", "translations")
@ -520,13 +552,14 @@ def get_preset_translations():
return {}
def get_presets_from_tags(ending, tags):
translations = get_preset_translations()
found = []
for k, v in tags.items():
if k == 'amenity' and v == 'clock' and tags.get('display') == 'sundial':
if k == "amenity" and v == "clock" and tags.get("display") == "sundial":
tag_or_key = f"Tag:{k}={v}"
found.append({"tag_or_key": tag_or_key, "name": "Sundial"})
continue
@ -604,8 +637,7 @@ def address_node_label(tags):
def get_address_nodes_within_building(osm_id, bbox_list):
q = model.Point.query.filter(
polygon.c.osm_id == osm_id,
or_(*[func.ST_Intersects(bbox, model.Point.way)
for bbox in bbox_list]),
or_(*[func.ST_Intersects(bbox, model.Point.way) for bbox in bbox_list]),
func.ST_Covers(polygon.c.way, model.Point.way),
model.Point.tags.has_key("addr:street"),
model.Point.tags.has_key("addr:housenumber"),
@ -615,8 +647,14 @@ def get_address_nodes_within_building(osm_id, bbox_list):
def osm_display_name(tags):
keys = ("bridge:name", "tunnel:name", "lock_name", "name", "addr:housename",
"inscription")
keys = (
"bridge:name",
"tunnel:name",
"lock_name",
"name",
"addr:housename",
"inscription",
)
for key in keys:
if key in tags:
return tags[key]
@ -625,6 +663,7 @@ def osm_display_name(tags):
def street_address_in_tags(tags):
return "addr:housenumber" in tags and "addr:street" in tags
def find_osm_candidates(item, limit=80, max_distance=450, names=None):
item_id = item.item_id
item_is_linear_feature = item.is_linear_feature()
@ -637,51 +676,94 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
check_is_street_number_first(item.locations[0].get_lat_lon())
bbox_list = [make_envelope_around_point(*loc.get_lat_lon(), max_distance)
for loc in item.locations]
bbox_list = [
make_envelope_around_point(*loc.get_lat_lon(), max_distance)
for loc in item.locations
]
null_area = cast(None, Float)
dist = column('dist')
tags = column('tags', postgresql.HSTORE)
dist = column("dist")
tags = column("tags", postgresql.HSTORE)
tag_list = get_item_tags(item)
# tag_filters = get_tag_filter(point.c.tags, tag_list)
# print(tag_filters)
s_point = (select([literal('point').label('t'), point.c.osm_id, point.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)).label('dist'),
func.ST_AsText(point.c.way),
func.ST_AsGeoJSON(point.c.way),
null_area]).
where(and_(
or_(*[func.ST_Intersects(bbox, point.c.way)
for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(point.c.tags, tag_list)))).
group_by(point.c.osm_id, point.c.tags, point.c.way))
s_point = (
select(
[
literal("point").label("t"),
point.c.osm_id,
point.c.tags.label("tags"),
func.min(
func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)
).label("dist"),
func.ST_AsText(point.c.way),
func.ST_AsGeoJSON(point.c.way),
null_area,
]
)
.where(
and_(
or_(*[func.ST_Intersects(bbox, point.c.way) for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(point.c.tags, tag_list)),
)
)
.group_by(point.c.osm_id, point.c.tags, point.c.way)
)
s_line = (select([literal('line').label('t'), line.c.osm_id, line.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)).label('dist'),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(line.c.way)),
null_area]).
where(and_(
or_(*[func.ST_Intersects(bbox, line.c.way) for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(line.c.tags, tag_list)))).
group_by(line.c.osm_id, line.c.tags))
s_line = (
select(
[
literal("line").label("t"),
line.c.osm_id,
line.c.tags.label("tags"),
func.min(
func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)
).label("dist"),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(line.c.way)),
null_area,
]
)
.where(
and_(
or_(*[func.ST_Intersects(bbox, line.c.way) for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(line.c.tags, tag_list)),
)
)
.group_by(line.c.osm_id, line.c.tags)
)
s_polygon = (select([literal('polygon').label('t'), polygon.c.osm_id, polygon.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)).label('dist'),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)),
func.ST_Area(func.ST_Collect(polygon.c.way))]).
where(and_(
or_(*[func.ST_Intersects(bbox, polygon.c.way) for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(polygon.c.tags, tag_list)))).
group_by(polygon.c.osm_id, polygon.c.tags).
having(func.ST_Area(func.ST_Collect(polygon.c.way)) < 20 * func.ST_Area(bbox_list[0])))
s_polygon = (
select(
[
literal("polygon").label("t"),
polygon.c.osm_id,
polygon.c.tags.label("tags"),
func.min(
func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)
).label("dist"),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)),
func.ST_Area(func.ST_Collect(polygon.c.way)),
]
)
.where(
and_(
or_(*[func.ST_Intersects(bbox, polygon.c.way) for bbox in bbox_list]),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(polygon.c.tags, tag_list)),
)
)
.group_by(polygon.c.osm_id, polygon.c.tags)
.having(
func.ST_Area(func.ST_Collect(polygon.c.way))
< 20 * func.ST_Area(bbox_list[0])
)
)
tables = ([] if item_is_linear_feature else [s_point]) + [s_line, s_polygon]
s = select([union(*tables).alias()]).where(dist < max_distance).order_by(dist)
@ -695,10 +777,14 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
s = s.where(tags.has_key("name"))
if "Key:amenity" in tag_list:
s = s.where(and_(tags["amenity"] != "bicycle_parking",
tags["amenity"] != "bicycle_repair_station",
tags["amenity"] != "atm",
tags["amenity"] != "recycling"))
s = s.where(
and_(
tags["amenity"] != "bicycle_parking",
tags["amenity"] != "bicycle_repair_station",
tags["amenity"] != "atm",
tags["amenity"] != "recycling",
)
)
if limit:
s = s.limit(limit)
@ -730,6 +816,8 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
shape = "area" if table == "polygon" else table
item_identifier_tags = item.get_identifiers_tags()
cur = {
"identifier": f"{osm_type}/{osm_id}",
"type": osm_type,
@ -748,8 +836,9 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
part_of = []
for bbox in bbox_list:
part_of += [i for i in get_part_of(table, src_id, bbox)
if i["tags"]["name"] != name]
part_of += [
i for i in get_part_of(table, src_id, bbox) if i["tags"]["name"] != name
]
if part_of:
cur["part_of"] = part_of
@ -760,9 +849,9 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
return nearby
def get_item(item_id):
""" Retrieve a Wikidata item, either from the database or from Wikidata. """
def get_item(item_id):
"""Retrieve a Wikidata item, either from the database or from Wikidata."""
item = model.Item.query.get(item_id)
return item or get_and_save_item(f"Q{item_id}")
@ -774,7 +863,7 @@ def get_item_street_addresses(item):
for claim in item.claims["P669"]:
qualifiers = claim.get("qualifiers")
if not qualifiers or 'P670' not in qualifiers:
if not qualifiers or "P670" not in qualifiers:
continue
number = qualifiers["P670"][0]["datavalue"]["value"]
@ -782,24 +871,26 @@ def get_item_street_addresses(item):
street = street_item.label()
for q in qualifiers["P670"]:
number = q["datavalue"]["value"]
address = (f"{number} {street}"
if g.street_number_first
else f"{street} {number}")
address = (
f"{number} {street}" if g.street_number_first else f"{street} {number}"
)
street_address.append(address)
return street_address
def check_is_street_number_first(latlng):
g.street_number_first = is_street_number_first(*latlng)
def item_detail(item):
unsupported_relation_types = {
'Q194356', # wind farm
'Q2175765', # tram stop
"Q194356", # wind farm
"Q2175765", # tram stop
}
locations = [list(i.get_lat_lon()) for i in item.locations]
if not hasattr(g, 'street_number_first'):
if not hasattr(g, "street_number_first"):
g.street_number_first = is_street_number_first(*locations[0])
image_filenames = item.get_claim("P18")
@ -809,20 +900,24 @@ def item_detail(item):
heritage_designation = []
for v in item.get_claim("P1435"):
if not v:
print('heritage designation missing:', item.qid)
print("heritage designation missing:", item.qid)
continue
heritage_designation_item = get_item(v["numeric-id"])
heritage_designation.append({
"qid": v["id"],
"label": heritage_designation_item.label(),
})
heritage_designation.append(
{
"qid": v["id"],
"label": heritage_designation_item.label(),
}
)
isa_items = [get_item(isa["numeric-id"]) for isa in item.get_isa()]
isa_lookup = {isa.qid: isa for isa in isa_items}
wikipedia_links = [{"lang": site[:-4], "title": link["title"]}
for site, link in sorted(item.sitelinks.items())
if site.endswith("wiki") and len(site) < 8]
wikipedia_links = [
{"lang": site[:-4], "title": link["title"]}
for site, link in sorted(item.sitelinks.items())
if site.endswith("wiki") and len(site) < 8
]
d = {
"qid": item.qid,
@ -831,7 +926,9 @@ def item_detail(item):
"markers": locations,
"image_list": image_filenames,
"street_address": street_address,
"isa_list": [{"qid": isa.qid, "label": isa.label()} for isa in isa_items if isa],
"isa_list": [
{"qid": isa.qid, "label": isa.label()} for isa in isa_items if isa
],
"closed": item.closed(),
"inception": item.time_claim("P571"),
"p1619": item.time_claim("P1619"),
@ -849,8 +946,9 @@ def item_detail(item):
unsupported = isa_lookup.keys() & unsupported_relation_types
if unsupported:
d["unsupported_relation_types"] = [isa for isa in d["isa_list"]
if isa["qid"] in isa_lookup]
d["unsupported_relation_types"] = [
isa for isa in d["isa_list"] if isa["qid"] in isa_lookup
]
return d
@ -889,7 +987,7 @@ def wikidata_items(bounds, isa_filter=None):
}
isa_count.append(isa)
return {'items': items, 'isa_count': isa_count}
return {"items": items, "isa_count": isa_count}
def missing_wikidata_items(qids, lat, lon):
@ -924,12 +1022,13 @@ def missing_wikidata_items(qids, lat, lon):
return dict(items=items, isa_count=isa_count)
def isa_incremental_search(search_terms):
en_label = func.jsonb_extract_path_text(model.Item.labels, "en", "value")
q = model.Item.query.filter(
model.Item.claims.has_key("P1282"),
en_label.ilike(f"%{search_terms}%"),
func.length(en_label) < 20,
model.Item.claims.has_key("P1282"),
en_label.ilike(f"%{search_terms}%"),
func.length(en_label) < 20,
)
print(q.statement.compile(compile_kwargs={"literal_binds": True}))
@ -943,13 +1042,18 @@ def isa_incremental_search(search_terms):
ret.append(cur)
return ret
def get_place_items(osm_type, osm_id):
src_id = osm_id * {'way': 1, 'relation': -1}[osm_type]
q = (model.Item.query
.join(model.ItemLocation)
.join(model.Polygon, func.ST_Covers(model.Polygon.way, model.ItemLocation.location))
.filter(model.Polygon.src_id == src_id))
def get_place_items(osm_type, osm_id):
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
q = (
model.Item.query.join(model.ItemLocation)
.join(
model.Polygon,
func.ST_Covers(model.Polygon.way, model.ItemLocation.location),
)
.filter(model.Polygon.src_id == src_id)
)
# sql = q.statement.compile(compile_kwargs={"literal_binds": True})
item_count = q.count()

View File

@ -1,18 +1,25 @@
import requests
"""Use mediawiki API to look up images on Wikimedia Commons."""
import urllib.parse
from . import utils
from typing import Any
import requests
from . import CallParams, utils
commons_start = "http://commons.wikimedia.org/wiki/Special:FilePath/"
commons_url = "https://www.wikidata.org/w/api.php"
page_size = 50
def commons_uri_to_filename(uri):
def commons_uri_to_filename(uri: str) -> str:
"""Given the URI for a file on commons return the filename of the file."""
return urllib.parse.unquote(utils.drop_start(uri, commons_start))
def api_call(params):
call_params = {
def api_call(params: CallParams) -> requests.Response:
"""Call the Commons API."""
call_params: CallParams = {
"format": "json",
"formatversion": 2,
**params,
@ -21,8 +28,11 @@ def api_call(params):
return requests.get(commons_url, params=call_params, timeout=5)
def image_detail(filenames, thumbheight=None, thumbwidth=None):
params = {
def image_detail(
filenames: list[str], thumbheight: int | None = None, thumbwidth: int | None = None
) -> dict[str, Any]:
"""Detail for multiple images."""
params: CallParams = {
"action": "query",
"prop": "imageinfo",
"iiprop": "url",
@ -32,7 +42,7 @@ def image_detail(filenames, thumbheight=None, thumbwidth=None):
if thumbwidth is not None:
params["iiurlwidth"] = thumbwidth
images = {}
images: dict[str, Any] = {}
for cur in utils.chunk(filenames, page_size):
call_params = params.copy()

39
matcher/database.py Normal file
View File

@ -0,0 +1,39 @@
"""Database functions."""
import flask
import sqlalchemy
from sqlalchemy import create_engine, func
from sqlalchemy.engine import reflection
from sqlalchemy.orm import scoped_session, sessionmaker
session: sqlalchemy.orm.scoping.scoped_session = scoped_session(sessionmaker())
def init_db(db_url: str, echo: bool = False) -> None:
"""Initialise database."""
session.configure(bind=get_engine(db_url, echo=echo))
def get_engine(db_url: str, echo: bool = False) -> sqlalchemy.engine.base.Engine:
"""Create an engine objcet."""
return create_engine(db_url, pool_recycle=3600, echo=echo)
def get_tables() -> list[str]:
"""Get a list of table names."""
tables: list[str] = reflection.Inspector.from_engine(session.bind).get_table_names()
return tables
def init_app(app: flask.app.Flask, echo: bool = False) -> None:
"""Initialise database connection within flask app."""
db_url = app.config["DB_URL"]
session.configure(bind=get_engine(db_url, echo=echo))
@app.teardown_appcontext
def shutdown_session(exception: Exception | None = None) -> None:
session.remove()
def now_utc():
return func.timezone("utc", func.now())

71
matcher/edit.py Normal file
View File

@ -0,0 +1,71 @@
from flask import g
from . import user_agent_headers, database, osm_oauth, mail
from .model import Changeset
import requests
import html
really_save = True
osm_api_base = "https://api.openstreetmap.org/api/0.6"
def new_changeset(comment):
return f"""
<osm>
<changeset>
<tag k="created_by" v="https://map.osm.wikidata.link/"/>
<tag k="comment" v="{html.escape(comment)}"/>
</changeset>
</osm>"""
def osm_request(path, **kwargs):
return osm_oauth.api_put_request(path, **kwargs)
def create_changeset(changeset):
try:
return osm_request("/changeset/create", data=changeset.encode("utf-8"))
except requests.exceptions.HTTPError as r:
print(changeset)
print(r.response.text)
raise
def close_changeset(changeset_id):
return osm_request(f"/changeset/{changeset_id}/close")
def save_element(osm_type, osm_id, element_data):
osm_path = f"/{osm_type}/{osm_id}"
r = osm_request(osm_path, data=element_data)
reply = r.text.strip()
if reply.isdigit():
return r
subject = f"matcher error saving element: {osm_path}"
username = g.user.username
body = f"""
https://www.openstreetmap.org{osm_path}
user: {username}
message user: https://www.openstreetmap.org/message/new/{username}
error:
{reply}
"""
mail.send_mail(subject, body)
def record_changeset(**kwargs):
change = Changeset(created=database.now_utc(), **kwargs)
database.session.add(change)
database.session.commit()
return change
def get_existing(osm_type, osm_id):
url = f"{osm_api_base}/{osm_type}/{osm_id}"
return requests.get(url, headers=user_agent_headers())

View File

@ -1,13 +1,19 @@
"""Send mail to admins when there is an error."""
import logging
from logging.handlers import SMTPHandler
from logging import Formatter
from flask import request
from logging.handlers import SMTPHandler
import flask
PROJECT = "osm-wikidata"
class MatcherSMTPHandler(SMTPHandler):
def getSubject(self, record): # noqa: N802
"""Custom SMTP handler to change subject line."""
def getSubject(self, record: logging.LogRecord) -> str: # noqa: N802
"""Return subject line for error mail."""
return (
f"{PROJECT} error: {record.exc_info[0].__name__}"
if (record.exc_info and record.exc_info[0])
@ -16,12 +22,16 @@ class MatcherSMTPHandler(SMTPHandler):
class RequestFormatter(Formatter):
def format(self, record):
record.request = request
"""Custom request formatter."""
def format(self, record: logging.LogRecord) -> str:
"""Add request to log record."""
record.request = flask.request
return super().format(record)
def setup_error_mail(app):
def setup_error_mail(app: flask.Flask) -> None:
"""Configure logging to catch errors and email them."""
if not app.config.get("ERROR_MAIL"):
return
formatter = RequestFormatter(

View File

@ -1,24 +1,32 @@
from sqlalchemy import Table, Column, Integer, String, Float, MetaData
from sqlalchemy.dialects import postgresql
"""Planet tables."""
from geoalchemy2 import Geometry
from sqlalchemy import Column, Float, Integer, MetaData, String, Table
from sqlalchemy.dialects import postgresql
metadata = MetaData()
point = Table("planet_osm_point", metadata,
point = Table(
"planet_osm_point",
metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),
Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False),
)
line = Table("planet_osm_line", metadata,
line = Table(
"planet_osm_line",
metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),
Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False),
)
polygon = Table("planet_osm_polygon", metadata,
polygon = Table(
"planet_osm_polygon",
metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),

View File

@ -1,113 +1,139 @@
from flask import current_app, request
from itertools import islice
import os.path
"""Utility functions."""
import json
import math
import user_agents
import os.path
import re
import typing
from datetime import date
from itertools import islice
from typing import Any, cast
import flask
import user_agents
from num2words import num2words
metres_per_mile = 1609.344
feet_per_metre = 3.28084
feet_per_mile = 5280
T = typing.TypeVar("T")
def chunk(it, size):
def chunk(it: typing.Iterable[T], size: int) -> typing.Iterator[tuple[T, ...]]:
"""Split an iterable into chunks of the given size."""
it = iter(it)
return iter(lambda: tuple(islice(it, size)), ())
def flatten(l):
return [item for sublist in l for item in sublist]
def flatten(top_list: list[list[T]]) -> list[T]:
"""Flatten a list."""
return [item for sub_list in top_list for item in sub_list]
def drop_start(s, start):
def drop_start(s: str, start: str) -> str:
"""Remove string prefix, otherwise throw an error."""
assert s.startswith(start)
return s[len(start) :]
def remove_start(s, start):
def remove_start(s: str, start: str) -> str:
"""Remove a string prefix, if present."""
return s[len(start) :] if s.startswith(start) else s
def normalize_url(url):
def normalize_url(url: str) -> str:
"""Standardize URLs to help in comparison."""
for start in "http://", "https://", "www.":
url = remove_start(url, start)
return url.rstrip("/")
def contains_digit(s):
def contains_digit(s: str) -> bool:
"""Check if string contains a digit."""
return any(c.isdigit() for c in s)
def cache_dir():
return current_app.config["CACHE_DIR"]
def cache_dir() -> str:
"""Get cache dir location."""
d: str = flask.current_app.config["CACHE_DIR"]
return d
def cache_filename(filename):
def cache_filename(filename: str) -> str:
"""Get absolute path for cache file."""
return os.path.join(cache_dir(), filename)
def load_from_cache(filename):
def load_from_cache(filename: str) -> Any:
"""Load JSON data from cache."""
return json.load(open(cache_filename(filename)))
def get_radius(default=1000):
arg_radius = request.args.get("radius")
def get_radius(default: int = 1000) -> int | None:
"""Get radius request argument with default."""
arg_radius = flask.request.args.get("radius")
return int(arg_radius) if arg_radius and arg_radius.isdigit() else default
def get_int_arg(name):
if name in request.args and request.args[name].isdigit():
return int(request.args[name])
def get_int_arg(name: str) -> int | None:
"""Get an request arg and convert to integer."""
v = flask.request.args.get(name)
return int(v) if v and v.isdigit() else None
def calc_chunk_size(area_in_sq_km, size=22):
def calc_chunk_size(area_in_sq_km: float, size: int = 22) -> int:
"""Work out the size of a chunk."""
side = math.sqrt(area_in_sq_km)
return max(1, math.ceil(side / size))
def file_missing_or_empty(filename):
def file_missing_or_empty(filename: str) -> bool:
"""Check if a file is missing or empty."""
return os.path.exists(filename) or os.stat(filename).st_size == 0
def is_bot():
""" Is the current request from a web robot? """
ua = request.headers.get("User-Agent")
return ua and user_agents.parse(ua).is_bot
def is_bot() -> bool:
"""Is the current request from a web robot."""
ua = flask.request.headers.get("User-Agent")
return bool(ua and user_agents.parse(ua).is_bot)
def log_location():
return current_app.config["LOG_DIR"]
def log_location() -> str:
"""Get log location from Flask config."""
return cast(str, flask.current_app.config["LOG_DIR"])
def good_location():
return os.path.join(log_location(), "complete")
def capfirst(value):
""" Uppercase first letter of string, leave rest as is. """
def capfirst(value: str) -> str:
"""Uppercase first letter of string, leave rest as is."""
return value[0].upper() + value[1:] if value else value
def any_upper(value):
def any_upper(value: str) -> bool:
"""Check if string contains any uppercase characters."""
return any(c.isupper() for c in value)
def find_log_file(place):
start = f"{place.place_id}_"
for f in os.scandir(good_location()):
if f.name.startswith(start):
return f.path
def get_free_space(config):
def get_free_space(config: flask.config.Config) -> int:
"""Return the amount of available free space."""
s = os.statvfs(config["FREE_SPACE_PATH"])
return s.f_bsize * s.f_bavail
def display_distance(units, dist):
def metric_display_distance(units: str, dist: float) -> str | None:
"""Convert distance from metres to the specified metric units."""
if units == "km_and_metres":
units = "km" if dist > 500 else "metres"
if units == "metres":
return f"{dist:,.0f} m"
if units == "km":
return f"{dist / 1000:,.2f} km"
return None
def display_distance(units: str, dist: float) -> str | None:
"""Convert distance from metres to the specified units."""
if units in ("miles_and_feet", "miles_and_yards"):
total_feet = dist * feet_per_metre
miles = total_feet / feet_per_mile
@ -124,20 +150,15 @@ def display_distance(units, dist):
miles = dist / metres_per_mile
return f"{miles:,.2f} miles" if miles > 0.5 else f"{dist:,.0f} metres"
if units == "km_and_metres":
units = "km" if dist > 500 else "metres"
if units == "metres":
return f"{dist:,.0f} m"
if units == "km":
return f"{dist / 1000:,.2f} km"
return metric_display_distance(units, dist)
re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I)
re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I)
re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b")
def is_in_range(address_range: str, address: str) -> bool:
"""Check if an address is within a range."""
re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I)
re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I)
re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b")
def is_in_range(address_range, address):
m_number = re_number.match(address)
if not m_number:
return False
@ -159,20 +180,27 @@ def is_in_range(address_range, address):
return False
def format_wikibase_time(v):
p = v["precision"]
class WikibaseTime(typing.TypedDict):
"""Wikibase Time dict."""
precision: int
time: str
def format_wikibase_time(v: WikibaseTime) -> str | None:
"""Format wikibase time value into human readable string."""
t = v["time"]
# TODO: handle dates with century precision (7)
# example: https://www.wikidata.org/wiki/Q108266998
if p == 11:
return date.fromisoformat(t[1:11]).strftime("%-d %B %Y")
if p == 10:
return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y")
if p == 9:
return t[1:5]
if p == 7:
century = ((int(t[:5]) - 1) // 100) + 1
end = " BC" if century < 0 else ""
return num2words(abs(century), to="ordinal_num") + " century" + end
match v["precision"]:
case 11: # year, month and day
return date.fromisoformat(t[1:11]).strftime("%-d %B %Y")
case 10: # year and month
return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y")
case 9: # year
return t[1:5]
case 7: # century
century = ((int(t[:5]) - 1) // 100) + 1
ordinal_num: str = num2words(abs(century), to="ordinal_num")
return f"{ordinal_num} {century}{' BC' if century < 0 else ''}"
case _: # not handled
return None

View File

@ -6,12 +6,12 @@
"test": "echo \"This template does not include a test runner by default.\" && exit 1"
},
"dependencies": {
"bootstrap": "^5.1.3",
"@popperjs/core": "^2.11.0",
"fork-awesome": "^1.2.0",
"leaflet": "^1.7.1",
"leaflet": "^1.8.0",
"leaflet-extra-markers": "^1.2.1",
"redaxios": "^0.4.1",
"vue": "^3.1.15"
"vue": "^3.2.26"
},
"devDependencies": {
"@snowpack/plugin-dotenv": "^2.1.0",

View File

@ -1,9 +1,7 @@
import pkg from './package.json';
/** @type {import("snowpack").SnowpackUserConfig } */
export default {
mount: {
// public: {url: '/', static: true},
public: {url: '/', static: true},
frontend: {url: '/dist'},
},
plugins: [

View File

@ -14,7 +14,7 @@
</a></div>
<div class="my-2">
<form method="POST">
<form method="GET" action="{{ url_for("refresh_item", item_id=item.item_id) }}">
<input type="hidden" name="action" value="refresh">
<input type="submit" value="refresh item" class="btn btn-sm btn-primary">
</form>

View File

@ -4,10 +4,13 @@
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Wikidata items linked to OSM</title>
<!--
<link rel="stylesheet" href="https://unpkg.com/bootstrap@5.1.3/dist/css/bootstrap.min.css">
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.7.1/dist/leaflet.css">
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.8.0/dist/leaflet.css">
<link rel="stylesheet" href="https://unpkg.com/fork-awesome@1.2.0/css/fork-awesome.min.css">
<link rel="stylesheet" href="https://unpkg.com/leaflet-extra-markers@1.2.1/dist/css/leaflet.extra-markers.min.css">
-->
<link rel="stylesheet" href="{{ url_for("static", filename="frontend/style.css") }}">
</head>
@ -16,7 +19,7 @@
{% block nav %}{{ navbar() }}{% endblock %}
<div id="app"></div>
<script src="https://unpkg.com/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" crossorigin="anonymous"></script>
<!-- <script src="https://unpkg.com/bootstrap@5.1.3/dist/js/bootstrap.bundle.min.js" crossorigin="anonymous"></script> -->
<script type="module">
import main from {{ url_for('static', filename='frontend/owl.es.js') | tojson }};

View File

@ -1,36 +1,61 @@
#!/usr/bin/python3.9
#!/usr/bin/python3
from flask import (Flask, render_template, request, jsonify, redirect, url_for, g,
flash, session, Response, stream_with_context, abort)
import json
import re
from time import sleep, time
import flask_login
import GeoIP
import maxminddb
import requests
import sqlalchemy
from flask import (
Flask,
Response,
abort,
flash,
g,
jsonify,
redirect,
render_template,
request,
session,
stream_with_context,
url_for,
)
from lxml import etree
from requests_oauthlib import OAuth1Session
from sqlalchemy import func
from sqlalchemy.sql.expression import update
from matcher import (nominatim, model, database, commons, wikidata, wikidata_api,
osm_oauth, edit, mail, api, error_mail)
from werkzeug.debug.tbtools import get_current_traceback
from matcher import (
api,
commons,
database,
edit,
error_mail,
mail,
model,
nominatim,
osm_oauth,
wikidata,
wikidata_api,
)
from matcher.data import property_map
from time import time, sleep
from requests_oauthlib import OAuth1Session
from lxml import etree
import werkzeug.exceptions
import inspect
import flask_login
import requests
import json
import GeoIP
import re
import maxminddb
# from werkzeug.debug.tbtools import get_current_traceback
srid = 4326
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
app = Flask(__name__)
app.debug = True
app.config.from_object('config.default')
app.config.from_object("config.default")
error_mail.setup_error_mail(app)
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login_route'
osm_api_base = 'https://api.openstreetmap.org/api/0.6'
login_manager.login_view = "login_route"
osm_api_base = "https://api.openstreetmap.org/api/0.6"
maxminddb_reader = maxminddb.open_database(app.config["GEOLITE2"])
@ -38,7 +63,7 @@ DB_URL = "postgresql:///matcher"
database.init_db(DB_URL)
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
re_qid = re.compile(r'^Q\d+$')
re_qid = re.compile(r"^Q\d+$")
@app.teardown_appcontext
@ -50,37 +75,40 @@ def shutdown_session(exception=None):
def global_user():
g.user = flask_login.current_user._get_current_object()
def dict_repr_values(d):
return {key: repr(value) for key, value in d.items()}
@app.errorhandler(werkzeug.exceptions.InternalServerError)
def exception_handler(e):
tb = get_current_traceback()
last_frame = next(frame for frame in reversed(tb.frames) if not frame.is_library)
last_frame_args = inspect.getargs(last_frame.code)
if request.path.startswith("/api/"):
return cors_jsonify({
"success": False,
"error": tb.exception,
"traceback": tb.plaintext,
"locals": dict_repr_values(last_frame.locals),
"last_function": {
"name": tb.frames[-1].function_name,
"args": repr(last_frame_args),
},
}), 500
# @app.errorhandler(werkzeug.exceptions.InternalServerError)
# def exception_handler(e):
# tb = get_current_traceback()
# last_frame = next(frame for frame in reversed(tb.frames) if not frame.is_library)
# last_frame_args = inspect.getargs(last_frame.code)
# if request.path.startswith("/api/"):
# return cors_jsonify({
# "success": False,
# "error": tb.exception,
# "traceback": tb.plaintext,
# "locals": dict_repr_values(last_frame.locals),
# "last_function": {
# "name": tb.frames[-1].function_name,
# "args": repr(last_frame_args),
# },
# }), 500
#
# return render_template('show_error.html',
# tb=tb,
# last_frame=last_frame,
# last_frame_args=last_frame_args), 500
return render_template('show_error.html',
tb=tb,
last_frame=last_frame,
last_frame_args=last_frame_args), 500
def cors_jsonify(*args, **kwargs):
response = jsonify(*args, **kwargs)
response.headers["Access-Control-Allow-Origin"] = "*"
return response
def check_for_tagged_qids(qids):
tagged = set()
for qid in qids:
@ -107,12 +135,12 @@ def check_for_tagged_qid(qid):
def geoip_user_record():
gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD)
remote_ip = request.get('ip', request.remote_addr)
remote_ip = request.get("ip", request.remote_addr)
return gi.record_by_addr(remote_ip)
def get_user_location():
remote_ip = request.args.get('ip', request.remote_addr)
remote_ip = request.args.get("ip", request.remote_addr)
maxmind = maxminddb_reader.get(remote_ip)
return maxmind.get("location") if maxmind else None
@ -153,13 +181,15 @@ def isa_page(item_id):
subclass_list = []
for s in item.get_claim(subclass_property):
subclass = api.get_item(s["numeric-id"])
subclass_list.append({
"qid": s["id"],
"item_id": s["numeric-id"],
"label": subclass.label(),
"description": subclass.description(),
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
})
subclass_list.append(
{
"qid": s["id"],
"item_id": s["numeric-id"],
"label": subclass.label(),
"description": subclass.description(),
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
}
)
tags = api.get_tags_for_isa_item(item)
@ -253,14 +283,16 @@ def map_start_page():
lat, lon = 42.2917, -85.5872
radius = 5
return redirect(url_for(
'map_location',
lat=f'{lat:.5f}',
lon=f'{lon:.5f}',
zoom=16,
radius=radius,
ip=request.args.get('ip'),
))
return redirect(
url_for(
"map_location",
lat=f"{lat:.5f}",
lon=f"{lon:.5f}",
zoom=16,
radius=radius,
ip=request.args.get("ip"),
)
)
@app.route("/documentation")
@ -269,16 +301,14 @@ def documentation_page():
username = user.username if user.is_authenticated else None
return render_template(
"documentation.html",
active_tab="documentation",
username=username
"documentation.html", active_tab="documentation", username=username
)
@app.route("/search")
def search_page():
loc = get_user_location()
q = request.args.get('q')
q = request.args.get("q")
user = flask_login.current_user
username = user.username if user.is_authenticated else None
@ -295,6 +325,7 @@ def search_page():
q=q,
)
@app.route("/map/<int:zoom>/<float(signed=True):lat>/<float(signed=True):lon>")
def map_location(zoom, lat, lon):
qid = request.args.get("item")
@ -327,6 +358,37 @@ def map_location(zoom, lat, lon):
item_type_filter=isa_list,
)
@app.route("/item/Q<int:item_id>")
def lookup_item(item_id):
item = api.get_item(item_id)
if not item:
# TODO: show nicer page for Wikidata item not found
return abort(404)
try:
lat, lon = item.locations[0].get_lat_lon()
except IndexError:
# TODO: show nicer page for Wikidata item without coordinates
return abort(404)
return render_template(
"map.html",
active_tab="map",
zoom=16,
lat=lat,
lon=lon,
username=get_username(),
mode="map",
q=None,
qid=item.qid,
item_type_filter=[],
)
url = url_for("map_location", zoom=16, lat=lat, lon=lon, item=item.qid)
return redirect(url)
@app.route("/item/Q<int:item_id>")
def lookup_item(item_id):
item = api.get_item(item_id)
@ -396,10 +458,12 @@ def old_search_page():
def read_bounds_param():
return [float(i) for i in request.args["bounds"].split(",")]
def read_isa_filter_param():
isa_param = request.args.get('isa')
isa_param = request.args.get("isa")
if isa_param:
return set(qid.strip() for qid in isa_param.upper().split(','))
return set(qid.strip() for qid in isa_param.upper().split(","))
@app.route("/api/1/location")
def show_user_location():
@ -415,6 +479,7 @@ def api_wikidata_items_count():
t1 = time() - t0
return cors_jsonify(success=True, count=count, duration=t1)
@app.route("/api/1/isa_search")
def api_isa_search():
t0 = time()
@ -450,6 +515,7 @@ def api_wikidata_items():
t1 = time() - t0
return cors_jsonify(success=True, duration=t1, **ret)
@app.route("/api/1/place/<osm_type>/<int:osm_id>")
def api_place_items(osm_type, osm_id):
t0 = time()
@ -476,9 +542,7 @@ def api_get_item(item_id):
detail = api.item_detail(item)
t1 = time() - t0
return cors_jsonify(success=True,
duration=t1,
**detail)
return cors_jsonify(success=True, duration=t1, **detail)
@app.route("/api/1/item/Q<int:item_id>/tags")
@ -489,25 +553,23 @@ def api_get_item_tags(item_id):
osm_list = sorted(tags.keys())
t1 = time() - t0
return cors_jsonify(success=True,
qid=item.qid,
tag_or_key_list=osm_list,
tag_src=tags,
duration=t1)
return cors_jsonify(
success=True, qid=item.qid, tag_or_key_list=osm_list, tag_src=tags, duration=t1
)
def expand_street_name(from_names):
ret = set(from_names)
for name in from_names:
if any(name.startswith(st) for st in ('St ', 'St. ')):
first_space = name.find(' ')
if any(name.startswith(st) for st in ("St ", "St. ")):
first_space = name.find(" ")
ret.add("Saint" + name[first_space:])
if ', ' in name:
if ", " in name:
for n in set(ret):
comma = n.find(", ")
ret.add(name[:comma])
elif '/' in name:
elif "/" in name:
for n in set(ret):
ret.extend(part.strip() for part in n.split("/"))
@ -520,14 +582,12 @@ def api_find_osm_candidates(item_id):
t0 = time()
item = model.Item.query.get(item_id)
if not item:
return cors_jsonify(success=True,
qid=f'Q{item_id}',
error="item doesn't exist")
return cors_jsonify(success=True, qid=f"Q{item_id}", error="item doesn't exist")
if not item.locations:
return cors_jsonify(success=True,
qid=f'Q{item_id}',
error="item has no coordinates")
return cors_jsonify(
success=True, qid=f"Q{item_id}", error="item has no coordinates"
)
label = item.label()
item_is_street = item.is_street()
@ -545,17 +605,15 @@ def api_find_osm_candidates(item_id):
max_distance = 1_000
limit = 40
names = None
nearby = api.find_osm_candidates(item,
limit=limit,
max_distance=max_distance,
names=names)
nearby = api.find_osm_candidates(
item, limit=limit, max_distance=max_distance, names=names
)
if (item_is_street or item_is_watercourse) and not nearby:
# nearby = [osm for osm in nearby if street_name_match(label, osm)]
# try again without name filter
nearby = api.find_osm_candidates(item, limit=100,
max_distance=1_000)
nearby = api.find_osm_candidates(item, limit=100, max_distance=1_000)
t1 = time() - t0
return cors_jsonify(
@ -563,7 +621,7 @@ def api_find_osm_candidates(item_id):
qid=item.qid,
nearby=nearby,
duration=t1,
max_distance=max_distance
max_distance=max_distance,
)
@ -572,10 +630,12 @@ def api_missing_wikidata_items():
t0 = time()
qids_arg = request.args.get("qids")
if not qids_arg:
return cors_jsonify(success=False,
error="required parameter 'qids' is missing",
items=[],
isa_count=[])
return cors_jsonify(
success=False,
error="required parameter 'qids' is missing",
items=[],
isa_count=[],
)
qids = []
for qid in qids_arg.upper().split(","):
@ -591,10 +651,7 @@ def api_missing_wikidata_items():
ret = api.missing_wikidata_items(qids, lat, lon)
t1 = time() - t0
return cors_jsonify(
success=True,
duration=t1,
**ret)
return cors_jsonify(success=True, duration=t1, **ret)
@app.route("/api/1/search")
@ -605,24 +662,28 @@ def api_search():
hit["name"] = nominatim.get_hit_name(hit)
hit["label"] = nominatim.get_hit_label(hit)
hit["address"] = list(hit["address"].items())
hit["identifier"] = f"{hit['osm_type']}/{hit['osm_id']}"
if "osm_type" in hit and "osm_id" in hit:
hit["identifier"] = f"{hit['osm_type']}/{hit['osm_id']}"
else:
print(hit)
print(q)
return cors_jsonify(success=True, hits=hits)
@app.route("/api/1/polygon/<osm_type>/<int:osm_id>")
def api_polygon(osm_type, osm_id):
obj = model.Polygon.get_osm(osm_type, osm_id)
return cors_jsonify(successful=True,
osm_type=osm_type,
osm_id=osm_id,
geojson=obj.geojson())
return cors_jsonify(
successful=True, osm_type=osm_type, osm_id=osm_id, geojson=obj.geojson()
)
@app.route("/refresh/Q<int:item_id>")
def refresh_item(item_id):
assert not model.Item.query.get(item_id)
qid = f'Q{item_id}'
qid = f"Q{item_id}"
entity = wikidata_api.get_entity(qid)
entity_qid = entity.pop("id")
assert qid == entity_qid
@ -637,100 +698,110 @@ def refresh_item(item_id):
database.session.add(item)
database.session.commit()
return 'done'
return "done"
@app.route('/login')
@app.route("/login")
def login_openstreetmap():
return redirect(url_for('start_oauth',
next=request.args.get('next')))
return redirect(url_for("start_oauth", next=request.args.get("next")))
@app.route('/logout')
@app.route("/logout")
def logout():
next_url = request.args.get('next') or url_for('map_start_page')
next_url = request.args.get("next") or url_for("map_start_page")
flask_login.logout_user()
flash('you are logged out')
flash("you are logged out")
return redirect(next_url)
@app.route('/done/')
@app.route("/done/")
def done():
flash('login successful')
return redirect(url_for('map_start_page'))
flash("login successful")
return redirect(url_for("map_start_page"))
@app.route('/oauth/start')
@app.route("/oauth/start")
def start_oauth():
next_page = request.args.get('next')
next_page = request.args.get("next")
if next_page:
session['next'] = next_page
session["next"] = next_page
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
request_token_url = 'https://www.openstreetmap.org/oauth/request_token'
request_token_url = "https://www.openstreetmap.org/oauth/request_token"
callback = url_for('oauth_callback', _external=True)
callback = url_for("oauth_callback", _external=True)
oauth = OAuth1Session(client_key,
client_secret=client_secret,
callback_uri=callback)
oauth = OAuth1Session(
client_key, client_secret=client_secret, callback_uri=callback
)
fetch_response = oauth.fetch_request_token(request_token_url)
session['owner_key'] = fetch_response.get('oauth_token')
session['owner_secret'] = fetch_response.get('oauth_token_secret')
session["owner_key"] = fetch_response.get("oauth_token")
session["owner_secret"] = fetch_response.get("oauth_token_secret")
base_authorization_url = 'https://www.openstreetmap.org/oauth/authorize'
authorization_url = oauth.authorization_url(base_authorization_url,
oauth_consumer_key=client_key)
base_authorization_url = "https://www.openstreetmap.org/oauth/authorize"
authorization_url = oauth.authorization_url(
base_authorization_url, oauth_consumer_key=client_key
)
return redirect(authorization_url)
@login_manager.user_loader
def load_user(user_id):
return model.User.query.get(user_id)
@app.route("/oauth/callback", methods=["GET"])
def oauth_callback():
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'])
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
)
oauth_response = oauth.parse_authorization_response(request.url)
verifier = oauth_response.get('oauth_verifier')
access_token_url = 'https://www.openstreetmap.org/oauth/access_token'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'],
verifier=verifier)
verifier = oauth_response.get("oauth_verifier")
access_token_url = "https://www.openstreetmap.org/oauth/access_token"
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
session['owner_key'] = oauth_tokens.get('oauth_token')
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
session["owner_key"] = oauth_tokens.get("oauth_token")
session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
r = oauth.get(osm_api_base + '/user/details')
r = oauth.get(osm_api_base + "/user/details")
info = osm_oauth.parse_userinfo_call(r.content)
user = model.User.query.filter_by(osm_id=info['id']).one_or_none()
user = model.User.query.filter_by(osm_id=info["id"]).one_or_none()
if user:
user.osm_oauth_token = oauth_tokens.get('oauth_token')
user.osm_oauth_token_secret = oauth_tokens.get('oauth_token_secret')
user.osm_oauth_token = oauth_tokens.get("oauth_token")
user.osm_oauth_token_secret = oauth_tokens.get("oauth_token_secret")
else:
user = model.User(
username=info['username'],
description=info['description'],
img=info['img'],
osm_id=info['id'],
osm_account_created=info['account_created'],
username=info["username"],
description=info["description"],
img=info["img"],
osm_id=info["id"],
osm_account_created=info["account_created"],
mock_upload=False,
)
database.session.add(user)
database.session.commit()
flask_login.login_user(user)
next_page = session.get('next') or url_for('map_start_page')
next_page = session.get("next") or url_for("map_start_page")
return redirect(next_page)
@ -738,14 +809,13 @@ def validate_edit_list(edits):
for e in edits:
assert model.Item.get_by_qid(e["qid"])
assert e["op"] in {"add", "remove", "change"}
osm_type, _, osm_id = e['osm'].partition('/')
osm_type, _, osm_id = e["osm"].partition("/")
osm_id = int(osm_id)
if osm_type == 'node':
if osm_type == "node":
assert model.Point.query.get(osm_id)
else:
src_id = osm_id if osm_type == "way" else -osm_id
assert (model.Line.query.get(src_id)
or model.Polygon.query.get(src_id))
assert model.Line.query.get(src_id) or model.Polygon.query.get(src_id)
@app.route("/api/1/edit", methods=["POST"])
@ -754,9 +824,9 @@ def api_new_edit_session():
incoming = request.json
validate_edit_list(incoming["edit_list"])
es = model.EditSession(user=user,
edit_list=incoming['edit_list'],
comment=incoming['comment'])
es = model.EditSession(
user=user, edit_list=incoming["edit_list"], comment=incoming["comment"]
)
database.session.add(es)
database.session.commit()
@ -764,13 +834,14 @@ def api_new_edit_session():
return cors_jsonify(success=True, session_id=session_id)
@app.route("/api/1/edit/<int:session_id>", methods=["POST"])
def api_edit_session(session_id):
es = model.EditSession.query.get(session_id)
assert flask_login.current_user.id == es.user_id
incoming = request.json
for f in 'edit_list', 'comment':
for f in "edit_list", "comment":
if f not in incoming:
continue
setattr(es, f, incoming[f])
@ -778,21 +849,24 @@ def api_edit_session(session_id):
return cors_jsonify(success=True, session_id=session_id)
class VersionMismatch(Exception):
pass
def osm_object(osm_type, osm_id):
if osm_type == "node":
return model.Point.query.get(osm_id)
src_id = int(osm_id) * {'way': 1, 'relation': -1}[osm_type]
src_id = int(osm_id) * {"way": 1, "relation": -1}[osm_type]
for cls in model.Line, model.Polygon:
obj = cls.query.get(src_id)
if obj:
return obj
def process_edit(changeset_id, e):
osm_type, _, osm_id = e['osm'].partition('/')
osm_type, _, osm_id = e["osm"].partition("/")
qid = e["qid"]
item_id = qid[1:]
@ -845,9 +919,7 @@ def process_edit(changeset_id, e):
cls = type(osm)
database.session.execute(
update(cls).
where(cls.src_id == osm.src_id).
values(tags=new_tags)
update(cls).where(cls.src_id == osm.src_id).values(tags=new_tags)
)
db_edit = model.ChangesetEdit(
@ -861,6 +933,7 @@ def process_edit(changeset_id, e):
return "saved"
@app.route("/api/1/save/<int:session_id>")
def api_save_changeset(session_id):
assert g.user.is_authenticated
@ -870,6 +943,18 @@ def api_save_changeset(session_id):
return api_call(session_id)
@app.route("/sql", methods=["GET", "POST"])
def run_sql():
if request.method != "POST":
return render_template("run_sql.html")
sql = request.form["sql"]
conn = database.session.connection()
result = conn.execute(sqlalchemy.text(sql))
return render_template("run_sql.html", result=result)
def api_real_save_changeset(session_id):
es = model.EditSession.query.get(session_id)
@ -920,7 +1005,8 @@ def api_real_save_changeset(session_id):
edit.close_changeset(changeset_id)
yield send("done")
return Response(stream_with_context(stream(g.user)), mimetype='text/event-stream')
return Response(stream_with_context(stream(g.user)), mimetype="text/event-stream")
def api_mock_save_changeset(session_id):
es = model.EditSession.query.get(session_id)
@ -930,7 +1016,7 @@ def api_mock_save_changeset(session_id):
return f"data: {json.dumps(data)}\n\n"
def stream(user):
print('stream')
print("stream")
changeset_id = database.session.query(func.max(model.Changeset.id) + 1).scalar()
sleep(1)
yield send("open", id=changeset_id)
@ -938,12 +1024,12 @@ def api_mock_save_changeset(session_id):
update_count = 0
print('record_changeset', changeset_id)
print("record_changeset", changeset_id)
edit.record_changeset(
id=changeset_id, user=user, comment=es.comment, update_count=update_count
)
print('edits')
print("edits")
for num, e in enumerate(es.edit_list):
print(num, e)
@ -952,12 +1038,12 @@ def api_mock_save_changeset(session_id):
yield send("saved", edit=e, num=num)
sleep(1)
print('closing')
print("closing")
yield send("closing")
sleep(1)
yield send("done")
return Response(stream(g.user), mimetype='text/event-stream')
return Response(stream(g.user), mimetype="text/event-stream")
if __name__ == "__main__":