Switch to SQLAlchemy Core for OSM objects

osm2pgsql creates tables without primary keys. Some OSM objects are
split into multiple table rows to help with PostGIS index performance.

Adjust the code to be aware of this.

Also add name matching for streets.
This commit is contained in:
Edward Betts 2021-07-22 14:47:38 +02:00
parent 8b7ab45c73
commit 867e77ab7b
3 changed files with 292 additions and 135 deletions

View file

@ -1,5 +1,10 @@
from sqlalchemy import func, or_, and_ from sqlalchemy import func, or_, and_, text
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from sqlalchemy.sql import select
from sqlalchemy.sql.expression import literal, union, cast, column
from sqlalchemy.types import Float
from sqlalchemy.dialects import postgresql
from matcher.planet import point, line, polygon
from matcher import model, database, wikidata_api, wikidata from matcher import model, database, wikidata_api, wikidata
from matcher.data import extra_keys from matcher.data import extra_keys
from collections import Counter, defaultdict from collections import Counter, defaultdict
@ -41,37 +46,61 @@ def is_street_number_first(lat, lon):
def make_envelope(bounds): def make_envelope(bounds):
return func.ST_MakeEnvelope(*bounds, srid) return func.ST_MakeEnvelope(*bounds, srid)
def get_bbox_centroid(bbox): def get_bbox_centroid(bbox):
bbox = make_envelope(bbox) bbox = make_envelope(bbox)
centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar() centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar()
return reversed(re_point.match(centroid).groups()) return reversed(re_point.match(centroid).groups())
def make_envelope_around_point(lat, lon, distance):
conn = database.session.connection()
p = func.ST_MakePoint(lon, lat)
s = select([
func.ST_AsText(func.ST_Project(p, distance, func.radians(0))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(90))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(180))),
func.ST_AsText(func.ST_Project(p, distance, func.radians(270))),
])
row = conn.execute(s).fetchone()
coords = [[float(v) for v in re_point.match(i).groups()] for i in row]
north = coords[0][1]
east = coords[1][0]
south = coords[2][1]
west = coords[3][0]
return func.ST_MakeEnvelope(west, south, east, north, srid)
def drop_way_area(tags): def drop_way_area(tags):
if "way_area" in tags: if "way_area" in tags:
del tags["way_area"] del tags["way_area"]
return tags return tags
def get_part_of(thing, bbox): def get_part_of(table_name, src_id, bbox):
db_bbox = make_envelope(bbox) table_map = {'point': point, 'line': line, 'polygon': polygon}
ewkt = thing.as_EWKT table_alias = table_map[table_name].alias()
q = model.Polygon.query.filter(
func.ST_Intersects(db_bbox, model.Polygon.way), s = (select([polygon.c.osm_id,
func.ST_Covers(model.Polygon.way, func.ST_GeomFromEWKT(ewkt)), polygon.c.tags,
func.ST_Area(func.ST_Collect(polygon.c.way))]).
where(and_(func.ST_Intersects(bbox, polygon.c.way),
func.ST_Covers(polygon.c.way, table_alias.c.way),
table_alias.c.osm_id == src_id,
polygon.c.tags.has_key("name"),
or_( or_(
model.Polygon.tags.has_key("landuse"), polygon.c.tags.has_key("landuse"),
model.Polygon.tags.has_key("amenity"), polygon.c.tags.has_key("amenity"),
), ))).
model.Polygon.tags.has_key("name"), group_by(polygon.c.osm_id, polygon.c.tags))
)
conn = database.session.connection()
return [{ return [{
"type": polygon.type, "type": "way" if osm_id > 0 else "relation",
"id": polygon.id, "id": abs(osm_id),
"tags": drop_way_area(polygon.tags), "tags": tags,
"area": polygon.area, "area": area,
} for polygon in q] } for osm_id, tags, area in conn.execute(s)]
def get_and_save_item(qid): def get_and_save_item(qid):
entity = wikidata_api.get_entity(qid) entity = wikidata_api.get_entity(qid)
@ -127,34 +156,51 @@ def get_items_in_bbox(bbox):
def get_osm_with_wikidata_tag(bbox): def get_osm_with_wikidata_tag(bbox):
db_bbox = make_envelope(bbox) bbox_str = ','.join(str(v) for v in bbox)
# easier than building this query with SQLAlchemy
sql = f'''
SELECT tbl, osm_id, tags, ARRAY[ST_Y(centroid), ST_X(centroid)], geojson
FROM (
SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson
FROM planet_osm_point
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
UNION
SELECT 'line' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson
FROM planet_osm_line
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
GROUP BY osm_id, tags
UNION
SELECT 'polygon' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson
FROM planet_osm_polygon
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
GROUP BY osm_id, tags
HAVING st_area(st_collect(way)) < 20 * st_area(ST_MakeEnvelope({bbox_str}, {srid}))
) as anon
WHERE tags ? 'wikidata'
'''
conn = database.session.connection()
result = conn.execute(text(sql))
tagged = [] tagged = []
for tbl, osm_id, tags, centroid, geojson in result:
if tbl == 'point':
osm_type = "node"
else:
osm_type = "way" if osm_id > 0 else "relation"
osm_id = abs(osm_id)
seen = set() name = tags.get("name") or tags.get("addr:housename") or "[no label]"
for cls in (model.Point, model.Polygon, model.Line):
q = cls.query.filter(
cls.tags.has_key("wikidata"),
func.ST_Intersects(db_bbox, cls.way),
func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
)
for osm in q:
if osm.identifier in seen:
continue
seen.add(osm.identifier)
name = osm.name or osm.tags.get("addr:housename") or "[no label]"
tagged.append( tagged.append({
{ "identifier": f"{osm_type}/{osm_id}",
"identifier": osm.identifier, "id": osm_id,
"id": osm.id, "type": osm_type,
"type": osm.type, "geojson": json.loads(geojson),
"geojson": osm.geojson(), "centroid": centroid,
"centroid": list(osm.get_centroid()),
"name": name, "name": name,
"wikidata": osm.tags["wikidata"], "wikidata": tags["wikidata"],
} })
)
return tagged return tagged
@ -250,73 +296,18 @@ def wikidata_isa_counts(bounds):
return isa_count return isa_count
def get_tag_filter(cls, tag_list): def get_tag_filter(tags, tag_list):
tag_filter = [] tag_filter = []
for tag_or_key in tag_list: for tag_or_key in tag_list:
if tag_or_key.startswith("Key:"): if tag_or_key.startswith("Key:"):
tag_filter.append(and_(cls.tags.has_key(tag_or_key[4:]), tag_filter.append(and_(tags.has_key(tag_or_key[4:]),
cls.tags[tag_or_key[4:]] != 'no')) tags[tag_or_key[4:]] != 'no'))
if tag_or_key.startswith("Tag:"): if tag_or_key.startswith("Tag:"):
k, _, v = tag_or_key.partition("=") k, _, v = tag_or_key.partition("=")
tag_filter.append(cls.tags[k[4:]] == v) tag_filter.append(tags[k[4:]] == v)
return tag_filter return tag_filter
def get_nearby(bbox, item, limit=60, max_distance=400):
db_bbox = make_envelope(bbox)
osm_objects = {}
distances = {}
tag_list = get_item_tags(item)
if not tag_list:
return []
item_is_street = item.is_street()
for loc in item.locations:
lat, lon = loc.get_lat_lon()
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
for cls in model.Point, model.Line, model.Polygon:
if item_is_street and cls == model.Point:
continue
tag_filter = get_tag_filter(cls, tag_list)
dist = func.ST_DistanceSphere(point, cls.way)
q = (cls.query.add_columns(dist.label('distance'))
.filter(
func.ST_Intersects(db_bbox, cls.way),
func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
or_(*tag_filter))
.order_by(point.distance_centroid(cls.way)))
if item_is_street:
q = q.filter(cls.tags.has_key("name"),
cls.tags["highway"] != 'bus_stop')
if "Key:amenity" in tag_list:
q = q.filter(cls.tags["amenity"] != "bicycle_parking",
cls.tags["amenity"] != "bicycle_repair_station",
cls.tags["amenity"] != "atm",
cls.tags["amenity"] != "recycling")
q = q.limit(limit)
# print(q.statement.compile(compile_kwargs={"literal_binds": True}))
for i, dist in q:
if dist > max_distance:
continue
osm_objects.setdefault(i.identifier, i)
if i.identifier not in distances or dist < distances[i.identifier]:
distances[i.identifier] = dist
nearby = [(osm_objects[identifier], dist)
for identifier, dist
in sorted(distances.items(), key=lambda i:i[1])]
return nearby[:limit]
def get_preset_translations(): def get_preset_translations():
app = current_app app = current_app
@ -338,15 +329,13 @@ def get_preset_translations():
return {} return {}
def get_presets_from_tags(osm): def get_presets_from_tags(ending, tags):
translations = get_preset_translations() translations = get_preset_translations()
found = [] found = []
endings = {model.Point: "point", model.Line: "line", model.Polygon: "area"}
ending = endings[type(osm)]
for k, v in osm.tags.items(): for k, v in tags.items():
if k == 'amenity' and v == 'clock' and osm.tags.get('display') == 'sundial': if k == 'amenity' and v == 'clock' and tags.get('display') == 'sundial':
tag_or_key = f"Tag:{k}={v}" tag_or_key = f"Tag:{k}={v}"
found.append({"tag_or_key": tag_or_key, "name": "Sundial"}) found.append({"tag_or_key": tag_or_key, "name": "Sundial"})
continue continue
@ -416,7 +405,7 @@ def address_from_tags(tags):
return " ".join(tags["addr:" + k] for k in keys) return " ".join(tags["addr:" + k] for k in keys)
def get_address_nodes_within_building(building, bbox): def old_get_address_nodes_within_building(building, bbox):
db_bbox = make_envelope(bbox) db_bbox = make_envelope(bbox)
ewkt = building.as_EWKT ewkt = building.as_EWKT
q = model.Point.query.filter( q = model.Point.query.filter(
@ -428,45 +417,149 @@ def get_address_nodes_within_building(building, bbox):
return [node.tags for node in q] return [node.tags for node in q]
def get_address_nodes_within_building(osm_id, bbox):
q = model.Point.query.filter(
polygon.c.osm_id == osm_id,
func.ST_Intersects(bbox, model.Point.way),
func.ST_Covers(polygon.c.way, model.Point.way),
model.Point.tags.has_key("addr:street"),
model.Point.tags.has_key("addr:housenumber"),
)
def find_osm_candidates(item, bounds): return [node.tags for node in q]
check_is_street_number_first(bounds)
def osm_display_name(tags):
keys = ("bridge:name", "tunnel:name", "lock_name", "name", "addr:housename",
"inscription")
for key in keys:
if key in tags:
return tags[key]
def street_address_in_tags(tags):
return "addr:housenumber" in tags and "addr:street" in tags
def find_osm_candidates(item, limit=60, max_distance=400, names=None):
item_id = item.item_id
item_is_street = item.is_street()
print("is street:", item_is_street)
check_is_street_number_first(item.locations[0].get_lat_lon())
db_bbox = or_(*[make_envelope_around_point(*loc.get_lat_lon(), max_distance)
for loc in item.locations])
null_area = cast(None, Float)
dist = column('dist')
tags = column('tags', postgresql.HSTORE)
tag_list = get_item_tags(item)
s_point = (select([literal('point'), point.c.osm_id, point.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)).label('dist'),
func.ST_AsText(point.c.way),
func.ST_AsGeoJSON(point.c.way),
null_area]).
where(and_(
func.ST_Intersects(db_bbox, point.c.way),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(point.c.tags, tag_list)))).
group_by(point.c.osm_id, point.c.tags, point.c.way))
s_line = (select([literal('line'), line.c.osm_id, line.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)).label('dist'),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(line.c.way)),
null_area]).
where(and_(
func.ST_Intersects(db_bbox, line.c.way),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(line.c.tags, tag_list)))).
group_by(line.c.osm_id, line.c.tags))
s_polygon = (select([literal('polygon'), polygon.c.osm_id, polygon.c.tags.label('tags'),
func.min(func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)).label('dist'),
func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))),
func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)),
func.ST_Area(func.ST_Collect(polygon.c.way))]).
where(and_(
func.ST_Intersects(db_bbox, polygon.c.way),
model.ItemLocation.item_id == item_id,
or_(*get_tag_filter(polygon.c.tags, tag_list)))).
group_by(polygon.c.osm_id, polygon.c.tags).
having(func.ST_Area(func.ST_Collect(polygon.c.way)) < 20 * func.ST_Area(db_bbox)))
tables = ([] if item_is_street else [s_point]) + [s_line, s_polygon]
s = select([union(*tables).alias()]).where(dist < max_distance).order_by(dist)
if names:
s = s.where(tags["name"].in_(names))
if item_is_street:
s = s.where(tags["highway"] != "bus_stop")
if not names:
s = s.where(tags.has_key("name"))
if "Key:amenity" in tag_list:
s = s.where(and_(tags["amenity"] != "bicycle_parking",
tags["amenity"] != "bicycle_repair_station",
tags["amenity"] != "atm",
tags["amenity"] != "recycling"))
if limit:
s = s.limit(limit)
print(s.compile(compile_kwargs={"literal_binds": True}))
conn = database.session.connection()
nearby = [] nearby = []
for osm, dist in get_nearby(bounds, item): for table, src_id, tags, distance, centroid, geojson, area in conn.execute(s):
tags = osm.tags osm_id = src_id
if table == "point":
osm_type = "node"
elif osm_id > 0:
osm_type = "way"
else:
osm_type = "relation"
osm_id = -osm_id
tags.pop("way_area", None) tags.pop("way_area", None)
name = osm.display_name() name = osm_display_name(tags)
if not name and osm.has_street_address: if not name and street_address_in_tags(tags):
name = address_from_tags(tags) name = address_from_tags(tags)
if isinstance(osm, model.Polygon) and "building" in osm.tags: if table == "polygon" and "building" in tags:
address_nodes = get_address_nodes_within_building(osm, bounds) address_nodes = get_address_nodes_within_building(src_id, db_bbox)
address_list = [address_from_tags(addr) for addr in address_nodes] address_list = [address_from_tags(addr) for addr in address_nodes]
else: else:
address_list = [] address_list = []
shape = "area" if table == "polygon" else table
cur = { cur = {
"identifier": osm.identifier, "identifier": f"{osm_type}/{osm_id}",
"type": osm.type, "type": osm_type,
"id": osm.id, "id": osm_id,
"distance": dist, "distance": distance,
"name": name, "name": name,
"tags": tags, "tags": tags,
"geojson": osm.geojson(), "geojson": json.loads(geojson),
"presets": get_presets_from_tags(osm), "presets": get_presets_from_tags(shape, tags),
"address_list": address_list, "address_list": address_list,
"centroid": list(osm.get_centroid()), "centroid": list(reversed(re_point.match(centroid).groups())),
} }
if hasattr(osm, 'area'): if area is not None:
cur["area"] = osm.area cur["area"] = area
part_of = [i for i in get_part_of(table, src_id, db_bbox)
if i["tags"]["name"] != name]
if part_of:
cur["part_of"] = part_of
if address := address_from_tags(tags): if address := address_from_tags(tags):
cur["address"] = address cur["address"] = address
part_of = [i for i in get_part_of(osm, bounds) if i["tags"]["name"] != name]
if part_of:
cur["part_of"] = part_of
nearby.append(cur) nearby.append(cur)
return nearby return nearby
@ -503,8 +596,8 @@ def get_item_street_addresses(item):
return street_address return street_address
def check_is_street_number_first(bounds): def check_is_street_number_first(latlng):
g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds)) g.street_number_first = is_street_number_first(*latlng)
def item_detail(item): def item_detail(item):
locations = [list(i.get_lat_lon()) for i in item.locations] locations = [list(i.get_lat_lon()) for i in item.locations]
@ -537,7 +630,7 @@ def get_markers(all_items):
def wikidata_items(bounds): def wikidata_items(bounds):
check_is_street_number_first(bounds) check_is_street_number_first(get_bbox_centroid(bounds))
q = get_items_in_bbox(bounds) q = get_items_in_bbox(bounds)
db_items = q.all() db_items = q.all()
items = get_markers(db_items) items = get_markers(db_items)

27
matcher/planet.py Normal file
View file

@ -0,0 +1,27 @@
from sqlalchemy import Table, Column, Integer, String, Float, MetaData
from sqlalchemy.dialects import postgresql
from geoalchemy2 import Geometry
metadata = MetaData()
point = Table("planet_osm_point", metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),
Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False),
)
line = Table("planet_osm_line", metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),
Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False),
)
polygon = Table("planet_osm_polygon", metadata,
Column("osm_id", Integer),
Column("name", String),
Column("tags", postgresql.HSTORE),
Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False),
Column("way_area", Float),
)

View file

@ -343,17 +343,54 @@ def api_get_item_tags(item_id):
duration=t1) duration=t1)
def expand_street_name(name):
ret = {name}
if any(name.startswith(st) for st in ('St ', 'St. ')):
first_space = name.find(' ')
ret.add("Saint" + name[first_space:])
if ', ' in name:
for n in set(ret):
comma = n.find(", ")
ret.add(name[:comma])
elif '/' in name:
for n in set(ret):
ret.extend(part.strip() for part in n.split("/"))
return ret
@app.route("/api/1/item/Q<int:item_id>/candidates") @app.route("/api/1/item/Q<int:item_id>/candidates")
def api_find_osm_candidates(item_id): def api_find_osm_candidates(item_id):
t0 = time() t0 = time()
bounds = read_bounds_param()
item = model.Item.query.get(item_id) item = model.Item.query.get(item_id)
if not item: if not item:
return cors_jsonify(success=True, return cors_jsonify(success=True,
qid=f'Q{item_id}', qid=f'Q{item_id}',
error="item doesn't exist") error="item doesn't exist")
nearby = api.find_osm_candidates(item, bounds) label = item.label()
item_is_street = item.is_street()
if item_is_street:
max_distance = 2_000
limit = None
names = expand_street_name(label)
else:
max_distance = 400
limit = 60
names = None
nearby = api.find_osm_candidates(item,
limit=limit,
max_distance=max_distance,
names=names)
if item_is_street and not nearby:
# nearby = [osm for osm in nearby if street_name_match(label, osm)]
# try again without name filter
nearby = api.find_osm_candidates(item, limit=100,
max_distance=1_000)
t1 = time() - t0 t1 = time() - t0
return cors_jsonify(success=True, qid=item.qid, nearby=nearby, duration=t1) return cors_jsonify(success=True, qid=item.qid, nearby=nearby, duration=t1)