diff --git a/matcher/api.py b/matcher/api.py index 9f32802..e2b4e27 100644 --- a/matcher/api.py +++ b/matcher/api.py @@ -1,5 +1,10 @@ -from sqlalchemy import func, or_, and_ +from sqlalchemy import func, or_, and_, text from sqlalchemy.orm import selectinload +from sqlalchemy.sql import select +from sqlalchemy.sql.expression import literal, union, cast, column +from sqlalchemy.types import Float +from sqlalchemy.dialects import postgresql +from matcher.planet import point, line, polygon from matcher import model, database, wikidata_api, wikidata from matcher.data import extra_keys from collections import Counter, defaultdict @@ -41,37 +46,61 @@ def is_street_number_first(lat, lon): def make_envelope(bounds): return func.ST_MakeEnvelope(*bounds, srid) - def get_bbox_centroid(bbox): bbox = make_envelope(bbox) centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar() return reversed(re_point.match(centroid).groups()) +def make_envelope_around_point(lat, lon, distance): + conn = database.session.connection() + + p = func.ST_MakePoint(lon, lat) + + s = select([ + func.ST_AsText(func.ST_Project(p, distance, func.radians(0))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(90))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(180))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(270))), + ]) + row = conn.execute(s).fetchone() + coords = [[float(v) for v in re_point.match(i).groups()] for i in row] + + north = coords[0][1] + east = coords[1][0] + south = coords[2][1] + west = coords[3][0] + + return func.ST_MakeEnvelope(west, south, east, north, srid) + def drop_way_area(tags): if "way_area" in tags: del tags["way_area"] return tags -def get_part_of(thing, bbox): - db_bbox = make_envelope(bbox) - ewkt = thing.as_EWKT - q = model.Polygon.query.filter( - func.ST_Intersects(db_bbox, model.Polygon.way), - func.ST_Covers(model.Polygon.way, func.ST_GeomFromEWKT(ewkt)), - or_( - model.Polygon.tags.has_key("landuse"), - model.Polygon.tags.has_key("amenity"), - ), - model.Polygon.tags.has_key("name"), - ) +def get_part_of(table_name, src_id, bbox): + table_map = {'point': point, 'line': line, 'polygon': polygon} + table_alias = table_map[table_name].alias() + s = (select([polygon.c.osm_id, + polygon.c.tags, + func.ST_Area(func.ST_Collect(polygon.c.way))]). + where(and_(func.ST_Intersects(bbox, polygon.c.way), + func.ST_Covers(polygon.c.way, table_alias.c.way), + table_alias.c.osm_id == src_id, + polygon.c.tags.has_key("name"), + or_( + polygon.c.tags.has_key("landuse"), + polygon.c.tags.has_key("amenity"), + ))). + group_by(polygon.c.osm_id, polygon.c.tags)) + + conn = database.session.connection() return [{ - "type": polygon.type, - "id": polygon.id, - "tags": drop_way_area(polygon.tags), - "area": polygon.area, - } for polygon in q] - + "type": "way" if osm_id > 0 else "relation", + "id": abs(osm_id), + "tags": tags, + "area": area, + } for osm_id, tags, area in conn.execute(s)] def get_and_save_item(qid): entity = wikidata_api.get_entity(qid) @@ -127,34 +156,51 @@ def get_items_in_bbox(bbox): def get_osm_with_wikidata_tag(bbox): - db_bbox = make_envelope(bbox) + bbox_str = ','.join(str(v) for v in bbox) + + # easier than building this query with SQLAlchemy + sql = f''' +SELECT tbl, osm_id, tags, ARRAY[ST_Y(centroid), ST_X(centroid)], geojson +FROM ( + SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson + FROM planet_osm_point + WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) +UNION + SELECT 'line' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson + FROM planet_osm_line + WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) + GROUP BY osm_id, tags +UNION + SELECT 'polygon' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson + FROM planet_osm_polygon + WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) + GROUP BY osm_id, tags + HAVING st_area(st_collect(way)) < 20 * st_area(ST_MakeEnvelope({bbox_str}, {srid})) +) as anon +WHERE tags ? 'wikidata' +''' + conn = database.session.connection() + result = conn.execute(text(sql)) tagged = [] + for tbl, osm_id, tags, centroid, geojson in result: + if tbl == 'point': + osm_type = "node" + else: + osm_type = "way" if osm_id > 0 else "relation" + osm_id = abs(osm_id) - seen = set() - for cls in (model.Point, model.Polygon, model.Line): - q = cls.query.filter( - cls.tags.has_key("wikidata"), - func.ST_Intersects(db_bbox, cls.way), - func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox), - ) - for osm in q: - if osm.identifier in seen: - continue - seen.add(osm.identifier) - name = osm.name or osm.tags.get("addr:housename") or "[no label]" + name = tags.get("name") or tags.get("addr:housename") or "[no label]" - tagged.append( - { - "identifier": osm.identifier, - "id": osm.id, - "type": osm.type, - "geojson": osm.geojson(), - "centroid": list(osm.get_centroid()), - "name": name, - "wikidata": osm.tags["wikidata"], - } - ) + tagged.append({ + "identifier": f"{osm_type}/{osm_id}", + "id": osm_id, + "type": osm_type, + "geojson": json.loads(geojson), + "centroid": centroid, + "name": name, + "wikidata": tags["wikidata"], + }) return tagged @@ -250,73 +296,18 @@ def wikidata_isa_counts(bounds): return isa_count -def get_tag_filter(cls, tag_list): +def get_tag_filter(tags, tag_list): tag_filter = [] for tag_or_key in tag_list: if tag_or_key.startswith("Key:"): - tag_filter.append(and_(cls.tags.has_key(tag_or_key[4:]), - cls.tags[tag_or_key[4:]] != 'no')) + tag_filter.append(and_(tags.has_key(tag_or_key[4:]), + tags[tag_or_key[4:]] != 'no')) if tag_or_key.startswith("Tag:"): k, _, v = tag_or_key.partition("=") - tag_filter.append(cls.tags[k[4:]] == v) + tag_filter.append(tags[k[4:]] == v) return tag_filter -def get_nearby(bbox, item, limit=60, max_distance=400): - db_bbox = make_envelope(bbox) - - osm_objects = {} - distances = {} - tag_list = get_item_tags(item) - if not tag_list: - return [] - - item_is_street = item.is_street() - - for loc in item.locations: - lat, lon = loc.get_lat_lon() - point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326) - for cls in model.Point, model.Line, model.Polygon: - if item_is_street and cls == model.Point: - continue - - tag_filter = get_tag_filter(cls, tag_list) - dist = func.ST_DistanceSphere(point, cls.way) - - q = (cls.query.add_columns(dist.label('distance')) - .filter( - func.ST_Intersects(db_bbox, cls.way), - func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox), - or_(*tag_filter)) - .order_by(point.distance_centroid(cls.way))) - - if item_is_street: - q = q.filter(cls.tags.has_key("name"), - cls.tags["highway"] != 'bus_stop') - - if "Key:amenity" in tag_list: - q = q.filter(cls.tags["amenity"] != "bicycle_parking", - cls.tags["amenity"] != "bicycle_repair_station", - cls.tags["amenity"] != "atm", - cls.tags["amenity"] != "recycling") - - q = q.limit(limit) - - # print(q.statement.compile(compile_kwargs={"literal_binds": True})) - - for i, dist in q: - if dist > max_distance: - continue - osm_objects.setdefault(i.identifier, i) - if i.identifier not in distances or dist < distances[i.identifier]: - distances[i.identifier] = dist - - nearby = [(osm_objects[identifier], dist) - for identifier, dist - in sorted(distances.items(), key=lambda i:i[1])] - - return nearby[:limit] - def get_preset_translations(): app = current_app @@ -338,15 +329,13 @@ def get_preset_translations(): return {} -def get_presets_from_tags(osm): +def get_presets_from_tags(ending, tags): translations = get_preset_translations() found = [] - endings = {model.Point: "point", model.Line: "line", model.Polygon: "area"} - ending = endings[type(osm)] - for k, v in osm.tags.items(): - if k == 'amenity' and v == 'clock' and osm.tags.get('display') == 'sundial': + for k, v in tags.items(): + if k == 'amenity' and v == 'clock' and tags.get('display') == 'sundial': tag_or_key = f"Tag:{k}={v}" found.append({"tag_or_key": tag_or_key, "name": "Sundial"}) continue @@ -416,7 +405,7 @@ def address_from_tags(tags): return " ".join(tags["addr:" + k] for k in keys) -def get_address_nodes_within_building(building, bbox): +def old_get_address_nodes_within_building(building, bbox): db_bbox = make_envelope(bbox) ewkt = building.as_EWKT q = model.Point.query.filter( @@ -428,45 +417,149 @@ def get_address_nodes_within_building(building, bbox): return [node.tags for node in q] +def get_address_nodes_within_building(osm_id, bbox): + q = model.Point.query.filter( + polygon.c.osm_id == osm_id, + func.ST_Intersects(bbox, model.Point.way), + func.ST_Covers(polygon.c.way, model.Point.way), + model.Point.tags.has_key("addr:street"), + model.Point.tags.has_key("addr:housenumber"), + ) -def find_osm_candidates(item, bounds): - check_is_street_number_first(bounds) + return [node.tags for node in q] + +def osm_display_name(tags): + keys = ("bridge:name", "tunnel:name", "lock_name", "name", "addr:housename", + "inscription") + for key in keys: + if key in tags: + return tags[key] + + +def street_address_in_tags(tags): + return "addr:housenumber" in tags and "addr:street" in tags + +def find_osm_candidates(item, limit=60, max_distance=400, names=None): + item_id = item.item_id + item_is_street = item.is_street() + print("is street:", item_is_street) + + check_is_street_number_first(item.locations[0].get_lat_lon()) + + db_bbox = or_(*[make_envelope_around_point(*loc.get_lat_lon(), max_distance) + for loc in item.locations]) + + null_area = cast(None, Float) + dist = column('dist') + tags = column('tags', postgresql.HSTORE) + + tag_list = get_item_tags(item) + + s_point = (select([literal('point'), point.c.osm_id, point.c.tags.label('tags'), + func.min(func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)).label('dist'), + func.ST_AsText(point.c.way), + func.ST_AsGeoJSON(point.c.way), + null_area]). + where(and_( + func.ST_Intersects(db_bbox, point.c.way), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(point.c.tags, tag_list)))). + group_by(point.c.osm_id, point.c.tags, point.c.way)) + + s_line = (select([literal('line'), line.c.osm_id, line.c.tags.label('tags'), + func.min(func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)).label('dist'), + func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))), + func.ST_AsGeoJSON(func.ST_Collect(line.c.way)), + null_area]). + where(and_( + func.ST_Intersects(db_bbox, line.c.way), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(line.c.tags, tag_list)))). + group_by(line.c.osm_id, line.c.tags)) + + s_polygon = (select([literal('polygon'), polygon.c.osm_id, polygon.c.tags.label('tags'), + func.min(func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)).label('dist'), + func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))), + func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)), + func.ST_Area(func.ST_Collect(polygon.c.way))]). + where(and_( + func.ST_Intersects(db_bbox, polygon.c.way), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(polygon.c.tags, tag_list)))). + group_by(polygon.c.osm_id, polygon.c.tags). + having(func.ST_Area(func.ST_Collect(polygon.c.way)) < 20 * func.ST_Area(db_bbox))) + + tables = ([] if item_is_street else [s_point]) + [s_line, s_polygon] + s = select([union(*tables).alias()]).where(dist < max_distance).order_by(dist) + + if names: + s = s.where(tags["name"].in_(names)) + + if item_is_street: + s = s.where(tags["highway"] != "bus_stop") + if not names: + s = s.where(tags.has_key("name")) + + if "Key:amenity" in tag_list: + s = s.where(and_(tags["amenity"] != "bicycle_parking", + tags["amenity"] != "bicycle_repair_station", + tags["amenity"] != "atm", + tags["amenity"] != "recycling")) + + if limit: + s = s.limit(limit) + + print(s.compile(compile_kwargs={"literal_binds": True})) + + conn = database.session.connection() nearby = [] - for osm, dist in get_nearby(bounds, item): - tags = osm.tags + for table, src_id, tags, distance, centroid, geojson, area in conn.execute(s): + osm_id = src_id + if table == "point": + osm_type = "node" + elif osm_id > 0: + osm_type = "way" + else: + osm_type = "relation" + osm_id = -osm_id + tags.pop("way_area", None) - name = osm.display_name() - if not name and osm.has_street_address: + name = osm_display_name(tags) + if not name and street_address_in_tags(tags): name = address_from_tags(tags) - if isinstance(osm, model.Polygon) and "building" in osm.tags: - address_nodes = get_address_nodes_within_building(osm, bounds) + if table == "polygon" and "building" in tags: + address_nodes = get_address_nodes_within_building(src_id, db_bbox) address_list = [address_from_tags(addr) for addr in address_nodes] else: address_list = [] + + shape = "area" if table == "polygon" else table + cur = { - "identifier": osm.identifier, - "type": osm.type, - "id": osm.id, - "distance": dist, + "identifier": f"{osm_type}/{osm_id}", + "type": osm_type, + "id": osm_id, + "distance": distance, "name": name, "tags": tags, - "geojson": osm.geojson(), - "presets": get_presets_from_tags(osm), + "geojson": json.loads(geojson), + "presets": get_presets_from_tags(shape, tags), "address_list": address_list, - "centroid": list(osm.get_centroid()), + "centroid": list(reversed(re_point.match(centroid).groups())), } - if hasattr(osm, 'area'): - cur["area"] = osm.area + if area is not None: + cur["area"] = area + + part_of = [i for i in get_part_of(table, src_id, db_bbox) + if i["tags"]["name"] != name] + if part_of: + cur["part_of"] = part_of if address := address_from_tags(tags): cur["address"] = address - part_of = [i for i in get_part_of(osm, bounds) if i["tags"]["name"] != name] - if part_of: - cur["part_of"] = part_of - nearby.append(cur) return nearby @@ -503,8 +596,8 @@ def get_item_street_addresses(item): return street_address -def check_is_street_number_first(bounds): - g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds)) +def check_is_street_number_first(latlng): + g.street_number_first = is_street_number_first(*latlng) def item_detail(item): locations = [list(i.get_lat_lon()) for i in item.locations] @@ -537,7 +630,7 @@ def get_markers(all_items): def wikidata_items(bounds): - check_is_street_number_first(bounds) + check_is_street_number_first(get_bbox_centroid(bounds)) q = get_items_in_bbox(bounds) db_items = q.all() items = get_markers(db_items) diff --git a/matcher/planet.py b/matcher/planet.py new file mode 100644 index 0000000..57f7b44 --- /dev/null +++ b/matcher/planet.py @@ -0,0 +1,27 @@ +from sqlalchemy import Table, Column, Integer, String, Float, MetaData +from sqlalchemy.dialects import postgresql +from geoalchemy2 import Geometry + +metadata = MetaData() + +point = Table("planet_osm_point", metadata, + Column("osm_id", Integer), + Column("name", String), + Column("tags", postgresql.HSTORE), + Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False), +) + +line = Table("planet_osm_line", metadata, + Column("osm_id", Integer), + Column("name", String), + Column("tags", postgresql.HSTORE), + Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False), +) + +polygon = Table("planet_osm_polygon", metadata, + Column("osm_id", Integer), + Column("name", String), + Column("tags", postgresql.HSTORE), + Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False), + Column("way_area", Float), +) diff --git a/web_view.py b/web_view.py index 2120104..7ff7f61 100755 --- a/web_view.py +++ b/web_view.py @@ -343,17 +343,54 @@ def api_get_item_tags(item_id): duration=t1) +def expand_street_name(name): + ret = {name} + if any(name.startswith(st) for st in ('St ', 'St. ')): + first_space = name.find(' ') + ret.add("Saint" + name[first_space:]) + + if ', ' in name: + for n in set(ret): + comma = n.find(", ") + ret.add(name[:comma]) + elif '/' in name: + for n in set(ret): + ret.extend(part.strip() for part in n.split("/")) + + return ret + + @app.route("/api/1/item/Q/candidates") def api_find_osm_candidates(item_id): t0 = time() - bounds = read_bounds_param() item = model.Item.query.get(item_id) if not item: return cors_jsonify(success=True, qid=f'Q{item_id}', error="item doesn't exist") - nearby = api.find_osm_candidates(item, bounds) + label = item.label() + item_is_street = item.is_street() + + if item_is_street: + max_distance = 2_000 + limit = None + names = expand_street_name(label) + else: + max_distance = 400 + limit = 60 + names = None + nearby = api.find_osm_candidates(item, + limit=limit, + max_distance=max_distance, + names=names) + + if item_is_street and not nearby: + # nearby = [osm for osm in nearby if street_name_match(label, osm)] + + # try again without name filter + nearby = api.find_osm_candidates(item, limit=100, + max_distance=1_000) t1 = time() - t0 return cors_jsonify(success=True, qid=item.qid, nearby=nearby, duration=t1)