2023-05-14 22:04:26 +01:00
|
|
|
import collections
|
2023-05-13 20:57:58 +01:00
|
|
|
import json
|
|
|
|
import os.path
|
|
|
|
import re
|
2023-05-14 21:40:16 +01:00
|
|
|
import typing
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 21:42:08 +01:00
|
|
|
import flask
|
2023-05-13 20:57:58 +01:00
|
|
|
from sqlalchemy import and_, func, or_, text
|
|
|
|
from sqlalchemy.dialects import postgresql
|
2021-06-25 13:52:42 +01:00
|
|
|
from sqlalchemy.orm import selectinload
|
2021-07-22 13:47:38 +01:00
|
|
|
from sqlalchemy.sql import select
|
2023-05-13 20:57:58 +01:00
|
|
|
from sqlalchemy.sql.expression import cast, column, literal, union
|
2021-07-22 13:47:38 +01:00
|
|
|
from sqlalchemy.types import Float
|
2023-05-13 20:57:58 +01:00
|
|
|
|
|
|
|
from matcher import database, model, wikidata, wikidata_api
|
|
|
|
from matcher.planet import line, point, polygon
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
TagsType = dict[str, str]
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
srid = 4326
|
2023-05-13 20:57:58 +01:00
|
|
|
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
|
2021-06-25 13:52:42 +01:00
|
|
|
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
|
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
tag_prefixes = {
|
|
|
|
"disused",
|
|
|
|
"was",
|
|
|
|
"abandoned",
|
|
|
|
"demolished",
|
|
|
|
"destroyed",
|
|
|
|
"ruins",
|
|
|
|
"historic",
|
|
|
|
}
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-11-12 12:17:59 +00:00
|
|
|
# these tags are too generic, so we ignore them
|
|
|
|
skip_tags = {
|
|
|
|
"Key:addr",
|
|
|
|
"Key:addr:street",
|
|
|
|
"Key:lit",
|
|
|
|
"Key:image",
|
|
|
|
"Key:name",
|
|
|
|
"Key:symbol",
|
|
|
|
"Key:brand",
|
|
|
|
}
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 19:17:16 +01:00
|
|
|
def get_country_iso3166_1(lat: float, lon: float) -> set[str]:
|
2023-05-13 20:57:58 +01:00
|
|
|
"""For a given lat/lon return a set of ISO country codes.
|
2022-04-18 12:26:04 +01:00
|
|
|
|
|
|
|
Also cache the country code in the global object.
|
|
|
|
|
|
|
|
Normally there should be only one country.
|
|
|
|
"""
|
2021-06-25 13:52:42 +01:00
|
|
|
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
|
2023-05-14 19:17:16 +01:00
|
|
|
alpha2_codes: set[str] = set()
|
2023-05-13 20:57:58 +01:00
|
|
|
q = model.Polygon.query.filter(
|
|
|
|
func.ST_Covers(model.Polygon.way, point), model.Polygon.admin_level == "2"
|
|
|
|
)
|
2021-06-25 13:52:42 +01:00
|
|
|
for country in q:
|
2023-05-14 19:17:16 +01:00
|
|
|
alpha2: str = country.tags.get("ISO3166-1")
|
2021-06-25 13:52:42 +01:00
|
|
|
if not alpha2:
|
|
|
|
continue
|
|
|
|
alpha2_codes.add(alpha2)
|
|
|
|
|
2023-05-14 21:42:08 +01:00
|
|
|
flask.g.alpha2_codes = alpha2_codes
|
2021-06-25 13:52:42 +01:00
|
|
|
return alpha2_codes
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def is_street_number_first(lat: float, lon: float) -> bool:
|
|
|
|
"""Is lat/lon within a country that puts number first in a street address."""
|
2021-06-25 13:52:42 +01:00
|
|
|
if lat is None or lon is None:
|
|
|
|
return True
|
|
|
|
|
|
|
|
alpha2 = get_country_iso3166_1(lat, lon)
|
2022-04-18 12:26:04 +01:00
|
|
|
# Incomplete list of countries that put street number first.
|
|
|
|
alpha2_number_first = {
|
2023-05-13 20:57:58 +01:00
|
|
|
"GB", # United Kingdom
|
|
|
|
"IE", # Ireland
|
|
|
|
"US", # United States
|
|
|
|
"MX", # Mexico
|
|
|
|
"CA", # Canada
|
|
|
|
"FR", # France
|
|
|
|
"AU", # Australia
|
|
|
|
"NZ", # New Zealand
|
|
|
|
"ZA", # South Africa
|
2022-04-18 12:26:04 +01:00
|
|
|
}
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
return bool(alpha2_number_first & alpha2)
|
|
|
|
|
|
|
|
|
|
|
|
def make_envelope(bounds):
|
|
|
|
return func.ST_MakeEnvelope(*bounds, srid)
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
def get_bbox_centroid(bbox):
|
|
|
|
bbox = make_envelope(bbox)
|
|
|
|
centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar()
|
2023-05-14 21:20:28 +01:00
|
|
|
m = re_point.match(centroid)
|
|
|
|
assert m
|
|
|
|
return reversed(m.groups())
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 21:20:28 +01:00
|
|
|
def make_envelope_around_point(lat: float, lon: float, distance: float):
|
2021-07-22 13:47:38 +01:00
|
|
|
conn = database.session.connection()
|
|
|
|
|
|
|
|
p = func.ST_MakePoint(lon, lat)
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
s = select(
|
|
|
|
[
|
|
|
|
func.ST_AsText(func.ST_Project(p, distance, func.radians(0))),
|
|
|
|
func.ST_AsText(func.ST_Project(p, distance, func.radians(90))),
|
|
|
|
func.ST_AsText(func.ST_Project(p, distance, func.radians(180))),
|
|
|
|
func.ST_AsText(func.ST_Project(p, distance, func.radians(270))),
|
|
|
|
]
|
|
|
|
)
|
2021-07-22 13:47:38 +01:00
|
|
|
row = conn.execute(s).fetchone()
|
|
|
|
coords = [[float(v) for v in re_point.match(i).groups()] for i in row]
|
|
|
|
|
|
|
|
north = coords[0][1]
|
|
|
|
east = coords[1][0]
|
|
|
|
south = coords[2][1]
|
|
|
|
west = coords[3][0]
|
|
|
|
|
|
|
|
return func.ST_MakeEnvelope(west, south, east, north, srid)
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def drop_way_area(tags: TagsType) -> TagsType:
|
2023-05-13 20:57:58 +01:00
|
|
|
"""Remove the way_area field from a tags dict."""
|
2021-07-11 16:14:50 +01:00
|
|
|
if "way_area" in tags:
|
|
|
|
del tags["way_area"]
|
|
|
|
return tags
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-11-19 15:40:11 +00:00
|
|
|
def get_part_of(table_name, src_id, bbox):
|
2023-05-13 20:57:58 +01:00
|
|
|
table_map = {"point": point, "line": line, "polygon": polygon}
|
2021-07-22 13:47:38 +01:00
|
|
|
table_alias = table_map[table_name].alias()
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
s = (
|
|
|
|
select(
|
|
|
|
[
|
|
|
|
polygon.c.osm_id,
|
|
|
|
polygon.c.tags,
|
|
|
|
func.ST_Area(func.ST_Collect(polygon.c.way)),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
func.ST_Intersects(bbox, polygon.c.way),
|
|
|
|
func.ST_Covers(polygon.c.way, table_alias.c.way),
|
|
|
|
table_alias.c.osm_id == src_id,
|
|
|
|
polygon.c.tags.has_key("name"),
|
|
|
|
or_(
|
|
|
|
polygon.c.tags.has_key("landuse"),
|
|
|
|
polygon.c.tags.has_key("amenity"),
|
|
|
|
),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.group_by(polygon.c.osm_id, polygon.c.tags)
|
|
|
|
)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
conn = database.session.connection()
|
2023-05-13 20:57:58 +01:00
|
|
|
return [
|
|
|
|
{
|
|
|
|
"type": "way" if osm_id > 0 else "relation",
|
|
|
|
"id": abs(osm_id),
|
|
|
|
"tags": tags,
|
|
|
|
"area": area,
|
|
|
|
}
|
|
|
|
for osm_id, tags, area in conn.execute(s)
|
|
|
|
]
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2022-04-18 12:26:04 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def get_and_save_item(qid: str) -> model.Item | None:
|
|
|
|
"""Download an item from Wikidata and cache it in the database."""
|
2021-06-25 13:52:42 +01:00
|
|
|
entity = wikidata_api.get_entity(qid)
|
|
|
|
entity_qid = entity["id"]
|
|
|
|
if entity_qid != qid:
|
2023-05-13 20:57:58 +01:00
|
|
|
print(f"redirect {qid} -> {entity_qid}")
|
2023-05-14 21:20:28 +01:00
|
|
|
item: model.Item | None = model.Item.query.get(entity_qid[1:])
|
2021-06-25 13:52:42 +01:00
|
|
|
return item
|
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
if "claims" not in entity:
|
2023-05-13 20:57:58 +01:00
|
|
|
return None
|
2021-06-25 13:52:42 +01:00
|
|
|
coords = wikidata.get_entity_coords(entity["claims"])
|
|
|
|
|
|
|
|
item_id = int(qid[1:])
|
|
|
|
obj = {k: v for k, v in entity.items() if k in entity_keys}
|
|
|
|
try:
|
|
|
|
item = model.Item(item_id=item_id, **obj)
|
|
|
|
except TypeError:
|
|
|
|
print(qid)
|
|
|
|
print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
|
|
|
|
print(entity.keys())
|
|
|
|
raise
|
|
|
|
item.locations = model.location_objects(coords)
|
|
|
|
database.session.add(item)
|
|
|
|
database.session.commit()
|
|
|
|
|
|
|
|
return item
|
|
|
|
|
|
|
|
|
2023-05-14 21:40:16 +01:00
|
|
|
def get_isa_count(items: list[model.Item]) -> list[tuple[str, int]]:
|
2023-05-13 20:57:58 +01:00
|
|
|
"""List of IsA counts."""
|
2023-05-14 22:04:26 +01:00
|
|
|
isa_count: collections.Counter[str] = collections.Counter()
|
2021-06-25 13:52:42 +01:00
|
|
|
for item in items:
|
|
|
|
if not item:
|
|
|
|
continue
|
|
|
|
isa_list = item.get_claim("P31")
|
|
|
|
for isa in isa_list:
|
|
|
|
if not isa:
|
|
|
|
print("missing IsA:", item.qid)
|
|
|
|
continue
|
|
|
|
isa_count[isa["id"]] += 1
|
|
|
|
|
|
|
|
return isa_count.most_common()
|
|
|
|
|
|
|
|
|
|
|
|
def get_items_in_bbox(bbox):
|
|
|
|
db_bbox = make_envelope(bbox)
|
|
|
|
|
|
|
|
q = (
|
|
|
|
model.Item.query.join(model.ItemLocation)
|
|
|
|
.filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
|
|
|
|
.options(selectinload(model.Item.locations))
|
|
|
|
)
|
|
|
|
|
|
|
|
return q
|
|
|
|
|
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
def get_osm_with_wikidata_tag(bbox, isa_filter=None):
|
2023-05-13 20:57:58 +01:00
|
|
|
bbox_str = ",".join(str(v) for v in bbox)
|
2021-07-30 15:02:41 +01:00
|
|
|
extra_sql = ""
|
|
|
|
if isa_filter:
|
2023-05-13 20:57:58 +01:00
|
|
|
q = model.Item.query.join(model.ItemLocation).filter(
|
|
|
|
func.ST_Covers(make_envelope(bbox), model.ItemLocation.location)
|
2021-07-30 15:02:41 +01:00
|
|
|
)
|
|
|
|
q = add_isa_filter(q, isa_filter)
|
|
|
|
qids = [isa.qid for isa in q]
|
|
|
|
if not qids:
|
|
|
|
return []
|
|
|
|
|
|
|
|
qid_list = ",".join(f"'{qid}'" for qid in qids)
|
|
|
|
extra_sql += f" AND tags -> 'wikidata' in ({qid_list})"
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
# easier than building this query with SQLAlchemy
|
2023-05-13 20:57:58 +01:00
|
|
|
sql = (
|
|
|
|
f"""
|
2021-07-22 13:47:38 +01:00
|
|
|
SELECT tbl, osm_id, tags, ARRAY[ST_Y(centroid), ST_X(centroid)], geojson
|
|
|
|
FROM (
|
|
|
|
SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson
|
|
|
|
FROM planet_osm_point
|
|
|
|
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
|
|
|
|
UNION
|
|
|
|
SELECT 'line' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson
|
|
|
|
FROM planet_osm_line
|
|
|
|
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
|
|
|
|
GROUP BY osm_id, tags
|
|
|
|
UNION
|
|
|
|
SELECT 'polygon' as tbl, osm_id, tags, ST_AsText(ST_Centroid(ST_Collect(way))) AS centroid, ST_AsGeoJSON(ST_Collect(way)) AS geojson
|
|
|
|
FROM planet_osm_polygon
|
|
|
|
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way)
|
|
|
|
GROUP BY osm_id, tags
|
|
|
|
HAVING st_area(st_collect(way)) < 20 * st_area(ST_MakeEnvelope({bbox_str}, {srid}))
|
|
|
|
) as anon
|
|
|
|
WHERE tags ? 'wikidata'
|
2023-05-13 20:57:58 +01:00
|
|
|
"""
|
|
|
|
+ extra_sql
|
|
|
|
)
|
2021-07-22 13:47:38 +01:00
|
|
|
conn = database.session.connection()
|
|
|
|
result = conn.execute(text(sql))
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 11:31:56 +01:00
|
|
|
# print(sql)
|
2021-10-15 09:49:08 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
point_sql = (
|
|
|
|
f"""
|
2021-10-15 09:49:08 +01:00
|
|
|
SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson
|
|
|
|
FROM planet_osm_point
|
|
|
|
WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) and tags ? 'wikidata'
|
2023-05-13 20:57:58 +01:00
|
|
|
"""
|
|
|
|
+ extra_sql
|
|
|
|
)
|
2021-10-15 09:49:08 +01:00
|
|
|
|
|
|
|
print("point")
|
|
|
|
print(point_sql)
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
tagged = []
|
2021-07-22 13:47:38 +01:00
|
|
|
for tbl, osm_id, tags, centroid, geojson in result:
|
2023-05-13 20:57:58 +01:00
|
|
|
if tbl == "point":
|
2021-07-22 13:47:38 +01:00
|
|
|
osm_type = "node"
|
|
|
|
else:
|
|
|
|
osm_type = "way" if osm_id > 0 else "relation"
|
|
|
|
osm_id = abs(osm_id)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
name = tags.get("name") or tags.get("addr:housename") or "[no label]"
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
tagged.append(
|
|
|
|
{
|
|
|
|
"identifier": f"{osm_type}/{osm_id}",
|
|
|
|
"id": osm_id,
|
|
|
|
"type": osm_type,
|
|
|
|
"geojson": json.loads(geojson),
|
|
|
|
"centroid": centroid,
|
|
|
|
"name": name,
|
|
|
|
"wikidata": tags["wikidata"],
|
|
|
|
}
|
|
|
|
)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
return tagged
|
|
|
|
|
|
|
|
|
2023-05-14 21:29:24 +01:00
|
|
|
def get_items(item_ids: list[int]) -> list[model.Item]:
|
|
|
|
"""Get a Wikidata items with the given item IDs."""
|
2021-06-25 13:52:42 +01:00
|
|
|
items = []
|
|
|
|
for item_id in item_ids:
|
|
|
|
item = model.Item.query.get(item_id)
|
|
|
|
if not item:
|
|
|
|
if not get_and_save_item(f"Q{item_id}"):
|
|
|
|
continue
|
|
|
|
item = model.Item.query.get(item_id)
|
|
|
|
items.append(item)
|
|
|
|
|
|
|
|
return items
|
|
|
|
|
|
|
|
|
2023-05-14 21:40:16 +01:00
|
|
|
class IsaPath(typing.TypedDict):
|
|
|
|
"""Component of an IsA path."""
|
|
|
|
|
|
|
|
qid: str
|
|
|
|
label: str
|
|
|
|
|
|
|
|
|
2023-05-14 21:29:24 +01:00
|
|
|
def get_item_tags(item: model.Item) -> dict[str, list[str]]:
|
2023-05-14 21:40:16 +01:00
|
|
|
isa_list: list[int] = [typing.cast(int, v["numeric-id"]) for v in item.get_isa()]
|
|
|
|
isa_items: list[tuple[model.Item, list[IsaPath]]] = [
|
|
|
|
(isa, []) for isa in get_items(isa_list)
|
|
|
|
]
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
osm_list = collections.defaultdict(list)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
skip_isa = {row[0] for row in database.session.query(model.SkipIsA.item_id)}
|
2021-10-15 09:49:08 +01:00
|
|
|
|
|
|
|
tram_stop_id = 41176
|
|
|
|
airport_id = 1248784
|
|
|
|
aerodrome_id = 62447
|
|
|
|
if {tram_stop_id, airport_id, aerodrome_id} & set(isa_list):
|
2021-06-28 10:50:37 +01:00
|
|
|
skip_isa.add(41176) # building (Q41176)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
seen = set(isa_list) | skip_isa
|
2021-07-23 13:38:13 +01:00
|
|
|
stop = {
|
2021-11-12 12:18:53 +00:00
|
|
|
"Q11799049": "public institution",
|
|
|
|
"Q7075": "library",
|
|
|
|
"Q329683": "industrial park",
|
2021-07-23 13:38:13 +01:00
|
|
|
}
|
2021-06-25 13:52:42 +01:00
|
|
|
while isa_items:
|
2021-07-03 12:37:30 +01:00
|
|
|
isa, isa_path = isa_items.pop()
|
2021-06-25 13:52:42 +01:00
|
|
|
if not isa:
|
|
|
|
continue
|
2023-05-14 21:40:16 +01:00
|
|
|
isa_qid: str = typing.cast(str, isa.qid)
|
|
|
|
isa_path = isa_path + [{"qid": isa_qid, "label": isa.label()}]
|
2021-06-25 13:52:42 +01:00
|
|
|
osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
|
2021-10-15 09:49:08 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
osm += [
|
|
|
|
extra.tag_or_key
|
|
|
|
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)
|
|
|
|
]
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-03 12:37:30 +01:00
|
|
|
for i in osm:
|
|
|
|
osm_list[i].append(isa_path[:])
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 21:40:16 +01:00
|
|
|
if isa_qid in stop:
|
2021-07-23 13:38:13 +01:00
|
|
|
# item is specific enough, no need to keep walking the item hierarchy
|
|
|
|
continue
|
|
|
|
|
2021-11-13 10:44:56 +00:00
|
|
|
check = set()
|
|
|
|
properties = [
|
|
|
|
("P279", "subclass of"),
|
|
|
|
("P140", "religion"),
|
|
|
|
("P641", "sport"),
|
|
|
|
("P366", "use"),
|
|
|
|
("P1269", "facet of"),
|
|
|
|
# ("P361", "part of"),
|
|
|
|
]
|
|
|
|
|
|
|
|
for pid, label in properties:
|
|
|
|
check |= {v["numeric-id"] for v in (isa.get_claim(pid) or []) if v}
|
|
|
|
|
2021-07-03 12:37:30 +01:00
|
|
|
print(isa.qid, isa.label(), check)
|
2021-07-22 16:07:26 +01:00
|
|
|
isa_list = check - seen
|
2021-06-25 13:52:42 +01:00
|
|
|
seen.update(isa_list)
|
2021-07-03 12:37:30 +01:00
|
|
|
isa_items += [(isa, isa_path) for isa in get_items(isa_list)]
|
|
|
|
return {key: list(values) for key, values in osm_list.items()}
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-11-13 10:44:56 +00:00
|
|
|
|
|
|
|
def get_tags_for_isa_item(item):
|
|
|
|
isa_list = [item.item_id]
|
|
|
|
isa_items = [(item, [])]
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
osm_list = collections.defaultdict(list)
|
2021-11-13 10:44:56 +00:00
|
|
|
|
|
|
|
skip_isa = {row[0] for row in database.session.query(model.SkipIsA.item_id)}
|
|
|
|
|
|
|
|
tram_stop_id = 41176
|
|
|
|
airport_id = 1248784
|
|
|
|
aerodrome_id = 62447
|
|
|
|
if {tram_stop_id, airport_id, aerodrome_id} & set(isa_list):
|
|
|
|
skip_isa.add(41176) # building (Q41176)
|
|
|
|
|
|
|
|
seen = set(isa_list) | skip_isa
|
|
|
|
stop = {
|
|
|
|
"Q11799049": "public institution",
|
|
|
|
"Q7075": "library",
|
|
|
|
"Q329683": "industrial park",
|
|
|
|
}
|
|
|
|
items_checked = []
|
|
|
|
items_checked_done = set()
|
|
|
|
while isa_items:
|
|
|
|
isa, isa_path = isa_items.pop()
|
|
|
|
if not isa:
|
|
|
|
continue
|
2023-05-13 20:57:58 +01:00
|
|
|
isa_path = isa_path + [{"qid": isa.qid, "label": isa.label()}]
|
2021-11-13 10:44:56 +00:00
|
|
|
if isa.item_id not in items_checked_done:
|
2023-05-13 20:57:58 +01:00
|
|
|
items_checked.append({"qid": isa.qid, "label": isa.label()})
|
2021-11-13 10:44:56 +00:00
|
|
|
items_checked_done.add(isa.item_id)
|
|
|
|
osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
osm += [
|
|
|
|
extra.tag_or_key
|
|
|
|
for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)
|
|
|
|
]
|
2021-11-13 10:44:56 +00:00
|
|
|
|
|
|
|
for i in osm:
|
|
|
|
osm_list[i].append(isa_path[:])
|
|
|
|
|
|
|
|
if isa.qid in stop:
|
|
|
|
# item is specific enough, no need to keep walking the item hierarchy
|
|
|
|
continue
|
|
|
|
|
|
|
|
check = set()
|
|
|
|
properties = [
|
|
|
|
("P279", "subclass of"),
|
|
|
|
("P140", "religion"),
|
|
|
|
("P641", "sport"),
|
|
|
|
("P366", "use"),
|
|
|
|
("P1269", "facet of"),
|
|
|
|
# ("P361", "part of"),
|
|
|
|
]
|
|
|
|
|
|
|
|
for pid, label in properties:
|
|
|
|
check |= {v["numeric-id"] for v in (isa.get_claim(pid) or []) if v}
|
|
|
|
|
|
|
|
print(isa.qid, isa.label(), check)
|
|
|
|
isa_list = check - seen
|
|
|
|
seen.update(isa_list)
|
|
|
|
isa_items += [(isa, isa_path) for isa in get_items(isa_list)]
|
|
|
|
return {
|
2023-05-13 20:57:58 +01:00
|
|
|
"tags": {key: list(values) for key, values in osm_list.items()},
|
|
|
|
"checked": items_checked,
|
2021-11-13 10:44:56 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
def add_isa_filter(q, isa_qids):
|
|
|
|
q_subclass = database.session.query(model.Item.qid).filter(
|
|
|
|
func.jsonb_path_query_array(
|
|
|
|
model.Item.claims,
|
2023-05-13 20:57:58 +01:00
|
|
|
"$.P279[*].mainsnak.datavalue.value.id",
|
|
|
|
).bool_op("?|")(list(isa_qids))
|
2021-07-30 15:02:41 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
subclass_qid = {qid for qid, in q_subclass.all()}
|
|
|
|
|
|
|
|
isa = func.jsonb_path_query_array(
|
|
|
|
model.Item.claims,
|
2023-05-13 20:57:58 +01:00
|
|
|
"$.P31[*].mainsnak.datavalue.value.id",
|
|
|
|
).bool_op("?|")
|
2021-07-30 15:02:41 +01:00
|
|
|
return q.filter(isa(list(isa_qids | subclass_qid)))
|
|
|
|
|
|
|
|
|
|
|
|
def wikidata_items_count(bounds, isa_filter=None):
|
2023-05-13 20:57:58 +01:00
|
|
|
q = model.Item.query.join(model.ItemLocation).filter(
|
|
|
|
func.ST_Covers(make_envelope(bounds), model.ItemLocation.location)
|
2021-06-25 13:52:42 +01:00
|
|
|
)
|
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
if isa_filter:
|
|
|
|
q = add_isa_filter(q, isa_filter)
|
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
# print(q.statement.compile(compile_kwargs={"literal_binds": True}))
|
2021-07-30 15:02:41 +01:00
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
return q.count()
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2022-04-18 12:25:38 +01:00
|
|
|
def wikidata_isa_counts(bounds, isa_filter=None):
|
2021-06-25 13:52:42 +01:00
|
|
|
db_bbox = make_envelope(bounds)
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
q = model.Item.query.join(model.ItemLocation).filter(
|
|
|
|
func.ST_Covers(db_bbox, model.ItemLocation.location)
|
2021-06-25 13:52:42 +01:00
|
|
|
)
|
|
|
|
|
2022-04-18 12:25:38 +01:00
|
|
|
if isa_filter:
|
|
|
|
q = add_isa_filter(q, isa_filter)
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
db_items = q.all()
|
|
|
|
|
|
|
|
counts = get_isa_count(db_items)
|
|
|
|
isa_ids = [qid[1:] for qid, count in counts]
|
|
|
|
isa_items = {
|
|
|
|
isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
|
|
|
|
}
|
|
|
|
isa_count = []
|
|
|
|
for qid, count in counts:
|
|
|
|
item = isa_items.get(qid)
|
|
|
|
if not item:
|
|
|
|
item = get_and_save_item(qid)
|
|
|
|
|
|
|
|
label = item.label() if item else "[missing]"
|
|
|
|
isa = {
|
|
|
|
"qid": qid,
|
|
|
|
"count": count,
|
|
|
|
"label": label,
|
|
|
|
}
|
|
|
|
isa_count.append(isa)
|
|
|
|
|
|
|
|
return isa_count
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
def get_tag_filter(tags, tag_list):
|
2021-06-25 13:52:42 +01:00
|
|
|
tag_filter = []
|
|
|
|
for tag_or_key in tag_list:
|
|
|
|
if tag_or_key.startswith("Key:"):
|
2021-10-15 09:49:08 +01:00
|
|
|
key = tag_or_key[4:]
|
2023-05-13 20:57:58 +01:00
|
|
|
tag_filter.append(and_(tags.has_key(key), tags[key] != "no"))
|
2021-10-15 09:49:08 +01:00
|
|
|
for prefix in tag_prefixes:
|
|
|
|
tag_filter.append(tags.has_key(f"{prefix}:{key}"))
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
if tag_or_key.startswith("Tag:"):
|
2021-10-15 09:49:08 +01:00
|
|
|
k, _, v = tag_or_key[4:].partition("=")
|
|
|
|
tag_filter.append(tags[k] == v)
|
|
|
|
for prefix in tag_prefixes:
|
|
|
|
tag_filter.append(tags[f"{prefix}:{k}"] == v)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
return tag_filter
|
|
|
|
|
|
|
|
|
|
|
|
def get_preset_translations():
|
2023-05-14 21:42:08 +01:00
|
|
|
app = flask.current_app
|
2021-06-25 13:52:42 +01:00
|
|
|
country_language = {
|
2023-05-13 20:57:58 +01:00
|
|
|
"AU": "en-AU", # Australia
|
|
|
|
"GB": "en-GB", # United Kingdom
|
|
|
|
"IE": "en-GB", # Ireland
|
|
|
|
"IN": "en-IN", # India
|
|
|
|
"NZ": "en-NZ", # New Zealand
|
2021-06-25 13:52:42 +01:00
|
|
|
}
|
|
|
|
ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
|
|
|
|
translation_dir = os.path.join(ts_dir, "dist", "translations")
|
|
|
|
|
2023-05-14 21:42:08 +01:00
|
|
|
for code in flask.g.alpha2_codes:
|
2021-10-15 09:49:08 +01:00
|
|
|
lang_code = country_language.get("code")
|
|
|
|
if not lang_code:
|
2021-06-25 13:52:42 +01:00
|
|
|
continue
|
2021-10-15 09:49:08 +01:00
|
|
|
filename = os.path.join(translation_dir, lang_code + ".json")
|
|
|
|
json_data = json.load(open(filename))
|
|
|
|
if lang_code not in json_data:
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
return json_data[lang_code]["presets"]["presets"]
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
return {}
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def get_presets_from_tags(ending: str, tags: TagsType) -> list[dict[str, typing.Any]]:
|
2021-06-25 13:52:42 +01:00
|
|
|
translations = get_preset_translations()
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
found: list[dict[str, typing.Any]] = []
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
for k, v in tags.items():
|
2023-05-13 20:57:58 +01:00
|
|
|
if k == "amenity" and v == "clock" and tags.get("display") == "sundial":
|
2021-06-25 13:52:42 +01:00
|
|
|
tag_or_key = f"Tag:{k}={v}"
|
|
|
|
found.append({"tag_or_key": tag_or_key, "name": "Sundial"})
|
|
|
|
continue
|
|
|
|
|
|
|
|
match = find_preset_file(k, v, ending)
|
|
|
|
if not match:
|
|
|
|
continue
|
|
|
|
|
|
|
|
preset = match["preset"]
|
|
|
|
if preset in translations:
|
|
|
|
match["name"] = translations[preset]["name"]
|
|
|
|
else:
|
|
|
|
match["name"] = json.load(open(match["filename"]))["name"]
|
|
|
|
|
|
|
|
del match["filename"]
|
|
|
|
|
|
|
|
found.append(match)
|
|
|
|
|
|
|
|
return found
|
|
|
|
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def find_preset_file(k: str, v: str, ending: str) -> dict[str, str] | None:
|
|
|
|
"""Find preset file."""
|
2023-05-14 21:42:08 +01:00
|
|
|
app = flask.current_app
|
2021-06-25 13:52:42 +01:00
|
|
|
ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
|
|
|
|
preset_dir = os.path.join(ts_dir, "data", "presets")
|
|
|
|
|
|
|
|
filename = os.path.join(preset_dir, k, v + ".json")
|
|
|
|
if os.path.exists(filename):
|
|
|
|
return {
|
|
|
|
"tag_or_key": f"Tag:{k}={v}",
|
|
|
|
"preset": f"{k}/{v}",
|
|
|
|
"filename": filename,
|
|
|
|
}
|
|
|
|
|
|
|
|
filename = os.path.join(preset_dir, k, f"{v}_{ending}.json")
|
|
|
|
if os.path.exists(filename):
|
|
|
|
return {
|
|
|
|
"tag_or_key": f"Tag:{k}={v}",
|
|
|
|
"preset": f"{k}/{v}",
|
|
|
|
"filename": filename,
|
|
|
|
}
|
|
|
|
|
|
|
|
filename = os.path.join(preset_dir, k, "_" + v + ".json")
|
|
|
|
if os.path.exists(filename):
|
|
|
|
return {
|
|
|
|
"tag_or_key": f"Tag:{k}={v}",
|
|
|
|
"preset": f"{k}/{v}",
|
|
|
|
"filename": filename,
|
|
|
|
}
|
|
|
|
|
|
|
|
filename = os.path.join(preset_dir, k + ".json")
|
|
|
|
if os.path.exists(filename):
|
|
|
|
return {
|
|
|
|
"tag_or_key": f"Key:{k}",
|
|
|
|
"preset": k,
|
|
|
|
"filename": filename,
|
|
|
|
}
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
return None
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def address_from_tags(tags: TagsType) -> str | None:
|
|
|
|
"""Build list of addresses based on OSM tags."""
|
2021-06-25 13:52:42 +01:00
|
|
|
keys = ["street", "housenumber"]
|
|
|
|
if not all("addr:" + k in tags for k in keys):
|
2023-05-14 22:04:26 +01:00
|
|
|
return None
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-14 21:42:08 +01:00
|
|
|
if flask.g.street_number_first:
|
2021-06-25 13:52:42 +01:00
|
|
|
keys.reverse()
|
|
|
|
return " ".join(tags["addr:" + k] for k in keys)
|
|
|
|
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def address_node_label(tags: TagsType) -> str | None:
|
|
|
|
"""Label for an OSM node, based on tags."""
|
2021-07-22 16:09:04 +01:00
|
|
|
address = address_from_tags(tags)
|
|
|
|
return f"{tags['name']} ({address})" if "name" in tags else address
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
|
2021-07-22 16:09:04 +01:00
|
|
|
def get_address_nodes_within_building(osm_id, bbox_list):
|
2021-07-22 13:47:38 +01:00
|
|
|
q = model.Point.query.filter(
|
|
|
|
polygon.c.osm_id == osm_id,
|
2023-05-13 20:57:58 +01:00
|
|
|
or_(*[func.ST_Intersects(bbox, model.Point.way) for bbox in bbox_list]),
|
2021-07-22 13:47:38 +01:00
|
|
|
func.ST_Covers(polygon.c.way, model.Point.way),
|
|
|
|
model.Point.tags.has_key("addr:street"),
|
|
|
|
model.Point.tags.has_key("addr:housenumber"),
|
|
|
|
)
|
|
|
|
|
|
|
|
return [node.tags for node in q]
|
|
|
|
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def osm_display_name(tags: dict[str, str]) -> str | None:
|
|
|
|
"""Get name to display from OSM tags."""
|
2023-05-13 20:57:58 +01:00
|
|
|
keys = (
|
|
|
|
"bridge:name",
|
|
|
|
"tunnel:name",
|
|
|
|
"lock_name",
|
|
|
|
"name",
|
|
|
|
"addr:housename",
|
|
|
|
"inscription",
|
|
|
|
)
|
2023-05-14 22:04:26 +01:00
|
|
|
return next((tags[key] for key in keys if key in tags), None)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
|
|
|
|
def street_address_in_tags(tags):
|
|
|
|
return "addr:housenumber" in tags and "addr:street" in tags
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
def find_osm_candidates(item, limit=80, max_distance=450, names=None):
|
2021-07-22 13:47:38 +01:00
|
|
|
item_id = item.item_id
|
2021-10-15 09:49:08 +01:00
|
|
|
item_is_linear_feature = item.is_linear_feature()
|
2021-07-22 13:47:38 +01:00
|
|
|
item_is_street = item.is_street()
|
2022-04-08 10:44:39 +01:00
|
|
|
item_names_dict = item.names()
|
|
|
|
if item_names_dict:
|
|
|
|
item_names = {n.lower() for n in item_names_dict.keys()}
|
|
|
|
else:
|
|
|
|
item_names = set()
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
check_is_street_number_first(item.locations[0].get_lat_lon())
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
bbox_list = [
|
|
|
|
make_envelope_around_point(*loc.get_lat_lon(), max_distance)
|
|
|
|
for loc in item.locations
|
|
|
|
]
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
null_area = cast(None, Float)
|
2023-05-13 20:57:58 +01:00
|
|
|
dist = column("dist")
|
|
|
|
tags = column("tags", postgresql.HSTORE)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
tag_list = get_item_tags(item)
|
2021-10-15 09:49:08 +01:00
|
|
|
# tag_filters = get_tag_filter(point.c.tags, tag_list)
|
|
|
|
# print(tag_filters)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
s_point = (
|
|
|
|
select(
|
|
|
|
[
|
|
|
|
literal("point").label("t"),
|
|
|
|
point.c.osm_id,
|
|
|
|
point.c.tags.label("tags"),
|
|
|
|
func.min(
|
|
|
|
func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)
|
|
|
|
).label("dist"),
|
|
|
|
func.ST_AsText(point.c.way),
|
|
|
|
func.ST_AsGeoJSON(point.c.way),
|
|
|
|
null_area,
|
|
|
|
]
|
|
|
|
)
|
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
or_(*[func.ST_Intersects(bbox, point.c.way) for bbox in bbox_list]),
|
|
|
|
model.ItemLocation.item_id == item_id,
|
|
|
|
or_(*get_tag_filter(point.c.tags, tag_list)),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.group_by(point.c.osm_id, point.c.tags, point.c.way)
|
|
|
|
)
|
|
|
|
|
|
|
|
s_line = (
|
|
|
|
select(
|
|
|
|
[
|
|
|
|
literal("line").label("t"),
|
|
|
|
line.c.osm_id,
|
|
|
|
line.c.tags.label("tags"),
|
|
|
|
func.min(
|
|
|
|
func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)
|
|
|
|
).label("dist"),
|
|
|
|
func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))),
|
|
|
|
func.ST_AsGeoJSON(func.ST_Collect(line.c.way)),
|
|
|
|
null_area,
|
|
|
|
]
|
|
|
|
)
|
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
or_(*[func.ST_Intersects(bbox, line.c.way) for bbox in bbox_list]),
|
|
|
|
model.ItemLocation.item_id == item_id,
|
|
|
|
or_(*get_tag_filter(line.c.tags, tag_list)),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.group_by(line.c.osm_id, line.c.tags)
|
|
|
|
)
|
|
|
|
|
|
|
|
s_polygon = (
|
|
|
|
select(
|
|
|
|
[
|
|
|
|
literal("polygon").label("t"),
|
|
|
|
polygon.c.osm_id,
|
|
|
|
polygon.c.tags.label("tags"),
|
|
|
|
func.min(
|
|
|
|
func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)
|
|
|
|
).label("dist"),
|
|
|
|
func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))),
|
|
|
|
func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)),
|
|
|
|
func.ST_Area(func.ST_Collect(polygon.c.way)),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
.where(
|
|
|
|
and_(
|
|
|
|
or_(*[func.ST_Intersects(bbox, polygon.c.way) for bbox in bbox_list]),
|
|
|
|
model.ItemLocation.item_id == item_id,
|
|
|
|
or_(*get_tag_filter(polygon.c.tags, tag_list)),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.group_by(polygon.c.osm_id, polygon.c.tags)
|
|
|
|
.having(
|
|
|
|
func.ST_Area(func.ST_Collect(polygon.c.way))
|
|
|
|
< 20 * func.ST_Area(bbox_list[0])
|
|
|
|
)
|
|
|
|
)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
tables = ([] if item_is_linear_feature else [s_point]) + [s_line, s_polygon]
|
2021-07-22 13:47:38 +01:00
|
|
|
s = select([union(*tables).alias()]).where(dist < max_distance).order_by(dist)
|
|
|
|
|
|
|
|
if names:
|
2021-10-15 09:49:08 +01:00
|
|
|
s = s.where(or_(tags["name"].in_(names), tags["old_name"].in_(names)))
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
if item_is_street:
|
|
|
|
s = s.where(tags["highway"] != "bus_stop")
|
|
|
|
if not names:
|
|
|
|
s = s.where(tags.has_key("name"))
|
|
|
|
|
|
|
|
if "Key:amenity" in tag_list:
|
2023-05-13 20:57:58 +01:00
|
|
|
s = s.where(
|
|
|
|
and_(
|
|
|
|
tags["amenity"] != "bicycle_parking",
|
|
|
|
tags["amenity"] != "bicycle_repair_station",
|
|
|
|
tags["amenity"] != "atm",
|
|
|
|
tags["amenity"] != "recycling",
|
|
|
|
)
|
|
|
|
)
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
if limit:
|
|
|
|
s = s.limit(limit)
|
|
|
|
|
2023-05-14 11:31:56 +01:00
|
|
|
# print(s.compile(compile_kwargs={"literal_binds": True}))
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
conn = database.session.connection()
|
2021-06-25 13:52:42 +01:00
|
|
|
nearby = []
|
2021-07-22 13:47:38 +01:00
|
|
|
for table, src_id, tags, distance, centroid, geojson, area in conn.execute(s):
|
|
|
|
osm_id = src_id
|
|
|
|
if table == "point":
|
|
|
|
osm_type = "node"
|
|
|
|
elif osm_id > 0:
|
|
|
|
osm_type = "way"
|
|
|
|
else:
|
|
|
|
osm_type = "relation"
|
|
|
|
osm_id = -osm_id
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
tags.pop("way_area", None)
|
2021-07-22 13:47:38 +01:00
|
|
|
name = osm_display_name(tags)
|
|
|
|
if not name and street_address_in_tags(tags):
|
2021-06-25 13:52:42 +01:00
|
|
|
name = address_from_tags(tags)
|
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
if table == "polygon" and "building" in tags:
|
2021-07-22 16:09:04 +01:00
|
|
|
address_nodes = get_address_nodes_within_building(src_id, bbox_list)
|
|
|
|
address_list = [address_node_label(addr) for addr in address_nodes]
|
2021-06-25 13:52:42 +01:00
|
|
|
else:
|
|
|
|
address_list = []
|
2021-07-22 13:47:38 +01:00
|
|
|
|
|
|
|
shape = "area" if table == "polygon" else table
|
|
|
|
|
2023-05-13 14:01:28 +01:00
|
|
|
item_identifier_tags = item.get_identifiers_tags()
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
cur = {
|
2021-07-22 13:47:38 +01:00
|
|
|
"identifier": f"{osm_type}/{osm_id}",
|
|
|
|
"type": osm_type,
|
|
|
|
"id": osm_id,
|
|
|
|
"distance": distance,
|
2021-06-25 13:52:42 +01:00
|
|
|
"name": name,
|
2021-11-19 15:43:04 +00:00
|
|
|
"name_match": (name and name.lower() in item_names),
|
2021-06-25 13:52:42 +01:00
|
|
|
"tags": tags,
|
2021-07-22 13:47:38 +01:00
|
|
|
"geojson": json.loads(geojson),
|
|
|
|
"presets": get_presets_from_tags(shape, tags),
|
2021-06-25 13:52:42 +01:00
|
|
|
"address_list": address_list,
|
2021-07-22 13:47:38 +01:00
|
|
|
"centroid": list(reversed(re_point.match(centroid).groups())),
|
2021-06-25 13:52:42 +01:00
|
|
|
}
|
2021-07-22 13:47:38 +01:00
|
|
|
if area is not None:
|
|
|
|
cur["area"] = area
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-11-19 15:40:11 +00:00
|
|
|
part_of = []
|
|
|
|
for bbox in bbox_list:
|
2023-05-13 20:57:58 +01:00
|
|
|
part_of += [
|
|
|
|
i for i in get_part_of(table, src_id, bbox) if i["tags"]["name"] != name
|
|
|
|
]
|
2021-06-25 13:52:42 +01:00
|
|
|
if part_of:
|
|
|
|
cur["part_of"] = part_of
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
if address := address_from_tags(typing.cast(TagsType, tags)):
|
2021-07-22 13:47:38 +01:00
|
|
|
cur["address"] = address
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
nearby.append(cur)
|
|
|
|
|
|
|
|
return nearby
|
|
|
|
|
2022-04-18 12:26:04 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def get_item(item_id: int) -> model.Item | None:
|
2023-05-13 20:57:58 +01:00
|
|
|
"""Retrieve a Wikidata item, either from the database or from Wikidata."""
|
2021-06-25 13:52:42 +01:00
|
|
|
item = model.Item.query.get(item_id)
|
2021-11-12 12:17:15 +00:00
|
|
|
return item or get_and_save_item(f"Q{item_id}")
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
def get_item_street_addresses(item: model.Item) -> list[str]:
|
|
|
|
"""Hunt for street addresses for the given item."""
|
2021-07-03 12:38:37 +01:00
|
|
|
street_address = [addr["text"] for addr in item.get_claim("P6375") if addr]
|
2021-06-25 13:52:42 +01:00
|
|
|
if street_address or "P669" not in item.claims:
|
|
|
|
return street_address
|
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
assert isinstance(item.claims, dict)
|
|
|
|
claims: wikidata.Claims = item.claims
|
|
|
|
for claim in claims["P669"]:
|
2021-06-25 13:52:42 +01:00
|
|
|
qualifiers = claim.get("qualifiers")
|
2023-05-13 20:57:58 +01:00
|
|
|
if not qualifiers or "P670" not in qualifiers:
|
2021-06-25 13:52:42 +01:00
|
|
|
continue
|
|
|
|
number = qualifiers["P670"][0]["datavalue"]["value"]
|
|
|
|
|
|
|
|
street_item = get_item(claim["mainsnak"]["datavalue"]["value"]["numeric-id"])
|
2023-05-14 22:04:26 +01:00
|
|
|
assert street_item
|
2021-06-25 13:52:42 +01:00
|
|
|
street = street_item.label()
|
|
|
|
for q in qualifiers["P670"]:
|
|
|
|
number = q["datavalue"]["value"]
|
2023-05-13 20:57:58 +01:00
|
|
|
address = (
|
2023-05-14 21:42:08 +01:00
|
|
|
f"{number} {street}"
|
|
|
|
if flask.g.street_number_first
|
|
|
|
else f"{street} {number}"
|
2023-05-13 20:57:58 +01:00
|
|
|
)
|
2021-06-25 13:52:42 +01:00
|
|
|
street_address.append(address)
|
|
|
|
|
|
|
|
return street_address
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-07-22 13:47:38 +01:00
|
|
|
def check_is_street_number_first(latlng):
|
2023-05-14 21:42:08 +01:00
|
|
|
flask.g.street_number_first = is_street_number_first(*latlng)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
def item_detail(item):
|
2022-04-08 10:44:39 +01:00
|
|
|
unsupported_relation_types = {
|
2023-05-13 20:57:58 +01:00
|
|
|
"Q194356", # wind farm
|
|
|
|
"Q2175765", # tram stop
|
2022-04-08 10:44:39 +01:00
|
|
|
}
|
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
locations = [list(i.get_lat_lon()) for i in item.locations]
|
2023-05-14 21:42:08 +01:00
|
|
|
if not hasattr(flask.g, "street_number_first"):
|
|
|
|
flask.g.street_number_first = is_street_number_first(*locations[0])
|
2021-07-03 12:39:37 +01:00
|
|
|
|
|
|
|
image_filenames = item.get_claim("P18")
|
|
|
|
|
|
|
|
street_address = get_item_street_addresses(item)
|
|
|
|
|
2021-10-15 09:49:08 +01:00
|
|
|
heritage_designation = []
|
|
|
|
for v in item.get_claim("P1435"):
|
|
|
|
if not v:
|
2023-05-13 20:57:58 +01:00
|
|
|
print("heritage designation missing:", item.qid)
|
2021-10-15 09:49:08 +01:00
|
|
|
continue
|
|
|
|
heritage_designation_item = get_item(v["numeric-id"])
|
2023-05-13 20:57:58 +01:00
|
|
|
heritage_designation.append(
|
|
|
|
{
|
|
|
|
"qid": v["id"],
|
|
|
|
"label": heritage_designation_item.label(),
|
|
|
|
}
|
|
|
|
)
|
2021-10-06 17:43:37 +01:00
|
|
|
|
2021-11-13 20:19:04 +00:00
|
|
|
isa_items = [get_item(isa["numeric-id"]) for isa in item.get_isa()]
|
2022-04-08 10:44:39 +01:00
|
|
|
isa_lookup = {isa.qid: isa for isa in isa_items}
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
wikipedia_links = [
|
|
|
|
{"lang": site[:-4], "title": link["title"]}
|
|
|
|
for site, link in sorted(item.sitelinks.items())
|
|
|
|
if site.endswith("wiki") and len(site) < 8
|
|
|
|
]
|
2021-11-13 20:19:04 +00:00
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
d = {
|
|
|
|
"qid": item.qid,
|
|
|
|
"label": item.label(),
|
|
|
|
"description": item.description(),
|
|
|
|
"markers": locations,
|
|
|
|
"image_list": image_filenames,
|
|
|
|
"street_address": street_address,
|
2023-05-13 20:57:58 +01:00
|
|
|
"isa_list": [
|
|
|
|
{"qid": isa.qid, "label": isa.label()} for isa in isa_items if isa
|
|
|
|
],
|
2021-07-03 12:39:37 +01:00
|
|
|
"closed": item.closed(),
|
2021-11-19 15:43:18 +00:00
|
|
|
"inception": item.time_claim("P571"),
|
|
|
|
"p1619": item.time_claim("P1619"),
|
|
|
|
"p576": item.time_claim("P576"),
|
2021-10-06 17:43:37 +01:00
|
|
|
"heritage_designation": heritage_designation,
|
2022-04-08 10:44:39 +01:00
|
|
|
"wikipedia": wikipedia_links,
|
2022-04-18 12:52:11 +01:00
|
|
|
"identifiers": item.get_identifiers(),
|
2021-07-03 12:39:37 +01:00
|
|
|
}
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
if aliases := item.get_aliases():
|
|
|
|
d["aliases"] = aliases
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2022-04-08 10:44:39 +01:00
|
|
|
if "commonswiki" in item.sitelinks:
|
|
|
|
d["commons"] = item.sitelinks["commonswiki"]["title"]
|
|
|
|
|
|
|
|
unsupported = isa_lookup.keys() & unsupported_relation_types
|
|
|
|
if unsupported:
|
2023-05-13 20:57:58 +01:00
|
|
|
d["unsupported_relation_types"] = [
|
|
|
|
isa for isa in d["isa_list"] if isa["qid"] in isa_lookup
|
|
|
|
]
|
2022-04-08 10:44:39 +01:00
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
return d
|
2021-06-25 13:52:42 +01:00
|
|
|
|
2021-07-03 12:39:37 +01:00
|
|
|
|
|
|
|
def get_markers(all_items):
|
|
|
|
return [item_detail(item) for item in all_items if item]
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
def wikidata_items(bounds, isa_filter=None):
|
2021-07-22 13:47:38 +01:00
|
|
|
check_is_street_number_first(get_bbox_centroid(bounds))
|
2021-06-25 13:52:42 +01:00
|
|
|
q = get_items_in_bbox(bounds)
|
2021-07-30 15:02:41 +01:00
|
|
|
|
|
|
|
if isa_filter:
|
|
|
|
q = add_isa_filter(q, isa_filter)
|
|
|
|
|
2021-06-25 13:52:42 +01:00
|
|
|
db_items = q.all()
|
|
|
|
items = get_markers(db_items)
|
|
|
|
|
|
|
|
counts = get_isa_count(db_items)
|
|
|
|
isa_ids = [qid[1:] for qid, count in counts]
|
|
|
|
isa_items = {
|
|
|
|
isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
|
|
|
|
}
|
|
|
|
|
|
|
|
isa_count = []
|
|
|
|
for qid, count in counts:
|
|
|
|
item = isa_items.get(qid)
|
|
|
|
if not item:
|
|
|
|
item = get_and_save_item(qid)
|
|
|
|
|
|
|
|
label = item.label() if item else "[missing]"
|
|
|
|
isa = {
|
|
|
|
"qid": qid,
|
|
|
|
"count": count,
|
|
|
|
"label": label,
|
|
|
|
}
|
|
|
|
isa_count.append(isa)
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
return {"items": items, "isa_count": isa_count}
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
|
|
|
|
def missing_wikidata_items(qids, lat, lon):
|
2023-05-14 21:42:08 +01:00
|
|
|
flask.g.street_number_first = is_street_number_first(lat, lon)
|
2021-06-25 13:52:42 +01:00
|
|
|
|
|
|
|
db_items = []
|
|
|
|
for qid in qids:
|
|
|
|
item = model.Item.query.get(qid[1:])
|
|
|
|
if not item:
|
|
|
|
item = get_and_save_item(qid)
|
|
|
|
db_items.append(item)
|
|
|
|
items = get_markers(db_items)
|
|
|
|
counts = get_isa_count(db_items)
|
|
|
|
isa_ids = [qid[1:] for qid, count in counts]
|
|
|
|
isa_items = {
|
|
|
|
isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
|
|
|
|
}
|
|
|
|
|
|
|
|
isa_count = []
|
|
|
|
for qid, count in counts:
|
|
|
|
item = isa_items.get(qid)
|
|
|
|
if not item:
|
|
|
|
item = get_and_save_item(qid)
|
|
|
|
|
|
|
|
label = item.label() if item else "[missing]"
|
|
|
|
isa = {
|
|
|
|
"qid": qid,
|
|
|
|
"count": count,
|
|
|
|
"label": label,
|
|
|
|
}
|
|
|
|
isa_count.append(isa)
|
|
|
|
|
|
|
|
return dict(items=items, isa_count=isa_count)
|
2021-07-30 15:02:41 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
def isa_incremental_search(search_terms):
|
|
|
|
en_label = func.jsonb_extract_path_text(model.Item.labels, "en", "value")
|
|
|
|
q = model.Item.query.filter(
|
2023-05-13 20:57:58 +01:00
|
|
|
model.Item.claims.has_key("P1282"),
|
|
|
|
en_label.ilike(f"%{search_terms}%"),
|
|
|
|
func.length(en_label) < 20,
|
2021-07-30 15:02:41 +01:00
|
|
|
)
|
|
|
|
|
2023-05-14 11:31:56 +01:00
|
|
|
# print(q.statement.compile(compile_kwargs={"literal_binds": True}))
|
2021-10-15 09:49:08 +01:00
|
|
|
|
2021-07-30 15:02:41 +01:00
|
|
|
ret = []
|
|
|
|
for item in q:
|
|
|
|
cur = {
|
|
|
|
"qid": item.qid,
|
|
|
|
"label": item.label(),
|
|
|
|
}
|
|
|
|
ret.append(cur)
|
|
|
|
return ret
|
2022-04-18 12:24:16 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
|
2023-05-14 22:04:26 +01:00
|
|
|
class PlaceItems(typing.TypedDict):
|
|
|
|
"""Place items."""
|
|
|
|
|
|
|
|
count: int
|
|
|
|
items: list[model.Item]
|
|
|
|
|
|
|
|
|
|
|
|
def get_place_items(osm_type: int, osm_id: int) -> PlaceItems:
|
|
|
|
"""Return place items for given osm_type and osm_id."""
|
2023-05-13 20:57:58 +01:00
|
|
|
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
|
2022-04-18 12:24:16 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
q = (
|
|
|
|
model.Item.query.join(model.ItemLocation)
|
|
|
|
.join(
|
|
|
|
model.Polygon,
|
|
|
|
func.ST_Covers(model.Polygon.way, model.ItemLocation.location),
|
|
|
|
)
|
|
|
|
.filter(model.Polygon.src_id == src_id)
|
|
|
|
)
|
2022-04-18 12:24:16 +01:00
|
|
|
# sql = q.statement.compile(compile_kwargs={"literal_binds": True})
|
|
|
|
|
|
|
|
item_count = q.count()
|
|
|
|
items = []
|
|
|
|
for item in q:
|
|
|
|
keys = ["item_id", "labels", "descriptions", "aliases", "sitelinks", "claims"]
|
|
|
|
item_dict = {key: getattr(item, key) for key in keys}
|
|
|
|
items.append(item_dict)
|
|
|
|
|
|
|
|
return {"count": item_count, "items": items}
|