Split API logic out of web_view.py
This commit is contained in:
		
							parent
							
								
									e25aaa5ec6
								
							
						
					
					
						commit
						34b2d0fa05
					
				
							
								
								
									
										573
									
								
								matcher/api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										573
									
								
								matcher/api.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,573 @@
 | 
			
		|||
from sqlalchemy import func, or_
 | 
			
		||||
from sqlalchemy.orm import selectinload
 | 
			
		||||
from matcher import model, database, wikidata_api, wikidata
 | 
			
		||||
from matcher.data import extra_keys
 | 
			
		||||
from collections import Counter
 | 
			
		||||
from flask import g, current_app
 | 
			
		||||
import re
 | 
			
		||||
import os.path
 | 
			
		||||
import json
 | 
			
		||||
 | 
			
		||||
srid = 4326
 | 
			
		||||
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
 | 
			
		||||
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_country_iso3166_1(lat, lon):
 | 
			
		||||
    point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
 | 
			
		||||
    alpha2_codes = set()
 | 
			
		||||
    q = model.Polygon.query.filter(func.ST_Covers(model.Polygon.way, point),
 | 
			
		||||
                                   model.Polygon.admin_level == "2")
 | 
			
		||||
    for country in q:
 | 
			
		||||
        alpha2 = country.tags.get("ISO3166-1")
 | 
			
		||||
        if not alpha2:
 | 
			
		||||
            continue
 | 
			
		||||
        alpha2_codes.add(alpha2)
 | 
			
		||||
 | 
			
		||||
    g.alpha2_codes = alpha2_codes
 | 
			
		||||
    return alpha2_codes
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_street_number_first(lat, lon):
 | 
			
		||||
    if lat is None or lon is None:
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    alpha2 = get_country_iso3166_1(lat, lon)
 | 
			
		||||
    alpha2_number_first = {'GB', 'IE', 'US', 'MX', 'CA', 'FR', 'AU', 'NZ', 'ZA'}
 | 
			
		||||
 | 
			
		||||
    return bool(alpha2_number_first & alpha2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_envelope(bounds):
 | 
			
		||||
    return func.ST_MakeEnvelope(*bounds, srid)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_bbox_centroid(bbox):
 | 
			
		||||
    bbox = make_envelope(bbox)
 | 
			
		||||
    centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar()
 | 
			
		||||
    return re_point.match(centroid).groups()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_part_of(thing, bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
    ewkt = thing.as_EWKT
 | 
			
		||||
    q = model.Polygon.query.filter(
 | 
			
		||||
        func.ST_Intersects(db_bbox, model.Polygon.way),
 | 
			
		||||
        func.ST_Covers(model.Polygon.way, func.ST_GeomFromEWKT(ewkt)),
 | 
			
		||||
        or_(
 | 
			
		||||
            model.Polygon.tags.has_key("landuse"),
 | 
			
		||||
            model.Polygon.tags.has_key("amenity"),
 | 
			
		||||
        ),
 | 
			
		||||
        model.Polygon.tags.has_key("name"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return [polygon.tags for polygon in q]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_and_save_item(qid):
 | 
			
		||||
    entity = wikidata_api.get_entity(qid)
 | 
			
		||||
    entity_qid = entity["id"]
 | 
			
		||||
    if entity_qid != qid:
 | 
			
		||||
        print(f'redirect {qid} -> {entity_qid}')
 | 
			
		||||
        item = model.Item.query.get(entity_qid[1:])
 | 
			
		||||
        return item
 | 
			
		||||
 | 
			
		||||
    coords = wikidata.get_entity_coords(entity["claims"])
 | 
			
		||||
 | 
			
		||||
    item_id = int(qid[1:])
 | 
			
		||||
    obj = {k: v for k, v in entity.items() if k in entity_keys}
 | 
			
		||||
    try:
 | 
			
		||||
        item = model.Item(item_id=item_id, **obj)
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        print(qid)
 | 
			
		||||
        print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
 | 
			
		||||
        print(entity.keys())
 | 
			
		||||
        raise
 | 
			
		||||
    item.locations = model.location_objects(coords)
 | 
			
		||||
    database.session.add(item)
 | 
			
		||||
    database.session.commit()
 | 
			
		||||
 | 
			
		||||
    return item
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_isa_count(items):
 | 
			
		||||
    isa_count = Counter()
 | 
			
		||||
    for item in items:
 | 
			
		||||
        if not item:
 | 
			
		||||
            continue
 | 
			
		||||
        isa_list = item.get_claim("P31")
 | 
			
		||||
        for isa in isa_list:
 | 
			
		||||
            if not isa:
 | 
			
		||||
                print("missing IsA:", item.qid)
 | 
			
		||||
                continue
 | 
			
		||||
            isa_count[isa["id"]] += 1
 | 
			
		||||
 | 
			
		||||
    return isa_count.most_common()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_items_in_bbox(bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
 | 
			
		||||
        .options(selectinload(model.Item.locations))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return q
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_osm_with_wikidata_tag(bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    tagged = []
 | 
			
		||||
 | 
			
		||||
    seen = set()
 | 
			
		||||
    for cls in (model.Point, model.Polygon, model.Line):
 | 
			
		||||
        q = cls.query.filter(
 | 
			
		||||
            cls.tags.has_key("wikidata"),
 | 
			
		||||
            func.ST_Intersects(db_bbox, cls.way),
 | 
			
		||||
            func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
 | 
			
		||||
        )
 | 
			
		||||
        for osm in q:
 | 
			
		||||
            if osm.identifier in seen:
 | 
			
		||||
                continue
 | 
			
		||||
            seen.add(osm.identifier)
 | 
			
		||||
            name = osm.name or osm.tags.get("addr:housename") or "[no label]"
 | 
			
		||||
 | 
			
		||||
            tagged.append(
 | 
			
		||||
                {
 | 
			
		||||
                    "identifier": osm.identifier,
 | 
			
		||||
                    "id": osm.id,
 | 
			
		||||
                    "type": osm.type,
 | 
			
		||||
                    "url": osm.osm_url,
 | 
			
		||||
                    "geojson": osm.geojson(),
 | 
			
		||||
                    "centroid": list(osm.get_centroid()),
 | 
			
		||||
                    "name": name,
 | 
			
		||||
                    "wikidata": osm.tags["wikidata"],
 | 
			
		||||
                }
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    return tagged
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_items(item_ids):
 | 
			
		||||
    items = []
 | 
			
		||||
    for item_id in item_ids:
 | 
			
		||||
        item = model.Item.query.get(item_id)
 | 
			
		||||
        if not item:
 | 
			
		||||
            if not get_and_save_item(f"Q{item_id}"):
 | 
			
		||||
                continue
 | 
			
		||||
            item = model.Item.query.get(item_id)
 | 
			
		||||
        items.append(item)
 | 
			
		||||
 | 
			
		||||
    return items
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_item_tags(item):
 | 
			
		||||
    skip_tags = {"Key:addr:street"}
 | 
			
		||||
 | 
			
		||||
    isa_items = []
 | 
			
		||||
    isa_list = [v["numeric-id"] for v in item.get_claim("P31")]
 | 
			
		||||
    isa_items = get_items(isa_list)
 | 
			
		||||
 | 
			
		||||
    osm_list = set()
 | 
			
		||||
 | 
			
		||||
    skip_isa = {row[0] for row in database.session.query(model.SkipIsA.item_id)}
 | 
			
		||||
 | 
			
		||||
    seen = set(isa_list) | skip_isa
 | 
			
		||||
    while isa_items:
 | 
			
		||||
        isa = isa_items.pop()
 | 
			
		||||
        if not isa:
 | 
			
		||||
            continue
 | 
			
		||||
        osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
 | 
			
		||||
        if isa.qid in extra_keys:
 | 
			
		||||
            osm += extra_keys[isa.qid]
 | 
			
		||||
 | 
			
		||||
        osm_list.update(osm)
 | 
			
		||||
 | 
			
		||||
        subclass_of = [v["numeric-id"] for v in (isa.get_claim("P279") or []) if v]
 | 
			
		||||
        isa_list = [isa_id for isa_id in subclass_of if isa_id not in seen]
 | 
			
		||||
        seen.update(isa_list)
 | 
			
		||||
        isa_items += get_items(isa_list)
 | 
			
		||||
    return sorted(osm_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def wikidata_items_count(bounds):
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(make_envelope(bounds), model.ItemLocation.location))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return q.count()
 | 
			
		||||
 | 
			
		||||
def wikidata_isa_counts(bounds):
 | 
			
		||||
 | 
			
		||||
    db_bbox = make_envelope(bounds)
 | 
			
		||||
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    db_items = q.all()
 | 
			
		||||
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
 | 
			
		||||
    return isa_count
 | 
			
		||||
 | 
			
		||||
def get_tag_filter(cls, tag_list):
 | 
			
		||||
    tag_filter = []
 | 
			
		||||
    for tag_or_key in tag_list:
 | 
			
		||||
        if tag_or_key.startswith("Key:"):
 | 
			
		||||
            tag_filter.append(cls.tags.has_key(tag_or_key[4:]))
 | 
			
		||||
        if tag_or_key.startswith("Tag:"):
 | 
			
		||||
            k, _, v = tag_or_key.partition("=")
 | 
			
		||||
            tag_filter.append(cls.tags[k[4:]] == v)
 | 
			
		||||
 | 
			
		||||
    return tag_filter
 | 
			
		||||
 | 
			
		||||
def get_nearby(bbox, item, max_distance=300):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    osm_objects = {}
 | 
			
		||||
    distances = {}
 | 
			
		||||
    tag_list = get_item_tags(item)
 | 
			
		||||
    if not tag_list:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    item_is_street = item.is_street()
 | 
			
		||||
 | 
			
		||||
    for loc in item.locations:
 | 
			
		||||
        lat, lon = loc.get_lat_lon()
 | 
			
		||||
        point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
 | 
			
		||||
        for cls in model.Point, model.Line, model.Polygon:
 | 
			
		||||
            if item_is_street and cls == model.Point:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            tag_filter = get_tag_filter(cls, tag_list)
 | 
			
		||||
            dist = func.ST_Distance(point, cls.way)
 | 
			
		||||
 | 
			
		||||
            q = (cls.query.add_columns(dist.label('distance'))
 | 
			
		||||
                          .filter(
 | 
			
		||||
                              func.ST_Intersects(db_bbox, cls.way),
 | 
			
		||||
                              func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
 | 
			
		||||
                              or_(*tag_filter))
 | 
			
		||||
                          .order_by(point.distance_centroid(cls.way)))
 | 
			
		||||
 | 
			
		||||
            if item_is_street:
 | 
			
		||||
                q = q.filter(cls.tags.has_key("name"),
 | 
			
		||||
                             cls.tags["highway"] != 'bus_stop')
 | 
			
		||||
 | 
			
		||||
            if "Key:amenity" in tag_list:
 | 
			
		||||
                q = q.filter(cls.tags["amenity"] != "bicycle_parking",
 | 
			
		||||
                             cls.tags["amenity"] != "bicycle_repair_station",
 | 
			
		||||
                             cls.tags["amenity"] != "atm",
 | 
			
		||||
                             cls.tags["amenity"] != "recycling")
 | 
			
		||||
 | 
			
		||||
            q = q.limit(40)
 | 
			
		||||
 | 
			
		||||
            # print(q.statement.compile(compile_kwargs={"literal_binds": True}))
 | 
			
		||||
 | 
			
		||||
            for i, dist in q:
 | 
			
		||||
                if dist > max_distance:
 | 
			
		||||
                    continue
 | 
			
		||||
                osm_objects.setdefault(i.identifier, i)
 | 
			
		||||
                if i.identifier not in distances or dist < distances[i.identifier]:
 | 
			
		||||
                    distances[i.identifier] = dist
 | 
			
		||||
 | 
			
		||||
    nearby = [(osm_objects[identifier], dist)
 | 
			
		||||
              for identifier, dist
 | 
			
		||||
              in sorted(distances.items(), key=lambda i:i[1])]
 | 
			
		||||
 | 
			
		||||
    return nearby[:40]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_preset_translations():
 | 
			
		||||
    app = current_app
 | 
			
		||||
    country_language = {
 | 
			
		||||
        'AU': 'en-AU',  # Australia
 | 
			
		||||
        'GB': 'en-GB',  # United Kingdom
 | 
			
		||||
        'IE': 'en-GB',  # Ireland
 | 
			
		||||
        'IN': 'en-IN',  # India
 | 
			
		||||
        'NZ': 'en-NZ',  # New Zealand
 | 
			
		||||
    }
 | 
			
		||||
    ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
 | 
			
		||||
    translation_dir = os.path.join(ts_dir, "dist", "translations")
 | 
			
		||||
 | 
			
		||||
    for code in g.alpha2_codes:
 | 
			
		||||
        if code not in country_language:
 | 
			
		||||
            continue
 | 
			
		||||
        filename = os.path.join(translation_dir, country_language[code] + ".json")
 | 
			
		||||
        return json.load(open(filename))["en-GB"]["presets"]["presets"]
 | 
			
		||||
 | 
			
		||||
    return {}
 | 
			
		||||
 | 
			
		||||
def get_presets_from_tags(osm):
 | 
			
		||||
    translations = get_preset_translations()
 | 
			
		||||
 | 
			
		||||
    found = []
 | 
			
		||||
    endings = {model.Point: "point", model.Line: "line", model.Polygon: "area"}
 | 
			
		||||
    ending = endings[type(osm)]
 | 
			
		||||
 | 
			
		||||
    for k, v in osm.tags.items():
 | 
			
		||||
        if k == 'amenity' and v == 'clock' and osm.tags.get('display') == 'sundial':
 | 
			
		||||
            tag_or_key = f"Tag:{k}={v}"
 | 
			
		||||
            found.append({"tag_or_key": tag_or_key, "name": "Sundial"})
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        match = find_preset_file(k, v, ending)
 | 
			
		||||
        if not match:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        preset = match["preset"]
 | 
			
		||||
        if preset in translations:
 | 
			
		||||
            match["name"] = translations[preset]["name"]
 | 
			
		||||
        else:
 | 
			
		||||
            match["name"] = json.load(open(match["filename"]))["name"]
 | 
			
		||||
 | 
			
		||||
        del match["filename"]
 | 
			
		||||
 | 
			
		||||
        found.append(match)
 | 
			
		||||
 | 
			
		||||
    return found
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_preset_file(k, v, ending):
 | 
			
		||||
    app = current_app
 | 
			
		||||
    ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
 | 
			
		||||
    preset_dir = os.path.join(ts_dir, "data", "presets")
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, v + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, f"{v}_{ending}.json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, "_" + v + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Key:{k}",
 | 
			
		||||
            "preset": k,
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def address_from_tags(tags):
 | 
			
		||||
    keys = ["street", "housenumber"]
 | 
			
		||||
    if not all("addr:" + k in tags for k in keys):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    if g.street_number_first:
 | 
			
		||||
        keys.reverse()
 | 
			
		||||
    return " ".join(tags["addr:" + k] for k in keys)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_address_nodes_within_building(building, bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
    ewkt = building.as_EWKT
 | 
			
		||||
    q = model.Point.query.filter(
 | 
			
		||||
        func.ST_Intersects(db_bbox, model.Point.way),
 | 
			
		||||
        func.ST_Covers(func.ST_GeomFromEWKT(ewkt), model.Point.way),
 | 
			
		||||
        model.Point.tags.has_key("addr:street"),
 | 
			
		||||
        model.Point.tags.has_key("addr:housenumber"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return [node.tags for node in q]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_osm_candidates(item, bounds):
 | 
			
		||||
    g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds))
 | 
			
		||||
 | 
			
		||||
    nearby = []
 | 
			
		||||
    for osm, dist in get_nearby(bounds, item):
 | 
			
		||||
        tags = osm.tags
 | 
			
		||||
        tags.pop("way_area", None)
 | 
			
		||||
        name = osm.name or tags.get("addr:housename") or tags.get("inscription")
 | 
			
		||||
        if not name and "addr:housenumber" in tags and "addr:street" in tags:
 | 
			
		||||
            name = address_from_tags(tags)
 | 
			
		||||
 | 
			
		||||
        if isinstance(osm, model.Polygon) and "building" in osm.tags:
 | 
			
		||||
            address_nodes = get_address_nodes_within_building(osm, bounds)
 | 
			
		||||
            address_list = [address_from_tags(addr) for addr in address_nodes]
 | 
			
		||||
        else:
 | 
			
		||||
            address_list = []
 | 
			
		||||
        cur = {
 | 
			
		||||
            "identifier": osm.identifier,
 | 
			
		||||
            "distance": dist,
 | 
			
		||||
            "name": name,
 | 
			
		||||
            "tags": tags,
 | 
			
		||||
            "geojson": osm.geojson(),
 | 
			
		||||
            "presets": get_presets_from_tags(osm),
 | 
			
		||||
            "address_list": address_list,
 | 
			
		||||
        }
 | 
			
		||||
        if hasattr(osm, 'area'):
 | 
			
		||||
            cur["area"] = osm.area
 | 
			
		||||
 | 
			
		||||
        if address := address_from_tags(tags):
 | 
			
		||||
            cur["address"] = address
 | 
			
		||||
 | 
			
		||||
        part_of = [i["name"] for i in get_part_of(osm, bounds) if i["name"] != name]
 | 
			
		||||
        if part_of:
 | 
			
		||||
            cur["part_of"] = part_of
 | 
			
		||||
 | 
			
		||||
        nearby.append(cur)
 | 
			
		||||
 | 
			
		||||
    return nearby
 | 
			
		||||
 | 
			
		||||
def get_item(item_id):
 | 
			
		||||
    item = model.Item.query.get(item_id)
 | 
			
		||||
    if item:
 | 
			
		||||
        return item
 | 
			
		||||
    item = get_and_save_item(f"Q{item_id}")
 | 
			
		||||
    database.session.add(item)
 | 
			
		||||
    database.session.commit()
 | 
			
		||||
    return item
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_item_street_addresses(item):
 | 
			
		||||
    street_address = [addr["text"] for addr in item.get_claim("P6375")]
 | 
			
		||||
    if street_address or "P669" not in item.claims:
 | 
			
		||||
        return street_address
 | 
			
		||||
 | 
			
		||||
    for claim in item.claims["P669"]:
 | 
			
		||||
        qualifiers = claim.get("qualifiers")
 | 
			
		||||
        if not qualifiers or 'P670' not in qualifiers:
 | 
			
		||||
            continue
 | 
			
		||||
        number = qualifiers["P670"][0]["datavalue"]["value"]
 | 
			
		||||
 | 
			
		||||
        street_item = get_item(claim["mainsnak"]["datavalue"]["value"]["numeric-id"])
 | 
			
		||||
        street = street_item.label()
 | 
			
		||||
        for q in qualifiers["P670"]:
 | 
			
		||||
            number = q["datavalue"]["value"]
 | 
			
		||||
            address = (f"{number} {street}"
 | 
			
		||||
                       if g.street_number_first
 | 
			
		||||
                       else f"{street} {number}")
 | 
			
		||||
            street_address.append(address)
 | 
			
		||||
 | 
			
		||||
    return street_address
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_markers(all_items):
 | 
			
		||||
    items = []
 | 
			
		||||
    for item in all_items:
 | 
			
		||||
        if not item:
 | 
			
		||||
            continue
 | 
			
		||||
        locations = [list(i.get_lat_lon()) for i in item.locations]
 | 
			
		||||
        image_filenames = item.get_claim("P18")
 | 
			
		||||
 | 
			
		||||
        street_address = get_item_street_addresses(item)
 | 
			
		||||
 | 
			
		||||
        d = {
 | 
			
		||||
            "qid": item.qid,
 | 
			
		||||
            "label": item.label(),
 | 
			
		||||
            "description": item.description(),
 | 
			
		||||
            "markers": locations,
 | 
			
		||||
            "image_list": image_filenames,
 | 
			
		||||
            "street_address": street_address,
 | 
			
		||||
            "isa_list": [v["id"] for v in item.get_claim("P31") if v],
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if aliases := item.get_aliases():
 | 
			
		||||
            d["aliases"] = aliases
 | 
			
		||||
 | 
			
		||||
        items.append(d)
 | 
			
		||||
 | 
			
		||||
    return items
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def wikidata_items(bounds):
 | 
			
		||||
    g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds))
 | 
			
		||||
    q = get_items_in_bbox(bounds)
 | 
			
		||||
    db_items = q.all()
 | 
			
		||||
    items = get_markers(db_items)
 | 
			
		||||
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
 | 
			
		||||
    return {'items': items, 'isa_count': isa_count}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def missing_wikidata_items(qids, lat, lon):
 | 
			
		||||
    g.street_number_first = is_street_number_first(lat, lon)
 | 
			
		||||
 | 
			
		||||
    db_items = []
 | 
			
		||||
    for qid in qids:
 | 
			
		||||
        item = model.Item.query.get(qid[1:])
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
        db_items.append(item)
 | 
			
		||||
    items = get_markers(db_items)
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
 | 
			
		||||
    return dict(items=items, isa_count=isa_count)
 | 
			
		||||
							
								
								
									
										203
									
								
								matcher/data.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								matcher/data.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,203 @@
 | 
			
		|||
edu = ['Tag:amenity=college', 'Tag:amenity=university', 'Tag:amenity=school',
 | 
			
		||||
        'Tag:office=educational_institution', 'Tag:building=university']
 | 
			
		||||
tall = ['Key:height', 'Key:building:levels']
 | 
			
		||||
 | 
			
		||||
extra_keys = {
 | 
			
		||||
    'Q3914': ['Tag:building=school',
 | 
			
		||||
              'Tag:building=college',
 | 
			
		||||
              'Tag:amenity=college',
 | 
			
		||||
              'Tag:office=educational_institution'],  # school
 | 
			
		||||
    'Q322563': edu,                             # vocational school
 | 
			
		||||
    'Q383092': edu,                             # film school
 | 
			
		||||
    'Q1021290': edu,                            # music school
 | 
			
		||||
    'Q1244442': edu,                            # school building
 | 
			
		||||
    'Q1469420': edu,                            # adult education centre
 | 
			
		||||
    'Q2143781': edu,                            # drama school
 | 
			
		||||
    'Q2385804': edu,                            # educational institution
 | 
			
		||||
    'Q5167149': edu,                            # cooking school
 | 
			
		||||
    'Q7894959': edu,                            # University Technical College
 | 
			
		||||
    'Q47530379': edu,                           # agricultural college
 | 
			
		||||
    'Q38723': edu,                              # higher education institution
 | 
			
		||||
    'Q11303': tall,                             # skyscraper
 | 
			
		||||
    'Q18142': tall,                             # high-rise building
 | 
			
		||||
    'Q33673393': tall,                          # multi-storey building
 | 
			
		||||
    'Q641226': ['Tag:leisure=stadium'],         # arena
 | 
			
		||||
    'Q2301048': ['Tag:aeroway=helipad'],        # special airfield
 | 
			
		||||
    'Q622425': ['Tag:amenity=pub',
 | 
			
		||||
                'Tag:amenity=music_venue'],     # nightclub
 | 
			
		||||
    'Q187456': ['Tag:amenity=pub',
 | 
			
		||||
                'Tag:amenity=nightclub'],       # bar
 | 
			
		||||
    'Q16917': ['Tag:amenity=clinic',
 | 
			
		||||
               'Tag:building=clinic'],          # hospital
 | 
			
		||||
    'Q330284': ['Tag:amenity=market'],          # marketplace
 | 
			
		||||
    'Q5307737': ['Tag:amenity=pub',
 | 
			
		||||
                 'Tag:amenity=bar'],            # drinking establishment
 | 
			
		||||
    'Q875157': ['Tag:tourism=resort'],          # resort
 | 
			
		||||
    'Q174782': ['Tag:leisure=park',
 | 
			
		||||
                'Tag:highway=pedestrian',
 | 
			
		||||
                'Tag:foot=yes',
 | 
			
		||||
                'Tag:area=yes',
 | 
			
		||||
                'Tag:amenity=market',
 | 
			
		||||
                'Tag:leisure=common'],          # square
 | 
			
		||||
    'Q34627': ['Tag:religion=jewish'],          # synagogue
 | 
			
		||||
    'Q16970': ['Tag:religion=christian'],       # church
 | 
			
		||||
    'Q32815': ['Tag:religion=islam'],           # mosque
 | 
			
		||||
    'Q811979': ['Key:building'],                # architectural structure
 | 
			
		||||
    'Q11691': ['Key:building'],                 # stock exchange
 | 
			
		||||
    'Q1329623': ['Tag:amenity=arts_centre',     # cultural centre
 | 
			
		||||
                 'Tag:amenity=community_centre'],
 | 
			
		||||
    'Q856584': ['Tag:amenity=library'],         # library building
 | 
			
		||||
    'Q11315': ['Tag:landuse=retail'],           # shopping mall
 | 
			
		||||
    'Q39658032': ['Tag:landuse=retail'],        # open air shopping centre
 | 
			
		||||
    'Q277760': ['Tag:historic=folly',
 | 
			
		||||
                'Tag:historic=city_gate'],      # gatehouse
 | 
			
		||||
    'Q180174': ['Tag:historic=folly'],          # folly
 | 
			
		||||
    'Q15243209': ['Tag:leisure=park',
 | 
			
		||||
                  'Tag:boundary=national_park'],   # historic district
 | 
			
		||||
    'Q3010369': ['Tag:historic=monument'],      # opening ceremony
 | 
			
		||||
    'Q123705': ['Tag:place=suburb'],            # neighbourhood
 | 
			
		||||
    'Q256020': ['Tag:amenity=pub'],             # inn
 | 
			
		||||
    'Q41253': ['Tag:amenity=theatre'],          # movie theater
 | 
			
		||||
    'Q17350442': ['Tag:amenity=theatre'],       # venue
 | 
			
		||||
    'Q156362': ['Tag:amenity=winery'],          # winery
 | 
			
		||||
    'Q14092': ['Tag:leisure=fitness_centre',
 | 
			
		||||
               'Tag:leisure=sports_centre'],    # gymnasium
 | 
			
		||||
    'Q27686': ['Tag:tourism=hostel',            # hotel
 | 
			
		||||
               'Tag:tourism=guest_house',
 | 
			
		||||
               'Tag:building=hotel'],
 | 
			
		||||
    'Q11707': ['Tag:amenity=cafe', 'Tag:amenity=fast_food',
 | 
			
		||||
               'Tag:shop=deli', 'Tag:shop=bakery',
 | 
			
		||||
               'Key:cuisine'],                  # restaurant
 | 
			
		||||
    'Q2360219': ['Tag:amenity=embassy'],        # permanent mission
 | 
			
		||||
    'Q27995042': ['Tag:protection_title=Wilderness Area'],  # wilderness area
 | 
			
		||||
    'Q838948': ['Tag:historic=memorial',
 | 
			
		||||
                'Tag:historic=monument'],       # work of art
 | 
			
		||||
    'Q23413': ['Tag:place=locality'],           # castle
 | 
			
		||||
    'Q28045079': ['Tag:historic=archaeological_site',
 | 
			
		||||
                  'Tag:site_type=fortification',
 | 
			
		||||
                  'Tag:embankment=yes'],        # contour fort
 | 
			
		||||
    'Q744099': ['Tag:historic=archaeological_site',
 | 
			
		||||
                'Tag:site_type=fortification',
 | 
			
		||||
                'Tag:embankment=yes'],          # hillfort
 | 
			
		||||
    'Q515': ['Tag:border_type=city'],           # city
 | 
			
		||||
    'Q1254933': ['Tag:amenity=university'],     # astronomical observatory
 | 
			
		||||
    'Q1976594': ['Tag:landuse=industrial'],     # science park
 | 
			
		||||
    'Q190928': ['Tag:landuse=industrial'],      # shipyard
 | 
			
		||||
    'Q4663385': ['Tag:historic=train_station',  # former railway station
 | 
			
		||||
                 'Tag:railway=historic_station'],
 | 
			
		||||
    'Q11997323': ['Tag:emergency=lifeboat_station'],  # lifeboat station
 | 
			
		||||
    'Q16884952': ['Tag:castle_type=stately',
 | 
			
		||||
                  'Tag:building=country_house'],  # country house
 | 
			
		||||
    'Q1343246': ['Tag:castle_type=stately',
 | 
			
		||||
                 'Tag:building=country_house'],   # English country house
 | 
			
		||||
    'Q4919932': ['Tag:castle_type=stately'],    # stately home
 | 
			
		||||
    'Q1763828': ['Tag:amenity=community_centre'],  # multi-purpose hall
 | 
			
		||||
    'Q3469910': ['Tag:amenity=community_centre'],  # performing arts center
 | 
			
		||||
    'Q57660343': ['Tag:amenity=community_centre'],  # performing arts building
 | 
			
		||||
    'Q163740': ['Tag:amenity=community_centre',  # nonprofit organization
 | 
			
		||||
                'Tag:amenity=social_facility',
 | 
			
		||||
                'Key:social_facility'],
 | 
			
		||||
    'Q41176': ['Key:building:levels'],          # building
 | 
			
		||||
    'Q44494': ['Tag:historic=mill'],            # mill
 | 
			
		||||
    'Q56822897': ['Tag:historic=mill'],         # mill building
 | 
			
		||||
    'Q2175765': ['Tag:public_transport=stop_area'],  # tram stop
 | 
			
		||||
    'Q179700': ['Tag:memorial=statue',          # statue
 | 
			
		||||
                'Tag:memorial:type=statue',
 | 
			
		||||
                'Tag:historic=memorial'],
 | 
			
		||||
    'Q1076486': ['Tag:landuse=recreation_ground'],  # sports venue
 | 
			
		||||
    'Q988108': ['Tag:amenity=community_centre',  # club
 | 
			
		||||
                'Tag:community_centre=club_home'],
 | 
			
		||||
    'Q55004558': ['Tag:service=yard',
 | 
			
		||||
                  'Tag:landuse=railway'],       # car barn
 | 
			
		||||
    'Q19563580': ['Tag:landuse=railway'],       # rail yard
 | 
			
		||||
    'Q134447': ['Tag:generator:source=nuclear'],  # nuclear power plant
 | 
			
		||||
    'Q1258086': ['Tag:leisure=park',
 | 
			
		||||
                 'Tag:boundary=national_park'],  # National Historic Site
 | 
			
		||||
    'Q32350958': ['Tag:leisure=bingo'],         # Bingo hall
 | 
			
		||||
    'Q53060': ['Tag:historic=gate',             # gate
 | 
			
		||||
               'Tag:tourism=attraction'],
 | 
			
		||||
    'Q3947': ['Tag:tourism=hotel',              # house
 | 
			
		||||
              'Tag:building=hotel',
 | 
			
		||||
              'Tag:tourism=guest_house'],
 | 
			
		||||
    'Q847017': ['Tag:leisure=sports_centre'],   # sports club
 | 
			
		||||
    'Q820477': ['Tag:landuse=quarry',
 | 
			
		||||
                'Tag:gnis:feature_type=Mine'],  # mine
 | 
			
		||||
    'Q77115': ['Tag:leisure=sports_centre'],    # community center
 | 
			
		||||
    'Q35535': ['Tag:amenity=police'],           # police
 | 
			
		||||
    'Q16560': ['Tag:tourism=attraction',        # palace
 | 
			
		||||
               'Tag:historic=yes'],
 | 
			
		||||
    'Q131734': ['Tag:amenity=pub',              # brewery
 | 
			
		||||
                'Tag:industrial=brewery'],
 | 
			
		||||
    'Q828909': ['Tag:landuse=commercial',
 | 
			
		||||
                'Tag:landuse=industrial',
 | 
			
		||||
                'Tag:historic=dockyard'],       # wharf
 | 
			
		||||
    'Q10283556': ['Tag:landuse=railway'],       # motive power depot
 | 
			
		||||
    'Q18674739': ['Tag:leisure=stadium'],       # event venue
 | 
			
		||||
    'Q20672229': ['Tag:historic=archaeological_site'],  # friary
 | 
			
		||||
    'Q207694': ['Tag:museum=art'],              # art museum
 | 
			
		||||
    'Q22698': ['Tag:leisure=dog_park',
 | 
			
		||||
               'Tag:amenity=market',
 | 
			
		||||
               'Tag:place=square',
 | 
			
		||||
               'Tag:leisure=common'],           # park
 | 
			
		||||
    'Q738570': ['Tag:place=suburb'],            # central business district
 | 
			
		||||
    'Q1133961': ['Tag:place=suburb'],           # commercial district
 | 
			
		||||
    'Q935277': ['Tag:gnis:ftype=Playa',
 | 
			
		||||
                'Tag:natural=sand'],            # salt pan
 | 
			
		||||
    'Q14253637': ['Tag:gnis:ftype=Playa',
 | 
			
		||||
                  'Tag:natural=sand'],          # dry lake
 | 
			
		||||
    'Q63099748': ['Tag:tourism=hotel',          # hotel building
 | 
			
		||||
                  'Tag:building=hotel',
 | 
			
		||||
                  'Tag:tourism=guest_house'],
 | 
			
		||||
    'Q2997369': ['Tag:leisure=park',
 | 
			
		||||
                 'Tag:highway=pedestrian',
 | 
			
		||||
                 'Tag:foot=yes',
 | 
			
		||||
                 'Tag:area=yes',
 | 
			
		||||
                 'Tag:amenity=market',
 | 
			
		||||
                 'Tag:leisure=common'],         # plaza
 | 
			
		||||
    'Q130003': ['Tag:landuse=winter_sports',    # ski resort
 | 
			
		||||
                'Tag:site=piste',
 | 
			
		||||
                'Tag:leisure=resort',
 | 
			
		||||
                'Tag:landuse=recreation_ground'],
 | 
			
		||||
    'Q4830453': ['Key:office',
 | 
			
		||||
                 'Tag:building=office'],        # business
 | 
			
		||||
    'Q43229': ['Key:office',
 | 
			
		||||
               'Tag:building=office'],          # organization
 | 
			
		||||
    'Q17084016': ['Tag:office=association',
 | 
			
		||||
                  'Tag:office=ngo'],            # nonprofit corporation
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
property_map = [
 | 
			
		||||
    ("P238", ["iata"], "IATA airport code"),
 | 
			
		||||
    ("P239", ["icao"], "ICAO airport code"),
 | 
			
		||||
    ("P240", ["faa", "ref"], "FAA airport code"),
 | 
			
		||||
    # ('P281', ['addr:postcode', 'postal_code'], 'postal code'),
 | 
			
		||||
    ("P296", ["ref", "ref:train", "railway:ref"], "station code"),
 | 
			
		||||
    ("P300", ["ISO3166-2"], "ISO 3166-2 code"),
 | 
			
		||||
    ("P359", ["ref:rce"], "Rijksmonument ID"),
 | 
			
		||||
    ("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"),
 | 
			
		||||
    ("P649", ["ref:nrhp"], "NRHP reference number"),
 | 
			
		||||
    ("P722", ["uic_ref"], "UIC station code"),
 | 
			
		||||
    ("P782", ["ref"], "LAU (local administrative unit)"),
 | 
			
		||||
    ("P836", ["ref:gss"], "UK Government Statistical Service code"),
 | 
			
		||||
    ("P856", ["website", "contact:website", "url"], "website"),
 | 
			
		||||
    ("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"),
 | 
			
		||||
    ("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"),
 | 
			
		||||
    # A UIC id can be a IBNR, but not every IBNR is an UIC id
 | 
			
		||||
    ("P954", ["uic_ref"], "IBNR ID"),
 | 
			
		||||
    ("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"),
 | 
			
		||||
    ("P1216", ["HE_ref"], "National Heritage List for England number"),
 | 
			
		||||
    ("P2253", ["ref:edubase"], "EDUBase URN"),
 | 
			
		||||
    ("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"),
 | 
			
		||||
    ("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"),
 | 
			
		||||
    ("P3562", ["seamark:light:reference"], "Admiralty number"),
 | 
			
		||||
    (
 | 
			
		||||
        "P4755",
 | 
			
		||||
        ["ref", "ref:train", "ref:crs", "crs", "nat_ref"],
 | 
			
		||||
        "UK railway station code",
 | 
			
		||||
    ),
 | 
			
		||||
    ("P4803", ["ref", "ref:train"], "Amtrak station code"),
 | 
			
		||||
    ("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"),
 | 
			
		||||
    ("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"),
 | 
			
		||||
    ("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"),
 | 
			
		||||
    ("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"),
 | 
			
		||||
]
 | 
			
		||||
							
								
								
									
										768
									
								
								web_view.py
									
									
									
									
									
								
							
							
						
						
									
										768
									
								
								web_view.py
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -2,16 +2,13 @@
 | 
			
		|||
 | 
			
		||||
from flask import (Flask, render_template, request, jsonify, redirect, url_for, g,
 | 
			
		||||
                   flash, session, Response)
 | 
			
		||||
from sqlalchemy import func, or_
 | 
			
		||||
from sqlalchemy.orm import selectinload
 | 
			
		||||
from sqlalchemy import func
 | 
			
		||||
from matcher import (nominatim, model, database, commons, wikidata, wikidata_api,
 | 
			
		||||
                     osm_oauth, edit, mail)
 | 
			
		||||
from collections import Counter
 | 
			
		||||
                     osm_oauth, edit, mail, api)
 | 
			
		||||
from matcher.data import property_map
 | 
			
		||||
from time import time, sleep
 | 
			
		||||
from geoalchemy2 import Geography
 | 
			
		||||
from requests_oauthlib import OAuth1Session
 | 
			
		||||
import flask_login
 | 
			
		||||
import os
 | 
			
		||||
import json
 | 
			
		||||
import GeoIP
 | 
			
		||||
import re
 | 
			
		||||
| 
						 | 
				
			
			@ -34,42 +31,6 @@ entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "last
 | 
			
		|||
 | 
			
		||||
re_qid = re.compile(r'^Q\d+$')
 | 
			
		||||
 | 
			
		||||
property_map = [
 | 
			
		||||
    ("P238", ["iata"], "IATA airport code"),
 | 
			
		||||
    ("P239", ["icao"], "ICAO airport code"),
 | 
			
		||||
    ("P240", ["faa", "ref"], "FAA airport code"),
 | 
			
		||||
    # ('P281', ['addr:postcode', 'postal_code'], 'postal code'),
 | 
			
		||||
    ("P296", ["ref", "ref:train", "railway:ref"], "station code"),
 | 
			
		||||
    ("P300", ["ISO3166-2"], "ISO 3166-2 code"),
 | 
			
		||||
    ("P359", ["ref:rce"], "Rijksmonument ID"),
 | 
			
		||||
    ("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"),
 | 
			
		||||
    ("P649", ["ref:nrhp"], "NRHP reference number"),
 | 
			
		||||
    ("P722", ["uic_ref"], "UIC station code"),
 | 
			
		||||
    ("P782", ["ref"], "LAU (local administrative unit)"),
 | 
			
		||||
    ("P836", ["ref:gss"], "UK Government Statistical Service code"),
 | 
			
		||||
    ("P856", ["website", "contact:website", "url"], "website"),
 | 
			
		||||
    ("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"),
 | 
			
		||||
    ("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"),
 | 
			
		||||
    # A UIC id can be a IBNR, but not every IBNR is an UIC id
 | 
			
		||||
    ("P954", ["uic_ref"], "IBNR ID"),
 | 
			
		||||
    ("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"),
 | 
			
		||||
    ("P1216", ["HE_ref"], "National Heritage List for England number"),
 | 
			
		||||
    ("P2253", ["ref:edubase"], "EDUBase URN"),
 | 
			
		||||
    ("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"),
 | 
			
		||||
    ("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"),
 | 
			
		||||
    ("P3562", ["seamark:light:reference"], "Admiralty number"),
 | 
			
		||||
    (
 | 
			
		||||
        "P4755",
 | 
			
		||||
        ["ref", "ref:train", "ref:crs", "crs", "nat_ref"],
 | 
			
		||||
        "UK railway station code",
 | 
			
		||||
    ),
 | 
			
		||||
    ("P4803", ["ref", "ref:train"], "Amtrak station code"),
 | 
			
		||||
    ("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"),
 | 
			
		||||
    ("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"),
 | 
			
		||||
    ("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"),
 | 
			
		||||
    ("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.teardown_appcontext
 | 
			
		||||
def shutdown_session(exception=None):
 | 
			
		||||
| 
						 | 
				
			
			@ -108,107 +69,6 @@ def check_for_tagged_qid(qid):
 | 
			
		|||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def make_envelope(bbox):
 | 
			
		||||
    west, south, east, north = [float(i) for i in bbox.split(",")]
 | 
			
		||||
    return func.ST_MakeEnvelope(west, south, east, north, srid)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_osm_with_wikidata_tag(bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    tagged = []
 | 
			
		||||
 | 
			
		||||
    seen = set()
 | 
			
		||||
    for cls in (model.Point, model.Polygon, model.Line):
 | 
			
		||||
        q = cls.query.filter(
 | 
			
		||||
            cls.tags.has_key("wikidata"),
 | 
			
		||||
            func.ST_Intersects(db_bbox, cls.way),
 | 
			
		||||
            func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
 | 
			
		||||
        )
 | 
			
		||||
        for osm in q:
 | 
			
		||||
            if osm.identifier in seen:
 | 
			
		||||
                continue
 | 
			
		||||
            seen.add(osm.identifier)
 | 
			
		||||
            name = osm.name or osm.tags.get("addr:housename") or "[no label]"
 | 
			
		||||
 | 
			
		||||
            tagged.append(
 | 
			
		||||
                {
 | 
			
		||||
                    "identifier": osm.identifier,
 | 
			
		||||
                    "id": osm.id,
 | 
			
		||||
                    "type": osm.type,
 | 
			
		||||
                    "url": osm.osm_url,
 | 
			
		||||
                    "geojson": osm.geojson(),
 | 
			
		||||
                    "centroid": list(osm.get_centroid()),
 | 
			
		||||
                    "name": name,
 | 
			
		||||
                    "wikidata": osm.tags["wikidata"],
 | 
			
		||||
                }
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    return tagged
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_items_in_bbox(bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
 | 
			
		||||
        .options(selectinload(model.Item.locations))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return q
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_item_street_addresses(item):
 | 
			
		||||
    street_address = [addr["text"] for addr in item.get_claim("P6375")]
 | 
			
		||||
    if street_address or "P669" not in item.claims:
 | 
			
		||||
        return street_address
 | 
			
		||||
 | 
			
		||||
    for claim in item.claims["P669"]:
 | 
			
		||||
        qualifiers = claim.get("qualifiers")
 | 
			
		||||
        if not qualifiers or 'P670' not in qualifiers:
 | 
			
		||||
            continue
 | 
			
		||||
        number = qualifiers["P670"][0]["datavalue"]["value"]
 | 
			
		||||
 | 
			
		||||
        street_item = get_item(claim["mainsnak"]["datavalue"]["value"]["numeric-id"])
 | 
			
		||||
        street = street_item.label()
 | 
			
		||||
        for q in qualifiers["P670"]:
 | 
			
		||||
            number = q["datavalue"]["value"]
 | 
			
		||||
            address = (f"{number} {street}"
 | 
			
		||||
                       if g.street_number_first
 | 
			
		||||
                       else f"{street} {number}")
 | 
			
		||||
            street_address.append(address)
 | 
			
		||||
 | 
			
		||||
    return street_address
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_markers(all_items):
 | 
			
		||||
    items = []
 | 
			
		||||
    for item in all_items:
 | 
			
		||||
        if not item:
 | 
			
		||||
            continue
 | 
			
		||||
        locations = [list(i.get_lat_lon()) for i in item.locations]
 | 
			
		||||
        image_filenames = item.get_claim("P18")
 | 
			
		||||
 | 
			
		||||
        street_address = get_item_street_addresses(item)
 | 
			
		||||
 | 
			
		||||
        d = {
 | 
			
		||||
            "qid": item.qid,
 | 
			
		||||
            "label": item.label(),
 | 
			
		||||
            "description": item.description(),
 | 
			
		||||
            "markers": locations,
 | 
			
		||||
            "image_list": image_filenames,
 | 
			
		||||
            "street_address": street_address,
 | 
			
		||||
            "isa_list": [v["id"] for v in item.get_claim("P31") if v],
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if aliases := item.get_aliases():
 | 
			
		||||
            d["aliases"] = aliases
 | 
			
		||||
 | 
			
		||||
        items.append(d)
 | 
			
		||||
 | 
			
		||||
    return items
 | 
			
		||||
 | 
			
		||||
def geoip_user_record():
 | 
			
		||||
    gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -372,52 +232,9 @@ def search_page():
 | 
			
		|||
    return render_template("search.html", hits=hits, bbox_list=bbox)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_isa_count(items):
 | 
			
		||||
    isa_count = Counter()
 | 
			
		||||
    for item in items:
 | 
			
		||||
        if not item:
 | 
			
		||||
            continue
 | 
			
		||||
        isa_list = item.get_claim("P31")
 | 
			
		||||
        for isa in isa_list:
 | 
			
		||||
            if not isa:
 | 
			
		||||
                print("missing IsA:", item.qid)
 | 
			
		||||
                continue
 | 
			
		||||
            isa_count[isa["id"]] += 1
 | 
			
		||||
def read_bounds_param():
 | 
			
		||||
    return [float(i) for i in request.args["bounds"].split(",")]
 | 
			
		||||
 | 
			
		||||
    return isa_count.most_common()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_and_save_item(qid):
 | 
			
		||||
    entity = wikidata_api.get_entity(qid)
 | 
			
		||||
    entity_qid = entity["id"]
 | 
			
		||||
    if entity_qid != qid:
 | 
			
		||||
        print(f'redirect {qid} -> {entity_qid}')
 | 
			
		||||
        item = model.Item.query.get(entity_qid[1:])
 | 
			
		||||
        return item
 | 
			
		||||
 | 
			
		||||
    coords = wikidata.get_entity_coords(entity["claims"])
 | 
			
		||||
 | 
			
		||||
    item_id = int(qid[1:])
 | 
			
		||||
    obj = {k: v for k, v in entity.items() if k in entity_keys}
 | 
			
		||||
    try:
 | 
			
		||||
        item = model.Item(item_id=item_id, **obj)
 | 
			
		||||
    except TypeError:
 | 
			
		||||
        print(qid)
 | 
			
		||||
        print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
 | 
			
		||||
        print(entity.keys())
 | 
			
		||||
        raise
 | 
			
		||||
    item.locations = model.location_objects(coords)
 | 
			
		||||
    database.session.add(item)
 | 
			
		||||
    database.session.commit()
 | 
			
		||||
 | 
			
		||||
    return item
 | 
			
		||||
 | 
			
		||||
def get_bbox_centroid(bbox):
 | 
			
		||||
    bbox = make_envelope(bbox)
 | 
			
		||||
    centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar()
 | 
			
		||||
    lon, lat = re_point.match(centroid).groups()
 | 
			
		||||
 | 
			
		||||
    return lat, lon
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/location")
 | 
			
		||||
def show_user_location():
 | 
			
		||||
| 
						 | 
				
			
			@ -427,52 +244,18 @@ def show_user_location():
 | 
			
		|||
@app.route("/api/1/count")
 | 
			
		||||
def api_wikidata_items_count():
 | 
			
		||||
    t0 = time()
 | 
			
		||||
    bounds = request.args.get("bounds")
 | 
			
		||||
 | 
			
		||||
    db_bbox = make_envelope(bounds)
 | 
			
		||||
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
 | 
			
		||||
    )
 | 
			
		||||
    count = api.wikidata_items_count(read_bounds_param())
 | 
			
		||||
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
    return cors_jsonify(success=True, count=q.count(), duration=t1)
 | 
			
		||||
    return cors_jsonify(success=True, count=count, duration=t1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/isa")
 | 
			
		||||
def api_wikidata_isa_counts():
 | 
			
		||||
    t0 = time()
 | 
			
		||||
 | 
			
		||||
    bbox = request.args.get("bounds")
 | 
			
		||||
    bounds = [float(i) for i in bbox.split(",")]
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    q = (
 | 
			
		||||
        model.Item.query.join(model.ItemLocation)
 | 
			
		||||
        .filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    db_items = q.all()
 | 
			
		||||
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
    bounds = read_bounds_param()
 | 
			
		||||
    isa_count = api.wikidata_isa_counts(bounds)
 | 
			
		||||
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
    return cors_jsonify(success=True, isa_count=isa_count, bounds=bounds, duration=t1)
 | 
			
		||||
| 
						 | 
				
			
			@ -480,273 +263,28 @@ def api_wikidata_isa_counts():
 | 
			
		|||
 | 
			
		||||
@app.route("/api/1/items")
 | 
			
		||||
def api_wikidata_items():
 | 
			
		||||
    bounds = request.args.get("bounds")
 | 
			
		||||
    g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds))
 | 
			
		||||
    t0 = time()
 | 
			
		||||
    q = get_items_in_bbox(bounds)
 | 
			
		||||
    db_items = q.all()
 | 
			
		||||
    items = get_markers(db_items)
 | 
			
		||||
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
    bounds = read_bounds_param()
 | 
			
		||||
    ret = api.wikidata_items(bounds)
 | 
			
		||||
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
    print(f"wikidata: {t1} seconds")
 | 
			
		||||
 | 
			
		||||
    return cors_jsonify(success=True, items=items, isa_count=isa_count, duration=t1)
 | 
			
		||||
    return cors_jsonify(success=True, duration=t1, **ret)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/osm")
 | 
			
		||||
def api_osm_objects():
 | 
			
		||||
    bounds = request.args.get("bounds")
 | 
			
		||||
    t0 = time()
 | 
			
		||||
    objects = get_osm_with_wikidata_tag(bounds)
 | 
			
		||||
    objects = api.get_osm_with_wikidata_tag(read_bounds_param())
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
    print(f"OSM: {t1} seconds")
 | 
			
		||||
    return cors_jsonify(success=True, objects=objects, duration=t1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
edu = ['Tag:amenity=college', 'Tag:amenity=university', 'Tag:amenity=school',
 | 
			
		||||
        'Tag:office=educational_institution', 'Tag:building=university']
 | 
			
		||||
tall = ['Key:height', 'Key:building:levels']
 | 
			
		||||
 | 
			
		||||
extra_keys = {
 | 
			
		||||
    'Q3914': ['Tag:building=school',
 | 
			
		||||
              'Tag:building=college',
 | 
			
		||||
              'Tag:amenity=college',
 | 
			
		||||
              'Tag:office=educational_institution'],  # school
 | 
			
		||||
    'Q322563': edu,                             # vocational school
 | 
			
		||||
    'Q383092': edu,                             # film school
 | 
			
		||||
    'Q1021290': edu,                            # music school
 | 
			
		||||
    'Q1244442': edu,                            # school building
 | 
			
		||||
    'Q1469420': edu,                            # adult education centre
 | 
			
		||||
    'Q2143781': edu,                            # drama school
 | 
			
		||||
    'Q2385804': edu,                            # educational institution
 | 
			
		||||
    'Q5167149': edu,                            # cooking school
 | 
			
		||||
    'Q7894959': edu,                            # University Technical College
 | 
			
		||||
    'Q47530379': edu,                           # agricultural college
 | 
			
		||||
    'Q38723': edu,                              # higher education institution
 | 
			
		||||
    'Q11303': tall,                             # skyscraper
 | 
			
		||||
    'Q18142': tall,                             # high-rise building
 | 
			
		||||
    'Q33673393': tall,                          # multi-storey building
 | 
			
		||||
    'Q641226': ['Tag:leisure=stadium'],         # arena
 | 
			
		||||
    'Q2301048': ['Tag:aeroway=helipad'],        # special airfield
 | 
			
		||||
    'Q622425': ['Tag:amenity=pub',
 | 
			
		||||
                'Tag:amenity=music_venue'],     # nightclub
 | 
			
		||||
    'Q187456': ['Tag:amenity=pub',
 | 
			
		||||
                'Tag:amenity=nightclub'],       # bar
 | 
			
		||||
    'Q16917': ['Tag:amenity=clinic',
 | 
			
		||||
               'Tag:building=clinic'],          # hospital
 | 
			
		||||
    'Q330284': ['Tag:amenity=market'],          # marketplace
 | 
			
		||||
    'Q5307737': ['Tag:amenity=pub',
 | 
			
		||||
                 'Tag:amenity=bar'],            # drinking establishment
 | 
			
		||||
    'Q875157': ['Tag:tourism=resort'],          # resort
 | 
			
		||||
    'Q174782': ['Tag:leisure=park',
 | 
			
		||||
                'Tag:highway=pedestrian',
 | 
			
		||||
                'Tag:foot=yes',
 | 
			
		||||
                'Tag:area=yes',
 | 
			
		||||
                'Tag:amenity=market',
 | 
			
		||||
                'Tag:leisure=common'],          # square
 | 
			
		||||
    'Q34627': ['Tag:religion=jewish'],          # synagogue
 | 
			
		||||
    'Q16970': ['Tag:religion=christian'],       # church
 | 
			
		||||
    'Q32815': ['Tag:religion=islam'],           # mosque
 | 
			
		||||
    'Q811979': ['Key:building'],                # architectural structure
 | 
			
		||||
    'Q11691': ['Key:building'],                 # stock exchange
 | 
			
		||||
    'Q1329623': ['Tag:amenity=arts_centre',     # cultural centre
 | 
			
		||||
                 'Tag:amenity=community_centre'],
 | 
			
		||||
    'Q856584': ['Tag:amenity=library'],         # library building
 | 
			
		||||
    'Q11315': ['Tag:landuse=retail'],           # shopping mall
 | 
			
		||||
    'Q39658032': ['Tag:landuse=retail'],        # open air shopping centre
 | 
			
		||||
    'Q277760': ['Tag:historic=folly',
 | 
			
		||||
                'Tag:historic=city_gate'],      # gatehouse
 | 
			
		||||
    'Q180174': ['Tag:historic=folly'],          # folly
 | 
			
		||||
    'Q15243209': ['Tag:leisure=park',
 | 
			
		||||
                  'Tag:boundary=national_park'],   # historic district
 | 
			
		||||
    'Q3010369': ['Tag:historic=monument'],      # opening ceremony
 | 
			
		||||
    'Q123705': ['Tag:place=suburb'],            # neighbourhood
 | 
			
		||||
    'Q256020': ['Tag:amenity=pub'],             # inn
 | 
			
		||||
    'Q41253': ['Tag:amenity=theatre'],          # movie theater
 | 
			
		||||
    'Q17350442': ['Tag:amenity=theatre'],       # venue
 | 
			
		||||
    'Q156362': ['Tag:amenity=winery'],          # winery
 | 
			
		||||
    'Q14092': ['Tag:leisure=fitness_centre',
 | 
			
		||||
               'Tag:leisure=sports_centre'],    # gymnasium
 | 
			
		||||
    'Q27686': ['Tag:tourism=hostel',            # hotel
 | 
			
		||||
               'Tag:tourism=guest_house',
 | 
			
		||||
               'Tag:building=hotel'],
 | 
			
		||||
    'Q11707': ['Tag:amenity=cafe', 'Tag:amenity=fast_food',
 | 
			
		||||
               'Tag:shop=deli', 'Tag:shop=bakery',
 | 
			
		||||
               'Key:cuisine'],                  # restaurant
 | 
			
		||||
    'Q2360219': ['Tag:amenity=embassy'],        # permanent mission
 | 
			
		||||
    'Q27995042': ['Tag:protection_title=Wilderness Area'],  # wilderness area
 | 
			
		||||
    'Q838948': ['Tag:historic=memorial',
 | 
			
		||||
                'Tag:historic=monument'],       # work of art
 | 
			
		||||
    'Q23413': ['Tag:place=locality'],           # castle
 | 
			
		||||
    'Q28045079': ['Tag:historic=archaeological_site',
 | 
			
		||||
                  'Tag:site_type=fortification',
 | 
			
		||||
                  'Tag:embankment=yes'],        # contour fort
 | 
			
		||||
    'Q744099': ['Tag:historic=archaeological_site',
 | 
			
		||||
                'Tag:site_type=fortification',
 | 
			
		||||
                'Tag:embankment=yes'],          # hillfort
 | 
			
		||||
    'Q515': ['Tag:border_type=city'],           # city
 | 
			
		||||
    'Q1254933': ['Tag:amenity=university'],     # astronomical observatory
 | 
			
		||||
    'Q1976594': ['Tag:landuse=industrial'],     # science park
 | 
			
		||||
    'Q190928': ['Tag:landuse=industrial'],      # shipyard
 | 
			
		||||
    'Q4663385': ['Tag:historic=train_station',  # former railway station
 | 
			
		||||
                 'Tag:railway=historic_station'],
 | 
			
		||||
    'Q11997323': ['Tag:emergency=lifeboat_station'],  # lifeboat station
 | 
			
		||||
    'Q16884952': ['Tag:castle_type=stately',
 | 
			
		||||
                  'Tag:building=country_house'],  # country house
 | 
			
		||||
    'Q1343246': ['Tag:castle_type=stately',
 | 
			
		||||
                 'Tag:building=country_house'],   # English country house
 | 
			
		||||
    'Q4919932': ['Tag:castle_type=stately'],    # stately home
 | 
			
		||||
    'Q1763828': ['Tag:amenity=community_centre'],  # multi-purpose hall
 | 
			
		||||
    'Q3469910': ['Tag:amenity=community_centre'],  # performing arts center
 | 
			
		||||
    'Q57660343': ['Tag:amenity=community_centre'],  # performing arts building
 | 
			
		||||
    'Q163740': ['Tag:amenity=community_centre',  # nonprofit organization
 | 
			
		||||
                'Tag:amenity=social_facility',
 | 
			
		||||
                'Key:social_facility'],
 | 
			
		||||
    'Q41176': ['Key:building:levels'],          # building
 | 
			
		||||
    'Q44494': ['Tag:historic=mill'],            # mill
 | 
			
		||||
    'Q56822897': ['Tag:historic=mill'],         # mill building
 | 
			
		||||
    'Q2175765': ['Tag:public_transport=stop_area'],  # tram stop
 | 
			
		||||
    'Q179700': ['Tag:memorial=statue',          # statue
 | 
			
		||||
                'Tag:memorial:type=statue',
 | 
			
		||||
                'Tag:historic=memorial'],
 | 
			
		||||
    'Q1076486': ['Tag:landuse=recreation_ground'],  # sports venue
 | 
			
		||||
    'Q988108': ['Tag:amenity=community_centre',  # club
 | 
			
		||||
                'Tag:community_centre=club_home'],
 | 
			
		||||
    'Q55004558': ['Tag:service=yard',
 | 
			
		||||
                  'Tag:landuse=railway'],       # car barn
 | 
			
		||||
    'Q19563580': ['Tag:landuse=railway'],       # rail yard
 | 
			
		||||
    'Q134447': ['Tag:generator:source=nuclear'],  # nuclear power plant
 | 
			
		||||
    'Q1258086': ['Tag:leisure=park',
 | 
			
		||||
                 'Tag:boundary=national_park'],  # National Historic Site
 | 
			
		||||
    'Q32350958': ['Tag:leisure=bingo'],         # Bingo hall
 | 
			
		||||
    'Q53060': ['Tag:historic=gate',             # gate
 | 
			
		||||
               'Tag:tourism=attraction'],
 | 
			
		||||
    'Q3947': ['Tag:tourism=hotel',              # house
 | 
			
		||||
              'Tag:building=hotel',
 | 
			
		||||
              'Tag:tourism=guest_house'],
 | 
			
		||||
    'Q847017': ['Tag:leisure=sports_centre'],   # sports club
 | 
			
		||||
    'Q820477': ['Tag:landuse=quarry',
 | 
			
		||||
                'Tag:gnis:feature_type=Mine'],  # mine
 | 
			
		||||
    'Q77115': ['Tag:leisure=sports_centre'],    # community center
 | 
			
		||||
    'Q35535': ['Tag:amenity=police'],           # police
 | 
			
		||||
    'Q16560': ['Tag:tourism=attraction',        # palace
 | 
			
		||||
               'Tag:historic=yes'],
 | 
			
		||||
    'Q131734': ['Tag:amenity=pub',              # brewery
 | 
			
		||||
                'Tag:industrial=brewery'],
 | 
			
		||||
    'Q828909': ['Tag:landuse=commercial',
 | 
			
		||||
                'Tag:landuse=industrial',
 | 
			
		||||
                'Tag:historic=dockyard'],       # wharf
 | 
			
		||||
    'Q10283556': ['Tag:landuse=railway'],       # motive power depot
 | 
			
		||||
    'Q18674739': ['Tag:leisure=stadium'],       # event venue
 | 
			
		||||
    'Q20672229': ['Tag:historic=archaeological_site'],  # friary
 | 
			
		||||
    'Q207694': ['Tag:museum=art'],              # art museum
 | 
			
		||||
    'Q22698': ['Tag:leisure=dog_park',
 | 
			
		||||
               'Tag:amenity=market',
 | 
			
		||||
               'Tag:place=square',
 | 
			
		||||
               'Tag:leisure=common'],           # park
 | 
			
		||||
    'Q738570': ['Tag:place=suburb'],            # central business district
 | 
			
		||||
    'Q1133961': ['Tag:place=suburb'],           # commercial district
 | 
			
		||||
    'Q935277': ['Tag:gnis:ftype=Playa',
 | 
			
		||||
                'Tag:natural=sand'],            # salt pan
 | 
			
		||||
    'Q14253637': ['Tag:gnis:ftype=Playa',
 | 
			
		||||
                  'Tag:natural=sand'],          # dry lake
 | 
			
		||||
    'Q63099748': ['Tag:tourism=hotel',          # hotel building
 | 
			
		||||
                  'Tag:building=hotel',
 | 
			
		||||
                  'Tag:tourism=guest_house'],
 | 
			
		||||
    'Q2997369': ['Tag:leisure=park',
 | 
			
		||||
                 'Tag:highway=pedestrian',
 | 
			
		||||
                 'Tag:foot=yes',
 | 
			
		||||
                 'Tag:area=yes',
 | 
			
		||||
                 'Tag:amenity=market',
 | 
			
		||||
                 'Tag:leisure=common'],         # plaza
 | 
			
		||||
    'Q130003': ['Tag:landuse=winter_sports',    # ski resort
 | 
			
		||||
                'Tag:site=piste',
 | 
			
		||||
                'Tag:leisure=resort',
 | 
			
		||||
                'Tag:landuse=recreation_ground'],
 | 
			
		||||
    'Q4830453': ['Key:office',
 | 
			
		||||
                 'Tag:building=office'],        # business
 | 
			
		||||
    'Q43229': ['Key:office',
 | 
			
		||||
               'Tag:building=office'],          # organization
 | 
			
		||||
    'Q17084016': ['Tag:office=association',
 | 
			
		||||
                  'Tag:office=ngo'],            # nonprofit corporation
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
skip_tags = {"Key:addr:street"}
 | 
			
		||||
 | 
			
		||||
def get_item(item_id):
 | 
			
		||||
    item = model.Item.query.get(item_id)
 | 
			
		||||
    if item:
 | 
			
		||||
        return item
 | 
			
		||||
    item = get_and_save_item(f"Q{item_id}")
 | 
			
		||||
    database.session.add(item)
 | 
			
		||||
    database.session.commit()
 | 
			
		||||
    return item
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_items(item_ids):
 | 
			
		||||
    items = []
 | 
			
		||||
    for item_id in item_ids:
 | 
			
		||||
        item = model.Item.query.get(item_id)
 | 
			
		||||
        if not item:
 | 
			
		||||
            if not get_and_save_item(f"Q{item_id}"):
 | 
			
		||||
                continue
 | 
			
		||||
            item = model.Item.query.get(item_id)
 | 
			
		||||
        items.append(item)
 | 
			
		||||
 | 
			
		||||
    return items
 | 
			
		||||
 | 
			
		||||
def get_item_tags(item):
 | 
			
		||||
    isa_items = []
 | 
			
		||||
    isa_list = [v["numeric-id"] for v in item.get_claim("P31")]
 | 
			
		||||
    isa_items = get_items(isa_list)
 | 
			
		||||
 | 
			
		||||
    osm_list = set()
 | 
			
		||||
 | 
			
		||||
    skip_isa = {row[0] for row in database.session.query(model.SkipIsA.item_id)}
 | 
			
		||||
 | 
			
		||||
    seen = set(isa_list) | skip_isa
 | 
			
		||||
    while isa_items:
 | 
			
		||||
        isa = isa_items.pop()
 | 
			
		||||
        if not isa:
 | 
			
		||||
            continue
 | 
			
		||||
        osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
 | 
			
		||||
        if isa.qid in extra_keys:
 | 
			
		||||
            osm += extra_keys[isa.qid]
 | 
			
		||||
 | 
			
		||||
        osm_list.update(osm)
 | 
			
		||||
 | 
			
		||||
        subclass_of = [v["numeric-id"] for v in (isa.get_claim("P279") or []) if v]
 | 
			
		||||
        isa_list = [isa_id for isa_id in subclass_of if isa_id not in seen]
 | 
			
		||||
        seen.update(isa_list)
 | 
			
		||||
        isa_items += get_items(isa_list)
 | 
			
		||||
    return sorted(osm_list)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/item/Q<int:item_id>/tags")
 | 
			
		||||
def api_get_item_tags(item_id):
 | 
			
		||||
    t0 = time()
 | 
			
		||||
    item = model.Item.query.get(item_id)
 | 
			
		||||
    osm_list = get_item_tags(item)
 | 
			
		||||
    osm_list = api.get_item_tags(item)
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
 | 
			
		||||
    return cors_jsonify(success=True,
 | 
			
		||||
| 
						 | 
				
			
			@ -754,256 +292,13 @@ def api_get_item_tags(item_id):
 | 
			
		|||
                        tag_or_key_list=osm_list,
 | 
			
		||||
                        duration=t1)
 | 
			
		||||
 | 
			
		||||
def get_tag_filter(cls, tag_list):
 | 
			
		||||
    tag_filter = []
 | 
			
		||||
    for tag_or_key in tag_list:
 | 
			
		||||
        if tag_or_key.startswith("Key:"):
 | 
			
		||||
            tag_filter.append(cls.tags.has_key(tag_or_key[4:]))
 | 
			
		||||
        if tag_or_key.startswith("Tag:"):
 | 
			
		||||
            k, _, v = tag_or_key.partition("=")
 | 
			
		||||
            tag_filter.append(cls.tags[k[4:]] == v)
 | 
			
		||||
 | 
			
		||||
    return tag_filter
 | 
			
		||||
 | 
			
		||||
def get_country_iso3166_1(lat, lon):
 | 
			
		||||
    point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
 | 
			
		||||
    alpha2_codes = set()
 | 
			
		||||
    q = model.Polygon.query.filter(func.ST_Covers(model.Polygon.way, point),
 | 
			
		||||
                                   model.Polygon.admin_level == "2")
 | 
			
		||||
    for country in q:
 | 
			
		||||
        alpha2 = country.tags.get("ISO3166-1")
 | 
			
		||||
        if not alpha2:
 | 
			
		||||
            continue
 | 
			
		||||
        alpha2_codes.add(alpha2)
 | 
			
		||||
 | 
			
		||||
    g.alpha2_codes = alpha2_codes
 | 
			
		||||
    return alpha2_codes
 | 
			
		||||
 | 
			
		||||
def is_street_number_first(lat, lon):
 | 
			
		||||
    if lat is None or lon is None:
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    alpha2 = get_country_iso3166_1(lat, lon)
 | 
			
		||||
    alpha2_number_first = {'GB', 'IE', 'US', 'MX', 'CA', 'FR', 'AU', 'NZ', 'ZA'}
 | 
			
		||||
 | 
			
		||||
    return bool(alpha2_number_first & alpha2)
 | 
			
		||||
 | 
			
		||||
def get_nearby(bbox, item, max_distance=300):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
 | 
			
		||||
    osm_objects = {}
 | 
			
		||||
    distances = {}
 | 
			
		||||
    tag_list = get_item_tags(item)
 | 
			
		||||
    if not tag_list:
 | 
			
		||||
        return []
 | 
			
		||||
 | 
			
		||||
    item_is_street = item.is_street()
 | 
			
		||||
 | 
			
		||||
    for loc in item.locations:
 | 
			
		||||
        lat, lon = loc.get_lat_lon()
 | 
			
		||||
        point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
 | 
			
		||||
        for cls in model.Point, model.Line, model.Polygon:
 | 
			
		||||
            if item_is_street and cls == model.Point:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            tag_filter = get_tag_filter(cls, tag_list)
 | 
			
		||||
            dist = func.ST_Distance(point, cls.way.cast(Geography(srid=4326)))
 | 
			
		||||
 | 
			
		||||
            q = (cls.query.add_columns(dist.label('distance'))
 | 
			
		||||
                          .filter(
 | 
			
		||||
                              func.ST_Intersects(db_bbox, cls.way),
 | 
			
		||||
                              func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
 | 
			
		||||
                              or_(*tag_filter))
 | 
			
		||||
                          .order_by(point.distance_centroid(cls.way)))
 | 
			
		||||
 | 
			
		||||
            if item_is_street:
 | 
			
		||||
                q = q.filter(cls.tags.has_key("name"),
 | 
			
		||||
                             cls.tags["highway"] != 'bus_stop')
 | 
			
		||||
 | 
			
		||||
            if "Key:amenity" in tag_list:
 | 
			
		||||
                q = q.filter(cls.tags["amenity"] != "bicycle_parking",
 | 
			
		||||
                             cls.tags["amenity"] != "bicycle_repair_station",
 | 
			
		||||
                             cls.tags["amenity"] != "atm",
 | 
			
		||||
                             cls.tags["amenity"] != "recycling")
 | 
			
		||||
 | 
			
		||||
            q = q.limit(40)
 | 
			
		||||
 | 
			
		||||
            # print(q.statement.compile(compile_kwargs={"literal_binds": True}))
 | 
			
		||||
 | 
			
		||||
            for i, dist in q:
 | 
			
		||||
                if dist > max_distance:
 | 
			
		||||
                    continue
 | 
			
		||||
                osm_objects.setdefault(i.identifier, i)
 | 
			
		||||
                if i.identifier not in distances or dist < distances[i.identifier]:
 | 
			
		||||
                    distances[i.identifier] = dist
 | 
			
		||||
 | 
			
		||||
    nearby = [(osm_objects[identifier], dist)
 | 
			
		||||
              for identifier, dist
 | 
			
		||||
              in sorted(distances.items(), key=lambda i:i[1])]
 | 
			
		||||
 | 
			
		||||
    return nearby[:40]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_preset_file(k, v, ending):
 | 
			
		||||
    ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
 | 
			
		||||
    preset_dir = os.path.join(ts_dir, "data", "presets")
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, v + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, f"{v}_{ending}.json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k, "_" + v + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Tag:{k}={v}",
 | 
			
		||||
            "preset": f"{k}/{v}",
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    filename = os.path.join(preset_dir, k + ".json")
 | 
			
		||||
    if os.path.exists(filename):
 | 
			
		||||
        return {
 | 
			
		||||
            "tag_or_key": f"Key:{k}",
 | 
			
		||||
            "preset": k,
 | 
			
		||||
            "filename": filename,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
def get_preset_translations():
 | 
			
		||||
    country_language = {
 | 
			
		||||
        'AU': 'en-AU',  # Australia
 | 
			
		||||
        'GB': 'en-GB',  # United Kingdom
 | 
			
		||||
        'IE': 'en-GB',  # Ireland
 | 
			
		||||
        'IN': 'en-IN',  # India
 | 
			
		||||
        'NZ': 'en-NZ',  # New Zealand
 | 
			
		||||
    }
 | 
			
		||||
    ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"]
 | 
			
		||||
    translation_dir = os.path.join(ts_dir, "dist", "translations")
 | 
			
		||||
 | 
			
		||||
    for code in g.alpha2_codes:
 | 
			
		||||
        if code not in country_language:
 | 
			
		||||
            continue
 | 
			
		||||
        filename = os.path.join(translation_dir, country_language[code] + ".json")
 | 
			
		||||
        return json.load(open(filename))["en-GB"]["presets"]["presets"]
 | 
			
		||||
 | 
			
		||||
    return {}
 | 
			
		||||
 | 
			
		||||
def get_presets_from_tags(osm):
 | 
			
		||||
    translations = get_preset_translations()
 | 
			
		||||
 | 
			
		||||
    found = []
 | 
			
		||||
    endings = {model.Point: "point", model.Line: "line", model.Polygon: "area"}
 | 
			
		||||
    ending = endings[type(osm)]
 | 
			
		||||
 | 
			
		||||
    for k, v in osm.tags.items():
 | 
			
		||||
        if k == 'amenity' and v == 'clock' and osm.tags.get('display') == 'sundial':
 | 
			
		||||
            tag_or_key = f"Tag:{k}={v}"
 | 
			
		||||
            found.append({"tag_or_key": tag_or_key, "name": "Sundial"})
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        match = find_preset_file(k, v, ending)
 | 
			
		||||
        if not match:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        preset = match["preset"]
 | 
			
		||||
        if preset in translations:
 | 
			
		||||
            match["name"] = translations[preset]["name"]
 | 
			
		||||
        else:
 | 
			
		||||
            match["name"] = json.load(open(match["filename"]))["name"]
 | 
			
		||||
 | 
			
		||||
        del match["filename"]
 | 
			
		||||
 | 
			
		||||
        found.append(match)
 | 
			
		||||
 | 
			
		||||
    return found
 | 
			
		||||
 | 
			
		||||
def get_address_nodes_within_building(building, bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
    ewkt = building.as_EWKT
 | 
			
		||||
    q = model.Point.query.filter(
 | 
			
		||||
        func.ST_Intersects(db_bbox, model.Point.way),
 | 
			
		||||
        func.ST_Covers(func.ST_GeomFromEWKT(ewkt), model.Point.way),
 | 
			
		||||
        model.Point.tags.has_key("addr:street"),
 | 
			
		||||
        model.Point.tags.has_key("addr:housenumber"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return [node.tags for node in q]
 | 
			
		||||
 | 
			
		||||
def get_part_of(thing, bbox):
 | 
			
		||||
    db_bbox = make_envelope(bbox)
 | 
			
		||||
    ewkt = thing.as_EWKT
 | 
			
		||||
    q = model.Polygon.query.filter(
 | 
			
		||||
        func.ST_Intersects(db_bbox, model.Polygon.way),
 | 
			
		||||
        func.ST_Covers(model.Polygon.way, func.ST_GeomFromEWKT(ewkt)),
 | 
			
		||||
        or_(
 | 
			
		||||
            model.Polygon.tags.has_key("landuse"),
 | 
			
		||||
            model.Polygon.tags.has_key("amenity"),
 | 
			
		||||
        ),
 | 
			
		||||
        model.Polygon.tags.has_key("name"),
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return [polygon.tags for polygon in q]
 | 
			
		||||
 | 
			
		||||
def address_from_tags(tags):
 | 
			
		||||
    keys = ["street", "housenumber"]
 | 
			
		||||
    if not all("addr:" + k in tags for k in keys):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    if g.street_number_first:
 | 
			
		||||
        keys.reverse()
 | 
			
		||||
    return " ".join(tags["addr:" + k] for k in keys)
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/item/Q<int:item_id>/candidates")
 | 
			
		||||
def api_find_osm_candidates(item_id):
 | 
			
		||||
    bounds = request.args.get("bounds")
 | 
			
		||||
    g.street_number_first = is_street_number_first(*get_bbox_centroid(bounds))
 | 
			
		||||
 | 
			
		||||
    t0 = time()
 | 
			
		||||
    bounds = read_bounds_param()
 | 
			
		||||
    item = model.Item.query.get(item_id)
 | 
			
		||||
    nearby = []
 | 
			
		||||
    for osm, dist in get_nearby(bounds, item):
 | 
			
		||||
        tags = osm.tags
 | 
			
		||||
        tags.pop("way_area", None)
 | 
			
		||||
        name = osm.name or tags.get("addr:housename") or tags.get("inscription")
 | 
			
		||||
        if not name and "addr:housenumber" in tags and "addr:street" in tags:
 | 
			
		||||
            name = address_from_tags(tags)
 | 
			
		||||
 | 
			
		||||
        if isinstance(osm, model.Polygon) and "building" in osm.tags:
 | 
			
		||||
            address_nodes = get_address_nodes_within_building(osm, bounds)
 | 
			
		||||
            address_list = [address_from_tags(addr) for addr in address_nodes]
 | 
			
		||||
        else:
 | 
			
		||||
            address_list = []
 | 
			
		||||
        cur = {
 | 
			
		||||
            "identifier": osm.identifier,
 | 
			
		||||
            "distance": dist,
 | 
			
		||||
            "name": name,
 | 
			
		||||
            "tags": tags,
 | 
			
		||||
            "geojson": osm.geojson(),
 | 
			
		||||
            "presets": get_presets_from_tags(osm),
 | 
			
		||||
            "address_list": address_list,
 | 
			
		||||
        }
 | 
			
		||||
        if hasattr(osm, 'area'):
 | 
			
		||||
            cur["area"] = osm.area
 | 
			
		||||
 | 
			
		||||
        if address := address_from_tags(tags):
 | 
			
		||||
            cur["address"] = address
 | 
			
		||||
 | 
			
		||||
        part_of = [i["name"] for i in get_part_of(osm, bounds) if i["name"] != name]
 | 
			
		||||
        if part_of:
 | 
			
		||||
            cur["part_of"] = part_of
 | 
			
		||||
 | 
			
		||||
        nearby.append(cur)
 | 
			
		||||
    nearby = api.find_osm_candidates(item, bounds)
 | 
			
		||||
 | 
			
		||||
    t1 = time() - t0
 | 
			
		||||
    return cors_jsonify(success=True, qid=item.qid, nearby=nearby, duration=t1)
 | 
			
		||||
| 
						 | 
				
			
			@ -1029,36 +324,9 @@ def api_missing_wikidata_items():
 | 
			
		|||
        return jsonify(success=True, items=[], isa_count=[])
 | 
			
		||||
 | 
			
		||||
    lat, lon = request.args.get("lat"), request.args.get("lon")
 | 
			
		||||
    g.street_number_first = is_street_number_first(lat, lon)
 | 
			
		||||
 | 
			
		||||
    db_items = []
 | 
			
		||||
    for qid in qids:
 | 
			
		||||
        item = model.Item.query.get(qid[1:])
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
        db_items.append(item)
 | 
			
		||||
    items = get_markers(db_items)
 | 
			
		||||
    counts = get_isa_count(db_items)
 | 
			
		||||
    isa_ids = [qid[1:] for qid, count in counts]
 | 
			
		||||
    isa_items = {
 | 
			
		||||
        isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    isa_count = []
 | 
			
		||||
    for qid, count in counts:
 | 
			
		||||
        item = isa_items.get(qid)
 | 
			
		||||
        if not item:
 | 
			
		||||
            item = get_and_save_item(qid)
 | 
			
		||||
 | 
			
		||||
        label = item.label() if item else "[missing]"
 | 
			
		||||
        isa = {
 | 
			
		||||
            "qid": qid,
 | 
			
		||||
            "count": count,
 | 
			
		||||
            "label": label,
 | 
			
		||||
        }
 | 
			
		||||
        isa_count.append(isa)
 | 
			
		||||
 | 
			
		||||
    return cors_jsonify(success=True, items=items, isa_count=isa_count)
 | 
			
		||||
    ret = api.missing_wikidata_items(qids, lat, lon)
 | 
			
		||||
    return cors_jsonify(success=True, **ret)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/api/1/search")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue