from flask import Flask, render_template, request, jsonify, redirect, url_for
from sqlalchemy import func, or_
from sqlalchemy.orm import selectinload
from matcher import nominatim, model, database, commons, wikidata, wikidata_api
from collections import Counter
from time import time
from geoalchemy2 import Geography
import GeoIP
srid = 4326
app = Flask(__name__)
app.debug = True
DB_URL = "postgresql:///matcher"
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
property_map = [
("P238", ["iata"], "IATA airport code"),
("P239", ["icao"], "ICAO airport code"),
("P240", ["faa", "ref"], "FAA airport code"),
# ('P281', ['addr:postcode', 'postal_code'], 'postal code'),
("P296", ["ref", "ref:train", "railway:ref"], "station code"),
("P300", ["ISO3166-2"], "ISO 3166-2 code"),
("P359", ["ref:rce"], "Rijksmonument ID"),
("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"),
("P649", ["ref:nrhp"], "NRHP reference number"),
("P722", ["uic_ref"], "UIC station code"),
("P782", ["ref"], "LAU (local administrative unit)"),
("P836", ["ref:gss"], "UK Government Statistical Service code"),
("P856", ["website", "contact:website", "url"], "website"),
("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"),
("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"),
# A UIC id can be a IBNR, but not every IBNR is an UIC id
("P954", ["uic_ref"], "IBNR ID"),
("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"),
("P1216", ["HE_ref"], "National Heritage List for England number"),
("P2253", ["ref:edubase"], "EDUBase URN"),
("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"),
("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"),
("P3562", ["seamark:light:reference"], "Admiralty number"),
["ref", "ref:train", "ref:crs", "crs", "nat_ref"],
"UK railway station code",
("P4803", ["ref", "ref:train"], "Amtrak station code"),
("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"),
("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"),
("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"),
("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"),
def shutdown_session(exception=None):
def check_for_tagged_qids(qids):
tagged = set()
for qid in qids:
for cls in model.Point, model.Polygon, model.Line:
q = cls.query.filter(cls.tags["wikidata"] == qid)
if q.count():
return tagged
def check_for_tagged_qid(qid):
return any(
cls.tags.has_key("wikidata"), cls.tags["wikidata"] == qid
for cls in (model.Point, model.Polygon, model.Line)
def make_envelope(bbox):
west, south, east, north = [float(i) for i in bbox.split(",")]
return func.ST_MakeEnvelope(west, south, east, north, srid)
def get_osm_with_wikidata_tag(bbox):
db_bbox = make_envelope(bbox)
tagged = []
seen = set()
for cls in (model.Point, model.Polygon, model.Line):
q = cls.query.filter(
func.ST_Intersects(db_bbox, cls.way),
func.ST_Area(cls.way) < 20 * func.ST_Area(db_bbox),
for osm in q:
if osm.identifier in seen:
name = osm.name
if not name:
if "addr:housename" in osm.tags:
name = osm.tags["addr:housename"]
name = "[no label]"
"identifier": osm.identifier,
"id": osm.id,
"type": osm.type,
"url": osm.osm_url,
"geojson": osm.geojson(),
"centroid": list(osm.get_centroid()),
"name": name,
"wikidata": osm.tags["wikidata"],
return tagged
def get_items_in_bbox(bbox):
db_bbox = make_envelope(bbox)
q = (
.filter(func.ST_Covers(db_bbox, model.ItemLocation.location))
return q
def get_markers(all_items):
items = []
for item in all_items:
if "en" not in item.labels:
locations = [list(i.get_lat_lon()) for i in item.locations]
image_filenames = item.get_claim("P18")
item = {
"qid": item.qid,
"label": item.label(),
"description": item.description(),
"markers": locations,
"image_list": image_filenames,
"street_address": item.get_claim("P6375"),
"isa_list": [v["id"] for v in item.get_claim("P31")],
return items
def get_user_location():
gi = GeoIP.open("/home/edward/lib/data/GeoIPCity.dat", GeoIP.GEOIP_STANDARD)
remote_ip = request.remote_addr
gir = gi.record_by_addr(remote_ip)
if not gir:
lat, lon = gir["latitude"], gir["longitude"]
return (lat, lon)
def redirect_from_root():
return redirect(url_for("map_start_page"))
def index_page():
return render_template("index.html")
def identifier_index():
return render_template("identifier_index.html", property_map=property_map)
def get_commons_image(filename):
detail = commons.image_detail([filename], thumbheight=600, thumbwidth=600)
image = detail[filename]
return redirect(image["thumburl"])
def identifier_page(pid):
per_page = 10
page = int(request.args.get("page", 1))
property_dict = {pid: (osm_keys, label) for pid, osm_keys, label in property_map}
osm_keys, label = property_dict[pid]
wd = model.Item.query.filter(model.Item.claims.has_key(pid))
total = wd.count()
start = per_page * (page - 1)
items = wd.all()[start : per_page * page]
qids = [item.qid for item in items]
# pred = None
# values = set()
# for item in items:
# values |= set(item.get_claim(pid))
# for key in osm_keys:
# if key == 'ref':
# continue
# if pred is None:
# pred = model.Point.tags[key].in_(values)
# else:
# pred |= model.Point.tags[key].in_(values)
osm_points = {}
for qid in qids:
osm_points[qid] = model.Point.query.filter(
model.Point.tags["wikidata"] == qid
osm_total = len(osm_points)
return render_template(
def map_location(zoom, lat, lng):
t = int(time())
return render_template("map.html", zoom=zoom, lat=lat, lng=lng, time=t)
def map_start_page():
t = int(time())
location = get_user_location()
if not location:
return render_template("map.html", zoom=16, lat=None, lng=None, time=t)
lat, lng = location
return render_template("map.html", zoom=16, lat=lat, lng=lng, time=t)
def search_map_page():
user_lat, user_lon = get_user_location() or (None, None)
q = request.args.get("q")
if not q:
return render_template("map.html", user_lat=user_lat, user_lon=user_lon)
hits = nominatim.lookup(q)
for hit in hits:
if "geotext" in hit:
del hit["geotext"]
bbox = [hit["boundingbox"] for hit in hits]
return render_template(
def search_page():
q = request.args.get("q")
if not q:
return render_template("search.html", hits=None, bbox_list=None)
hits = nominatim.lookup(q)
for hit in hits:
if "geotext" in hit:
del hit["geotext"]
bbox = [hit["boundingbox"] for hit in hits]
return render_template("search.html", hits=hits, bbox_list=bbox)
def get_isa_count(items):
isa_count = Counter()
for item in items:
isa_list = item.get_claim("P31")
for isa in isa_list:
isa_count[isa["id"]] += 1
return isa_count.most_common()
def get_and_save_item(qid):
entity = wikidata_api.get_entity(qid)
if entity["id"] != qid:
print(f'redirect {qid} -> {entity["id"]}')
coords = wikidata.get_entity_coords(entity["claims"])
item_id = int(qid[1:])
obj = {k: v for k, v in entity.items() if k in entity_keys}
item = model.Item(item_id=item_id, **obj)
except TypeError:
print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
item.locations = model.location_objects(coords)
return item
def api_wikidata_items():
bounds = request.args.get("bounds")
t0 = time()
q = get_items_in_bbox(bounds)
db_items = q.all()
items = get_markers(db_items)
counts = get_isa_count(db_items)
isa_ids = [qid[1:] for qid, count in counts]
isa_items = {
isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
isa_count = []
for qid, count in counts:
item = isa_items.get(qid)
if not item:
item = get_and_save_item(qid)
label = item.label() if item else "[missing]"
isa = {
"qid": qid,
"count": count,
"label": label,
t1 = time() - t0
print(f"wikidata: {t1} seconds")
return jsonify(success=True, items=items, isa_count=isa_count, duration=t1)
def api_osm_objects():
bounds = request.args.get("bounds")
t0 = time()
objects = get_osm_with_wikidata_tag(bounds)
t1 = time() - t0
print(f"OSM: {t1} seconds")
return jsonify(success=True, objects=objects, duration=t1)
skip_isa = {13226383, 16686448, 2221906}
skip_tags = {"Key:addr:street"}
def get_item_tags(item):
isa_list = [v["numeric-id"] for v in item.get_claim("P31")]
isa_items = model.Item.query.filter(model.Item.item_id.in_(isa_list)).all()
osm_list = set()
seen = set(isa_list) | skip_isa
while isa_items:
isa = isa_items.pop()
osm = [v for v in isa.get_claim("P1282") if v not in skip_tags]
subclass_of = [v["numeric-id"] for v in isa.get_claim("P279")]
isa_list = [isa_id for isa_id in subclass_of if isa_id not in seen]
isa_items += model.Item.query.filter(model.Item.item_id.in_(isa_list)).all()
return sorted(osm_list)
def api_get_item_tags(item_id):
t0 = time()
item = model.Item.query.get(item_id)
osm_list = get_item_tags(item)
t1 = time() - t0
return jsonify(success=True, qid=item.qid, tag_or_key_list=osm_list, duration=t1)
def get_tag_filter(item):
osm_list = get_item_tags(item)
tag_filter = []
for tag_or_key in osm_list:
if tag_or_key.startswith("Key:"):
if tag_or_key.startswith("Tag:"):
k, _, v = tag_or_key.partition("=")
tag_filter.append(model.Polygon.tags[k] == v)
return or_(*tag_filter)
def get_nearby(item, max_distance=100):
osm_objects = {}
distances = {}
tag_filter = get_tag_filter(item)
for loc in item.locations:
lat, lon = loc.get_lat_lon()
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), 4326)
dist = func.ST_Distance(point, model.Polygon.way.cast(Geography()))
q = (model.Polygon.query
.filter(dist < max_distance, tag_filter)
for i, dist in q:
osm_objects.setdefault(i.identifier, i)
if i.identifier not in distances or dist < distances[i.identifier]:
distances[i.identifier] = dist
return [(osm_objects[identifier], dist)
for identifier, dist
in sorted(distances.items(), key=lambda i:i[1])]
def api_find_osm_candidates(item_id):
t0 = time()
item = model.Item.query.get(item_id)
max_distance = 100
nearby = []
for osm, dist in get_nearby(item, max_distance):
cur = {
"identifier": osm.identifier,
"distance": dist,
"tags": osm.tags,
"area": osm.area,
"geojson": osm.geojson(),
t1 = time() - t0
return jsonify(success=True, qid=item.qid, nearby=nearby, duration=t1)
def api_missing_wikidata_items():
qids_arg = request.args.get("qids")
qids = qids_arg.split(",")
if not qids or not qids[0]:
return jsonify(success=True, items=[], isa_count=[])
db_items = []
for qid in qids:
item = model.Item.query.get(qid[1:])
if not item:
item = get_and_save_item(qid)
items = get_markers(db_items)
counts = get_isa_count(db_items)
isa_ids = [qid[1:] for qid, count in counts]
isa_items = {
isa.qid: isa for isa in model.Item.query.filter(model.Item.item_id.in_(isa_ids))
isa_count = []
for qid, count in counts:
item = isa_items.get(qid)
if not item:
item = get_and_save_item(qid)
label = item.label() if item else "[missing]"
isa = {
"qid": qid,
"count": count,
"label": label,
return jsonify(success=True, items=items, isa_count=isa_count)
def api_search():
q = request.args["q"]
hits = nominatim.lookup(q)
for hit in hits:
if "geotext" in hit:
del hit["geotext"]
hit["name"] = nominatim.get_hit_name(hit)
return jsonify(hits=hits)
if __name__ == "__main__":