From c60735169908249fb6cccddc15bc8dbf66b4f3b0 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 13 May 2023 15:01:28 +0200 Subject: [PATCH 01/12] Update --- frontend/App.vue | 77 ++++++++++++++++++++--- matcher/api.py | 78 ++++++++++++++++++++++-- matcher/nominatim.py | 4 +- matcher/utils.py | 18 +----- templates/isa.html | 3 +- templates/map.html | 22 ++++--- web_view.py | 141 ++++++++++++++++++++++++++++++++++--------- 7 files changed, 274 insertions(+), 69 deletions(-) diff --git a/frontend/App.vue b/frontend/App.vue index ba87f42..77b1964 100644 --- a/frontend/App.vue +++ b/frontend/App.vue @@ -303,7 +303,7 @@ v-bind:key="isa.qid" v-for="isa in item_type_hits" href="#" - @click.prevent="item_type_filters.includes(isa) || item_type_filters.push(isa)" + @click.prevent="add_item_type_filter(isa)" > {{ isa.label }} ({{ isa.qid }}) @@ -458,12 +458,33 @@
{{ wd_item.aliases.join("; ") }} +
item coordinates + +
+ {{ marker[0].toFixed(5) }}, + {{ marker[1].toFixed(5) }} + +
+
item type
{{isa.label}} ({{isa.qid}})
+ +
+ + Wikipedia + + +
+ + {{wp.lang}} +   + +
+
street address
{{wd_item.street_address[0]}} @@ -524,6 +545,15 @@
+ +
Images on Commons +
+ {{wd_item.commons}} + +
+ @@ -548,7 +578,7 @@
No OSM matches found nearby -
+

The OSM tags/keys used as the search criteria to find matching OSM objects are listed below, along with the Wikidata item that was the source.

@@ -766,6 +796,8 @@ export default { startLon: Number, startZoom: Number, startRadius: Number, + startItem: String, + startItemTypeFilter: Array, username: String, startMode: String, q: String, @@ -994,10 +1026,26 @@ export default { } }, methods: { - api_call(endpoint, options) { + wikipedia_link(lang, title) { + var norm_title = title.replaceAll(" ", "_"); + return `https://${lang}.wikipedia.org/wiki/${norm_title}`; + }, + marker_osm_url(marker) { + var lat = marker[0].toFixed(5); + var lon = marker[1].toFixed(5); + return `https://www.openstreetmap.org/?mlat=${lat}&mlon=${lon}#map=18/${lat}/${lon}` + }, + add_item_type_filter(isa) { + if (this.item_type_filters.includes(isa)) { + return; + } + this.item_type_filters.push(isa); + this.update_map_path(); + }, + api_call(endpoint, options) { var url = `${this.api_base_url}/api/1/${endpoint}`; - return axios.get(url, options).catch(this.show_api_error_modal); - }, + return axios.get(url, options).catch(this.show_api_error_modal); + }, update_unload_warning(edit_list) { if (edit_list.length) { addEventListener("beforeunload", beforeUnloadListener, {capture: true}); @@ -1216,14 +1264,19 @@ export default { this.isa_ticked = Object.keys(this.isa_labels); }, build_map_path() { + if (this.current_item) { + return `/item/${this.current_qid}`; + } var zoom = this.map.getZoom(); var c = this.map.getCenter(); var lat = c.lat.toFixed(5); var lng = c.lng.toFixed(5); var path = `/map/${zoom}/${lat}/${lng}`; - if (this.current_item) { - path += `?item=${this.current_qid}`; + + if (this.item_type_filters.length) { + path += "?isa=" + this.item_type_filters.map((t) => t.qid).join(";"); } + return path; }, @@ -1755,6 +1808,11 @@ export default { this.zoom = this.startZoom; this.mode = this.startMode; this.changeset_comment = this.defaultComment || '+wikidata'; + console.log(this.startItemTypeFilter); + if (this.startItemTypeFilter.length) { + this.show_item_type_filter = true; + } + this.item_type_filters = this.startItemTypeFilter; }, mounted() { @@ -1764,7 +1822,6 @@ export default { zoom: this.zoom || 16, }; - var map = L.map("map", options); var osm_url = "https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png"; var tile_url = "https://tile-c.openstreetmap.fr/hot/{z}/{x}/{y}.png"; @@ -1788,13 +1845,13 @@ export default { this.search_text = this.q.trim(); this.run_search(); } else { - this.detail_qid = this.qid_from_url(); + this.detail_qid = this.startItem; if (this.detail_qid) { this.load_wikidata_items(bounds); } else { this.auto_load(bounds); + this.update_map_path(); } - this.update_map_path(); } window.onpopstate = this.onpopstate; diff --git a/matcher/api.py b/matcher/api.py index bb84aca..2199311 100644 --- a/matcher/api.py +++ b/matcher/api.py @@ -38,6 +38,14 @@ skip_tags = { } def get_country_iso3166_1(lat, lon): + """ + For a given lat/lon return a set of ISO country codes. + + Also cache the country code in the global object. + + Normally there should be only one country. + """ + point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid) alpha2_codes = set() q = model.Polygon.query.filter(func.ST_Covers(model.Polygon.way, point), @@ -57,7 +65,18 @@ def is_street_number_first(lat, lon): return True alpha2 = get_country_iso3166_1(lat, lon) - alpha2_number_first = {'GB', 'IE', 'US', 'MX', 'CA', 'FR', 'AU', 'NZ', 'ZA'} + # Incomplete list of countries that put street number first. + alpha2_number_first = { + 'GB', # United Kingdom + 'IE', # Ireland + 'US', # United States + 'MX', # Mexico + 'CA', # Canada + 'FR', # France + 'AU', # Australia + 'NZ', # New Zealand + 'ZA', # South Africa + } return bool(alpha2_number_first & alpha2) @@ -92,6 +111,7 @@ def make_envelope_around_point(lat, lon, distance): return func.ST_MakeEnvelope(west, south, east, north, srid) def drop_way_area(tags): + """ Remove the way_area field from a tags dict. """ if "way_area" in tags: del tags["way_area"] return tags @@ -122,6 +142,8 @@ def get_part_of(table_name, src_id, bbox): } for osm_id, tags, area in conn.execute(s)] def get_and_save_item(qid): + """ Download an item from Wikidata and cache it in the database. """ + entity = wikidata_api.get_entity(qid) entity_qid = entity["id"] if entity_qid != qid: @@ -396,7 +418,6 @@ def add_isa_filter(q, isa_qids): ) subclass_qid = {qid for qid, in q_subclass.all()} - # print(subclass_qid) isa = func.jsonb_path_query_array( model.Item.claims, @@ -419,7 +440,7 @@ def wikidata_items_count(bounds, isa_filter=None): return q.count() -def wikidata_isa_counts(bounds): +def wikidata_isa_counts(bounds, isa_filter=None): db_bbox = make_envelope(bounds) q = ( @@ -427,6 +448,9 @@ def wikidata_isa_counts(bounds): .filter(func.ST_Covers(db_bbox, model.ItemLocation.location)) ) + if isa_filter: + q = add_isa_filter(q, isa_filter) + db_items = q.all() counts = get_isa_count(db_items) @@ -605,7 +629,11 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): item_id = item.item_id item_is_linear_feature = item.is_linear_feature() item_is_street = item.is_street() - item_names = {n.lower() for n in item.names().keys()} + item_names_dict = item.names() + if item_names_dict: + item_names = {n.lower() for n in item_names_dict.keys()} + else: + item_names = set() check_is_street_number_first(item.locations[0].get_lat_lon()) @@ -702,6 +730,8 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): shape = "area" if table == "polygon" else table + item_identifier_tags = item.get_identifiers_tags() + cur = { "identifier": f"{osm_type}/{osm_id}", "type": osm_type, @@ -733,6 +763,8 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): return nearby def get_item(item_id): + """ Retrieve a Wikidata item, either from the database or from Wikidata. """ + item = model.Item.query.get(item_id) return item or get_and_save_item(f"Q{item_id}") @@ -763,6 +795,11 @@ def check_is_street_number_first(latlng): g.street_number_first = is_street_number_first(*latlng) def item_detail(item): + unsupported_relation_types = { + 'Q194356', # wind farm + 'Q2175765', # tram stop + } + locations = [list(i.get_lat_lon()) for i in item.locations] if not hasattr(g, 'street_number_first'): g.street_number_first = is_street_number_first(*locations[0]) @@ -783,6 +820,11 @@ def item_detail(item): }) isa_items = [get_item(isa["numeric-id"]) for isa in item.get_isa()] + isa_lookup = {isa.qid: isa for isa in isa_items} + + wikipedia_links = [{"lang": site[:-4], "title": link["title"]} + for site, link in sorted(item.sitelinks.items()) + if site.endswith("wiki") and len(site) < 8] d = { "qid": item.qid, @@ -797,11 +839,21 @@ def item_detail(item): "p1619": item.time_claim("P1619"), "p576": item.time_claim("P576"), "heritage_designation": heritage_designation, + "wikipedia": wikipedia_links, + "identifiers": item.get_identifiers(), } if aliases := item.get_aliases(): d["aliases"] = aliases + if "commonswiki" in item.sitelinks: + d["commons"] = item.sitelinks["commonswiki"]["title"] + + unsupported = isa_lookup.keys() & unsupported_relation_types + if unsupported: + d["unsupported_relation_types"] = [isa for isa in d["isa_list"] + if isa["qid"] in isa_lookup] + return d @@ -892,3 +944,21 @@ def isa_incremental_search(search_terms): } ret.append(cur) return ret + +def get_place_items(osm_type, osm_id): + src_id = osm_id * {'way': 1, 'relation': -1}[osm_type] + + q = (model.Item.query + .join(model.ItemLocation) + .join(model.Polygon, func.ST_Covers(model.Polygon.way, model.ItemLocation.location)) + .filter(model.Polygon.src_id == src_id)) + # sql = q.statement.compile(compile_kwargs={"literal_binds": True}) + + item_count = q.count() + items = [] + for item in q: + keys = ["item_id", "labels", "descriptions", "aliases", "sitelinks", "claims"] + item_dict = {key: getattr(item, key) for key in keys} + items.append(item_dict) + + return {"count": item_count, "items": items} diff --git a/matcher/nominatim.py b/matcher/nominatim.py index 5e70793..cfee85f 100644 --- a/matcher/nominatim.py +++ b/matcher/nominatim.py @@ -83,8 +83,8 @@ def get_hit_name(hit): if len(address) == 1: return n1 - country = address.pop("country") - country_code = address.pop("country_code") + country = address.pop("country", None) + country_code = address.pop("country_code", None) if country_code: country_code == country_code.lower() diff --git a/matcher/utils.py b/matcher/utils.py index 81335a0..0cace13 100644 --- a/matcher/utils.py +++ b/matcher/utils.py @@ -5,7 +5,6 @@ import json import math import user_agents import re -import pattern.en from datetime import date from num2words import num2words @@ -160,18 +159,6 @@ def is_in_range(address_range, address): return False -def pluralize_label(label): - text = label["value"] - if label["language"] != "en": - return text - - # pattern.en.pluralize has the plural of 'mine' as 'ours' - if text == "mine": - return "mines" - - return pattern.en.pluralize(text) - - def format_wikibase_time(v): p = v["precision"] t = v["time"] @@ -180,11 +167,12 @@ def format_wikibase_time(v): # example: https://www.wikidata.org/wiki/Q108266998 if p == 11: - return date.fromisoformat(t[1:11]).strftime("%d %B %Y") + return date.fromisoformat(t[1:11]).strftime("%-d %B %Y") if p == 10: return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y") if p == 9: return t[1:5] if p == 7: century = ((int(t[:5]) - 1) // 100) + 1 - return num2words(century, to="ordinal_num") + " century" + end = " BC" if century < 0 else "" + return num2words(abs(century), to="ordinal_num") + " century" + end diff --git a/templates/isa.html b/templates/isa.html index 0b0ac24..edb2950 100644 --- a/templates/isa.html +++ b/templates/isa.html @@ -4,6 +4,7 @@ {% block content %}
+ {% include "flash_msg.html" %}

{{ self.title() }}

@@ -13,7 +14,7 @@
-
+
diff --git a/templates/map.html b/templates/map.html index 5e03c0f..1c4a85c 100644 --- a/templates/map.html +++ b/templates/map.html @@ -4,26 +4,32 @@ Wikidata items linked to OSM - - - + + + {% from "navbar.html" import navbar with context %} {% block nav %}{{ navbar() }}{% endblock %}
- + diff --git a/web_view.py b/web_view.py index c20df04..239988e 100755 --- a/web_view.py +++ b/web_view.py @@ -1,12 +1,12 @@ #!/usr/bin/python3.9 from flask import (Flask, render_template, request, jsonify, redirect, url_for, g, - flash, session, Response, stream_with_context) + flash, session, Response, stream_with_context, abort, send_file) from sqlalchemy import func from sqlalchemy.sql.expression import update from matcher import (nominatim, model, database, commons, wikidata, wikidata_api, osm_oauth, edit, mail, api, error_mail) -from werkzeug.debug.tbtools import get_current_traceback +# from werkzeug.debug.tbtools import get_current_traceback from matcher.data import property_map from time import time, sleep from requests_oauthlib import OAuth1Session @@ -19,6 +19,7 @@ import json import GeoIP import re import maxminddb +import sqlalchemy srid = 4326 re_point = re.compile(r'^POINT\((.+) (.+)\)$') @@ -54,27 +55,27 @@ def dict_repr_values(d): return {key: repr(value) for key, value in d.items()} -@app.errorhandler(werkzeug.exceptions.InternalServerError) -def exception_handler(e): - tb = get_current_traceback() - last_frame = next(frame for frame in reversed(tb.frames) if not frame.is_library) - last_frame_args = inspect.getargs(last_frame.code) - if request.path.startswith("/api/"): - return cors_jsonify({ - "success": False, - "error": tb.exception, - "traceback": tb.plaintext, - "locals": dict_repr_values(last_frame.locals), - "last_function": { - "name": tb.frames[-1].function_name, - "args": repr(last_frame_args), - }, - }), 500 - - return render_template('show_error.html', - tb=tb, - last_frame=last_frame, - last_frame_args=last_frame_args), 500 +# @app.errorhandler(werkzeug.exceptions.InternalServerError) +# def exception_handler(e): +# tb = get_current_traceback() +# last_frame = next(frame for frame in reversed(tb.frames) if not frame.is_library) +# last_frame_args = inspect.getargs(last_frame.code) +# if request.path.startswith("/api/"): +# return cors_jsonify({ +# "success": False, +# "error": tb.exception, +# "traceback": tb.plaintext, +# "locals": dict_repr_values(last_frame.locals), +# "last_function": { +# "name": tb.frames[-1].function_name, +# "args": repr(last_frame_args), +# }, +# }), 500 +# +# return render_template('show_error.html', +# tb=tb, +# last_frame=last_frame, +# last_frame_args=last_frame_args), 500 def cors_jsonify(*args, **kwargs): response = jsonify(*args, **kwargs) @@ -113,8 +114,8 @@ def geoip_user_record(): def get_user_location(): remote_ip = request.args.get('ip', request.remote_addr) - maxmind = maxminddb_reader.get(remote_ip)["location"] - return maxmind["location"] if maxmind else None + maxmind = maxminddb_reader.get(remote_ip) + return maxmind.get("location") if maxmind else None @app.route("/") @@ -138,6 +139,12 @@ def isa_page(item_id): item = api.get_item(item_id) if request.method == "POST": + tag_or_key = request.form["tag_or_key"] + extra = model.ItemExtraKeys(item=item, tag_or_key=tag_or_key) + database.session.add(extra) + database.session.commit() + flash("extra OSM tag/key added") + return redirect(url_for(request.endpoint, item_id=item_id)) q = model.ItemExtraKeys.query.filter_by(item=item) @@ -240,12 +247,19 @@ def identifier_page(pid): def map_start_page(): loc = get_user_location() + if loc: + lat, lon = loc["latitude"], loc["longitude"] + radius = loc["accuracy_radius"] + else: + lat, lon = 42.2917, -85.5872 + radius = 5 + return redirect(url_for( 'map_location', - lat=f'{loc["latitude"]:.5f}', - lon=f'{loc["longitude"]:.5f}', + lat=f'{lat:.5f}', + lon=f'{lon:.5f}', zoom=16, - radius=loc["accuracy_radius"], + radius=radius, ip=request.args.get('ip'), )) @@ -285,9 +299,22 @@ def search_page(): @app.route("/map///") def map_location(zoom, lat, lon): qid = request.args.get("item") + isa_param = request.args.get("isa") if qid: api.get_item(qid[1:]) + isa_list = [] + if isa_param: + for isa_qid in isa_param.split(";"): + isa = api.get_item(isa_qid[1:]) + if not isa: + continue + cur = { + "qid": isa.qid, + "label": isa.label(), + } + isa_list.append(cur) + return render_template( "map.html", active_tab="map", @@ -298,9 +325,40 @@ def map_location(zoom, lat, lon): username=get_username(), mode="map", q=None, + item_type_filter=isa_list, ) +@app.route("/item/Q") +def lookup_item(item_id): + item = api.get_item(item_id) + if not item: + # TODO: show nicer page for Wikidata item not found + return abort(404) + + try: + lat, lon = item.locations[0].get_lat_lon() + except IndexError: + # TODO: show nicer page for Wikidata item without coordinates + return abort(404) + + return render_template( + "map.html", + active_tab="map", + zoom=16, + lat=lat, + lon=lon, + username=get_username(), + mode="map", + q=None, + qid=item.qid, + item_type_filter=[], + ) + + url = url_for("map_location", zoom=16, lat=lat, lon=lon, item=item.qid) + return redirect(url) + + @app.route("/search/map") def search_map_page(): user_lat, user_lon = get_user_location() or (None, None) @@ -394,6 +452,15 @@ def api_wikidata_items(): t1 = time() - t0 return cors_jsonify(success=True, duration=t1, **ret) +@app.route("/api/1/place//") +def api_place_items(osm_type, osm_id): + t0 = time() + + ret = api.get_place_items(osm_type, osm_id) + + t1 = time() - t0 + return cors_jsonify(success=True, duration=t1, **ret) + @app.route("/api/1/osm") def api_osm_objects(): @@ -540,7 +607,11 @@ def api_search(): hit["name"] = nominatim.get_hit_name(hit) hit["label"] = nominatim.get_hit_label(hit) hit["address"] = list(hit["address"].items()) - hit["identifier"] = f"{hit['osm_type']}/{hit['osm_id']}" + if "osm_type" in hit and "osm_id" in hit: + hit["identifier"] = f"{hit['osm_type']}/{hit['osm_id']}" + else: + print(hit) + print(q) return cors_jsonify(success=True, hits=hits) @@ -805,6 +876,18 @@ def api_save_changeset(session_id): return api_call(session_id) +@app.route("/sql", methods=["GET", "POST"]) +def run_sql(): + if request.method != "POST": + return render_template("run_sql.html") + + sql = request.form["sql"] + conn = database.session.connection() + result = conn.execute(sqlalchemy.text(sql)) + + return render_template("run_sql.html", result=result) + + def api_real_save_changeset(session_id): es = model.EditSession.query.get(session_id) From c232db73ebdb6010e60eb88989a3ea13fe173ee3 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 13 May 2023 15:24:49 +0200 Subject: [PATCH 02/12] Update --- package.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index 2e463cc..2cb6641 100644 --- a/package.json +++ b/package.json @@ -6,12 +6,12 @@ "test": "echo \"This template does not include a test runner by default.\" && exit 1" }, "dependencies": { - "bootstrap": "^5.0.1", - "fork-awesome": "^1.1.7", - "leaflet": "^1.7.1", + "@popperjs/core": "^2.11.0", + "fork-awesome": "^1.2.0", + "leaflet": "^1.8.0", "leaflet-extra-markers": "^1.2.1", "redaxios": "^0.4.1", - "vue": "^3.0.11" + "vue": "^3.2.26" }, "devDependencies": { "@snowpack/plugin-dotenv": "^2.1.0", From 9f8ccf95f19d008e776c88d7fa333ad30e816b6b Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 13 May 2023 15:25:52 +0200 Subject: [PATCH 03/12] Update --- snowpack.config.mjs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/snowpack.config.mjs b/snowpack.config.mjs index f92da73..d508721 100644 --- a/snowpack.config.mjs +++ b/snowpack.config.mjs @@ -1,20 +1,10 @@ -import pkg from './package.json'; - /** @type {import("snowpack").SnowpackUserConfig } */ export default { mount: { - // public: {url: '/', static: true}, + public: {url: '/', static: true}, frontend: {url: '/dist'}, }, - plugins: [ - '@snowpack/plugin-vue', - '@snowpack/plugin-dotenv', - ['snowpack-plugin-cdn-import', { - dependencies: pkg.dependencies, - enableInDevMode: true, - // baseUrl: 'https://unpkg.com', - }] - ], + plugins: ['@snowpack/plugin-vue', '@snowpack/plugin-dotenv'], routes: [ /* Enable an SPA Fallback in development: */ // {"match": "routes", "src": ".*", "dest": "/index.html"}, From 733ca3aa8fefaea85caa6aa018926cc5266b50b5 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 13 May 2023 21:57:58 +0200 Subject: [PATCH 04/12] Update --- matcher/api.py | 466 +++++++++++++++++++++++++++------------------ matcher/commons.py | 13 +- matcher/planet.py | 18 +- matcher/utils.py | 100 ++++++---- 4 files changed, 366 insertions(+), 231 deletions(-) diff --git a/matcher/api.py b/matcher/api.py index 2199311..5fd8a9c 100644 --- a/matcher/api.py +++ b/matcher/api.py @@ -1,19 +1,21 @@ -from sqlalchemy import func, or_, and_, text +import json +import os.path +import re +from collections import Counter, defaultdict + +from flask import current_app, g +from sqlalchemy import and_, func, or_, text +from sqlalchemy.dialects import postgresql from sqlalchemy.orm import selectinload from sqlalchemy.sql import select -from sqlalchemy.sql.expression import literal, union, cast, column +from sqlalchemy.sql.expression import cast, column, literal, union from sqlalchemy.types import Float -from sqlalchemy.dialects import postgresql -from matcher.planet import point, line, polygon -from matcher import model, database, wikidata_api, wikidata -from collections import Counter, defaultdict -from flask import g, current_app -import re -import os.path -import json + +from matcher import database, model, wikidata, wikidata_api +from matcher.planet import line, point, polygon srid = 4326 -re_point = re.compile(r'^POINT\((.+) (.+)\)$') +re_point = re.compile(r"^POINT\((.+) (.+)\)$") entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"} tag_prefixes = { @@ -37,19 +39,19 @@ skip_tags = { "Key:brand", } -def get_country_iso3166_1(lat, lon): - """ - For a given lat/lon return a set of ISO country codes. + +def get_country_iso3166_1(lat: float, lon: float) -> set[str]: + """For a given lat/lon return a set of ISO country codes. Also cache the country code in the global object. Normally there should be only one country. """ - point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid) alpha2_codes = set() - q = model.Polygon.query.filter(func.ST_Covers(model.Polygon.way, point), - model.Polygon.admin_level == "2") + q = model.Polygon.query.filter( + func.ST_Covers(model.Polygon.way, point), model.Polygon.admin_level == "2" + ) for country in q: alpha2 = country.tags.get("ISO3166-1") if not alpha2: @@ -60,22 +62,23 @@ def get_country_iso3166_1(lat, lon): return alpha2_codes -def is_street_number_first(lat, lon): +def is_street_number_first(lat: float, lon: float) -> bool: + """Is lat/lon within a country that puts number first in a street address.""" if lat is None or lon is None: return True alpha2 = get_country_iso3166_1(lat, lon) # Incomplete list of countries that put street number first. alpha2_number_first = { - 'GB', # United Kingdom - 'IE', # Ireland - 'US', # United States - 'MX', # Mexico - 'CA', # Canada - 'FR', # France - 'AU', # Australia - 'NZ', # New Zealand - 'ZA', # South Africa + "GB", # United Kingdom + "IE", # Ireland + "US", # United States + "MX", # Mexico + "CA", # Canada + "FR", # France + "AU", # Australia + "NZ", # New Zealand + "ZA", # South Africa } return bool(alpha2_number_first & alpha2) @@ -84,22 +87,26 @@ def is_street_number_first(lat, lon): def make_envelope(bounds): return func.ST_MakeEnvelope(*bounds, srid) + def get_bbox_centroid(bbox): bbox = make_envelope(bbox) centroid = database.session.query(func.ST_AsText(func.ST_Centroid(bbox))).scalar() return reversed(re_point.match(centroid).groups()) + def make_envelope_around_point(lat, lon, distance): conn = database.session.connection() p = func.ST_MakePoint(lon, lat) - s = select([ - func.ST_AsText(func.ST_Project(p, distance, func.radians(0))), - func.ST_AsText(func.ST_Project(p, distance, func.radians(90))), - func.ST_AsText(func.ST_Project(p, distance, func.radians(180))), - func.ST_AsText(func.ST_Project(p, distance, func.radians(270))), - ]) + s = select( + [ + func.ST_AsText(func.ST_Project(p, distance, func.radians(0))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(90))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(180))), + func.ST_AsText(func.ST_Project(p, distance, func.radians(270))), + ] + ) row = conn.execute(s).fetchone() coords = [[float(v) for v in re_point.match(i).groups()] for i in row] @@ -110,49 +117,64 @@ def make_envelope_around_point(lat, lon, distance): return func.ST_MakeEnvelope(west, south, east, north, srid) -def drop_way_area(tags): - """ Remove the way_area field from a tags dict. """ + +def drop_way_area(tags: dict[str, str]) -> dict[str, str]: + """Remove the way_area field from a tags dict.""" if "way_area" in tags: del tags["way_area"] return tags + def get_part_of(table_name, src_id, bbox): - table_map = {'point': point, 'line': line, 'polygon': polygon} + table_map = {"point": point, "line": line, "polygon": polygon} table_alias = table_map[table_name].alias() - s = (select([polygon.c.osm_id, - polygon.c.tags, - func.ST_Area(func.ST_Collect(polygon.c.way))]). - where(and_(func.ST_Intersects(bbox, polygon.c.way), - func.ST_Covers(polygon.c.way, table_alias.c.way), - table_alias.c.osm_id == src_id, - polygon.c.tags.has_key("name"), - or_( - polygon.c.tags.has_key("landuse"), - polygon.c.tags.has_key("amenity"), - ))). - group_by(polygon.c.osm_id, polygon.c.tags)) + s = ( + select( + [ + polygon.c.osm_id, + polygon.c.tags, + func.ST_Area(func.ST_Collect(polygon.c.way)), + ] + ) + .where( + and_( + func.ST_Intersects(bbox, polygon.c.way), + func.ST_Covers(polygon.c.way, table_alias.c.way), + table_alias.c.osm_id == src_id, + polygon.c.tags.has_key("name"), + or_( + polygon.c.tags.has_key("landuse"), + polygon.c.tags.has_key("amenity"), + ), + ) + ) + .group_by(polygon.c.osm_id, polygon.c.tags) + ) conn = database.session.connection() - return [{ - "type": "way" if osm_id > 0 else "relation", - "id": abs(osm_id), - "tags": tags, - "area": area, - } for osm_id, tags, area in conn.execute(s)] + return [ + { + "type": "way" if osm_id > 0 else "relation", + "id": abs(osm_id), + "tags": tags, + "area": area, + } + for osm_id, tags, area in conn.execute(s) + ] -def get_and_save_item(qid): - """ Download an item from Wikidata and cache it in the database. """ +def get_and_save_item(qid: str) -> model.Item | None: + """Download an item from Wikidata and cache it in the database.""" entity = wikidata_api.get_entity(qid) entity_qid = entity["id"] if entity_qid != qid: - print(f'redirect {qid} -> {entity_qid}') + print(f"redirect {qid} -> {entity_qid}") item = model.Item.query.get(entity_qid[1:]) return item if "claims" not in entity: - return + return None coords = wikidata.get_entity_coords(entity["claims"]) item_id = int(qid[1:]) @@ -171,8 +193,9 @@ def get_and_save_item(qid): return item -def get_isa_count(items): - isa_count = Counter() +def get_isa_count(items: list[model.Item]) -> list[tuple[int, int]]: + """List of IsA counts.""" + isa_count: Counter[int] = Counter() for item in items: if not item: continue @@ -199,13 +222,11 @@ def get_items_in_bbox(bbox): def get_osm_with_wikidata_tag(bbox, isa_filter=None): - bbox_str = ','.join(str(v) for v in bbox) + bbox_str = ",".join(str(v) for v in bbox) extra_sql = "" if isa_filter: - q = ( - model.Item.query.join(model.ItemLocation) - .filter(func.ST_Covers(make_envelope(bbox), - model.ItemLocation.location)) + q = model.Item.query.join(model.ItemLocation).filter( + func.ST_Covers(make_envelope(bbox), model.ItemLocation.location) ) q = add_isa_filter(q, isa_filter) qids = [isa.qid for isa in q] @@ -216,7 +237,8 @@ def get_osm_with_wikidata_tag(bbox, isa_filter=None): extra_sql += f" AND tags -> 'wikidata' in ({qid_list})" # easier than building this query with SQLAlchemy - sql = f''' + sql = ( + f""" SELECT tbl, osm_id, tags, ARRAY[ST_Y(centroid), ST_X(centroid)], geojson FROM ( SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson @@ -235,24 +257,29 @@ UNION HAVING st_area(st_collect(way)) < 20 * st_area(ST_MakeEnvelope({bbox_str}, {srid})) ) as anon WHERE tags ? 'wikidata' -''' + extra_sql +""" + + extra_sql + ) conn = database.session.connection() result = conn.execute(text(sql)) print(sql) - point_sql = f''' + point_sql = ( + f""" SELECT 'point' as tbl, osm_id, tags, ST_AsText(ST_Centroid(way)) as centroid, ST_AsGeoJSON(way) as geojson FROM planet_osm_point WHERE ST_Intersects(ST_MakeEnvelope({bbox_str}, {srid}), way) and tags ? 'wikidata' -''' + extra_sql +""" + + extra_sql + ) print("point") print(point_sql) tagged = [] for tbl, osm_id, tags, centroid, geojson in result: - if tbl == 'point': + if tbl == "point": osm_type = "node" else: osm_type = "way" if osm_id > 0 else "relation" @@ -260,15 +287,17 @@ WHERE tags ? 'wikidata' name = tags.get("name") or tags.get("addr:housename") or "[no label]" - tagged.append({ - "identifier": f"{osm_type}/{osm_id}", - "id": osm_id, - "type": osm_type, - "geojson": json.loads(geojson), - "centroid": centroid, - "name": name, - "wikidata": tags["wikidata"], - }) + tagged.append( + { + "identifier": f"{osm_type}/{osm_id}", + "id": osm_id, + "type": osm_type, + "geojson": json.loads(geojson), + "centroid": centroid, + "name": name, + "wikidata": tags["wikidata"], + } + ) return tagged @@ -310,11 +339,13 @@ def get_item_tags(item): isa, isa_path = isa_items.pop() if not isa: continue - isa_path = isa_path + [{'qid': isa.qid, 'label': isa.label()}] + isa_path = isa_path + [{"qid": isa.qid, "label": isa.label()}] osm = [v for v in isa.get_claim("P1282") if v not in skip_tags] - osm += [extra.tag_or_key - for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)] + osm += [ + extra.tag_or_key + for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id) + ] for i in osm: osm_list[i].append(isa_path[:]) @@ -369,14 +400,16 @@ def get_tags_for_isa_item(item): isa, isa_path = isa_items.pop() if not isa: continue - isa_path = isa_path + [{'qid': isa.qid, 'label': isa.label()}] + isa_path = isa_path + [{"qid": isa.qid, "label": isa.label()}] if isa.item_id not in items_checked_done: - items_checked.append({'qid': isa.qid, 'label': isa.label()}) + items_checked.append({"qid": isa.qid, "label": isa.label()}) items_checked_done.add(isa.item_id) osm = [v for v in isa.get_claim("P1282") if v not in skip_tags] - osm += [extra.tag_or_key - for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id)] + osm += [ + extra.tag_or_key + for extra in model.ItemExtraKeys.query.filter_by(item_id=isa.item_id) + ] for i in osm: osm_list[i].append(isa_path[:]) @@ -403,34 +436,31 @@ def get_tags_for_isa_item(item): seen.update(isa_list) isa_items += [(isa, isa_path) for isa in get_items(isa_list)] return { - 'tags': {key: list(values) for key, values in osm_list.items()}, - 'checked': items_checked, + "tags": {key: list(values) for key, values in osm_list.items()}, + "checked": items_checked, } def add_isa_filter(q, isa_qids): - q_subclass = database.session.query(model.Item.qid).filter( func.jsonb_path_query_array( model.Item.claims, - '$.P279[*].mainsnak.datavalue.value.id', - ).bool_op('?|')(list(isa_qids)) + "$.P279[*].mainsnak.datavalue.value.id", + ).bool_op("?|")(list(isa_qids)) ) subclass_qid = {qid for qid, in q_subclass.all()} isa = func.jsonb_path_query_array( model.Item.claims, - '$.P31[*].mainsnak.datavalue.value.id', - ).bool_op('?|') + "$.P31[*].mainsnak.datavalue.value.id", + ).bool_op("?|") return q.filter(isa(list(isa_qids | subclass_qid))) def wikidata_items_count(bounds, isa_filter=None): - - q = ( - model.Item.query.join(model.ItemLocation) - .filter(func.ST_Covers(make_envelope(bounds), model.ItemLocation.location)) + q = model.Item.query.join(model.ItemLocation).filter( + func.ST_Covers(make_envelope(bounds), model.ItemLocation.location) ) if isa_filter: @@ -440,12 +470,12 @@ def wikidata_items_count(bounds, isa_filter=None): return q.count() + def wikidata_isa_counts(bounds, isa_filter=None): db_bbox = make_envelope(bounds) - q = ( - model.Item.query.join(model.ItemLocation) - .filter(func.ST_Covers(db_bbox, model.ItemLocation.location)) + q = model.Item.query.join(model.ItemLocation).filter( + func.ST_Covers(db_bbox, model.ItemLocation.location) ) if isa_filter: @@ -474,12 +504,13 @@ def wikidata_isa_counts(bounds, isa_filter=None): return isa_count + def get_tag_filter(tags, tag_list): tag_filter = [] for tag_or_key in tag_list: if tag_or_key.startswith("Key:"): key = tag_or_key[4:] - tag_filter.append(and_(tags.has_key(key), tags[key] != 'no')) + tag_filter.append(and_(tags.has_key(key), tags[key] != "no")) for prefix in tag_prefixes: tag_filter.append(tags.has_key(f"{prefix}:{key}")) @@ -495,11 +526,11 @@ def get_tag_filter(tags, tag_list): def get_preset_translations(): app = current_app country_language = { - 'AU': 'en-AU', # Australia - 'GB': 'en-GB', # United Kingdom - 'IE': 'en-GB', # Ireland - 'IN': 'en-IN', # India - 'NZ': 'en-NZ', # New Zealand + "AU": "en-AU", # Australia + "GB": "en-GB", # United Kingdom + "IE": "en-GB", # Ireland + "IN": "en-IN", # India + "NZ": "en-NZ", # New Zealand } ts_dir = app.config["ID_TAGGING_SCHEMA_DIR"] translation_dir = os.path.join(ts_dir, "dist", "translations") @@ -520,13 +551,14 @@ def get_preset_translations(): return {} + def get_presets_from_tags(ending, tags): translations = get_preset_translations() found = [] for k, v in tags.items(): - if k == 'amenity' and v == 'clock' and tags.get('display') == 'sundial': + if k == "amenity" and v == "clock" and tags.get("display") == "sundial": tag_or_key = f"Tag:{k}={v}" found.append({"tag_or_key": tag_or_key, "name": "Sundial"}) continue @@ -604,8 +636,7 @@ def address_node_label(tags): def get_address_nodes_within_building(osm_id, bbox_list): q = model.Point.query.filter( polygon.c.osm_id == osm_id, - or_(*[func.ST_Intersects(bbox, model.Point.way) - for bbox in bbox_list]), + or_(*[func.ST_Intersects(bbox, model.Point.way) for bbox in bbox_list]), func.ST_Covers(polygon.c.way, model.Point.way), model.Point.tags.has_key("addr:street"), model.Point.tags.has_key("addr:housenumber"), @@ -615,8 +646,14 @@ def get_address_nodes_within_building(osm_id, bbox_list): def osm_display_name(tags): - keys = ("bridge:name", "tunnel:name", "lock_name", "name", "addr:housename", - "inscription") + keys = ( + "bridge:name", + "tunnel:name", + "lock_name", + "name", + "addr:housename", + "inscription", + ) for key in keys: if key in tags: return tags[key] @@ -625,6 +662,7 @@ def osm_display_name(tags): def street_address_in_tags(tags): return "addr:housenumber" in tags and "addr:street" in tags + def find_osm_candidates(item, limit=80, max_distance=450, names=None): item_id = item.item_id item_is_linear_feature = item.is_linear_feature() @@ -637,51 +675,94 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): check_is_street_number_first(item.locations[0].get_lat_lon()) - bbox_list = [make_envelope_around_point(*loc.get_lat_lon(), max_distance) - for loc in item.locations] + bbox_list = [ + make_envelope_around_point(*loc.get_lat_lon(), max_distance) + for loc in item.locations + ] null_area = cast(None, Float) - dist = column('dist') - tags = column('tags', postgresql.HSTORE) + dist = column("dist") + tags = column("tags", postgresql.HSTORE) tag_list = get_item_tags(item) # tag_filters = get_tag_filter(point.c.tags, tag_list) # print(tag_filters) - s_point = (select([literal('point').label('t'), point.c.osm_id, point.c.tags.label('tags'), - func.min(func.ST_DistanceSphere(model.ItemLocation.location, point.c.way)).label('dist'), - func.ST_AsText(point.c.way), - func.ST_AsGeoJSON(point.c.way), - null_area]). - where(and_( - or_(*[func.ST_Intersects(bbox, point.c.way) - for bbox in bbox_list]), - model.ItemLocation.item_id == item_id, - or_(*get_tag_filter(point.c.tags, tag_list)))). - group_by(point.c.osm_id, point.c.tags, point.c.way)) + s_point = ( + select( + [ + literal("point").label("t"), + point.c.osm_id, + point.c.tags.label("tags"), + func.min( + func.ST_DistanceSphere(model.ItemLocation.location, point.c.way) + ).label("dist"), + func.ST_AsText(point.c.way), + func.ST_AsGeoJSON(point.c.way), + null_area, + ] + ) + .where( + and_( + or_(*[func.ST_Intersects(bbox, point.c.way) for bbox in bbox_list]), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(point.c.tags, tag_list)), + ) + ) + .group_by(point.c.osm_id, point.c.tags, point.c.way) + ) - s_line = (select([literal('line').label('t'), line.c.osm_id, line.c.tags.label('tags'), - func.min(func.ST_DistanceSphere(model.ItemLocation.location, line.c.way)).label('dist'), - func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))), - func.ST_AsGeoJSON(func.ST_Collect(line.c.way)), - null_area]). - where(and_( - or_(*[func.ST_Intersects(bbox, line.c.way) for bbox in bbox_list]), - model.ItemLocation.item_id == item_id, - or_(*get_tag_filter(line.c.tags, tag_list)))). - group_by(line.c.osm_id, line.c.tags)) + s_line = ( + select( + [ + literal("line").label("t"), + line.c.osm_id, + line.c.tags.label("tags"), + func.min( + func.ST_DistanceSphere(model.ItemLocation.location, line.c.way) + ).label("dist"), + func.ST_AsText(func.ST_Centroid(func.ST_Collect(line.c.way))), + func.ST_AsGeoJSON(func.ST_Collect(line.c.way)), + null_area, + ] + ) + .where( + and_( + or_(*[func.ST_Intersects(bbox, line.c.way) for bbox in bbox_list]), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(line.c.tags, tag_list)), + ) + ) + .group_by(line.c.osm_id, line.c.tags) + ) - s_polygon = (select([literal('polygon').label('t'), polygon.c.osm_id, polygon.c.tags.label('tags'), - func.min(func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way)).label('dist'), - func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))), - func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)), - func.ST_Area(func.ST_Collect(polygon.c.way))]). - where(and_( - or_(*[func.ST_Intersects(bbox, polygon.c.way) for bbox in bbox_list]), - model.ItemLocation.item_id == item_id, - or_(*get_tag_filter(polygon.c.tags, tag_list)))). - group_by(polygon.c.osm_id, polygon.c.tags). - having(func.ST_Area(func.ST_Collect(polygon.c.way)) < 20 * func.ST_Area(bbox_list[0]))) + s_polygon = ( + select( + [ + literal("polygon").label("t"), + polygon.c.osm_id, + polygon.c.tags.label("tags"), + func.min( + func.ST_DistanceSphere(model.ItemLocation.location, polygon.c.way) + ).label("dist"), + func.ST_AsText(func.ST_Centroid(func.ST_Collect(polygon.c.way))), + func.ST_AsGeoJSON(func.ST_Collect(polygon.c.way)), + func.ST_Area(func.ST_Collect(polygon.c.way)), + ] + ) + .where( + and_( + or_(*[func.ST_Intersects(bbox, polygon.c.way) for bbox in bbox_list]), + model.ItemLocation.item_id == item_id, + or_(*get_tag_filter(polygon.c.tags, tag_list)), + ) + ) + .group_by(polygon.c.osm_id, polygon.c.tags) + .having( + func.ST_Area(func.ST_Collect(polygon.c.way)) + < 20 * func.ST_Area(bbox_list[0]) + ) + ) tables = ([] if item_is_linear_feature else [s_point]) + [s_line, s_polygon] s = select([union(*tables).alias()]).where(dist < max_distance).order_by(dist) @@ -695,10 +776,14 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): s = s.where(tags.has_key("name")) if "Key:amenity" in tag_list: - s = s.where(and_(tags["amenity"] != "bicycle_parking", - tags["amenity"] != "bicycle_repair_station", - tags["amenity"] != "atm", - tags["amenity"] != "recycling")) + s = s.where( + and_( + tags["amenity"] != "bicycle_parking", + tags["amenity"] != "bicycle_repair_station", + tags["amenity"] != "atm", + tags["amenity"] != "recycling", + ) + ) if limit: s = s.limit(limit) @@ -750,8 +835,9 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): part_of = [] for bbox in bbox_list: - part_of += [i for i in get_part_of(table, src_id, bbox) - if i["tags"]["name"] != name] + part_of += [ + i for i in get_part_of(table, src_id, bbox) if i["tags"]["name"] != name + ] if part_of: cur["part_of"] = part_of @@ -762,8 +848,9 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): return nearby + def get_item(item_id): - """ Retrieve a Wikidata item, either from the database or from Wikidata. """ + """Retrieve a Wikidata item, either from the database or from Wikidata.""" item = model.Item.query.get(item_id) return item or get_and_save_item(f"Q{item_id}") @@ -776,7 +863,7 @@ def get_item_street_addresses(item): for claim in item.claims["P669"]: qualifiers = claim.get("qualifiers") - if not qualifiers or 'P670' not in qualifiers: + if not qualifiers or "P670" not in qualifiers: continue number = qualifiers["P670"][0]["datavalue"]["value"] @@ -784,24 +871,26 @@ def get_item_street_addresses(item): street = street_item.label() for q in qualifiers["P670"]: number = q["datavalue"]["value"] - address = (f"{number} {street}" - if g.street_number_first - else f"{street} {number}") + address = ( + f"{number} {street}" if g.street_number_first else f"{street} {number}" + ) street_address.append(address) return street_address + def check_is_street_number_first(latlng): g.street_number_first = is_street_number_first(*latlng) + def item_detail(item): unsupported_relation_types = { - 'Q194356', # wind farm - 'Q2175765', # tram stop + "Q194356", # wind farm + "Q2175765", # tram stop } locations = [list(i.get_lat_lon()) for i in item.locations] - if not hasattr(g, 'street_number_first'): + if not hasattr(g, "street_number_first"): g.street_number_first = is_street_number_first(*locations[0]) image_filenames = item.get_claim("P18") @@ -811,20 +900,24 @@ def item_detail(item): heritage_designation = [] for v in item.get_claim("P1435"): if not v: - print('heritage designation missing:', item.qid) + print("heritage designation missing:", item.qid) continue heritage_designation_item = get_item(v["numeric-id"]) - heritage_designation.append({ - "qid": v["id"], - "label": heritage_designation_item.label(), - }) + heritage_designation.append( + { + "qid": v["id"], + "label": heritage_designation_item.label(), + } + ) isa_items = [get_item(isa["numeric-id"]) for isa in item.get_isa()] isa_lookup = {isa.qid: isa for isa in isa_items} - wikipedia_links = [{"lang": site[:-4], "title": link["title"]} - for site, link in sorted(item.sitelinks.items()) - if site.endswith("wiki") and len(site) < 8] + wikipedia_links = [ + {"lang": site[:-4], "title": link["title"]} + for site, link in sorted(item.sitelinks.items()) + if site.endswith("wiki") and len(site) < 8 + ] d = { "qid": item.qid, @@ -833,7 +926,9 @@ def item_detail(item): "markers": locations, "image_list": image_filenames, "street_address": street_address, - "isa_list": [{"qid": isa.qid, "label": isa.label()} for isa in isa_items if isa], + "isa_list": [ + {"qid": isa.qid, "label": isa.label()} for isa in isa_items if isa + ], "closed": item.closed(), "inception": item.time_claim("P571"), "p1619": item.time_claim("P1619"), @@ -851,8 +946,9 @@ def item_detail(item): unsupported = isa_lookup.keys() & unsupported_relation_types if unsupported: - d["unsupported_relation_types"] = [isa for isa in d["isa_list"] - if isa["qid"] in isa_lookup] + d["unsupported_relation_types"] = [ + isa for isa in d["isa_list"] if isa["qid"] in isa_lookup + ] return d @@ -891,7 +987,7 @@ def wikidata_items(bounds, isa_filter=None): } isa_count.append(isa) - return {'items': items, 'isa_count': isa_count} + return {"items": items, "isa_count": isa_count} def missing_wikidata_items(qids, lat, lon): @@ -926,12 +1022,13 @@ def missing_wikidata_items(qids, lat, lon): return dict(items=items, isa_count=isa_count) + def isa_incremental_search(search_terms): en_label = func.jsonb_extract_path_text(model.Item.labels, "en", "value") q = model.Item.query.filter( - model.Item.claims.has_key("P1282"), - en_label.ilike(f"%{search_terms}%"), - func.length(en_label) < 20, + model.Item.claims.has_key("P1282"), + en_label.ilike(f"%{search_terms}%"), + func.length(en_label) < 20, ) print(q.statement.compile(compile_kwargs={"literal_binds": True})) @@ -945,13 +1042,18 @@ def isa_incremental_search(search_terms): ret.append(cur) return ret -def get_place_items(osm_type, osm_id): - src_id = osm_id * {'way': 1, 'relation': -1}[osm_type] - q = (model.Item.query - .join(model.ItemLocation) - .join(model.Polygon, func.ST_Covers(model.Polygon.way, model.ItemLocation.location)) - .filter(model.Polygon.src_id == src_id)) +def get_place_items(osm_type, osm_id): + src_id = osm_id * {"way": 1, "relation": -1}[osm_type] + + q = ( + model.Item.query.join(model.ItemLocation) + .join( + model.Polygon, + func.ST_Covers(model.Polygon.way, model.ItemLocation.location), + ) + .filter(model.Polygon.src_id == src_id) + ) # sql = q.statement.compile(compile_kwargs={"literal_binds": True}) item_count = q.count() diff --git a/matcher/commons.py b/matcher/commons.py index b65658c..469566c 100644 --- a/matcher/commons.py +++ b/matcher/commons.py @@ -1,5 +1,9 @@ -import requests +"""Use mediawiki API to look up images on Wikimedia Commons.""" + import urllib.parse + +import requests + from . import utils commons_start = "http://commons.wikimedia.org/wiki/Special:FilePath/" @@ -7,11 +11,13 @@ commons_url = "https://www.wikidata.org/w/api.php" page_size = 50 -def commons_uri_to_filename(uri): +def commons_uri_to_filename(uri: str) -> str: + """Given the URI for a file on commons return the filename of the file.""" return urllib.parse.unquote(utils.drop_start(uri, commons_start)) -def api_call(params): +def api_call(params: dict[str, str | int]) -> requests.models.Response: + """Make an API call.""" call_params = { "format": "json", "formatversion": 2, @@ -22,6 +28,7 @@ def api_call(params): def image_detail(filenames, thumbheight=None, thumbwidth=None): + """Detail for multiple images.""" params = { "action": "query", "prop": "imageinfo", diff --git a/matcher/planet.py b/matcher/planet.py index 57f7b44..15b8016 100644 --- a/matcher/planet.py +++ b/matcher/planet.py @@ -1,24 +1,32 @@ -from sqlalchemy import Table, Column, Integer, String, Float, MetaData -from sqlalchemy.dialects import postgresql +"""Planet tables.""" + from geoalchemy2 import Geometry +from sqlalchemy import Column, Float, Integer, MetaData, String, Table +from sqlalchemy.dialects import postgresql metadata = MetaData() -point = Table("planet_osm_point", metadata, +point = Table( + "planet_osm_point", + metadata, Column("osm_id", Integer), Column("name", String), Column("tags", postgresql.HSTORE), Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False), ) -line = Table("planet_osm_line", metadata, +line = Table( + "planet_osm_line", + metadata, Column("osm_id", Integer), Column("name", String), Column("tags", postgresql.HSTORE), Column("way", Geometry("GEOMETRY", srid=4326, spatial_index=True), nullable=False), ) -polygon = Table("planet_osm_polygon", metadata, +polygon = Table( + "planet_osm_polygon", + metadata, Column("osm_id", Integer), Column("name", String), Column("tags", postgresql.HSTORE), diff --git a/matcher/utils.py b/matcher/utils.py index 0cace13..8cbdb56 100644 --- a/matcher/utils.py +++ b/matcher/utils.py @@ -1,97 +1,114 @@ -from flask import current_app, request -from itertools import islice -import os.path import json import math -import user_agents +import os.path import re +import typing from datetime import date +from itertools import islice +from typing import Any, cast + +import flask +import user_agents from num2words import num2words metres_per_mile = 1609.344 feet_per_metre = 3.28084 feet_per_mile = 5280 +T = typing.TypeVar("T") -def chunk(it, size): + +def chunk(it: typing.Iterable[T], size: int) -> typing.Iterator[tuple[T, ...]]: + """Split an iterable into chunks of the given size.""" it = iter(it) return iter(lambda: tuple(islice(it, size)), ()) -def flatten(l): - return [item for sublist in l for item in sublist] +def flatten(top_list: list[list[T]]) -> list[T]: + """Flatten a list.""" + return [item for sub_list in top_list for item in sub_list] -def drop_start(s, start): +def drop_start(s: str, start: str) -> str: + """Remove string prefix, otherwise throw an error.""" assert s.startswith(start) return s[len(start) :] -def remove_start(s, start): +def remove_start(s: str, start: str) -> str: + """Remove a string prefix, if present.""" return s[len(start) :] if s.startswith(start) else s -def normalize_url(url): +def normalize_url(url: str) -> str: + """Standardize URLs to help in comparison.""" for start in "http://", "https://", "www.": url = remove_start(url, start) return url.rstrip("/") -def contains_digit(s): +def contains_digit(s: str) -> bool: + """Check if string contains a digit.""" return any(c.isdigit() for c in s) -def cache_dir(): - return current_app.config["CACHE_DIR"] +def cache_dir() -> str: + """Get cache dir location.""" + d: str = flask.current_app.config["CACHE_DIR"] + return d -def cache_filename(filename): +def cache_filename(filename: str) -> str: + """Get absolute path for cache file.""" return os.path.join(cache_dir(), filename) -def load_from_cache(filename): +def load_from_cache(filename: str) -> Any: + """Load JSON data from cache.""" return json.load(open(cache_filename(filename))) -def get_radius(default=1000): - arg_radius = request.args.get("radius") +def get_radius(default: int = 1000) -> int | None: + """Get radius request argument with default.""" + arg_radius = flask.request.args.get("radius") return int(arg_radius) if arg_radius and arg_radius.isdigit() else default -def get_int_arg(name): - if name in request.args and request.args[name].isdigit(): - return int(request.args[name]) +def get_int_arg(name: str) -> int | None: + """Get an request arg and convert to integer.""" + v = flask.request.args.get(name) + return int(v) if v and v.isdigit() else None -def calc_chunk_size(area_in_sq_km, size=22): +def calc_chunk_size(area_in_sq_km: float, size: int = 22) -> int: + """Work out the size of a chunk.""" side = math.sqrt(area_in_sq_km) return max(1, math.ceil(side / size)) -def file_missing_or_empty(filename): +def file_missing_or_empty(filename: str) -> bool: + """Check if a file is missing or empty.""" return os.path.exists(filename) or os.stat(filename).st_size == 0 -def is_bot(): - """ Is the current request from a web robot? """ - ua = request.headers.get("User-Agent") - return ua and user_agents.parse(ua).is_bot +def is_bot() -> bool: + """Is the current request from a web robot.""" + ua = flask.request.headers.get("User-Agent") + return bool(ua and user_agents.parse(ua).is_bot) -def log_location(): - return current_app.config["LOG_DIR"] +def log_location() -> str: + """Get log location from Flask config.""" + return cast(str, flask.current_app.config["LOG_DIR"]) -def good_location(): - return os.path.join(log_location(), "complete") - - -def capfirst(value): - """ Uppercase first letter of string, leave rest as is. """ +def capfirst(value: str) -> str: + """Uppercase first letter of string, leave rest as is.""" return value[0].upper() + value[1:] if value else value -def any_upper(value): +def any_upper(value: str) -> bool: + """Check if string contains any uppercase characters.""" return any(c.isupper() for c in value) @@ -102,7 +119,8 @@ def find_log_file(place): return f.path -def get_free_space(config): +def get_free_space(config: flask.config.Config) -> int: + """Return the amount of available free space.""" s = os.statvfs(config["FREE_SPACE_PATH"]) return s.f_bsize * s.f_bavail @@ -132,12 +150,12 @@ def display_distance(units, dist): return f"{dist / 1000:,.2f} km" -re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I) -re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I) -re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b") +def is_in_range(address_range: str, address: str) -> bool: + """Check if an address is within a range.""" + re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I) + re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I) + re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b") - -def is_in_range(address_range, address): m_number = re_number.match(address) if not m_number: return False From 54151bb1bb45bed59fd30684d7d3d7914de7467a Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 10:38:11 +0200 Subject: [PATCH 05/12] Add missing code --- matcher/database.py | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 matcher/database.py diff --git a/matcher/database.py b/matcher/database.py new file mode 100644 index 0000000..e2e7398 --- /dev/null +++ b/matcher/database.py @@ -0,0 +1,39 @@ +"""Database functions.""" + +import flask +import sqlalchemy +from sqlalchemy import create_engine, func +from sqlalchemy.engine import reflection +from sqlalchemy.orm import scoped_session, sessionmaker + +session: sqlalchemy.orm.scoping.scoped_session = scoped_session(sessionmaker()) + + +def init_db(db_url: str, echo: bool = False) -> None: + """Initialise database.""" + session.configure(bind=get_engine(db_url, echo=echo)) + + +def get_engine(db_url: str, echo: bool = False) -> sqlalchemy.engine.base.Engine: + """Create an engine objcet.""" + return create_engine(db_url, pool_recycle=3600, echo=echo) + + +def get_tables() -> list[str]: + """Get a list of table names.""" + tables: list[str] = reflection.Inspector.from_engine(session.bind).get_table_names() + return tables + + +def init_app(app: flask.app.Flask, echo: bool = False) -> None: + """Initialise database connection within flask app.""" + db_url = app.config["DB_URL"] + session.configure(bind=get_engine(db_url, echo=echo)) + + @app.teardown_appcontext + def shutdown_session(exception: Exception | None = None) -> None: + session.remove() + + +def now_utc(): + return func.timezone("utc", func.now()) From 0b48f932cbc3dc91dfe0911bcd7a4777b07065b8 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 10:49:29 +0200 Subject: [PATCH 06/12] Add sample config --- config/default.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 config/default.py diff --git a/config/default.py b/config/default.py new file mode 100644 index 0000000..28f5052 --- /dev/null +++ b/config/default.py @@ -0,0 +1,25 @@ +"""Sample config.""" + + +ID_TAGGING_SCHEMA_DIR = "/var/lib/data/id-tagging-schema" +ID_PRESET_DIR = "/var/lib/data/id-tagging-schema/data/presets/" +GEOIP_DATA = "/var/lib/data/GeoIP/GeoIPCity.dat" +GEOLITE2 = "/var/lib/data/GeoLite2/GeoLite2-City.mmdb" + +CLIENT_KEY = "" +CLIENT_SECRET = "" + +SECRET_KEY = "" + +DEFAULT_COMMENT = "+wikidata" + +ADMIN_NAME = "" +ADMIN_EMAIL = "" +ADMINS = [ADMIN_EMAIL] + +SMTP_HOST = "localhost" +MAIL_FROM = "osm-wikidata@localhost" + +ERROR_MAIL = True + +PROPAGATE_EXCEPTIONS = False From 2e8ff40d3d35e299a67050a3530c54c5e3b7a383 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:06:08 +0200 Subject: [PATCH 07/12] Update --- matcher/__init__.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 matcher/__init__.py diff --git a/matcher/__init__.py b/matcher/__init__.py new file mode 100644 index 0000000..5e9706c --- /dev/null +++ b/matcher/__init__.py @@ -0,0 +1,13 @@ +"""Match OSM and Wikidata items.""" + + +CallParams = dict[str, str | int] + +user_agent = ( + "osm-wikidata/0.1 (https://github.com/EdwardBetts/osm-wikidata; edward@4angle.com)" +) + + +def user_agent_headers() -> dict[str, str]: + """User-Agent headers.""" + return {"User-Agent": user_agent} From fd35658e517978b501992bc2bafa82c9894be90a Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:06:34 +0200 Subject: [PATCH 08/12] Update --- matcher/utils.py | 68 +++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/matcher/utils.py b/matcher/utils.py index 8cbdb56..36bbe99 100644 --- a/matcher/utils.py +++ b/matcher/utils.py @@ -1,3 +1,5 @@ +"""Utility functions.""" + import json import math import os.path @@ -112,20 +114,26 @@ def any_upper(value: str) -> bool: return any(c.isupper() for c in value) -def find_log_file(place): - start = f"{place.place_id}_" - for f in os.scandir(good_location()): - if f.name.startswith(start): - return f.path - - def get_free_space(config: flask.config.Config) -> int: """Return the amount of available free space.""" s = os.statvfs(config["FREE_SPACE_PATH"]) return s.f_bsize * s.f_bavail -def display_distance(units, dist): +def metric_display_distance(units: str, dist: float) -> str | None: + """Convert distance from metres to the specified metric units.""" + if units == "km_and_metres": + units = "km" if dist > 500 else "metres" + if units == "metres": + return f"{dist:,.0f} m" + if units == "km": + return f"{dist / 1000:,.2f} km" + + return None + + +def display_distance(units: str, dist: float) -> str | None: + """Convert distance from metres to the specified units.""" if units in ("miles_and_feet", "miles_and_yards"): total_feet = dist * feet_per_metre miles = total_feet / feet_per_mile @@ -142,12 +150,7 @@ def display_distance(units, dist): miles = dist / metres_per_mile return f"{miles:,.2f} miles" if miles > 0.5 else f"{dist:,.0f} metres" - if units == "km_and_metres": - units = "km" if dist > 500 else "metres" - if units == "metres": - return f"{dist:,.0f} m" - if units == "km": - return f"{dist / 1000:,.2f} km" + return metric_display_distance(units, dist) def is_in_range(address_range: str, address: str) -> bool: @@ -177,20 +180,27 @@ def is_in_range(address_range: str, address: str) -> bool: return False -def format_wikibase_time(v): - p = v["precision"] +class WikibaseTime(typing.TypedDict): + """Wikibase Time dict.""" + + precision: int + time: str + + +def format_wikibase_time(v: WikibaseTime) -> str | None: + """Format wikibase time value into human readable string.""" t = v["time"] - # TODO: handle dates with century precision (7) - # example: https://www.wikidata.org/wiki/Q108266998 - - if p == 11: - return date.fromisoformat(t[1:11]).strftime("%-d %B %Y") - if p == 10: - return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y") - if p == 9: - return t[1:5] - if p == 7: - century = ((int(t[:5]) - 1) // 100) + 1 - end = " BC" if century < 0 else "" - return num2words(abs(century), to="ordinal_num") + " century" + end + match v["precision"]: + case 11: # year, month and day + return date.fromisoformat(t[1:11]).strftime("%-d %B %Y") + case 10: # year and month + return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y") + case 9: # year + return t[1:5] + case 7: # century + century = ((int(t[:5]) - 1) // 100) + 1 + ordinal_num: str = num2words(abs(century), to="ordinal_num") + return f"{ordinal_num} {century}{' BC' if century < 0 else ''}" + case _: # not handled + return None From 96002254adfeb4b084fe74251fdc8752d938f49e Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:07:14 +0200 Subject: [PATCH 09/12] Docstrings and types. --- matcher/commons.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/matcher/commons.py b/matcher/commons.py index 469566c..8c537e9 100644 --- a/matcher/commons.py +++ b/matcher/commons.py @@ -1,10 +1,11 @@ """Use mediawiki API to look up images on Wikimedia Commons.""" import urllib.parse +from typing import Any import requests -from . import utils +from . import CallParams, utils commons_start = "http://commons.wikimedia.org/wiki/Special:FilePath/" commons_url = "https://www.wikidata.org/w/api.php" @@ -16,9 +17,9 @@ def commons_uri_to_filename(uri: str) -> str: return urllib.parse.unquote(utils.drop_start(uri, commons_start)) -def api_call(params: dict[str, str | int]) -> requests.models.Response: - """Make an API call.""" - call_params = { +def api_call(params: CallParams) -> requests.Response: + """Call the Commons API.""" + call_params: CallParams = { "format": "json", "formatversion": 2, **params, @@ -27,9 +28,11 @@ def api_call(params: dict[str, str | int]) -> requests.models.Response: return requests.get(commons_url, params=call_params, timeout=5) -def image_detail(filenames, thumbheight=None, thumbwidth=None): +def image_detail( + filenames: list[str], thumbheight: int | None = None, thumbwidth: int | None = None +) -> dict[str, Any]: """Detail for multiple images.""" - params = { + params: CallParams = { "action": "query", "prop": "imageinfo", "iiprop": "url", @@ -39,7 +42,7 @@ def image_detail(filenames, thumbheight=None, thumbwidth=None): if thumbwidth is not None: params["iiurlwidth"] = thumbwidth - images = {} + images: dict[str, Any] = {} for cur in utils.chunk(filenames, page_size): call_params = params.copy() From a4e847355eeac154579bf7eea4a0393f1c2bc6e5 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:07:22 +0200 Subject: [PATCH 10/12] Docstrings and types. --- matcher/error_mail.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/matcher/error_mail.py b/matcher/error_mail.py index a7a6563..6e8c888 100644 --- a/matcher/error_mail.py +++ b/matcher/error_mail.py @@ -1,13 +1,19 @@ +"""Send mail to admins when there is an error.""" + import logging -from logging.handlers import SMTPHandler from logging import Formatter -from flask import request +from logging.handlers import SMTPHandler + +import flask PROJECT = "osm-wikidata" class MatcherSMTPHandler(SMTPHandler): - def getSubject(self, record): # noqa: N802 + """Custom SMTP handler to change subject line.""" + + def getSubject(self, record: logging.LogRecord) -> str: # noqa: N802 + """Return subject line for error mail.""" return ( f"{PROJECT} error: {record.exc_info[0].__name__}" if (record.exc_info and record.exc_info[0]) @@ -16,12 +22,16 @@ class MatcherSMTPHandler(SMTPHandler): class RequestFormatter(Formatter): - def format(self, record): - record.request = request + """Custom request formatter.""" + + def format(self, record: logging.LogRecord) -> str: + """Add request to log record.""" + record.request = flask.request return super().format(record) -def setup_error_mail(app): +def setup_error_mail(app: flask.Flask) -> None: + """Configure logging to catch errors and email them.""" if not app.config.get("ERROR_MAIL"): return formatter = RequestFormatter( From 24e053f450d35056d533896c5d2653afa10658b8 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:11:23 +0200 Subject: [PATCH 11/12] Add missing code. --- matcher/edit.py | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 matcher/edit.py diff --git a/matcher/edit.py b/matcher/edit.py new file mode 100644 index 0000000..664b408 --- /dev/null +++ b/matcher/edit.py @@ -0,0 +1,71 @@ +from flask import g +from . import user_agent_headers, database, osm_oauth, mail +from .model import Changeset +import requests +import html + +really_save = True +osm_api_base = "https://api.openstreetmap.org/api/0.6" + + +def new_changeset(comment): + return f""" + + + + + +""" + + +def osm_request(path, **kwargs): + return osm_oauth.api_put_request(path, **kwargs) + + +def create_changeset(changeset): + try: + return osm_request("/changeset/create", data=changeset.encode("utf-8")) + except requests.exceptions.HTTPError as r: + print(changeset) + print(r.response.text) + raise + + +def close_changeset(changeset_id): + return osm_request(f"/changeset/{changeset_id}/close") + + +def save_element(osm_type, osm_id, element_data): + osm_path = f"/{osm_type}/{osm_id}" + r = osm_request(osm_path, data=element_data) + reply = r.text.strip() + if reply.isdigit(): + return r + + subject = f"matcher error saving element: {osm_path}" + username = g.user.username + body = f""" +https://www.openstreetmap.org{osm_path} + +user: {username} +message user: https://www.openstreetmap.org/message/new/{username} + +error: +{reply} +""" + + mail.send_mail(subject, body) + + +def record_changeset(**kwargs): + change = Changeset(created=database.now_utc(), **kwargs) + + database.session.add(change) + database.session.commit() + + return change + + +def get_existing(osm_type, osm_id): + url = f"{osm_api_base}/{osm_type}/{osm_id}" + return requests.get(url, headers=user_agent_headers()) From 88a0b9f89778d7612cc3e273f0915d4242c9bbd9 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 14 May 2023 11:17:30 +0200 Subject: [PATCH 12/12] Code doesn't work with python 3.9 --- web_view.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web_view.py b/web_view.py index 239988e..da77416 100755 --- a/web_view.py +++ b/web_view.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3.9 +#!/usr/bin/python3 from flask import (Flask, render_template, request, jsonify, redirect, url_for, g, flash, session, Response, stream_with_context, abort, send_file)