diff --git a/matcher.style b/matcher.style new file mode 100644 index 0000000..6ae348c --- /dev/null +++ b/matcher.style @@ -0,0 +1,162 @@ +# This is the default osm2pgsql .style file that comes with osm2pgsql. +# +# A .style file has 4 columns that define how OSM objects end up in tables in +# the database and what columns are created. It interacts with the command-line +# hstore options. +# +# Columns +# ======= +# +# OsmType: This is either "node", "way" or "node,way" and indicates if this tag +# applies to nodes, ways, or both. +# +# Tag: The tag +# +# DataType: The type of the column to be created. Normally "text" +# +# Flags: Flags that indicate what table the OSM object is moved into. +# +# There are 6 possible flags. These flags are used both to indicate if a column +# should be created, and if ways with the tag are assumed to be areas. The area +# assumptions can be overridden with an area=yes/no tag +# +# polygon - Create a column for this tag, and objects with the tag are areas +# +# linear - Create a column for this tag +# +# nocolumn - Override the above and don't create a column for the tag, but do +# include objects with this tag +# +# phstore - Same as polygon,nocolumn for backward compatibility +# +# delete - Drop this tag completely and don't create a column for it. This also +# prevents the tag from being added to hstore columns +# +# nocache - Deprecated and does nothing +# +# If an object has a tag that indicates it is an area or has area=yes/1, +# osm2pgsql will try to turn it into an area. If it succeeds, it places it in +# the polygon table. If it fails (e.g. not a closed way) it places it in the +# line table. +# +# Nodes are never placed into the polygon or line table and are always placed in +# the point table. +# +# Hstore +# ====== +# +# The options --hstore, --hstore-match-only, and --hstore-all interact with +# the .style file. +# +# With --hstore any tags without a column will be added to the hstore column. +# This will also cause all objects to be kept. +# +# With --hstore-match-only the behavior for tags is the same, but objects are +# only kept if they have a non-NULL value in one of the columns. +# +# With --hstore-all all tags are added to the hstore column unless they appear +# in the style file with a delete flag, causing duplication between the normal +# columns and the hstore column. +# +# Special database columns +# ======================== +# +# There are some special database columns that if present in the .style file +# will be populated by osm2pgsql. +# +# These are +# +# z_order - datatype int4 +# +# way_area - datatype real. The area of the way, in the units of the projection +# (e.g. square mercator meters). Only applies to areas +# +# osm_user - datatype text +# osm_uid - datatype integer +# osm_version - datatype integer +# osm_changeset - datatype integer +# osm_timestamp - datatype timestamptz(0). +# Used with the --extra-attributes option to include metadata in the database. +# If importing with both --hstore and --extra-attributes the meta-data will +# end up in the tags hstore column regardless of the style file. + +# OsmType Tag DataType Flags +node,way access text linear +node,way addr:housename text linear +node,way addr:housenumber text linear +node,way addr:interpolation text linear +node,way admin_level text linear +node,way aerialway text linear +node,way aeroway text polygon +node,way amenity text polygon +node,way area text polygon # hard coded support for area=1/yes => polygon is in osm2pgsql +node,way barrier text linear +node,way bicycle text linear +node,way brand text linear +node,way bridge text linear +node,way boundary text linear +node,way building text polygon +node capital text linear +node,way construction text linear +node,way covered text linear +node,way culvert text linear +node,way cutting text linear +node,way denomination text linear +node,way disused text linear +node ele text linear +node,way embankment text linear +node,way foot text linear +node,way generator:source text linear +node,way harbour text polygon +node,way highway text linear +node,way historic text polygon +node,way horse text linear +node,way intermittent text linear +node,way junction text linear +node,way landuse text polygon +node,way layer text linear +node,way leisure text polygon +node,way lock text linear +node,way man_made text polygon +node,way military text polygon +node,way motorcar text linear +node,way name text linear +node,way natural text polygon # natural=coastline tags are discarded by a hard coded rule in osm2pgsql +node,way office text polygon +node,way oneway text linear +node,way operator text linear +node,way place text polygon +node,way population text linear +node,way power text polygon +node,way power_source text linear +node,way public_transport text polygon +node,way railway text linear +node,way ref text linear +node,way religion text linear +node,way route text linear +node,way service text linear +node,way shop text polygon +node,way sport text polygon +node,way surface text linear +node,way toll text linear +node,way tourism text polygon +node,way tower:type text linear +way tracktype text linear +node,way tunnel text linear +node,way water text polygon +node,way waterway text polygon +node,way wetland text polygon +node,way width text linear +node,way wood text linear +node,way z_order int4 linear # This is calculated during import +way way_area real linear # This is calculated during import + +# Area tags +# We don't make columns for these tags, but objects with them are areas. +# Mainly for use with hstore +way abandoned:aeroway text polygon,nocolumn +way abandoned:amenity text polygon,nocolumn +way abandoned:building text polygon,nocolumn +way abandoned:landuse text polygon,nocolumn +way abandoned:power text polygon,nocolumn +way area:highway text polygon,nocolumn diff --git a/matcher/__init__.py b/matcher/__init__.py index 5e9706c..80c2c41 100644 --- a/matcher/__init__.py +++ b/matcher/__init__.py @@ -4,7 +4,9 @@ CallParams = dict[str, str | int] user_agent = ( - "osm-wikidata/0.1 (https://github.com/EdwardBetts/osm-wikidata; edward@4angle.com)" + "osm-wikidata/0.2" + + " (https://github.com/EdwardBetts/osm-wikidata;" + + " edward@4angle.com)" ) diff --git a/matcher/api.py b/matcher/api.py index 5fd8a9c..d47dd69 100644 --- a/matcher/api.py +++ b/matcher/api.py @@ -40,13 +40,14 @@ skip_tags = { } -def get_country_iso3166_1(lat: float, lon: float) -> set[str]: +def get_country_iso3166_1(lat, lon): """For a given lat/lon return a set of ISO country codes. Also cache the country code in the global object. Normally there should be only one country. """ + point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid) alpha2_codes = set() q = model.Polygon.query.filter( @@ -263,7 +264,7 @@ WHERE tags ? 'wikidata' conn = database.session.connection() result = conn.execute(text(sql)) - print(sql) + # print(sql) point_sql = ( f""" @@ -788,7 +789,7 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None): if limit: s = s.limit(limit) - print(s.compile(compile_kwargs={"literal_binds": True})) + # print(s.compile(compile_kwargs={"literal_binds": True})) conn = database.session.connection() nearby = [] @@ -1031,7 +1032,7 @@ def isa_incremental_search(search_terms): func.length(en_label) < 20, ) - print(q.statement.compile(compile_kwargs={"literal_binds": True})) + # print(q.statement.compile(compile_kwargs={"literal_binds": True})) ret = [] for item in q: diff --git a/matcher/mail.py b/matcher/mail.py index 23dd0ac..cc5fd91 100644 --- a/matcher/mail.py +++ b/matcher/mail.py @@ -1,10 +1,11 @@ -from flask import current_app, g, request, has_request_context +import smtplib +import sys +import traceback from email.mime.text import MIMEText from email.utils import formatdate, make_msgid from pprint import pformat -import smtplib -import traceback -import sys + +from flask import current_app, g, has_request_context, request def send_mail(subject, body, config=None): @@ -71,7 +72,7 @@ def open_changeset_error(session_id, changeset, r): username = g.user.username body = f""" user: {username} -page: {url} +page: {r.url} message user: https://www.openstreetmap.org/message/new/{username} diff --git a/matcher/model.py b/matcher/model.py index e72c113..4c65671 100644 --- a/matcher/model.py +++ b/matcher/model.py @@ -1,89 +1,159 @@ -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.schema import ForeignKey, Column -from sqlalchemy.orm import relationship, column_property, deferred, backref -from sqlalchemy import func -from sqlalchemy.types import Integer, String, Float, Boolean, DateTime, Text, BigInteger -from sqlalchemy.dialects import postgresql -from sqlalchemy.sql.expression import cast -from sqlalchemy.ext.hybrid import hybrid_property -from sqlalchemy.ext.declarative import declared_attr -from geoalchemy2 import Geometry -from collections import defaultdict -from flask_login import UserMixin -from .database import session, now_utc -from . import wikidata, utils, mail import json import re +import typing +from collections import defaultdict +from typing import Any + +from flask_login import UserMixin +from geoalchemy2 import Geometry +from sqlalchemy import func +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.associationproxy import association_proxy +from sqlalchemy.ext.declarative import declarative_base, declared_attr +from sqlalchemy.ext.hybrid import hybrid_property +from sqlalchemy.orm import backref, column_property, deferred, relationship +from sqlalchemy.orm.collections import attribute_mapped_collection +from sqlalchemy.schema import Column, ForeignKey +from sqlalchemy.sql.expression import cast +from sqlalchemy.types import BigInteger, Boolean, DateTime, Float, Integer, String, Text + +from . import mail, utils, wikidata +from .database import now_utc, session Base = declarative_base() Base.query = session.query_property() -re_point = re.compile(r'^POINT\((.+) (.+)\)$') +re_point = re.compile(r"^POINT\((.+) (.+)\)$") + +osm_type_enum = postgresql.ENUM( + "node", "way", "relation", name="osm_type_enum", metadata=Base.metadata +) + +re_lau_code = re.compile(r"^[A-Z]{2}([^A-Z].+)$") # 'LAU (local administrative unit)' + +property_map = [ + ("P238", ["iata"], "IATA airport code"), + ("P239", ["icao"], "ICAO airport code"), + ("P240", ["faa", "ref"], "FAA airport code"), + ("P296", ["ref", "ref:train", "railway:ref"], "station code"), + ("P300", ["ISO3166-2"], "ISO 3166-2 code"), + ("P359", ["ref:rce"], "Rijksmonument ID"), + ("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"), + ("P649", ["ref:nrhp"], "NRHP reference number"), + ("P722", ["uic_ref"], "UIC station code"), + ("P782", ["ref"], "LAU (local administrative unit)"), + ("P836", ["ref:gss"], "UK Government Statistical Service code"), + ("P856", ["website", "contact:website", "url"], "website"), + ("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"), + ("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"), + # A UIC id can be a IBNR, but not every IBNR is an UIC id + ("P954", ["uic_ref"], "IBNR ID"), + ("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"), + ("P1216", ["HE_ref"], "National Heritage List for England number"), + ("P2253", ["ref:edubase"], "EDUBase URN"), + ("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"), + ("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"), + ("P3562", ["seamark:light:reference"], "Admiralty number"), + ( + "P4755", + ["ref", "ref:train", "ref:crs", "crs", "nat_ref"], + "UK railway station code", + ), + ("P4803", ["ref", "ref:train"], "Amtrak station code"), + ("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"), + ("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"), + ("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"), + ("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"), +] + +T = typing.TypeVar("T", bound="Item") -osm_type_enum = postgresql.ENUM('node', 'way', 'relation', - name='osm_type_enum', - metadata=Base.metadata) class Item(Base): + """Wikidata item.""" + __tablename__ = "item" item_id = Column(Integer, primary_key=True, autoincrement=False) labels = Column(postgresql.JSONB) descriptions = Column(postgresql.JSONB) aliases = Column(postgresql.JSONB) sitelinks = Column(postgresql.JSONB) - claims = Column(postgresql.JSONB) + claims = Column(postgresql.JSONB, nullable=False) lastrevid = Column(Integer, nullable=False, unique=True) - locations = relationship("ItemLocation", cascade="all, delete-orphan", backref="item") + locations = relationship( + "ItemLocation", cascade="all, delete-orphan", backref="item" + ) qid = column_property("Q" + cast(item_id, String)) + wiki_extracts = relationship( + "Extract", + collection_class=attribute_mapped_collection("site"), + cascade="save-update, merge, delete, delete-orphan", + backref="item", + ) + extracts = association_proxy("wiki_extracts", "extract") + @classmethod - def get_by_qid(cls, qid): + def get_by_qid(cls: typing.Type[T], qid: str) -> T | None: if qid and len(qid) > 1 and qid[0].upper() == "Q" and qid[1:].isdigit(): - return cls.query.get(qid[1:]) + obj: T = cls.query.get(qid[1:]) + return obj + return None @property - def wd_url(self): + def wd_url(self) -> str: + """Wikidata URL for item.""" return f"https://www.wikidata.org/wiki/{self.qid}" - def get_claim(self, pid): - return [i["mainsnak"]["datavalue"]["value"] if "datavalue" in i["mainsnak"] else None - for i in self.claims.get(pid, [])] + def get_claim(self, pid: str) -> list[dict[str, Any] | None]: + """List of claims for given Wikidata property ID.""" + claims = typing.cast(dict[str, list[dict[str, Any]]], self.claims) + return [ + i["mainsnak"]["datavalue"]["value"] + if "datavalue" in i["mainsnak"] + else None + for i in claims.get(pid, []) + ] - def label(self, lang='en'): - if lang in self.labels: - return self.labels[lang]['value'] - elif 'en' in self.labels: - return self.labels['en']['value'] + def label(self, lang: str = "en") -> str: + """Label for this Wikidata item.""" + labels = typing.cast(dict[str, dict[str, Any]], self.labels) + if lang in labels: + return typing.cast(str, labels[lang]["value"]) + elif "en" in labels: + return typing.cast(str, labels["en"]["value"]) - label_list = list(self.labels.values()) - return label_list[0]['value'] if label_list else '[no label]' + label_list = list(labels.values()) + return typing.cast(str, label_list[0]["value"]) if label_list else "[no label]" - def description(self, lang='en'): - if lang in self.descriptions: - return self.descriptions[lang]['value'] - elif 'en' in self.descriptions: - return self.descriptions['en']['value'] - return + def description(self, lang: str = "en") -> str | None: + """Return a description of the item.""" + descriptions = typing.cast(dict[str, dict[str, Any]], self.descriptions) + if lang in descriptions: + return typing.cast(str, descriptions[lang]["value"]) + elif "en" in descriptions: + return typing.cast(str, descriptions["en"]["value"]) + return None d_list = list(self.descriptions.values()) if d_list: - return d_list[0]['value'] + return d_list[0]["value"] - def get_aliases(self, lang='en'): + def get_aliases(self, lang="en"): if lang not in self.aliases: - if 'en' not in self.aliases: + if "en" not in self.aliases: return [] - lang = 'en' - return [a['value'] for a in self.aliases[lang]] + lang = "en" + return [a["value"] for a in self.aliases[lang]] def get_part_of_names(self): if not self.claims: return set() part_of_names = set() - for p361 in self.claims.get('P361', []): + for p361 in self.claims.get("P361", []): try: - part_of_id = p361['mainsnak']['datavalue']['value']['numeric-id'] + part_of_id = p361["mainsnak"]["datavalue"]["value"]["numeric-id"] except KeyError: continue if part_of_id == self.item_id: @@ -98,7 +168,7 @@ class Item(Base): @property def entity(self): - keys = ['labels', 'aliases', 'descriptions', 'sitelinks', 'claims'] + keys = ["labels", "aliases", "descriptions", "sitelinks", "claims"] return {key: getattr(self, key) for key in keys} def names(self, check_part_of=True): @@ -107,22 +177,24 @@ class Item(Base): d = wikidata.names_from_entity(self.entity) or defaultdict(list) for name, sources in list(d.items()): - if len(sources) == 1 and sources[0][0] == 'image': + if len(sources) == 1 and sources[0][0] == "image": continue for part_of_name in part_of_names: if not name.startswith(part_of_name): continue - prefix_removed = name[len(part_of_name):].strip() + prefix_removed = name[len(part_of_name) :].strip() if prefix_removed not in d: d[prefix_removed] = sources if self.claims: - for p6375 in self.claims.get('P6375', []): + for p6375 in self.claims.get("P6375", []): try: - street_address = p6375['mainsnak']['datavalue']['value'] + street_address = p6375["mainsnak"]["datavalue"]["value"] except KeyError: continue - d[street_address['text']].append(('P6375', street_address.get('language'))) + d[street_address["text"]].append( + ("P6375", street_address.get("language")) + ) # A terrace of buildings can be illustrated with a photo of a single building. # We try to determine if this is the case and avoid using the filename of the @@ -131,8 +203,11 @@ class Item(Base): def has_digit(s): return any(c.isdigit() for c in s) - image_names = {name for name, sources in d.items() - if len(sources) == 1 and sources[0][0] == 'image' and has_digit(name)} + image_names = { + name + for name, sources in d.items() + if len(sources) == 1 and sources[0][0] == "image" and has_digit(name) + } if not image_names: return dict(d) or None @@ -166,10 +241,10 @@ class Item(Base): isa_qids = self.get_isa_qids() matching_types = { - "Q12731", # dead end street - "Q34442", # road - "Q79007", # street - "Q83620", # thoroughfare + "Q12731", # dead end street + "Q34442", # road + "Q79007", # street + "Q83620", # thoroughfare "Q21000333", # shopping street "Q62685721", # pedestrian street } @@ -179,14 +254,13 @@ class Item(Base): if isa_qids is None: isa_qids = self.get_isa_qids() matching_types = { - "Q355304", # watercourse - "Q4022", # river - "Q47521", # stream - "Q1437299", # creek + "Q355304", # watercourse + "Q4022", # river + "Q47521", # stream + "Q1437299", # creek "Q63565252", # brook - "Q12284", # canal + "Q12284", # canal "Q55659167", # natural watercourse - } return bool(matching_types & set(isa_qids)) @@ -195,19 +269,29 @@ class Item(Base): return self.is_street(isa_qids) or self.is_watercourse(isa_qids) def is_tram_stop(self): - return 'Q2175765' in self.get_isa_qids() + return "Q2175765" in self.get_isa_qids() def alert_admin_about_bad_time(self, v): - body = ("Wikidata item has an unsupported time precision\n\n" - + self.wd_url + "\n\n" + "Value:\n\n" + json.dumps(v, indent=2)) + body = ( + "Wikidata item has an unsupported time precision\n\n" + + self.wd_url + + "\n\n" + + "Value:\n\n" + + json.dumps(v, indent=2) + ) mail.send_mail(f"OWL Map: bad time value in {self.qid}", body) - def closed(self): + def time_claim(self, pid): ret = [] - for v in self.get_claim("P3999"): + for v in self.get_claim(pid): if not v: continue - t = utils.format_wikibase_time(v) + try: + t = utils.format_wikibase_time(v) + except Exception: + self.alert_admin_about_bad_time(v) + raise + if t: ret.append(t) else: @@ -215,6 +299,84 @@ class Item(Base): return ret + def closed(self): + return self.time_claim("P3999") + + def first_paragraph_language(self, lang): + if lang not in self.sitelinks(): + return + extract = self.extracts.get(lang) + if not extract: + return + + empty_list = [ + "

", + "

\n

", + "

\n\n

", + "

\n\n

", + "

\n\n\n

", + "

.\n

", + "


", + '

\n

', + '

\n\n

', + '

\n\n\n

', + ] + + text = extract.strip() + while True: + found_empty = False + for empty in empty_list: + if text.startswith(empty): + text = text[len(empty) :].strip() + found_empty = True + if not found_empty: + break + + close_tag = "

" + first_end_p_tag = text.find(close_tag) + if first_end_p_tag == -1: + # FIXME: e-mail admin + return text + + return text[: first_end_p_tag + len(close_tag)] + + def get_identifiers_tags(self): + tags = defaultdict(list) + for claim, osm_keys, label in property_map: + values = [ + i["mainsnak"]["datavalue"]["value"] + for i in self.claims.get(claim, []) + if "datavalue" in i["mainsnak"] + ] + if not values: + continue + if claim == "P782": + values += [ + m.group(1) for m in (re_lau_code.match(v) for v in values) if m + ] + for osm_key in osm_keys: + tags[osm_key].append((values, label)) + return dict(tags) + + def get_identifiers(self): + ret = {} + for claim, osm_keys, label in property_map: + values = [ + i["mainsnak"]["datavalue"]["value"] + for i in self.claims.get(claim, []) + if "datavalue" in i["mainsnak"] + ] + if not values: + continue + if claim == "P782": + values += [ + m.group(1) for m in (re_lau_code.match(v) for v in values) if m + ] + for osm_key in osm_keys: + ret[label] = values + return ret + + # class Claim(Base): # __tablename__ = "claim" # item_id = Column(Integer, primary_key=True) @@ -222,13 +384,14 @@ class Item(Base): # position = Column(Integer, primary_key=True) # mainsnak = Column(postgresql.JSONB) -class ItemIsA(Base): - __tablename__ = 'item_isa' - item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True) - isa_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True) - item = relationship('Item', foreign_keys=[item_id]) - isa = relationship('Item', foreign_keys=[isa_id]) +class ItemIsA(Base): + __tablename__ = "item_isa" + item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True) + isa_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True) + + item = relationship("Item", foreign_keys=[item_id]) + isa = relationship("Item", foreign_keys=[isa_id]) class ItemLocation(Base): @@ -241,18 +404,21 @@ class ItemLocation(Base): qid = column_property("Q" + cast(item_id, String)) pid = column_property("P" + cast(item_id, String)) - def get_lat_lon(self): - return session.query(func.ST_Y(self.location), - func.ST_X(self.location)).one() + def get_lat_lon(self) -> tuple[float, float]: + """Get latitude and longitude of item.""" + loc: tuple[float, float] + loc = session.query(func.ST_Y(self.location), func.ST_X(self.location)).one() + return loc + def location_objects(coords): locations = [] for pid, coord_list in coords.items(): for num, coords in enumerate(coord_list): point = f"POINT({coords['longitude']} {coords['latitude']})" - loc = ItemLocation(property_id=int(pid[1:]), - statement_order=num, - location=point) + loc = ItemLocation( + property_id=int(pid[1:]), statement_order=num, location=point + ) locations.append(loc) return locations @@ -282,8 +448,7 @@ class MapMixin: @declared_attr def geojson_str(cls): return column_property( - func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6), - deferred=True + func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6), deferred=True ) @declared_attr @@ -292,17 +457,16 @@ class MapMixin: @hybrid_property def has_street_address(self): - return ("addr:housenumber" in self.tags - and "addr:street" in self.tags) + return "addr:housenumber" in self.tags and "addr:street" in self.tags def display_name(self): - for key in 'bridge:name', 'tunnel:name', 'lock_name': + for key in "bridge:name", "tunnel:name", "lock_name": if key in self.tags: return self.tags[key] - return (self.name - or self.tags.get("addr:housename") - or self.tags.get("inscription")) + return ( + self.name or self.tags.get("addr:housename") or self.tags.get("inscription") + ) def geojson(self): return json.loads(self.geojson_str) @@ -343,7 +507,7 @@ class Line(MapMixin, Base): @classmethod def get_osm(cls, osm_type, osm_id): - src_id = osm_id * {'way': 1, 'relation': -1}[osm_type] + src_id = osm_id * {"way": 1, "relation": -1}[osm_type] return cls.query.get(src_id) @@ -352,11 +516,12 @@ class Polygon(MapMixin, Base): @classmethod def get_osm(cls, osm_type, osm_id): - src_id = osm_id * {'way': 1, 'relation': -1}[osm_type] + src_id = osm_id * {"way": 1, "relation": -1}[osm_type] return cls.query.get(src_id) @property - def type(self): + def type(self) -> str: + """Polygon is either a way or a relation.""" return "way" if self.src_id > 0 else "relation" @declared_attr @@ -364,12 +529,15 @@ class Polygon(MapMixin, Base): return column_property(func.ST_Area(cls.way, False), deferred=True) @hybrid_property - def area_in_sq_km(self): + def area_in_sq_km(self) -> float: + """Size of area in square km.""" return self.area / (1000 * 1000) class User(Base, UserMixin): - __tablename__ = 'user' + """User.""" + + __tablename__ = "user" id = Column(Integer, primary_key=True) username = Column(String) password = Column(String) @@ -392,23 +560,27 @@ class User(Base, UserMixin): osm_oauth_token = Column(String) osm_oauth_token_secret = Column(String) - def is_active(self): + def is_active(self) -> bool: + """User is active.""" return self.active + class EditSession(Base): - __tablename__ = 'edit_session' + __tablename__ = "edit_session" id = Column(Integer, primary_key=True) user_id = Column(Integer, ForeignKey(User.id)) created = Column(DateTime, default=now_utc(), nullable=False) edit_list = Column(postgresql.JSONB) comment = Column(String) - user = relationship('User') - changeset = relationship('Changeset', back_populates='edit_session', uselist=False) + user = relationship("User") + changeset = relationship("Changeset", back_populates="edit_session", uselist=False) class Changeset(Base): - __tablename__ = 'changeset' + """An OSM Changeset generated by this tool.""" + + __tablename__ = "changeset" id = Column(BigInteger, primary_key=True) created = Column(DateTime) comment = Column(String) @@ -416,38 +588,62 @@ class Changeset(Base): update_count = Column(Integer, nullable=False) edit_session_id = Column(Integer, ForeignKey(EditSession.id)) - user = relationship('User', - backref=backref('changesets', - lazy='dynamic', - order_by='Changeset.created.desc()')) + user = relationship( + "User", + backref=backref( + "changesets", lazy="dynamic", order_by="Changeset.created.desc()" + ), + ) - edit_session = relationship('EditSession', back_populates='changeset') + edit_session = relationship("EditSession", back_populates="changeset") class ChangesetEdit(Base): - __tablename__ = 'changeset_edit' + """Record details of edits within a changeset.""" - changeset_id = Column(BigInteger, - ForeignKey('changeset.id'), - primary_key=True) + __tablename__ = "changeset_edit" + + changeset_id = Column(BigInteger, ForeignKey("changeset.id"), primary_key=True) item_id = Column(Integer, primary_key=True) osm_id = Column(BigInteger, primary_key=True) osm_type = Column(osm_type_enum, primary_key=True) saved = Column(DateTime, default=now_utc(), nullable=False) - changeset = relationship('Changeset', - backref=backref('edits', lazy='dynamic')) + changeset = relationship("Changeset", backref=backref("edits", lazy="dynamic")) + class SkipIsA(Base): - __tablename__ = 'skip_isa' - item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True) + """Ignore this item type when walking the Wikidata subclass graph.""" + + __tablename__ = "skip_isa" + item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True) + qid = column_property("Q" + cast(item_id, String)) + + item = relationship("Item") - item = relationship('Item') class ItemExtraKeys(Base): - __tablename__ = 'item_extra_keys' - item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True) + """Extra tag or key to consider for an Wikidata item type.""" + + __tablename__ = "item_extra_keys" + item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True) tag_or_key = Column(String, primary_key=True) note = Column(String) + qid = column_property("Q" + cast(item_id, String)) - item = relationship('Item') + item = relationship("Item") + + +class Extract(Base): + """First paragraph from Wikipedia.""" + + __tablename__ = "extract" + + item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True) + site = Column(String, primary_key=True) + extract = Column(String, nullable=False) + + def __init__(self, site: str, extract: str): + """Initialise the object.""" + self.site = site + self.extract = extract diff --git a/notes b/notes new file mode 100644 index 0000000..a97e95e --- /dev/null +++ b/notes @@ -0,0 +1,406 @@ +# vim: spell:tw=80 ft=markdown + +Extracted items from data dump that include "P625" with the quotes. There are 8,398,490 matching items. + +Nearest-Neighbour Searching + +https://postgis.net/workshops/postgis-intro/knn.html + +--- +Use recent changes API to update local Wikidata entity mirror. + +Need to handle new item, edit, delete, and undelete. + +For now we're just interested in items with coordinates, later we might care +about languages, and classes. + +At some point we might keep track of redirects. + +Deletes +------- +Is the item in our database? If not then ignore it, if yes then delete it. + +New +--- +Download full entity, check if it contains coordinates, if yes, then add to database, if not then ignore. + +Make a note of item ID and revid. Avoid downloading item again during update. + +Edits +----- +If the item is in our database and lastrevid is larger than the revid of the change then skip. + +Download full entity. + +If in our database and latest revision includes coordinates update item in +database. If no coordinates then delete from our database. + + +====== +Currently we have geographic objects represented by the Item class. We also want +information about the type of object, languages and countries. + +How about a hierarchy with Item as the base class and GeoItem as a subclass for +geographical objects. We can also have IsA, Language, and Country classes that +derive from Item. + +Countries are a subclass of GeoItem. + +With the current design the Item table represents a cached copy of the latest +version of the Wikidata item, no history is stored locally. This makes it had to +keep track of changes over time. + +The same is true of the OSM data, we just keeping a copy of the most recent +version. + +Instead we could store multiple revisions of Wikidata items. We want the latest +version and any that has been considered part of a match with OSM. + +Which Wikidata revisions do we keep? + +1. latest revision +2. revision used to generate match +3. revision used in match checked by user + +Maybe a separate item revision table is too complex. We could just store JSON +from a match in a table of OSM user uploads. + +=== +All countries have a P625 statement + +=== +cable-stayed bridge (Q158555) + +There are 786 bridges on OSM tagged with bridge:structure=cable-stayed. Some of +these have a Wikidata tag but aren't tagged as a cable-stayed bridge in +Wikidata. The Wikidata could be updated to tag them as a cable-stayed bridge. +Something similar could be applied to other types. + +=== +Lots of items with coordinates don\'t have OSM tags/keys, either because they +don\'t belong on the map or there isn\'t enough info in Wikidata. + +Need to search different properties for OSM tags, at least 'instance of', +'use', 'sport' and 'religion'. + +Should start from items with an OSM tag first. Download all items with OSM tag, +then walk subclass tree and download. + +=== +Test out a new design. + +=== +Make a status page that shows the size of each database table. +=== +What should URLs look like, say I want to show lakes in lapland? + +https://osm.wikidata.link/matches?isa=Lake&location=Lapland + +=== +OSM & Wikidata pin map TODO list + +IsA list should support filtering + +=== +2021-06-17 + +Candidate list should show street address. For example: + +https://alpha.osm.wikidata.link/map/17/40.01461/-105.28196?item=Q42384818 +--- +Preset could be more specific. For example mosque instead of place of worship. + +id-tagging-schema/data/presets/amenity/place_of_worship/muslim.json +--- +candidates list should show object tags + +=== +2021-06-19 + +* Rename from 'alpha' to 'v2'. +* Use Flask-Babel to for i18n. Get translations from + https://www.microsoft.com/en-us/language/ +* Show total number of items +* Show place name +* Show aliases + +=== +2021-06-23 + +Planning to update user IP location code. Should grab items within city or +region. Need to handle IP that only resolves to a whole country. For example +archive.org is 207.241.224.2, and just returns USA. The USA is too big to apply +the matcher interface to. + +When trying to match the whole USA we should show the whole country and +encourage the user to zoom in. Once the + +--- +Map thoughts. Questions: + +What do we show to the user when the site loads? +What happens when the user drags the map? +What happens when the user changes zoom? +How does searching change things? + +Starting scenarios: + +User enters IP that resolves to a big country, say USA. We show a map of the +whole USA and ask them to zoom in. Once they've zoomed in we can show the total +number of items and the item type facets. + +Find item type within Cambridge: + +``` SQL +select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num +from item, item_location, planet_osm_polygon +where item.item_id = item_location.item_id and osm_id=-295355 and ST_Covers(way, location) group by isa order by num; +``` + +Also need a to show a facet for items where item type is empty + +Find item type within California: +``` SQL +select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num +from item, item_location, planet_osm_polygon +where item.item_id = item_location.item_id and osm_id=-165475 and ST_Intersects(way, location) +group by isa order by num desc limit 20; +``` +This query takes 26.5 seconds. + +England item count takes 1.5 seconds. + +``` SQL +select count(distinct item_id) +from item_location, planet_osm_polygon +where osm_id=-58447 and ST_Covers(way, location); +``` + +=== +2021-06-25 + +Library buildings (Q856584) in England. Query takes 3 seconds + +``` SQL +select count(*) +from item, item_location, planet_osm_polygon as loc +where loc.osm_id=-58447 + and jsonb_path_query_array(claims, '$.P31[*].mainsnak.datavalue.value.id') ? 'Q856584' + and item.item_id = item_location.item_id + and item_location.location && loc.way; +``` +=== +2021-07-04 + +TODO +* Better error page than just 500 Internal Server Error. +* Improve handling of Wikidata items without coordinates. Use different colour + for OSM Pin. Explain situation on item detail page. No need to look for matches. +* DONE: Show spinner when looking for nearby OSM candidate matches. +* DONE: Show message if no matches found. +* Add 'building only match' switch +* Two item pins on top of each other is a problem. + +2021-07-05 + +Sometimes the selected OSM matches are incorrect. For example: + +https://v2.osm.wikidata.link/map/15/37.31390/-121.86338?item=Q83645632 + +The item is linked to a node, a way and a relation. The node shows as a pin on +the map, but isn't in the list of possible nearby matches. The way and relation +both show in the list, but aren't selected. + +2021-07-07 + +Logout link should come back to the same map location. Need to record the +location somewhere. Could be in a cookie, constant updating of the logout +URL, or have JavaScript that runs when the user follows the logout link. + +Search +Should show a spinner so the user knows something is happening. +Trigger search after first three characters have been entered. +DONE: Style search hits so not so close to search box + +Highlight chosen search result. +Close button to hide search results. +DONE: Zoom to result bounds instead of zoom level 16. +Should you be allowed to search while editing? + +DONE: Hide OSM candidate checkboxes if user not logged in. + +2021-07-10 + +Exclude ways that are part of a boundary. Example: + +https://v2.osm.wikidata.link/map/18/42.37903/-71.11136?item=Q14715848 + +2021-07-16 + +Need better handling for OSM with wikidata tag but item has no coordinates. + +Viewing a street shows too many yellow pins. +https://v2.osm.wikidata.link/map/15/37.31221/-121.88869?item=Q89545422 + +2021-07-17 +Could match on just name +https://v2.osm.wikidata.link/map/18/50.21789/-5.28079?item=Q5288904 + +2021-07-18 +Florida State Road 922 (Q2433226) is stored as multiple lines in the osm2pgsql +database. Need to rebuild the database with the --multi-geometry so there is +only one. + +2021-07-19 +After a save clicking on another item without closing edit panel causes +problems. Need to trigger close_edit_list when opening item if upload_state is +set to 'done' + +2021-07-22 + +Example of a long road: Collins Avenue (Q652775) +https://v2.osm.wikidata.link/map/19/25.86222/-80.12032?item=Q652775 + +2021-08-04 +Use https://vue-select.org/ for item type filter. +Show alert with spinner while count is running. +Maybe we want to supply the item type filter as JSON and filter in the browser, +no need to hit the server and database. +Write documentation for the API. +Speed up the item detail OSM nearby option. +Use the sidebar to show list of items in the current view, so the user can +go through the list and check them. +OSM object polygon size is broken + +2021-08-05 + +IsA search + +```sql +SELECT 'Q' || item.item_id, item.labels->'en'->>'value' FROM item WHERE +item.claims ? 'P1282' AND lower(jsonb_extract_path_text(item.labels, 'en', +'value')) LIKE lower('%hotel%') AND length(jsonb_extract_path_text(item.labels, +'en', 'value')) < 20; +``` + +2021-09-11 + +Notes from Pixel 2 + +Pin at the centroid of a polygon is to busy, especially with an item that links +to multiple OSM objects. Object outline already on map, just need to connect +outline to Wikidata markers. Could try and work out corners of rectangular +buildings. Should link to ends nearest node for linear objects. + +Show warning when navigating away from map with edits. + +See WindowEventHandlers.onbeforeunload + +Option to clear edit list. + +--- +Ignore coordinates with a Google Maps reference. Example: + +https://www.wikidata.org/w/index.php?title=Q66228733&oldid=992964237 + +--- +Check history for previous wikidata tags to warn mappers if a wikidata tag +they're adding has previously been removed. + +Examples: + https://v2.osm.wikidata.link/map/17/52.18211/0.17756?item=Q6717455 + and https://www.openstreetmap.org/way/143741201 + https://www.openstreetmap.org/way/684624781 + +--- +What happens when we moved the map? + +First we check the area visible on the map. If it is too large then there is +nothing we can do, we give up and tell the user they need to zoom in. + +Otherwise we send the server a request for a count of the number of items in the +current view. If the count is too high we abort and tell the user to zoom in. + +Once we know the area isn't too big and doesn't have too many items we want to +make three requests to the server. First we make requests for the Wikidata items +on the map another request for OSM objects with a Wikidata tag on the map. Both +requests run at the same time. Once both requests complete we make another +request to check for missing Wikidata items that were linked from OSM objects. + +--- +This is done + +https://v2.osm.wikidata.link/map/18/52.23270/0.21560?item=Q55099320 +should match: https://www.openstreetmap.org/node/2000849525 + +Look for Tag:abandoned:railway=station + +--- +Need better handling for Wikidata redirects. + +Example: https://www.openstreetmap.org/way/130458959 +https://v2.osm.wikidata.link/map/18/51.36973/-2.81079?item=Q5117357 + +--- +Consider 'OS grid reference' +https://www.wikidata.org/w/index.php?title=Q27082051&oldid=1336630735 + +--- +Check for OpenStreetMap relation ID (P402) in Wikidata + +Display on details page. Highlight matching relation. + +example: https://www.wikidata.org/wiki/Q78078847 + +--- +TODO + +* DONE: Add special code for matching watercourses that works like street matching +* DONE: Frontend should catch API errors and show them +* DONE: API calls should return errors in JSON + +* Run update code from systemd +* Stop Wikidata update code from crashing when it hits an error +* Add an option for 'select all' for linear features +* Add a note to details page explaining street matching +* Upload code to GitHub +* Candidates list jumps when first object is selected, because message appears + at the top the list. Can be fixed by having a message there and replacing + it. + +IsA pages +* Flesh out IsA pages +* Allow users to add extra tags to IsA +* Add option to update IsA + +Type filter +* Include type filter QIDs in URL +* Move type filter to modal box +* Show item type description + +--- +Show note about relations for tram stops and windfarms + +--- +Show dissolved, abolished or demolished date (P576) +https://map.osm.wikidata.link/map/18/40.74610/-73.99652?item=Q14707174 + +--- +Get subclasses for one item type + +``` SQL +select item_id, labels->'en'->'value' from item where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath) ?| '{"Q718893"}'; +``` + +Get subclasses for items with OSM tag/key + +``` SQL +select item_id, labels->'en'->'value' + from item + where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath) + ?| array(select 'Q' || item_id from item where claims ? 'P1282'); +``` + +--- +Shipyard results shouldn't include place=city +https://map.osm.wikidata.link/map/18/50.89540/-1.38243?item=Q551401 diff --git a/snowpack.config.mjs b/snowpack.config.mjs index d508721..167d5bb 100644 --- a/snowpack.config.mjs +++ b/snowpack.config.mjs @@ -4,7 +4,15 @@ export default { public: {url: '/', static: true}, frontend: {url: '/dist'}, }, - plugins: ['@snowpack/plugin-vue', '@snowpack/plugin-dotenv'], + plugins: [ + '@snowpack/plugin-vue', + '@snowpack/plugin-dotenv', + ['snowpack-plugin-cdn-import', { + dependencies: pkg.dependencies, + enableInDevMode: true, + baseUrl: 'https://unpkg.com', + }] + ], routes: [ /* Enable an SPA Fallback in development: */ // {"match": "routes", "src": ".*", "dest": "/index.html"}, diff --git a/templates/flash_msg.html b/templates/flash_msg.html new file mode 100644 index 0000000..76038fa --- /dev/null +++ b/templates/flash_msg.html @@ -0,0 +1,12 @@ +{% with messages = get_flashed_messages() %} + {% if messages %} + {% for message in messages %} + + {% endfor %} + {% endif %} +{% endwith %} diff --git a/templates/show_error.html b/templates/show_error.html new file mode 100644 index 0000000..13447e7 --- /dev/null +++ b/templates/show_error.html @@ -0,0 +1,36 @@ +{% extends "base.html" %} + +{% block style %} + +{% endblock %} + +{% block content %} + +
+
+
+ +

Software error: {{ tb.exception_type }}

+
+
{{ tb.exception }}
+
+ +{% set body %} +URL: {{ request.url }} + +{{ tb.plaintext | safe }} +{% endset %} + +

Submit as an issue on GitHub (requires an account with GitHub)

+ +

Traceback (most recent call last)

+{{ tb.render_summary(include_title=False) | safe }} + +

Error in function "{{ tb.frames[-1].function_name }}": {{ last_frame_args | pprint }}

+
{{ last_frame.locals | pprint }}
+ +
+
+
+ +{% endblock %} diff --git a/tests/test_utils.py b/tests/test_utils.py index e091bc1..2064264 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,3 +12,6 @@ def test_format_wikibase_time_century(): v = {"time": "+1950-00-00T00:00:00Z", "precision": 7} assert utils.format_wikibase_time(v) == "20th century" + + v = {"time": "+1868-01-09T00:00:00Z", "precision": 11} + assert utils.format_wikibase_time(v) == "9 January 1868" diff --git a/update.py b/update.py new file mode 100755 index 0000000..3ebee24 --- /dev/null +++ b/update.py @@ -0,0 +1,159 @@ +#!/usr/bin/python3 + +"""Download Wikidata recent changes and update items in local database.""" + +import json +import typing +from time import sleep + +from matcher import database, model, wikidata, wikidata_api + +DB_URL = "postgresql:///matcher" +database.init_db(DB_URL) + +entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"} + + +class Change(typing.TypedDict): + """Dict representing an edit in recent changes.""" + + title: str + timestamp: str + redirect: dict[str, typing.Any] | None + revid: int + + +def handle_new(change: Change) -> None: + """Handle a new Wikidata item from the recent changes feed.""" + qid = change["title"] + ts = change["timestamp"] + if change["redirect"]: + print(f"{ts}: new item {qid}, since replaced with redirect") + return + item = model.Item.query.get(qid[1:]) # check if item is already loaded + if item: + return handle_edit(change) + + entity = wikidata_api.get_entity(qid) + if entity["id"] != qid: + print(f'redirect {qid} -> {entity["id"]}') + return + + if "claims" not in entity: + print(qid) + print(entity) + coords = wikidata.get_entity_coords(entity["claims"]) + if not coords: + print(f"{ts}: new item {qid} without coordinates") + return + + print(f"{ts}: new item {qid} with coordinates") + + item_id = int(qid[1:]) + obj = {k: v for k, v in entity.items() if k in entity_keys} + try: + item = model.Item(item_id=item_id, **obj) + except TypeError: + print(qid) + print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}') + print(entity.keys()) + raise + item.locations = model.location_objects(coords) + database.session.add(item) + + +def coords_equal(a: dict[str, typing.Any], b: dict[str, typing.Any]) -> bool: + """Deep equality comparison of nested dicts.""" + return json.dumps(a, sort_keys=True) == json.dumps(b, sort_keys=True) + + +def handle_edit(change: Change) -> None: + """Process an edit from recent changes.""" + qid = change["title"] + item = model.Item.query.get(qid[1:]) + if not item: + return # item isn't in our database so it probably has no coordinates + + ts = change["timestamp"] + + if item.lastrevid >= change["revid"]: + print(f"{ts}: no need to update {qid}") + return + + entity = wikidata_api.get_entity(qid) + entity_qid = entity.pop("id") + if entity_qid != qid: + print(f"{ts}: item {qid} replaced with redirect") + database.session.delete(item) + database.session.commit() + return + + assert entity_qid == qid + existing_coords = wikidata.get_entity_coords(item.claims) + if "claims" not in entity: + return + coords = wikidata.get_entity_coords(entity["claims"]) + + if not coords_equal(existing_coords, coords): + print(f"{ts}: update item {qid}, including coordinates") + item.locations = model.location_objects(coords) + else: + print(f"{ts}: update item {qid}, no change to coordinates") + + for key in entity_keys: + setattr(item, key, entity[key]) + + +def update_timestamp(timestamp: str) -> None: + """Save timestamp to rc_timestamp.""" + out = open("rc_timestamp", "w") + print(timestamp, file=out) + out.close() + + +def update_database() -> None: + """Check recent changes and apply updates to local mirror of Wikidata.""" + with open("rc_timestamp") as f: + start = f.read().strip() + + rccontinue = None + seen = set() + while True: + r = wikidata_api.get_recent_changes(rcstart=start, rccontinue=rccontinue) + + reply = r.json() + for change in reply["query"]["recentchanges"]: + rctype = change["type"] + timestamp = change["timestamp"] + qid = change["title"] + if qid in seen: + continue + + if rctype == "new": + handle_new(change) + seen.add(qid) + if rctype == "edit": + handle_edit(change) + seen.add(qid) + + update_timestamp(timestamp) + print("commit") + database.session.commit() + + if "continue" not in reply: + break + + rccontinue = reply["continue"]["rccontinue"] + database.session.commit() + print("finished") + + +def main() -> None: + """Infinite loop.""" + while True: + update_database() + sleep(60) + + +if __name__ == "__main__": + main() diff --git a/web_view.py b/web_view.py index da77416..6a11554 100755 --- a/web_view.py +++ b/web_view.py @@ -1,37 +1,61 @@ #!/usr/bin/python3 -from flask import (Flask, render_template, request, jsonify, redirect, url_for, g, - flash, session, Response, stream_with_context, abort, send_file) +import json +import re +from time import sleep, time + +import flask_login +import GeoIP +import maxminddb +import requests +import sqlalchemy +from flask import ( + Flask, + Response, + abort, + flash, + g, + jsonify, + redirect, + render_template, + request, + session, + stream_with_context, + url_for, +) +from lxml import etree +from requests_oauthlib import OAuth1Session from sqlalchemy import func from sqlalchemy.sql.expression import update -from matcher import (nominatim, model, database, commons, wikidata, wikidata_api, - osm_oauth, edit, mail, api, error_mail) -# from werkzeug.debug.tbtools import get_current_traceback + +from matcher import ( + api, + commons, + database, + edit, + error_mail, + mail, + model, + nominatim, + osm_oauth, + wikidata, + wikidata_api, +) from matcher.data import property_map -from time import time, sleep -from requests_oauthlib import OAuth1Session -from lxml import etree -import werkzeug.exceptions -import inspect -import flask_login -import requests -import json -import GeoIP -import re -import maxminddb -import sqlalchemy + +# from werkzeug.debug.tbtools import get_current_traceback srid = 4326 -re_point = re.compile(r'^POINT\((.+) (.+)\)$') +re_point = re.compile(r"^POINT\((.+) (.+)\)$") app = Flask(__name__) app.debug = True -app.config.from_object('config.default') +app.config.from_object("config.default") error_mail.setup_error_mail(app) login_manager = flask_login.LoginManager(app) -login_manager.login_view = 'login_route' -osm_api_base = 'https://api.openstreetmap.org/api/0.6' +login_manager.login_view = "login_route" +osm_api_base = "https://api.openstreetmap.org/api/0.6" maxminddb_reader = maxminddb.open_database(app.config["GEOLITE2"]) @@ -39,7 +63,7 @@ DB_URL = "postgresql:///matcher" database.init_db(DB_URL) entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"} -re_qid = re.compile(r'^Q\d+$') +re_qid = re.compile(r"^Q\d+$") @app.teardown_appcontext @@ -51,6 +75,7 @@ def shutdown_session(exception=None): def global_user(): g.user = flask_login.current_user._get_current_object() + def dict_repr_values(d): return {key: repr(value) for key, value in d.items()} @@ -71,17 +96,19 @@ def dict_repr_values(d): # "args": repr(last_frame_args), # }, # }), 500 -# +# # return render_template('show_error.html', # tb=tb, # last_frame=last_frame, # last_frame_args=last_frame_args), 500 + def cors_jsonify(*args, **kwargs): response = jsonify(*args, **kwargs) response.headers["Access-Control-Allow-Origin"] = "*" return response + def check_for_tagged_qids(qids): tagged = set() for qid in qids: @@ -108,12 +135,12 @@ def check_for_tagged_qid(qid): def geoip_user_record(): gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD) - remote_ip = request.get('ip', request.remote_addr) + remote_ip = request.get("ip", request.remote_addr) return gi.record_by_addr(remote_ip) def get_user_location(): - remote_ip = request.args.get('ip', request.remote_addr) + remote_ip = request.args.get("ip", request.remote_addr) maxmind = maxminddb_reader.get(remote_ip) return maxmind.get("location") if maxmind else None @@ -154,13 +181,15 @@ def isa_page(item_id): subclass_list = [] for s in item.get_claim(subclass_property): subclass = api.get_item(s["numeric-id"]) - subclass_list.append({ - "qid": s["id"], - "item_id": s["numeric-id"], - "label": subclass.label(), - "description": subclass.description(), - "isa_page_url": url_for("isa_page", item_id=s["numeric-id"]), - }) + subclass_list.append( + { + "qid": s["id"], + "item_id": s["numeric-id"], + "label": subclass.label(), + "description": subclass.description(), + "isa_page_url": url_for("isa_page", item_id=s["numeric-id"]), + } + ) tags = api.get_tags_for_isa_item(item) @@ -254,14 +283,16 @@ def map_start_page(): lat, lon = 42.2917, -85.5872 radius = 5 - return redirect(url_for( - 'map_location', - lat=f'{lat:.5f}', - lon=f'{lon:.5f}', - zoom=16, - radius=radius, - ip=request.args.get('ip'), - )) + return redirect( + url_for( + "map_location", + lat=f"{lat:.5f}", + lon=f"{lon:.5f}", + zoom=16, + radius=radius, + ip=request.args.get("ip"), + ) + ) @app.route("/documentation") @@ -270,16 +301,14 @@ def documentation_page(): username = user.username if user.is_authenticated else None return render_template( - "documentation.html", - active_tab="documentation", - username=username + "documentation.html", active_tab="documentation", username=username ) @app.route("/search") def search_page(): loc = get_user_location() - q = request.args.get('q') + q = request.args.get("q") user = flask_login.current_user username = user.username if user.is_authenticated else None @@ -296,6 +325,7 @@ def search_page(): q=q, ) + @app.route("/map///") def map_location(zoom, lat, lon): qid = request.args.get("item") @@ -359,6 +389,36 @@ def lookup_item(item_id): return redirect(url) +@app.route("/item/Q") +def lookup_item(item_id): + item = api.get_item(item_id) + if not item: + # TODO: show nicer page for Wikidata item not found + return abort(404) + + try: + lat, lon = item.locations[0].get_lat_lon() + except IndexError: + # TODO: show nicer page for Wikidata item without coordinates + return abort(404) + + return render_template( + "map.html", + active_tab="map", + zoom=16, + lat=lat, + lon=lon, + username=get_username(), + mode="map", + q=None, + qid=item.qid, + item_type_filter=[], + ) + + url = url_for("map_location", zoom=16, lat=lat, lon=lon, item=item.qid) + return redirect(url) + + @app.route("/search/map") def search_map_page(): user_lat, user_lon = get_user_location() or (None, None) @@ -398,10 +458,12 @@ def old_search_page(): def read_bounds_param(): return [float(i) for i in request.args["bounds"].split(",")] + def read_isa_filter_param(): - isa_param = request.args.get('isa') + isa_param = request.args.get("isa") if isa_param: - return set(qid.strip() for qid in isa_param.upper().split(',')) + return set(qid.strip() for qid in isa_param.upper().split(",")) + @app.route("/api/1/location") def show_user_location(): @@ -417,6 +479,7 @@ def api_wikidata_items_count(): t1 = time() - t0 return cors_jsonify(success=True, count=count, duration=t1) + @app.route("/api/1/isa_search") def api_isa_search(): t0 = time() @@ -452,6 +515,7 @@ def api_wikidata_items(): t1 = time() - t0 return cors_jsonify(success=True, duration=t1, **ret) + @app.route("/api/1/place//") def api_place_items(osm_type, osm_id): t0 = time() @@ -478,9 +542,7 @@ def api_get_item(item_id): detail = api.item_detail(item) t1 = time() - t0 - return cors_jsonify(success=True, - duration=t1, - **detail) + return cors_jsonify(success=True, duration=t1, **detail) @app.route("/api/1/item/Q/tags") @@ -491,25 +553,23 @@ def api_get_item_tags(item_id): osm_list = sorted(tags.keys()) t1 = time() - t0 - return cors_jsonify(success=True, - qid=item.qid, - tag_or_key_list=osm_list, - tag_src=tags, - duration=t1) + return cors_jsonify( + success=True, qid=item.qid, tag_or_key_list=osm_list, tag_src=tags, duration=t1 + ) def expand_street_name(from_names): ret = set(from_names) for name in from_names: - if any(name.startswith(st) for st in ('St ', 'St. ')): - first_space = name.find(' ') + if any(name.startswith(st) for st in ("St ", "St. ")): + first_space = name.find(" ") ret.add("Saint" + name[first_space:]) - if ', ' in name: + if ", " in name: for n in set(ret): comma = n.find(", ") ret.add(name[:comma]) - elif '/' in name: + elif "/" in name: for n in set(ret): ret.extend(part.strip() for part in n.split("/")) @@ -522,14 +582,12 @@ def api_find_osm_candidates(item_id): t0 = time() item = model.Item.query.get(item_id) if not item: - return cors_jsonify(success=True, - qid=f'Q{item_id}', - error="item doesn't exist") + return cors_jsonify(success=True, qid=f"Q{item_id}", error="item doesn't exist") if not item.locations: - return cors_jsonify(success=True, - qid=f'Q{item_id}', - error="item has no coordinates") + return cors_jsonify( + success=True, qid=f"Q{item_id}", error="item has no coordinates" + ) label = item.label() item_is_street = item.is_street() @@ -547,17 +605,15 @@ def api_find_osm_candidates(item_id): max_distance = 1_000 limit = 40 names = None - nearby = api.find_osm_candidates(item, - limit=limit, - max_distance=max_distance, - names=names) + nearby = api.find_osm_candidates( + item, limit=limit, max_distance=max_distance, names=names + ) if (item_is_street or item_is_watercourse) and not nearby: # nearby = [osm for osm in nearby if street_name_match(label, osm)] # try again without name filter - nearby = api.find_osm_candidates(item, limit=100, - max_distance=1_000) + nearby = api.find_osm_candidates(item, limit=100, max_distance=1_000) t1 = time() - t0 return cors_jsonify( @@ -565,7 +621,7 @@ def api_find_osm_candidates(item_id): qid=item.qid, nearby=nearby, duration=t1, - max_distance=max_distance + max_distance=max_distance, ) @@ -574,10 +630,12 @@ def api_missing_wikidata_items(): t0 = time() qids_arg = request.args.get("qids") if not qids_arg: - return cors_jsonify(success=False, - error="required parameter 'qids' is missing", - items=[], - isa_count=[]) + return cors_jsonify( + success=False, + error="required parameter 'qids' is missing", + items=[], + isa_count=[], + ) qids = [] for qid in qids_arg.upper().split(","): @@ -593,10 +651,7 @@ def api_missing_wikidata_items(): ret = api.missing_wikidata_items(qids, lat, lon) t1 = time() - t0 - return cors_jsonify( - success=True, - duration=t1, - **ret) + return cors_jsonify(success=True, duration=t1, **ret) @app.route("/api/1/search") @@ -615,20 +670,20 @@ def api_search(): return cors_jsonify(success=True, hits=hits) + @app.route("/api/1/polygon//") def api_polygon(osm_type, osm_id): obj = model.Polygon.get_osm(osm_type, osm_id) - return cors_jsonify(successful=True, - osm_type=osm_type, - osm_id=osm_id, - geojson=obj.geojson()) + return cors_jsonify( + successful=True, osm_type=osm_type, osm_id=osm_id, geojson=obj.geojson() + ) @app.route("/refresh/Q") def refresh_item(item_id): assert not model.Item.query.get(item_id) - qid = f'Q{item_id}' + qid = f"Q{item_id}" entity = wikidata_api.get_entity(qid) entity_qid = entity.pop("id") assert qid == entity_qid @@ -643,100 +698,110 @@ def refresh_item(item_id): database.session.add(item) database.session.commit() - return 'done' + return "done" -@app.route('/login') + +@app.route("/login") def login_openstreetmap(): - return redirect(url_for('start_oauth', - next=request.args.get('next'))) + return redirect(url_for("start_oauth", next=request.args.get("next"))) -@app.route('/logout') + +@app.route("/logout") def logout(): - next_url = request.args.get('next') or url_for('map_start_page') + next_url = request.args.get("next") or url_for("map_start_page") flask_login.logout_user() - flash('you are logged out') + flash("you are logged out") return redirect(next_url) -@app.route('/done/') + +@app.route("/done/") def done(): - flash('login successful') - return redirect(url_for('map_start_page')) + flash("login successful") + return redirect(url_for("map_start_page")) -@app.route('/oauth/start') + +@app.route("/oauth/start") def start_oauth(): - next_page = request.args.get('next') + next_page = request.args.get("next") if next_page: - session['next'] = next_page + session["next"] = next_page - client_key = app.config['CLIENT_KEY'] - client_secret = app.config['CLIENT_SECRET'] + client_key = app.config["CLIENT_KEY"] + client_secret = app.config["CLIENT_SECRET"] - request_token_url = 'https://www.openstreetmap.org/oauth/request_token' + request_token_url = "https://www.openstreetmap.org/oauth/request_token" - callback = url_for('oauth_callback', _external=True) + callback = url_for("oauth_callback", _external=True) - oauth = OAuth1Session(client_key, - client_secret=client_secret, - callback_uri=callback) + oauth = OAuth1Session( + client_key, client_secret=client_secret, callback_uri=callback + ) fetch_response = oauth.fetch_request_token(request_token_url) - session['owner_key'] = fetch_response.get('oauth_token') - session['owner_secret'] = fetch_response.get('oauth_token_secret') + session["owner_key"] = fetch_response.get("oauth_token") + session["owner_secret"] = fetch_response.get("oauth_token_secret") - base_authorization_url = 'https://www.openstreetmap.org/oauth/authorize' - authorization_url = oauth.authorization_url(base_authorization_url, - oauth_consumer_key=client_key) + base_authorization_url = "https://www.openstreetmap.org/oauth/authorize" + authorization_url = oauth.authorization_url( + base_authorization_url, oauth_consumer_key=client_key + ) return redirect(authorization_url) + @login_manager.user_loader def load_user(user_id): return model.User.query.get(user_id) + @app.route("/oauth/callback", methods=["GET"]) def oauth_callback(): - client_key = app.config['CLIENT_KEY'] - client_secret = app.config['CLIENT_SECRET'] + client_key = app.config["CLIENT_KEY"] + client_secret = app.config["CLIENT_SECRET"] - oauth = OAuth1Session(client_key, - client_secret=client_secret, - resource_owner_key=session['owner_key'], - resource_owner_secret=session['owner_secret']) + oauth = OAuth1Session( + client_key, + client_secret=client_secret, + resource_owner_key=session["owner_key"], + resource_owner_secret=session["owner_secret"], + ) oauth_response = oauth.parse_authorization_response(request.url) - verifier = oauth_response.get('oauth_verifier') - access_token_url = 'https://www.openstreetmap.org/oauth/access_token' - oauth = OAuth1Session(client_key, - client_secret=client_secret, - resource_owner_key=session['owner_key'], - resource_owner_secret=session['owner_secret'], - verifier=verifier) + verifier = oauth_response.get("oauth_verifier") + access_token_url = "https://www.openstreetmap.org/oauth/access_token" + oauth = OAuth1Session( + client_key, + client_secret=client_secret, + resource_owner_key=session["owner_key"], + resource_owner_secret=session["owner_secret"], + verifier=verifier, + ) oauth_tokens = oauth.fetch_access_token(access_token_url) - session['owner_key'] = oauth_tokens.get('oauth_token') - session['owner_secret'] = oauth_tokens.get('oauth_token_secret') + session["owner_key"] = oauth_tokens.get("oauth_token") + session["owner_secret"] = oauth_tokens.get("oauth_token_secret") - r = oauth.get(osm_api_base + '/user/details') + r = oauth.get(osm_api_base + "/user/details") info = osm_oauth.parse_userinfo_call(r.content) - user = model.User.query.filter_by(osm_id=info['id']).one_or_none() + user = model.User.query.filter_by(osm_id=info["id"]).one_or_none() if user: - user.osm_oauth_token = oauth_tokens.get('oauth_token') - user.osm_oauth_token_secret = oauth_tokens.get('oauth_token_secret') + user.osm_oauth_token = oauth_tokens.get("oauth_token") + user.osm_oauth_token_secret = oauth_tokens.get("oauth_token_secret") else: user = model.User( - username=info['username'], - description=info['description'], - img=info['img'], - osm_id=info['id'], - osm_account_created=info['account_created'], + username=info["username"], + description=info["description"], + img=info["img"], + osm_id=info["id"], + osm_account_created=info["account_created"], mock_upload=False, ) database.session.add(user) database.session.commit() flask_login.login_user(user) - next_page = session.get('next') or url_for('map_start_page') + next_page = session.get("next") or url_for("map_start_page") return redirect(next_page) @@ -744,14 +809,13 @@ def validate_edit_list(edits): for e in edits: assert model.Item.get_by_qid(e["qid"]) assert e["op"] in {"add", "remove", "change"} - osm_type, _, osm_id = e['osm'].partition('/') + osm_type, _, osm_id = e["osm"].partition("/") osm_id = int(osm_id) - if osm_type == 'node': + if osm_type == "node": assert model.Point.query.get(osm_id) else: src_id = osm_id if osm_type == "way" else -osm_id - assert (model.Line.query.get(src_id) - or model.Polygon.query.get(src_id)) + assert model.Line.query.get(src_id) or model.Polygon.query.get(src_id) @app.route("/api/1/edit", methods=["POST"]) @@ -760,9 +824,9 @@ def api_new_edit_session(): incoming = request.json validate_edit_list(incoming["edit_list"]) - es = model.EditSession(user=user, - edit_list=incoming['edit_list'], - comment=incoming['comment']) + es = model.EditSession( + user=user, edit_list=incoming["edit_list"], comment=incoming["comment"] + ) database.session.add(es) database.session.commit() @@ -770,13 +834,14 @@ def api_new_edit_session(): return cors_jsonify(success=True, session_id=session_id) + @app.route("/api/1/edit/", methods=["POST"]) def api_edit_session(session_id): es = model.EditSession.query.get(session_id) assert flask_login.current_user.id == es.user_id incoming = request.json - for f in 'edit_list', 'comment': + for f in "edit_list", "comment": if f not in incoming: continue setattr(es, f, incoming[f]) @@ -784,21 +849,24 @@ def api_edit_session(session_id): return cors_jsonify(success=True, session_id=session_id) + class VersionMismatch(Exception): pass + def osm_object(osm_type, osm_id): if osm_type == "node": return model.Point.query.get(osm_id) - src_id = int(osm_id) * {'way': 1, 'relation': -1}[osm_type] + src_id = int(osm_id) * {"way": 1, "relation": -1}[osm_type] for cls in model.Line, model.Polygon: obj = cls.query.get(src_id) if obj: return obj + def process_edit(changeset_id, e): - osm_type, _, osm_id = e['osm'].partition('/') + osm_type, _, osm_id = e["osm"].partition("/") qid = e["qid"] item_id = qid[1:] @@ -851,9 +919,7 @@ def process_edit(changeset_id, e): cls = type(osm) database.session.execute( - update(cls). - where(cls.src_id == osm.src_id). - values(tags=new_tags) + update(cls).where(cls.src_id == osm.src_id).values(tags=new_tags) ) db_edit = model.ChangesetEdit( @@ -867,6 +933,7 @@ def process_edit(changeset_id, e): return "saved" + @app.route("/api/1/save/") def api_save_changeset(session_id): assert g.user.is_authenticated @@ -938,7 +1005,8 @@ def api_real_save_changeset(session_id): edit.close_changeset(changeset_id) yield send("done") - return Response(stream_with_context(stream(g.user)), mimetype='text/event-stream') + return Response(stream_with_context(stream(g.user)), mimetype="text/event-stream") + def api_mock_save_changeset(session_id): es = model.EditSession.query.get(session_id) @@ -948,7 +1016,7 @@ def api_mock_save_changeset(session_id): return f"data: {json.dumps(data)}\n\n" def stream(user): - print('stream') + print("stream") changeset_id = database.session.query(func.max(model.Changeset.id) + 1).scalar() sleep(1) yield send("open", id=changeset_id) @@ -956,12 +1024,12 @@ def api_mock_save_changeset(session_id): update_count = 0 - print('record_changeset', changeset_id) + print("record_changeset", changeset_id) edit.record_changeset( id=changeset_id, user=user, comment=es.comment, update_count=update_count ) - print('edits') + print("edits") for num, e in enumerate(es.edit_list): print(num, e) @@ -970,12 +1038,12 @@ def api_mock_save_changeset(session_id): yield send("saved", edit=e, num=num) sleep(1) - print('closing') + print("closing") yield send("closing") sleep(1) yield send("done") - return Response(stream(g.user), mimetype='text/event-stream') + return Response(stream(g.user), mimetype="text/event-stream") if __name__ == "__main__":