Merge branch 'main' of github.com:EdwardBetts/owl-map

This commit is contained in:
Edward Betts 2023-05-14 16:12:58 +02:00
commit 33bda7f54b
12 changed files with 1335 additions and 281 deletions

162
matcher.style Normal file
View file

@ -0,0 +1,162 @@
# This is the default osm2pgsql .style file that comes with osm2pgsql.
#
# A .style file has 4 columns that define how OSM objects end up in tables in
# the database and what columns are created. It interacts with the command-line
# hstore options.
#
# Columns
# =======
#
# OsmType: This is either "node", "way" or "node,way" and indicates if this tag
# applies to nodes, ways, or both.
#
# Tag: The tag
#
# DataType: The type of the column to be created. Normally "text"
#
# Flags: Flags that indicate what table the OSM object is moved into.
#
# There are 6 possible flags. These flags are used both to indicate if a column
# should be created, and if ways with the tag are assumed to be areas. The area
# assumptions can be overridden with an area=yes/no tag
#
# polygon - Create a column for this tag, and objects with the tag are areas
#
# linear - Create a column for this tag
#
# nocolumn - Override the above and don't create a column for the tag, but do
# include objects with this tag
#
# phstore - Same as polygon,nocolumn for backward compatibility
#
# delete - Drop this tag completely and don't create a column for it. This also
# prevents the tag from being added to hstore columns
#
# nocache - Deprecated and does nothing
#
# If an object has a tag that indicates it is an area or has area=yes/1,
# osm2pgsql will try to turn it into an area. If it succeeds, it places it in
# the polygon table. If it fails (e.g. not a closed way) it places it in the
# line table.
#
# Nodes are never placed into the polygon or line table and are always placed in
# the point table.
#
# Hstore
# ======
#
# The options --hstore, --hstore-match-only, and --hstore-all interact with
# the .style file.
#
# With --hstore any tags without a column will be added to the hstore column.
# This will also cause all objects to be kept.
#
# With --hstore-match-only the behavior for tags is the same, but objects are
# only kept if they have a non-NULL value in one of the columns.
#
# With --hstore-all all tags are added to the hstore column unless they appear
# in the style file with a delete flag, causing duplication between the normal
# columns and the hstore column.
#
# Special database columns
# ========================
#
# There are some special database columns that if present in the .style file
# will be populated by osm2pgsql.
#
# These are
#
# z_order - datatype int4
#
# way_area - datatype real. The area of the way, in the units of the projection
# (e.g. square mercator meters). Only applies to areas
#
# osm_user - datatype text
# osm_uid - datatype integer
# osm_version - datatype integer
# osm_changeset - datatype integer
# osm_timestamp - datatype timestamptz(0).
# Used with the --extra-attributes option to include metadata in the database.
# If importing with both --hstore and --extra-attributes the meta-data will
# end up in the tags hstore column regardless of the style file.
# OsmType Tag DataType Flags
node,way access text linear
node,way addr:housename text linear
node,way addr:housenumber text linear
node,way addr:interpolation text linear
node,way admin_level text linear
node,way aerialway text linear
node,way aeroway text polygon
node,way amenity text polygon
node,way area text polygon # hard coded support for area=1/yes => polygon is in osm2pgsql
node,way barrier text linear
node,way bicycle text linear
node,way brand text linear
node,way bridge text linear
node,way boundary text linear
node,way building text polygon
node capital text linear
node,way construction text linear
node,way covered text linear
node,way culvert text linear
node,way cutting text linear
node,way denomination text linear
node,way disused text linear
node ele text linear
node,way embankment text linear
node,way foot text linear
node,way generator:source text linear
node,way harbour text polygon
node,way highway text linear
node,way historic text polygon
node,way horse text linear
node,way intermittent text linear
node,way junction text linear
node,way landuse text polygon
node,way layer text linear
node,way leisure text polygon
node,way lock text linear
node,way man_made text polygon
node,way military text polygon
node,way motorcar text linear
node,way name text linear
node,way natural text polygon # natural=coastline tags are discarded by a hard coded rule in osm2pgsql
node,way office text polygon
node,way oneway text linear
node,way operator text linear
node,way place text polygon
node,way population text linear
node,way power text polygon
node,way power_source text linear
node,way public_transport text polygon
node,way railway text linear
node,way ref text linear
node,way religion text linear
node,way route text linear
node,way service text linear
node,way shop text polygon
node,way sport text polygon
node,way surface text linear
node,way toll text linear
node,way tourism text polygon
node,way tower:type text linear
way tracktype text linear
node,way tunnel text linear
node,way water text polygon
node,way waterway text polygon
node,way wetland text polygon
node,way width text linear
node,way wood text linear
node,way z_order int4 linear # This is calculated during import
way way_area real linear # This is calculated during import
# Area tags
# We don't make columns for these tags, but objects with them are areas.
# Mainly for use with hstore
way abandoned:aeroway text polygon,nocolumn
way abandoned:amenity text polygon,nocolumn
way abandoned:building text polygon,nocolumn
way abandoned:landuse text polygon,nocolumn
way abandoned:power text polygon,nocolumn
way area:highway text polygon,nocolumn

View file

@ -4,7 +4,9 @@
CallParams = dict[str, str | int]
user_agent = (
"osm-wikidata/0.1 (https://github.com/EdwardBetts/osm-wikidata; edward@4angle.com)"
"osm-wikidata/0.2"
+ " (https://github.com/EdwardBetts/osm-wikidata;"
+ " edward@4angle.com)"
)

View file

@ -40,13 +40,14 @@ skip_tags = {
}
def get_country_iso3166_1(lat: float, lon: float) -> set[str]:
def get_country_iso3166_1(lat, lon):
"""For a given lat/lon return a set of ISO country codes.
Also cache the country code in the global object.
Normally there should be only one country.
"""
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
alpha2_codes = set()
q = model.Polygon.query.filter(
@ -263,7 +264,7 @@ WHERE tags ? 'wikidata'
conn = database.session.connection()
result = conn.execute(text(sql))
print(sql)
# print(sql)
point_sql = (
f"""
@ -788,7 +789,7 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
if limit:
s = s.limit(limit)
print(s.compile(compile_kwargs={"literal_binds": True}))
# print(s.compile(compile_kwargs={"literal_binds": True}))
conn = database.session.connection()
nearby = []
@ -1031,7 +1032,7 @@ def isa_incremental_search(search_terms):
func.length(en_label) < 20,
)
print(q.statement.compile(compile_kwargs={"literal_binds": True}))
# print(q.statement.compile(compile_kwargs={"literal_binds": True}))
ret = []
for item in q:

View file

@ -1,10 +1,11 @@
from flask import current_app, g, request, has_request_context
import smtplib
import sys
import traceback
from email.mime.text import MIMEText
from email.utils import formatdate, make_msgid
from pprint import pformat
import smtplib
import traceback
import sys
from flask import current_app, g, has_request_context, request
def send_mail(subject, body, config=None):
@ -71,7 +72,7 @@ def open_changeset_error(session_id, changeset, r):
username = g.user.username
body = f"""
user: {username}
page: {url}
page: {r.url}
message user: https://www.openstreetmap.org/message/new/{username}

View file

@ -1,89 +1,159 @@
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.schema import ForeignKey, Column
from sqlalchemy.orm import relationship, column_property, deferred, backref
from sqlalchemy import func
from sqlalchemy.types import Integer, String, Float, Boolean, DateTime, Text, BigInteger
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql.expression import cast
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.ext.declarative import declared_attr
from geoalchemy2 import Geometry
from collections import defaultdict
from flask_login import UserMixin
from .database import session, now_utc
from . import wikidata, utils, mail
import json
import re
import typing
from collections import defaultdict
from typing import Any
from flask_login import UserMixin
from geoalchemy2 import Geometry
from sqlalchemy import func
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import backref, column_property, deferred, relationship
from sqlalchemy.orm.collections import attribute_mapped_collection
from sqlalchemy.schema import Column, ForeignKey
from sqlalchemy.sql.expression import cast
from sqlalchemy.types import BigInteger, Boolean, DateTime, Float, Integer, String, Text
from . import mail, utils, wikidata
from .database import now_utc, session
Base = declarative_base()
Base.query = session.query_property()
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
osm_type_enum = postgresql.ENUM(
"node", "way", "relation", name="osm_type_enum", metadata=Base.metadata
)
re_lau_code = re.compile(r"^[A-Z]{2}([^A-Z].+)$") # 'LAU (local administrative unit)'
property_map = [
("P238", ["iata"], "IATA airport code"),
("P239", ["icao"], "ICAO airport code"),
("P240", ["faa", "ref"], "FAA airport code"),
("P296", ["ref", "ref:train", "railway:ref"], "station code"),
("P300", ["ISO3166-2"], "ISO 3166-2 code"),
("P359", ["ref:rce"], "Rijksmonument ID"),
("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"),
("P649", ["ref:nrhp"], "NRHP reference number"),
("P722", ["uic_ref"], "UIC station code"),
("P782", ["ref"], "LAU (local administrative unit)"),
("P836", ["ref:gss"], "UK Government Statistical Service code"),
("P856", ["website", "contact:website", "url"], "website"),
("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"),
("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"),
# A UIC id can be a IBNR, but not every IBNR is an UIC id
("P954", ["uic_ref"], "IBNR ID"),
("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"),
("P1216", ["HE_ref"], "National Heritage List for England number"),
("P2253", ["ref:edubase"], "EDUBase URN"),
("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"),
("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"),
("P3562", ["seamark:light:reference"], "Admiralty number"),
(
"P4755",
["ref", "ref:train", "ref:crs", "crs", "nat_ref"],
"UK railway station code",
),
("P4803", ["ref", "ref:train"], "Amtrak station code"),
("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"),
("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"),
("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"),
("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"),
]
T = typing.TypeVar("T", bound="Item")
osm_type_enum = postgresql.ENUM('node', 'way', 'relation',
name='osm_type_enum',
metadata=Base.metadata)
class Item(Base):
"""Wikidata item."""
__tablename__ = "item"
item_id = Column(Integer, primary_key=True, autoincrement=False)
labels = Column(postgresql.JSONB)
descriptions = Column(postgresql.JSONB)
aliases = Column(postgresql.JSONB)
sitelinks = Column(postgresql.JSONB)
claims = Column(postgresql.JSONB)
claims = Column(postgresql.JSONB, nullable=False)
lastrevid = Column(Integer, nullable=False, unique=True)
locations = relationship("ItemLocation", cascade="all, delete-orphan", backref="item")
locations = relationship(
"ItemLocation", cascade="all, delete-orphan", backref="item"
)
qid = column_property("Q" + cast(item_id, String))
wiki_extracts = relationship(
"Extract",
collection_class=attribute_mapped_collection("site"),
cascade="save-update, merge, delete, delete-orphan",
backref="item",
)
extracts = association_proxy("wiki_extracts", "extract")
@classmethod
def get_by_qid(cls, qid):
def get_by_qid(cls: typing.Type[T], qid: str) -> T | None:
if qid and len(qid) > 1 and qid[0].upper() == "Q" and qid[1:].isdigit():
return cls.query.get(qid[1:])
obj: T = cls.query.get(qid[1:])
return obj
return None
@property
def wd_url(self):
def wd_url(self) -> str:
"""Wikidata URL for item."""
return f"https://www.wikidata.org/wiki/{self.qid}"
def get_claim(self, pid):
return [i["mainsnak"]["datavalue"]["value"] if "datavalue" in i["mainsnak"] else None
for i in self.claims.get(pid, [])]
def get_claim(self, pid: str) -> list[dict[str, Any] | None]:
"""List of claims for given Wikidata property ID."""
claims = typing.cast(dict[str, list[dict[str, Any]]], self.claims)
return [
i["mainsnak"]["datavalue"]["value"]
if "datavalue" in i["mainsnak"]
else None
for i in claims.get(pid, [])
]
def label(self, lang='en'):
if lang in self.labels:
return self.labels[lang]['value']
elif 'en' in self.labels:
return self.labels['en']['value']
def label(self, lang: str = "en") -> str:
"""Label for this Wikidata item."""
labels = typing.cast(dict[str, dict[str, Any]], self.labels)
if lang in labels:
return typing.cast(str, labels[lang]["value"])
elif "en" in labels:
return typing.cast(str, labels["en"]["value"])
label_list = list(self.labels.values())
return label_list[0]['value'] if label_list else '[no label]'
label_list = list(labels.values())
return typing.cast(str, label_list[0]["value"]) if label_list else "[no label]"
def description(self, lang='en'):
if lang in self.descriptions:
return self.descriptions[lang]['value']
elif 'en' in self.descriptions:
return self.descriptions['en']['value']
return
def description(self, lang: str = "en") -> str | None:
"""Return a description of the item."""
descriptions = typing.cast(dict[str, dict[str, Any]], self.descriptions)
if lang in descriptions:
return typing.cast(str, descriptions[lang]["value"])
elif "en" in descriptions:
return typing.cast(str, descriptions["en"]["value"])
return None
d_list = list(self.descriptions.values())
if d_list:
return d_list[0]['value']
return d_list[0]["value"]
def get_aliases(self, lang='en'):
def get_aliases(self, lang="en"):
if lang not in self.aliases:
if 'en' not in self.aliases:
if "en" not in self.aliases:
return []
lang = 'en'
return [a['value'] for a in self.aliases[lang]]
lang = "en"
return [a["value"] for a in self.aliases[lang]]
def get_part_of_names(self):
if not self.claims:
return set()
part_of_names = set()
for p361 in self.claims.get('P361', []):
for p361 in self.claims.get("P361", []):
try:
part_of_id = p361['mainsnak']['datavalue']['value']['numeric-id']
part_of_id = p361["mainsnak"]["datavalue"]["value"]["numeric-id"]
except KeyError:
continue
if part_of_id == self.item_id:
@ -98,7 +168,7 @@ class Item(Base):
@property
def entity(self):
keys = ['labels', 'aliases', 'descriptions', 'sitelinks', 'claims']
keys = ["labels", "aliases", "descriptions", "sitelinks", "claims"]
return {key: getattr(self, key) for key in keys}
def names(self, check_part_of=True):
@ -107,22 +177,24 @@ class Item(Base):
d = wikidata.names_from_entity(self.entity) or defaultdict(list)
for name, sources in list(d.items()):
if len(sources) == 1 and sources[0][0] == 'image':
if len(sources) == 1 and sources[0][0] == "image":
continue
for part_of_name in part_of_names:
if not name.startswith(part_of_name):
continue
prefix_removed = name[len(part_of_name):].strip()
prefix_removed = name[len(part_of_name) :].strip()
if prefix_removed not in d:
d[prefix_removed] = sources
if self.claims:
for p6375 in self.claims.get('P6375', []):
for p6375 in self.claims.get("P6375", []):
try:
street_address = p6375['mainsnak']['datavalue']['value']
street_address = p6375["mainsnak"]["datavalue"]["value"]
except KeyError:
continue
d[street_address['text']].append(('P6375', street_address.get('language')))
d[street_address["text"]].append(
("P6375", street_address.get("language"))
)
# A terrace of buildings can be illustrated with a photo of a single building.
# We try to determine if this is the case and avoid using the filename of the
@ -131,8 +203,11 @@ class Item(Base):
def has_digit(s):
return any(c.isdigit() for c in s)
image_names = {name for name, sources in d.items()
if len(sources) == 1 and sources[0][0] == 'image' and has_digit(name)}
image_names = {
name
for name, sources in d.items()
if len(sources) == 1 and sources[0][0] == "image" and has_digit(name)
}
if not image_names:
return dict(d) or None
@ -166,10 +241,10 @@ class Item(Base):
isa_qids = self.get_isa_qids()
matching_types = {
"Q12731", # dead end street
"Q34442", # road
"Q79007", # street
"Q83620", # thoroughfare
"Q12731", # dead end street
"Q34442", # road
"Q79007", # street
"Q83620", # thoroughfare
"Q21000333", # shopping street
"Q62685721", # pedestrian street
}
@ -179,14 +254,13 @@ class Item(Base):
if isa_qids is None:
isa_qids = self.get_isa_qids()
matching_types = {
"Q355304", # watercourse
"Q4022", # river
"Q47521", # stream
"Q1437299", # creek
"Q355304", # watercourse
"Q4022", # river
"Q47521", # stream
"Q1437299", # creek
"Q63565252", # brook
"Q12284", # canal
"Q12284", # canal
"Q55659167", # natural watercourse
}
return bool(matching_types & set(isa_qids))
@ -195,19 +269,29 @@ class Item(Base):
return self.is_street(isa_qids) or self.is_watercourse(isa_qids)
def is_tram_stop(self):
return 'Q2175765' in self.get_isa_qids()
return "Q2175765" in self.get_isa_qids()
def alert_admin_about_bad_time(self, v):
body = ("Wikidata item has an unsupported time precision\n\n"
+ self.wd_url + "\n\n" + "Value:\n\n" + json.dumps(v, indent=2))
body = (
"Wikidata item has an unsupported time precision\n\n"
+ self.wd_url
+ "\n\n"
+ "Value:\n\n"
+ json.dumps(v, indent=2)
)
mail.send_mail(f"OWL Map: bad time value in {self.qid}", body)
def closed(self):
def time_claim(self, pid):
ret = []
for v in self.get_claim("P3999"):
for v in self.get_claim(pid):
if not v:
continue
t = utils.format_wikibase_time(v)
try:
t = utils.format_wikibase_time(v)
except Exception:
self.alert_admin_about_bad_time(v)
raise
if t:
ret.append(t)
else:
@ -215,6 +299,84 @@ class Item(Base):
return ret
def closed(self):
return self.time_claim("P3999")
def first_paragraph_language(self, lang):
if lang not in self.sitelinks():
return
extract = self.extracts.get(lang)
if not extract:
return
empty_list = [
"<p><span></span></p>",
"<p><span></span>\n</p>",
"<p><span></span>\n\n</p>",
"<p>\n<span></span>\n</p>",
"<p>\n\n<span></span>\n</p>",
"<p>.\n</p>",
"<p><br></p>",
'<p class="mw-empty-elt">\n</p>',
'<p class="mw-empty-elt">\n\n</p>',
'<p class="mw-empty-elt">\n\n\n</p>',
]
text = extract.strip()
while True:
found_empty = False
for empty in empty_list:
if text.startswith(empty):
text = text[len(empty) :].strip()
found_empty = True
if not found_empty:
break
close_tag = "</p>"
first_end_p_tag = text.find(close_tag)
if first_end_p_tag == -1:
# FIXME: e-mail admin
return text
return text[: first_end_p_tag + len(close_tag)]
def get_identifiers_tags(self):
tags = defaultdict(list)
for claim, osm_keys, label in property_map:
values = [
i["mainsnak"]["datavalue"]["value"]
for i in self.claims.get(claim, [])
if "datavalue" in i["mainsnak"]
]
if not values:
continue
if claim == "P782":
values += [
m.group(1) for m in (re_lau_code.match(v) for v in values) if m
]
for osm_key in osm_keys:
tags[osm_key].append((values, label))
return dict(tags)
def get_identifiers(self):
ret = {}
for claim, osm_keys, label in property_map:
values = [
i["mainsnak"]["datavalue"]["value"]
for i in self.claims.get(claim, [])
if "datavalue" in i["mainsnak"]
]
if not values:
continue
if claim == "P782":
values += [
m.group(1) for m in (re_lau_code.match(v) for v in values) if m
]
for osm_key in osm_keys:
ret[label] = values
return ret
# class Claim(Base):
# __tablename__ = "claim"
# item_id = Column(Integer, primary_key=True)
@ -222,13 +384,14 @@ class Item(Base):
# position = Column(Integer, primary_key=True)
# mainsnak = Column(postgresql.JSONB)
class ItemIsA(Base):
__tablename__ = 'item_isa'
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
isa_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
item = relationship('Item', foreign_keys=[item_id])
isa = relationship('Item', foreign_keys=[isa_id])
class ItemIsA(Base):
__tablename__ = "item_isa"
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
isa_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
item = relationship("Item", foreign_keys=[item_id])
isa = relationship("Item", foreign_keys=[isa_id])
class ItemLocation(Base):
@ -241,18 +404,21 @@ class ItemLocation(Base):
qid = column_property("Q" + cast(item_id, String))
pid = column_property("P" + cast(item_id, String))
def get_lat_lon(self):
return session.query(func.ST_Y(self.location),
func.ST_X(self.location)).one()
def get_lat_lon(self) -> tuple[float, float]:
"""Get latitude and longitude of item."""
loc: tuple[float, float]
loc = session.query(func.ST_Y(self.location), func.ST_X(self.location)).one()
return loc
def location_objects(coords):
locations = []
for pid, coord_list in coords.items():
for num, coords in enumerate(coord_list):
point = f"POINT({coords['longitude']} {coords['latitude']})"
loc = ItemLocation(property_id=int(pid[1:]),
statement_order=num,
location=point)
loc = ItemLocation(
property_id=int(pid[1:]), statement_order=num, location=point
)
locations.append(loc)
return locations
@ -282,8 +448,7 @@ class MapMixin:
@declared_attr
def geojson_str(cls):
return column_property(
func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6),
deferred=True
func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6), deferred=True
)
@declared_attr
@ -292,17 +457,16 @@ class MapMixin:
@hybrid_property
def has_street_address(self):
return ("addr:housenumber" in self.tags
and "addr:street" in self.tags)
return "addr:housenumber" in self.tags and "addr:street" in self.tags
def display_name(self):
for key in 'bridge:name', 'tunnel:name', 'lock_name':
for key in "bridge:name", "tunnel:name", "lock_name":
if key in self.tags:
return self.tags[key]
return (self.name
or self.tags.get("addr:housename")
or self.tags.get("inscription"))
return (
self.name or self.tags.get("addr:housename") or self.tags.get("inscription")
)
def geojson(self):
return json.loads(self.geojson_str)
@ -343,7 +507,7 @@ class Line(MapMixin, Base):
@classmethod
def get_osm(cls, osm_type, osm_id):
src_id = osm_id * {'way': 1, 'relation': -1}[osm_type]
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
return cls.query.get(src_id)
@ -352,11 +516,12 @@ class Polygon(MapMixin, Base):
@classmethod
def get_osm(cls, osm_type, osm_id):
src_id = osm_id * {'way': 1, 'relation': -1}[osm_type]
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
return cls.query.get(src_id)
@property
def type(self):
def type(self) -> str:
"""Polygon is either a way or a relation."""
return "way" if self.src_id > 0 else "relation"
@declared_attr
@ -364,12 +529,15 @@ class Polygon(MapMixin, Base):
return column_property(func.ST_Area(cls.way, False), deferred=True)
@hybrid_property
def area_in_sq_km(self):
def area_in_sq_km(self) -> float:
"""Size of area in square km."""
return self.area / (1000 * 1000)
class User(Base, UserMixin):
__tablename__ = 'user'
"""User."""
__tablename__ = "user"
id = Column(Integer, primary_key=True)
username = Column(String)
password = Column(String)
@ -392,23 +560,27 @@ class User(Base, UserMixin):
osm_oauth_token = Column(String)
osm_oauth_token_secret = Column(String)
def is_active(self):
def is_active(self) -> bool:
"""User is active."""
return self.active
class EditSession(Base):
__tablename__ = 'edit_session'
__tablename__ = "edit_session"
id = Column(Integer, primary_key=True)
user_id = Column(Integer, ForeignKey(User.id))
created = Column(DateTime, default=now_utc(), nullable=False)
edit_list = Column(postgresql.JSONB)
comment = Column(String)
user = relationship('User')
changeset = relationship('Changeset', back_populates='edit_session', uselist=False)
user = relationship("User")
changeset = relationship("Changeset", back_populates="edit_session", uselist=False)
class Changeset(Base):
__tablename__ = 'changeset'
"""An OSM Changeset generated by this tool."""
__tablename__ = "changeset"
id = Column(BigInteger, primary_key=True)
created = Column(DateTime)
comment = Column(String)
@ -416,38 +588,62 @@ class Changeset(Base):
update_count = Column(Integer, nullable=False)
edit_session_id = Column(Integer, ForeignKey(EditSession.id))
user = relationship('User',
backref=backref('changesets',
lazy='dynamic',
order_by='Changeset.created.desc()'))
user = relationship(
"User",
backref=backref(
"changesets", lazy="dynamic", order_by="Changeset.created.desc()"
),
)
edit_session = relationship('EditSession', back_populates='changeset')
edit_session = relationship("EditSession", back_populates="changeset")
class ChangesetEdit(Base):
__tablename__ = 'changeset_edit'
"""Record details of edits within a changeset."""
changeset_id = Column(BigInteger,
ForeignKey('changeset.id'),
primary_key=True)
__tablename__ = "changeset_edit"
changeset_id = Column(BigInteger, ForeignKey("changeset.id"), primary_key=True)
item_id = Column(Integer, primary_key=True)
osm_id = Column(BigInteger, primary_key=True)
osm_type = Column(osm_type_enum, primary_key=True)
saved = Column(DateTime, default=now_utc(), nullable=False)
changeset = relationship('Changeset',
backref=backref('edits', lazy='dynamic'))
changeset = relationship("Changeset", backref=backref("edits", lazy="dynamic"))
class SkipIsA(Base):
__tablename__ = 'skip_isa'
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
"""Ignore this item type when walking the Wikidata subclass graph."""
__tablename__ = "skip_isa"
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
qid = column_property("Q" + cast(item_id, String))
item = relationship("Item")
item = relationship('Item')
class ItemExtraKeys(Base):
__tablename__ = 'item_extra_keys'
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
"""Extra tag or key to consider for an Wikidata item type."""
__tablename__ = "item_extra_keys"
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
tag_or_key = Column(String, primary_key=True)
note = Column(String)
qid = column_property("Q" + cast(item_id, String))
item = relationship('Item')
item = relationship("Item")
class Extract(Base):
"""First paragraph from Wikipedia."""
__tablename__ = "extract"
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
site = Column(String, primary_key=True)
extract = Column(String, nullable=False)
def __init__(self, site: str, extract: str):
"""Initialise the object."""
self.site = site
self.extract = extract

406
notes Normal file
View file

@ -0,0 +1,406 @@
# vim: spell:tw=80 ft=markdown
Extracted items from data dump that include "P625" with the quotes. There are 8,398,490 matching items.
Nearest-Neighbour Searching
https://postgis.net/workshops/postgis-intro/knn.html
---
Use recent changes API to update local Wikidata entity mirror.
Need to handle new item, edit, delete, and undelete.
For now we're just interested in items with coordinates, later we might care
about languages, and classes.
At some point we might keep track of redirects.
Deletes
-------
Is the item in our database? If not then ignore it, if yes then delete it.
New
---
Download full entity, check if it contains coordinates, if yes, then add to database, if not then ignore.
Make a note of item ID and revid. Avoid downloading item again during update.
Edits
-----
If the item is in our database and lastrevid is larger than the revid of the change then skip.
Download full entity.
If in our database and latest revision includes coordinates update item in
database. If no coordinates then delete from our database.
======
Currently we have geographic objects represented by the Item class. We also want
information about the type of object, languages and countries.
How about a hierarchy with Item as the base class and GeoItem as a subclass for
geographical objects. We can also have IsA, Language, and Country classes that
derive from Item.
Countries are a subclass of GeoItem.
With the current design the Item table represents a cached copy of the latest
version of the Wikidata item, no history is stored locally. This makes it had to
keep track of changes over time.
The same is true of the OSM data, we just keeping a copy of the most recent
version.
Instead we could store multiple revisions of Wikidata items. We want the latest
version and any that has been considered part of a match with OSM.
Which Wikidata revisions do we keep?
1. latest revision
2. revision used to generate match
3. revision used in match checked by user
Maybe a separate item revision table is too complex. We could just store JSON
from a match in a table of OSM user uploads.
===
All countries have a P625 statement
===
cable-stayed bridge (Q158555)
There are 786 bridges on OSM tagged with bridge:structure=cable-stayed. Some of
these have a Wikidata tag but aren't tagged as a cable-stayed bridge in
Wikidata. The Wikidata could be updated to tag them as a cable-stayed bridge.
Something similar could be applied to other types.
===
Lots of items with coordinates don\'t have OSM tags/keys, either because they
don\'t belong on the map or there isn\'t enough info in Wikidata.
Need to search different properties for OSM tags, at least 'instance of',
'use', 'sport' and 'religion'.
Should start from items with an OSM tag first. Download all items with OSM tag,
then walk subclass tree and download.
===
Test out a new design.
===
Make a status page that shows the size of each database table.
===
What should URLs look like, say I want to show lakes in lapland?
https://osm.wikidata.link/matches?isa=Lake&location=Lapland
===
OSM & Wikidata pin map TODO list
IsA list should support filtering
===
2021-06-17
Candidate list should show street address. For example:
https://alpha.osm.wikidata.link/map/17/40.01461/-105.28196?item=Q42384818
---
Preset could be more specific. For example mosque instead of place of worship.
id-tagging-schema/data/presets/amenity/place_of_worship/muslim.json
---
candidates list should show object tags
===
2021-06-19
* Rename from 'alpha' to 'v2'.
* Use Flask-Babel to for i18n. Get translations from
https://www.microsoft.com/en-us/language/
* Show total number of items
* Show place name
* Show aliases
===
2021-06-23
Planning to update user IP location code. Should grab items within city or
region. Need to handle IP that only resolves to a whole country. For example
archive.org is 207.241.224.2, and just returns USA. The USA is too big to apply
the matcher interface to.
When trying to match the whole USA we should show the whole country and
encourage the user to zoom in. Once the
---
Map thoughts. Questions:
What do we show to the user when the site loads?
What happens when the user drags the map?
What happens when the user changes zoom?
How does searching change things?
Starting scenarios:
User enters IP that resolves to a big country, say USA. We show a map of the
whole USA and ask them to zoom in. Once they've zoomed in we can show the total
number of items and the item type facets.
Find item type within Cambridge:
``` SQL
select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num
from item, item_location, planet_osm_polygon
where item.item_id = item_location.item_id and osm_id=-295355 and ST_Covers(way, location) group by isa order by num;
```
Also need a to show a facet for items where item type is empty
Find item type within California:
``` SQL
select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num
from item, item_location, planet_osm_polygon
where item.item_id = item_location.item_id and osm_id=-165475 and ST_Intersects(way, location)
group by isa order by num desc limit 20;
```
This query takes 26.5 seconds.
England item count takes 1.5 seconds.
``` SQL
select count(distinct item_id)
from item_location, planet_osm_polygon
where osm_id=-58447 and ST_Covers(way, location);
```
===
2021-06-25
Library buildings (Q856584) in England. Query takes 3 seconds
``` SQL
select count(*)
from item, item_location, planet_osm_polygon as loc
where loc.osm_id=-58447
and jsonb_path_query_array(claims, '$.P31[*].mainsnak.datavalue.value.id') ? 'Q856584'
and item.item_id = item_location.item_id
and item_location.location && loc.way;
```
===
2021-07-04
TODO
* Better error page than just 500 Internal Server Error.
* Improve handling of Wikidata items without coordinates. Use different colour
for OSM Pin. Explain situation on item detail page. No need to look for matches.
* DONE: Show spinner when looking for nearby OSM candidate matches.
* DONE: Show message if no matches found.
* Add 'building only match' switch
* Two item pins on top of each other is a problem.
2021-07-05
Sometimes the selected OSM matches are incorrect. For example:
https://v2.osm.wikidata.link/map/15/37.31390/-121.86338?item=Q83645632
The item is linked to a node, a way and a relation. The node shows as a pin on
the map, but isn't in the list of possible nearby matches. The way and relation
both show in the list, but aren't selected.
2021-07-07
Logout link should come back to the same map location. Need to record the
location somewhere. Could be in a cookie, constant updating of the logout
URL, or have JavaScript that runs when the user follows the logout link.
Search
Should show a spinner so the user knows something is happening.
Trigger search after first three characters have been entered.
DONE: Style search hits so not so close to search box
Highlight chosen search result.
Close button to hide search results.
DONE: Zoom to result bounds instead of zoom level 16.
Should you be allowed to search while editing?
DONE: Hide OSM candidate checkboxes if user not logged in.
2021-07-10
Exclude ways that are part of a boundary. Example:
https://v2.osm.wikidata.link/map/18/42.37903/-71.11136?item=Q14715848
2021-07-16
Need better handling for OSM with wikidata tag but item has no coordinates.
Viewing a street shows too many yellow pins.
https://v2.osm.wikidata.link/map/15/37.31221/-121.88869?item=Q89545422
2021-07-17
Could match on just name
https://v2.osm.wikidata.link/map/18/50.21789/-5.28079?item=Q5288904
2021-07-18
Florida State Road 922 (Q2433226) is stored as multiple lines in the osm2pgsql
database. Need to rebuild the database with the --multi-geometry so there is
only one.
2021-07-19
After a save clicking on another item without closing edit panel causes
problems. Need to trigger close_edit_list when opening item if upload_state is
set to 'done'
2021-07-22
Example of a long road: Collins Avenue (Q652775)
https://v2.osm.wikidata.link/map/19/25.86222/-80.12032?item=Q652775
2021-08-04
Use https://vue-select.org/ for item type filter.
Show alert with spinner while count is running.
Maybe we want to supply the item type filter as JSON and filter in the browser,
no need to hit the server and database.
Write documentation for the API.
Speed up the item detail OSM nearby option.
Use the sidebar to show list of items in the current view, so the user can
go through the list and check them.
OSM object polygon size is broken
2021-08-05
IsA search
```sql
SELECT 'Q' || item.item_id, item.labels->'en'->>'value' FROM item WHERE
item.claims ? 'P1282' AND lower(jsonb_extract_path_text(item.labels, 'en',
'value')) LIKE lower('%hotel%') AND length(jsonb_extract_path_text(item.labels,
'en', 'value')) < 20;
```
2021-09-11
Notes from Pixel 2
Pin at the centroid of a polygon is to busy, especially with an item that links
to multiple OSM objects. Object outline already on map, just need to connect
outline to Wikidata markers. Could try and work out corners of rectangular
buildings. Should link to ends nearest node for linear objects.
Show warning when navigating away from map with edits.
See WindowEventHandlers.onbeforeunload
Option to clear edit list.
---
Ignore coordinates with a Google Maps reference. Example:
https://www.wikidata.org/w/index.php?title=Q66228733&oldid=992964237
---
Check history for previous wikidata tags to warn mappers if a wikidata tag
they're adding has previously been removed.
Examples:
https://v2.osm.wikidata.link/map/17/52.18211/0.17756?item=Q6717455
and https://www.openstreetmap.org/way/143741201
https://www.openstreetmap.org/way/684624781
---
What happens when we moved the map?
First we check the area visible on the map. If it is too large then there is
nothing we can do, we give up and tell the user they need to zoom in.
Otherwise we send the server a request for a count of the number of items in the
current view. If the count is too high we abort and tell the user to zoom in.
Once we know the area isn't too big and doesn't have too many items we want to
make three requests to the server. First we make requests for the Wikidata items
on the map another request for OSM objects with a Wikidata tag on the map. Both
requests run at the same time. Once both requests complete we make another
request to check for missing Wikidata items that were linked from OSM objects.
---
This is done
https://v2.osm.wikidata.link/map/18/52.23270/0.21560?item=Q55099320
should match: https://www.openstreetmap.org/node/2000849525
Look for Tag:abandoned:railway=station
---
Need better handling for Wikidata redirects.
Example: https://www.openstreetmap.org/way/130458959
https://v2.osm.wikidata.link/map/18/51.36973/-2.81079?item=Q5117357
---
Consider 'OS grid reference'
https://www.wikidata.org/w/index.php?title=Q27082051&oldid=1336630735
---
Check for OpenStreetMap relation ID (P402) in Wikidata
Display on details page. Highlight matching relation.
example: https://www.wikidata.org/wiki/Q78078847
---
TODO
* DONE: Add special code for matching watercourses that works like street matching
* DONE: Frontend should catch API errors and show them
* DONE: API calls should return errors in JSON
* Run update code from systemd
* Stop Wikidata update code from crashing when it hits an error
* Add an option for 'select all' for linear features
* Add a note to details page explaining street matching
* Upload code to GitHub
* Candidates list jumps when first object is selected, because message appears
at the top the list. Can be fixed by having a message there and replacing
it.
IsA pages
* Flesh out IsA pages
* Allow users to add extra tags to IsA
* Add option to update IsA
Type filter
* Include type filter QIDs in URL
* Move type filter to modal box
* Show item type description
---
Show note about relations for tram stops and windfarms
---
Show dissolved, abolished or demolished date (P576)
https://map.osm.wikidata.link/map/18/40.74610/-73.99652?item=Q14707174
---
Get subclasses for one item type
``` SQL
select item_id, labels->'en'->'value' from item where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath) ?| '{"Q718893"}';
```
Get subclasses for items with OSM tag/key
``` SQL
select item_id, labels->'en'->'value'
from item
where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath)
?| array(select 'Q' || item_id from item where claims ? 'P1282');
```
---
Shipyard results shouldn't include place=city
https://map.osm.wikidata.link/map/18/50.89540/-1.38243?item=Q551401

View file

@ -4,7 +4,15 @@ export default {
public: {url: '/', static: true},
frontend: {url: '/dist'},
},
plugins: ['@snowpack/plugin-vue', '@snowpack/plugin-dotenv'],
plugins: [
'@snowpack/plugin-vue',
'@snowpack/plugin-dotenv',
['snowpack-plugin-cdn-import', {
dependencies: pkg.dependencies,
enableInDevMode: true,
baseUrl: 'https://unpkg.com',
}]
],
routes: [
/* Enable an SPA Fallback in development: */
// {"match": "routes", "src": ".*", "dest": "/index.html"},

12
templates/flash_msg.html Normal file
View file

@ -0,0 +1,12 @@
{% with messages = get_flashed_messages() %}
{% if messages %}
{% for message in messages %}
<div class="alert alert-success alert-dismissible" role="alert">
<button type="button" class="close" data-dismiss="alert" aria-label="Close">
<span aria-hidden="true">&times;</span>
</button>
{{ message }}
</div>
{% endfor %}
{% endif %}
{% endwith %}

36
templates/show_error.html Normal file
View file

@ -0,0 +1,36 @@
{% extends "base.html" %}
{% block style %}
<link rel="stylesheet" href="{{url_for('static', filename='css/exception.css')}}" />
{% endblock %}
{% block content %}
<div class="container my-2">
<div class="row">
<div class="col">
<h1>Software error: {{ tb.exception_type }}</h1>
<div>
<pre>{{ tb.exception }}</pre>
</div>
{% set body %}
URL: {{ request.url }}
{{ tb.plaintext | safe }}
{% endset %}
<p><a class="btn btn-primary btn-lg" role="button" href="https://github.com/EdwardBetts/osm-wikidata/issues/new?title={{ tb.exception | urlencode }}&body={{ body | urlencode }}">Submit as an issue on GitHub</a> (requires an account with GitHub)</p>
<h2 class="traceback">Traceback <em>(most recent call last)</em></h2>
{{ tb.render_summary(include_title=False) | safe }}
<p>Error in function "{{ tb.frames[-1].function_name }}": {{ last_frame_args | pprint }}</p>
<pre>{{ last_frame.locals | pprint }}</pre>
</div>
</div>
</div>
{% endblock %}

View file

@ -12,3 +12,6 @@ def test_format_wikibase_time_century():
v = {"time": "+1950-00-00T00:00:00Z", "precision": 7}
assert utils.format_wikibase_time(v) == "20th century"
v = {"time": "+1868-01-09T00:00:00Z", "precision": 11}
assert utils.format_wikibase_time(v) == "9 January 1868"

159
update.py Executable file
View file

@ -0,0 +1,159 @@
#!/usr/bin/python3
"""Download Wikidata recent changes and update items in local database."""
import json
import typing
from time import sleep
from matcher import database, model, wikidata, wikidata_api
DB_URL = "postgresql:///matcher"
database.init_db(DB_URL)
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
class Change(typing.TypedDict):
"""Dict representing an edit in recent changes."""
title: str
timestamp: str
redirect: dict[str, typing.Any] | None
revid: int
def handle_new(change: Change) -> None:
"""Handle a new Wikidata item from the recent changes feed."""
qid = change["title"]
ts = change["timestamp"]
if change["redirect"]:
print(f"{ts}: new item {qid}, since replaced with redirect")
return
item = model.Item.query.get(qid[1:]) # check if item is already loaded
if item:
return handle_edit(change)
entity = wikidata_api.get_entity(qid)
if entity["id"] != qid:
print(f'redirect {qid} -> {entity["id"]}')
return
if "claims" not in entity:
print(qid)
print(entity)
coords = wikidata.get_entity_coords(entity["claims"])
if not coords:
print(f"{ts}: new item {qid} without coordinates")
return
print(f"{ts}: new item {qid} with coordinates")
item_id = int(qid[1:])
obj = {k: v for k, v in entity.items() if k in entity_keys}
try:
item = model.Item(item_id=item_id, **obj)
except TypeError:
print(qid)
print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
print(entity.keys())
raise
item.locations = model.location_objects(coords)
database.session.add(item)
def coords_equal(a: dict[str, typing.Any], b: dict[str, typing.Any]) -> bool:
"""Deep equality comparison of nested dicts."""
return json.dumps(a, sort_keys=True) == json.dumps(b, sort_keys=True)
def handle_edit(change: Change) -> None:
"""Process an edit from recent changes."""
qid = change["title"]
item = model.Item.query.get(qid[1:])
if not item:
return # item isn't in our database so it probably has no coordinates
ts = change["timestamp"]
if item.lastrevid >= change["revid"]:
print(f"{ts}: no need to update {qid}")
return
entity = wikidata_api.get_entity(qid)
entity_qid = entity.pop("id")
if entity_qid != qid:
print(f"{ts}: item {qid} replaced with redirect")
database.session.delete(item)
database.session.commit()
return
assert entity_qid == qid
existing_coords = wikidata.get_entity_coords(item.claims)
if "claims" not in entity:
return
coords = wikidata.get_entity_coords(entity["claims"])
if not coords_equal(existing_coords, coords):
print(f"{ts}: update item {qid}, including coordinates")
item.locations = model.location_objects(coords)
else:
print(f"{ts}: update item {qid}, no change to coordinates")
for key in entity_keys:
setattr(item, key, entity[key])
def update_timestamp(timestamp: str) -> None:
"""Save timestamp to rc_timestamp."""
out = open("rc_timestamp", "w")
print(timestamp, file=out)
out.close()
def update_database() -> None:
"""Check recent changes and apply updates to local mirror of Wikidata."""
with open("rc_timestamp") as f:
start = f.read().strip()
rccontinue = None
seen = set()
while True:
r = wikidata_api.get_recent_changes(rcstart=start, rccontinue=rccontinue)
reply = r.json()
for change in reply["query"]["recentchanges"]:
rctype = change["type"]
timestamp = change["timestamp"]
qid = change["title"]
if qid in seen:
continue
if rctype == "new":
handle_new(change)
seen.add(qid)
if rctype == "edit":
handle_edit(change)
seen.add(qid)
update_timestamp(timestamp)
print("commit")
database.session.commit()
if "continue" not in reply:
break
rccontinue = reply["continue"]["rccontinue"]
database.session.commit()
print("finished")
def main() -> None:
"""Infinite loop."""
while True:
update_database()
sleep(60)
if __name__ == "__main__":
main()

View file

@ -1,37 +1,61 @@
#!/usr/bin/python3
from flask import (Flask, render_template, request, jsonify, redirect, url_for, g,
flash, session, Response, stream_with_context, abort, send_file)
import json
import re
from time import sleep, time
import flask_login
import GeoIP
import maxminddb
import requests
import sqlalchemy
from flask import (
Flask,
Response,
abort,
flash,
g,
jsonify,
redirect,
render_template,
request,
session,
stream_with_context,
url_for,
)
from lxml import etree
from requests_oauthlib import OAuth1Session
from sqlalchemy import func
from sqlalchemy.sql.expression import update
from matcher import (nominatim, model, database, commons, wikidata, wikidata_api,
osm_oauth, edit, mail, api, error_mail)
# from werkzeug.debug.tbtools import get_current_traceback
from matcher import (
api,
commons,
database,
edit,
error_mail,
mail,
model,
nominatim,
osm_oauth,
wikidata,
wikidata_api,
)
from matcher.data import property_map
from time import time, sleep
from requests_oauthlib import OAuth1Session
from lxml import etree
import werkzeug.exceptions
import inspect
import flask_login
import requests
import json
import GeoIP
import re
import maxminddb
import sqlalchemy
# from werkzeug.debug.tbtools import get_current_traceback
srid = 4326
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
app = Flask(__name__)
app.debug = True
app.config.from_object('config.default')
app.config.from_object("config.default")
error_mail.setup_error_mail(app)
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login_route'
osm_api_base = 'https://api.openstreetmap.org/api/0.6'
login_manager.login_view = "login_route"
osm_api_base = "https://api.openstreetmap.org/api/0.6"
maxminddb_reader = maxminddb.open_database(app.config["GEOLITE2"])
@ -39,7 +63,7 @@ DB_URL = "postgresql:///matcher"
database.init_db(DB_URL)
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
re_qid = re.compile(r'^Q\d+$')
re_qid = re.compile(r"^Q\d+$")
@app.teardown_appcontext
@ -51,6 +75,7 @@ def shutdown_session(exception=None):
def global_user():
g.user = flask_login.current_user._get_current_object()
def dict_repr_values(d):
return {key: repr(value) for key, value in d.items()}
@ -71,17 +96,19 @@ def dict_repr_values(d):
# "args": repr(last_frame_args),
# },
# }), 500
#
#
# return render_template('show_error.html',
# tb=tb,
# last_frame=last_frame,
# last_frame_args=last_frame_args), 500
def cors_jsonify(*args, **kwargs):
response = jsonify(*args, **kwargs)
response.headers["Access-Control-Allow-Origin"] = "*"
return response
def check_for_tagged_qids(qids):
tagged = set()
for qid in qids:
@ -108,12 +135,12 @@ def check_for_tagged_qid(qid):
def geoip_user_record():
gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD)
remote_ip = request.get('ip', request.remote_addr)
remote_ip = request.get("ip", request.remote_addr)
return gi.record_by_addr(remote_ip)
def get_user_location():
remote_ip = request.args.get('ip', request.remote_addr)
remote_ip = request.args.get("ip", request.remote_addr)
maxmind = maxminddb_reader.get(remote_ip)
return maxmind.get("location") if maxmind else None
@ -154,13 +181,15 @@ def isa_page(item_id):
subclass_list = []
for s in item.get_claim(subclass_property):
subclass = api.get_item(s["numeric-id"])
subclass_list.append({
"qid": s["id"],
"item_id": s["numeric-id"],
"label": subclass.label(),
"description": subclass.description(),
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
})
subclass_list.append(
{
"qid": s["id"],
"item_id": s["numeric-id"],
"label": subclass.label(),
"description": subclass.description(),
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
}
)
tags = api.get_tags_for_isa_item(item)
@ -254,14 +283,16 @@ def map_start_page():
lat, lon = 42.2917, -85.5872
radius = 5
return redirect(url_for(
'map_location',
lat=f'{lat:.5f}',
lon=f'{lon:.5f}',
zoom=16,
radius=radius,
ip=request.args.get('ip'),
))
return redirect(
url_for(
"map_location",
lat=f"{lat:.5f}",
lon=f"{lon:.5f}",
zoom=16,
radius=radius,
ip=request.args.get("ip"),
)
)
@app.route("/documentation")
@ -270,16 +301,14 @@ def documentation_page():
username = user.username if user.is_authenticated else None
return render_template(
"documentation.html",
active_tab="documentation",
username=username
"documentation.html", active_tab="documentation", username=username
)
@app.route("/search")
def search_page():
loc = get_user_location()
q = request.args.get('q')
q = request.args.get("q")
user = flask_login.current_user
username = user.username if user.is_authenticated else None
@ -296,6 +325,7 @@ def search_page():
q=q,
)
@app.route("/map/<int:zoom>/<float(signed=True):lat>/<float(signed=True):lon>")
def map_location(zoom, lat, lon):
qid = request.args.get("item")
@ -359,6 +389,36 @@ def lookup_item(item_id):
return redirect(url)
@app.route("/item/Q<int:item_id>")
def lookup_item(item_id):
item = api.get_item(item_id)
if not item:
# TODO: show nicer page for Wikidata item not found
return abort(404)
try:
lat, lon = item.locations[0].get_lat_lon()
except IndexError:
# TODO: show nicer page for Wikidata item without coordinates
return abort(404)
return render_template(
"map.html",
active_tab="map",
zoom=16,
lat=lat,
lon=lon,
username=get_username(),
mode="map",
q=None,
qid=item.qid,
item_type_filter=[],
)
url = url_for("map_location", zoom=16, lat=lat, lon=lon, item=item.qid)
return redirect(url)
@app.route("/search/map")
def search_map_page():
user_lat, user_lon = get_user_location() or (None, None)
@ -398,10 +458,12 @@ def old_search_page():
def read_bounds_param():
return [float(i) for i in request.args["bounds"].split(",")]
def read_isa_filter_param():
isa_param = request.args.get('isa')
isa_param = request.args.get("isa")
if isa_param:
return set(qid.strip() for qid in isa_param.upper().split(','))
return set(qid.strip() for qid in isa_param.upper().split(","))
@app.route("/api/1/location")
def show_user_location():
@ -417,6 +479,7 @@ def api_wikidata_items_count():
t1 = time() - t0
return cors_jsonify(success=True, count=count, duration=t1)
@app.route("/api/1/isa_search")
def api_isa_search():
t0 = time()
@ -452,6 +515,7 @@ def api_wikidata_items():
t1 = time() - t0
return cors_jsonify(success=True, duration=t1, **ret)
@app.route("/api/1/place/<osm_type>/<int:osm_id>")
def api_place_items(osm_type, osm_id):
t0 = time()
@ -478,9 +542,7 @@ def api_get_item(item_id):
detail = api.item_detail(item)
t1 = time() - t0
return cors_jsonify(success=True,
duration=t1,
**detail)
return cors_jsonify(success=True, duration=t1, **detail)
@app.route("/api/1/item/Q<int:item_id>/tags")
@ -491,25 +553,23 @@ def api_get_item_tags(item_id):
osm_list = sorted(tags.keys())
t1 = time() - t0
return cors_jsonify(success=True,
qid=item.qid,
tag_or_key_list=osm_list,
tag_src=tags,
duration=t1)
return cors_jsonify(
success=True, qid=item.qid, tag_or_key_list=osm_list, tag_src=tags, duration=t1
)
def expand_street_name(from_names):
ret = set(from_names)
for name in from_names:
if any(name.startswith(st) for st in ('St ', 'St. ')):
first_space = name.find(' ')
if any(name.startswith(st) for st in ("St ", "St. ")):
first_space = name.find(" ")
ret.add("Saint" + name[first_space:])
if ', ' in name:
if ", " in name:
for n in set(ret):
comma = n.find(", ")
ret.add(name[:comma])
elif '/' in name:
elif "/" in name:
for n in set(ret):
ret.extend(part.strip() for part in n.split("/"))
@ -522,14 +582,12 @@ def api_find_osm_candidates(item_id):
t0 = time()
item = model.Item.query.get(item_id)
if not item:
return cors_jsonify(success=True,
qid=f'Q{item_id}',
error="item doesn't exist")
return cors_jsonify(success=True, qid=f"Q{item_id}", error="item doesn't exist")
if not item.locations:
return cors_jsonify(success=True,
qid=f'Q{item_id}',
error="item has no coordinates")
return cors_jsonify(
success=True, qid=f"Q{item_id}", error="item has no coordinates"
)
label = item.label()
item_is_street = item.is_street()
@ -547,17 +605,15 @@ def api_find_osm_candidates(item_id):
max_distance = 1_000
limit = 40
names = None
nearby = api.find_osm_candidates(item,
limit=limit,
max_distance=max_distance,
names=names)
nearby = api.find_osm_candidates(
item, limit=limit, max_distance=max_distance, names=names
)
if (item_is_street or item_is_watercourse) and not nearby:
# nearby = [osm for osm in nearby if street_name_match(label, osm)]
# try again without name filter
nearby = api.find_osm_candidates(item, limit=100,
max_distance=1_000)
nearby = api.find_osm_candidates(item, limit=100, max_distance=1_000)
t1 = time() - t0
return cors_jsonify(
@ -565,7 +621,7 @@ def api_find_osm_candidates(item_id):
qid=item.qid,
nearby=nearby,
duration=t1,
max_distance=max_distance
max_distance=max_distance,
)
@ -574,10 +630,12 @@ def api_missing_wikidata_items():
t0 = time()
qids_arg = request.args.get("qids")
if not qids_arg:
return cors_jsonify(success=False,
error="required parameter 'qids' is missing",
items=[],
isa_count=[])
return cors_jsonify(
success=False,
error="required parameter 'qids' is missing",
items=[],
isa_count=[],
)
qids = []
for qid in qids_arg.upper().split(","):
@ -593,10 +651,7 @@ def api_missing_wikidata_items():
ret = api.missing_wikidata_items(qids, lat, lon)
t1 = time() - t0
return cors_jsonify(
success=True,
duration=t1,
**ret)
return cors_jsonify(success=True, duration=t1, **ret)
@app.route("/api/1/search")
@ -615,20 +670,20 @@ def api_search():
return cors_jsonify(success=True, hits=hits)
@app.route("/api/1/polygon/<osm_type>/<int:osm_id>")
def api_polygon(osm_type, osm_id):
obj = model.Polygon.get_osm(osm_type, osm_id)
return cors_jsonify(successful=True,
osm_type=osm_type,
osm_id=osm_id,
geojson=obj.geojson())
return cors_jsonify(
successful=True, osm_type=osm_type, osm_id=osm_id, geojson=obj.geojson()
)
@app.route("/refresh/Q<int:item_id>")
def refresh_item(item_id):
assert not model.Item.query.get(item_id)
qid = f'Q{item_id}'
qid = f"Q{item_id}"
entity = wikidata_api.get_entity(qid)
entity_qid = entity.pop("id")
assert qid == entity_qid
@ -643,100 +698,110 @@ def refresh_item(item_id):
database.session.add(item)
database.session.commit()
return 'done'
return "done"
@app.route('/login')
@app.route("/login")
def login_openstreetmap():
return redirect(url_for('start_oauth',
next=request.args.get('next')))
return redirect(url_for("start_oauth", next=request.args.get("next")))
@app.route('/logout')
@app.route("/logout")
def logout():
next_url = request.args.get('next') or url_for('map_start_page')
next_url = request.args.get("next") or url_for("map_start_page")
flask_login.logout_user()
flash('you are logged out')
flash("you are logged out")
return redirect(next_url)
@app.route('/done/')
@app.route("/done/")
def done():
flash('login successful')
return redirect(url_for('map_start_page'))
flash("login successful")
return redirect(url_for("map_start_page"))
@app.route('/oauth/start')
@app.route("/oauth/start")
def start_oauth():
next_page = request.args.get('next')
next_page = request.args.get("next")
if next_page:
session['next'] = next_page
session["next"] = next_page
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
request_token_url = 'https://www.openstreetmap.org/oauth/request_token'
request_token_url = "https://www.openstreetmap.org/oauth/request_token"
callback = url_for('oauth_callback', _external=True)
callback = url_for("oauth_callback", _external=True)
oauth = OAuth1Session(client_key,
client_secret=client_secret,
callback_uri=callback)
oauth = OAuth1Session(
client_key, client_secret=client_secret, callback_uri=callback
)
fetch_response = oauth.fetch_request_token(request_token_url)
session['owner_key'] = fetch_response.get('oauth_token')
session['owner_secret'] = fetch_response.get('oauth_token_secret')
session["owner_key"] = fetch_response.get("oauth_token")
session["owner_secret"] = fetch_response.get("oauth_token_secret")
base_authorization_url = 'https://www.openstreetmap.org/oauth/authorize'
authorization_url = oauth.authorization_url(base_authorization_url,
oauth_consumer_key=client_key)
base_authorization_url = "https://www.openstreetmap.org/oauth/authorize"
authorization_url = oauth.authorization_url(
base_authorization_url, oauth_consumer_key=client_key
)
return redirect(authorization_url)
@login_manager.user_loader
def load_user(user_id):
return model.User.query.get(user_id)
@app.route("/oauth/callback", methods=["GET"])
def oauth_callback():
client_key = app.config['CLIENT_KEY']
client_secret = app.config['CLIENT_SECRET']
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'])
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
)
oauth_response = oauth.parse_authorization_response(request.url)
verifier = oauth_response.get('oauth_verifier')
access_token_url = 'https://www.openstreetmap.org/oauth/access_token'
oauth = OAuth1Session(client_key,
client_secret=client_secret,
resource_owner_key=session['owner_key'],
resource_owner_secret=session['owner_secret'],
verifier=verifier)
verifier = oauth_response.get("oauth_verifier")
access_token_url = "https://www.openstreetmap.org/oauth/access_token"
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
session['owner_key'] = oauth_tokens.get('oauth_token')
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
session["owner_key"] = oauth_tokens.get("oauth_token")
session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
r = oauth.get(osm_api_base + '/user/details')
r = oauth.get(osm_api_base + "/user/details")
info = osm_oauth.parse_userinfo_call(r.content)
user = model.User.query.filter_by(osm_id=info['id']).one_or_none()
user = model.User.query.filter_by(osm_id=info["id"]).one_or_none()
if user:
user.osm_oauth_token = oauth_tokens.get('oauth_token')
user.osm_oauth_token_secret = oauth_tokens.get('oauth_token_secret')
user.osm_oauth_token = oauth_tokens.get("oauth_token")
user.osm_oauth_token_secret = oauth_tokens.get("oauth_token_secret")
else:
user = model.User(
username=info['username'],
description=info['description'],
img=info['img'],
osm_id=info['id'],
osm_account_created=info['account_created'],
username=info["username"],
description=info["description"],
img=info["img"],
osm_id=info["id"],
osm_account_created=info["account_created"],
mock_upload=False,
)
database.session.add(user)
database.session.commit()
flask_login.login_user(user)
next_page = session.get('next') or url_for('map_start_page')
next_page = session.get("next") or url_for("map_start_page")
return redirect(next_page)
@ -744,14 +809,13 @@ def validate_edit_list(edits):
for e in edits:
assert model.Item.get_by_qid(e["qid"])
assert e["op"] in {"add", "remove", "change"}
osm_type, _, osm_id = e['osm'].partition('/')
osm_type, _, osm_id = e["osm"].partition("/")
osm_id = int(osm_id)
if osm_type == 'node':
if osm_type == "node":
assert model.Point.query.get(osm_id)
else:
src_id = osm_id if osm_type == "way" else -osm_id
assert (model.Line.query.get(src_id)
or model.Polygon.query.get(src_id))
assert model.Line.query.get(src_id) or model.Polygon.query.get(src_id)
@app.route("/api/1/edit", methods=["POST"])
@ -760,9 +824,9 @@ def api_new_edit_session():
incoming = request.json
validate_edit_list(incoming["edit_list"])
es = model.EditSession(user=user,
edit_list=incoming['edit_list'],
comment=incoming['comment'])
es = model.EditSession(
user=user, edit_list=incoming["edit_list"], comment=incoming["comment"]
)
database.session.add(es)
database.session.commit()
@ -770,13 +834,14 @@ def api_new_edit_session():
return cors_jsonify(success=True, session_id=session_id)
@app.route("/api/1/edit/<int:session_id>", methods=["POST"])
def api_edit_session(session_id):
es = model.EditSession.query.get(session_id)
assert flask_login.current_user.id == es.user_id
incoming = request.json
for f in 'edit_list', 'comment':
for f in "edit_list", "comment":
if f not in incoming:
continue
setattr(es, f, incoming[f])
@ -784,21 +849,24 @@ def api_edit_session(session_id):
return cors_jsonify(success=True, session_id=session_id)
class VersionMismatch(Exception):
pass
def osm_object(osm_type, osm_id):
if osm_type == "node":
return model.Point.query.get(osm_id)
src_id = int(osm_id) * {'way': 1, 'relation': -1}[osm_type]
src_id = int(osm_id) * {"way": 1, "relation": -1}[osm_type]
for cls in model.Line, model.Polygon:
obj = cls.query.get(src_id)
if obj:
return obj
def process_edit(changeset_id, e):
osm_type, _, osm_id = e['osm'].partition('/')
osm_type, _, osm_id = e["osm"].partition("/")
qid = e["qid"]
item_id = qid[1:]
@ -851,9 +919,7 @@ def process_edit(changeset_id, e):
cls = type(osm)
database.session.execute(
update(cls).
where(cls.src_id == osm.src_id).
values(tags=new_tags)
update(cls).where(cls.src_id == osm.src_id).values(tags=new_tags)
)
db_edit = model.ChangesetEdit(
@ -867,6 +933,7 @@ def process_edit(changeset_id, e):
return "saved"
@app.route("/api/1/save/<int:session_id>")
def api_save_changeset(session_id):
assert g.user.is_authenticated
@ -938,7 +1005,8 @@ def api_real_save_changeset(session_id):
edit.close_changeset(changeset_id)
yield send("done")
return Response(stream_with_context(stream(g.user)), mimetype='text/event-stream')
return Response(stream_with_context(stream(g.user)), mimetype="text/event-stream")
def api_mock_save_changeset(session_id):
es = model.EditSession.query.get(session_id)
@ -948,7 +1016,7 @@ def api_mock_save_changeset(session_id):
return f"data: {json.dumps(data)}\n\n"
def stream(user):
print('stream')
print("stream")
changeset_id = database.session.query(func.max(model.Changeset.id) + 1).scalar()
sleep(1)
yield send("open", id=changeset_id)
@ -956,12 +1024,12 @@ def api_mock_save_changeset(session_id):
update_count = 0
print('record_changeset', changeset_id)
print("record_changeset", changeset_id)
edit.record_changeset(
id=changeset_id, user=user, comment=es.comment, update_count=update_count
)
print('edits')
print("edits")
for num, e in enumerate(es.edit_list):
print(num, e)
@ -970,12 +1038,12 @@ def api_mock_save_changeset(session_id):
yield send("saved", edit=e, num=num)
sleep(1)
print('closing')
print("closing")
yield send("closing")
sleep(1)
yield send("done")
return Response(stream(g.user), mimetype='text/event-stream')
return Response(stream(g.user), mimetype="text/event-stream")
if __name__ == "__main__":