Merge branch 'main' of github.com:EdwardBetts/owl-map
This commit is contained in:
commit
33bda7f54b
162
matcher.style
Normal file
162
matcher.style
Normal file
|
@ -0,0 +1,162 @@
|
||||||
|
# This is the default osm2pgsql .style file that comes with osm2pgsql.
|
||||||
|
#
|
||||||
|
# A .style file has 4 columns that define how OSM objects end up in tables in
|
||||||
|
# the database and what columns are created. It interacts with the command-line
|
||||||
|
# hstore options.
|
||||||
|
#
|
||||||
|
# Columns
|
||||||
|
# =======
|
||||||
|
#
|
||||||
|
# OsmType: This is either "node", "way" or "node,way" and indicates if this tag
|
||||||
|
# applies to nodes, ways, or both.
|
||||||
|
#
|
||||||
|
# Tag: The tag
|
||||||
|
#
|
||||||
|
# DataType: The type of the column to be created. Normally "text"
|
||||||
|
#
|
||||||
|
# Flags: Flags that indicate what table the OSM object is moved into.
|
||||||
|
#
|
||||||
|
# There are 6 possible flags. These flags are used both to indicate if a column
|
||||||
|
# should be created, and if ways with the tag are assumed to be areas. The area
|
||||||
|
# assumptions can be overridden with an area=yes/no tag
|
||||||
|
#
|
||||||
|
# polygon - Create a column for this tag, and objects with the tag are areas
|
||||||
|
#
|
||||||
|
# linear - Create a column for this tag
|
||||||
|
#
|
||||||
|
# nocolumn - Override the above and don't create a column for the tag, but do
|
||||||
|
# include objects with this tag
|
||||||
|
#
|
||||||
|
# phstore - Same as polygon,nocolumn for backward compatibility
|
||||||
|
#
|
||||||
|
# delete - Drop this tag completely and don't create a column for it. This also
|
||||||
|
# prevents the tag from being added to hstore columns
|
||||||
|
#
|
||||||
|
# nocache - Deprecated and does nothing
|
||||||
|
#
|
||||||
|
# If an object has a tag that indicates it is an area or has area=yes/1,
|
||||||
|
# osm2pgsql will try to turn it into an area. If it succeeds, it places it in
|
||||||
|
# the polygon table. If it fails (e.g. not a closed way) it places it in the
|
||||||
|
# line table.
|
||||||
|
#
|
||||||
|
# Nodes are never placed into the polygon or line table and are always placed in
|
||||||
|
# the point table.
|
||||||
|
#
|
||||||
|
# Hstore
|
||||||
|
# ======
|
||||||
|
#
|
||||||
|
# The options --hstore, --hstore-match-only, and --hstore-all interact with
|
||||||
|
# the .style file.
|
||||||
|
#
|
||||||
|
# With --hstore any tags without a column will be added to the hstore column.
|
||||||
|
# This will also cause all objects to be kept.
|
||||||
|
#
|
||||||
|
# With --hstore-match-only the behavior for tags is the same, but objects are
|
||||||
|
# only kept if they have a non-NULL value in one of the columns.
|
||||||
|
#
|
||||||
|
# With --hstore-all all tags are added to the hstore column unless they appear
|
||||||
|
# in the style file with a delete flag, causing duplication between the normal
|
||||||
|
# columns and the hstore column.
|
||||||
|
#
|
||||||
|
# Special database columns
|
||||||
|
# ========================
|
||||||
|
#
|
||||||
|
# There are some special database columns that if present in the .style file
|
||||||
|
# will be populated by osm2pgsql.
|
||||||
|
#
|
||||||
|
# These are
|
||||||
|
#
|
||||||
|
# z_order - datatype int4
|
||||||
|
#
|
||||||
|
# way_area - datatype real. The area of the way, in the units of the projection
|
||||||
|
# (e.g. square mercator meters). Only applies to areas
|
||||||
|
#
|
||||||
|
# osm_user - datatype text
|
||||||
|
# osm_uid - datatype integer
|
||||||
|
# osm_version - datatype integer
|
||||||
|
# osm_changeset - datatype integer
|
||||||
|
# osm_timestamp - datatype timestamptz(0).
|
||||||
|
# Used with the --extra-attributes option to include metadata in the database.
|
||||||
|
# If importing with both --hstore and --extra-attributes the meta-data will
|
||||||
|
# end up in the tags hstore column regardless of the style file.
|
||||||
|
|
||||||
|
# OsmType Tag DataType Flags
|
||||||
|
node,way access text linear
|
||||||
|
node,way addr:housename text linear
|
||||||
|
node,way addr:housenumber text linear
|
||||||
|
node,way addr:interpolation text linear
|
||||||
|
node,way admin_level text linear
|
||||||
|
node,way aerialway text linear
|
||||||
|
node,way aeroway text polygon
|
||||||
|
node,way amenity text polygon
|
||||||
|
node,way area text polygon # hard coded support for area=1/yes => polygon is in osm2pgsql
|
||||||
|
node,way barrier text linear
|
||||||
|
node,way bicycle text linear
|
||||||
|
node,way brand text linear
|
||||||
|
node,way bridge text linear
|
||||||
|
node,way boundary text linear
|
||||||
|
node,way building text polygon
|
||||||
|
node capital text linear
|
||||||
|
node,way construction text linear
|
||||||
|
node,way covered text linear
|
||||||
|
node,way culvert text linear
|
||||||
|
node,way cutting text linear
|
||||||
|
node,way denomination text linear
|
||||||
|
node,way disused text linear
|
||||||
|
node ele text linear
|
||||||
|
node,way embankment text linear
|
||||||
|
node,way foot text linear
|
||||||
|
node,way generator:source text linear
|
||||||
|
node,way harbour text polygon
|
||||||
|
node,way highway text linear
|
||||||
|
node,way historic text polygon
|
||||||
|
node,way horse text linear
|
||||||
|
node,way intermittent text linear
|
||||||
|
node,way junction text linear
|
||||||
|
node,way landuse text polygon
|
||||||
|
node,way layer text linear
|
||||||
|
node,way leisure text polygon
|
||||||
|
node,way lock text linear
|
||||||
|
node,way man_made text polygon
|
||||||
|
node,way military text polygon
|
||||||
|
node,way motorcar text linear
|
||||||
|
node,way name text linear
|
||||||
|
node,way natural text polygon # natural=coastline tags are discarded by a hard coded rule in osm2pgsql
|
||||||
|
node,way office text polygon
|
||||||
|
node,way oneway text linear
|
||||||
|
node,way operator text linear
|
||||||
|
node,way place text polygon
|
||||||
|
node,way population text linear
|
||||||
|
node,way power text polygon
|
||||||
|
node,way power_source text linear
|
||||||
|
node,way public_transport text polygon
|
||||||
|
node,way railway text linear
|
||||||
|
node,way ref text linear
|
||||||
|
node,way religion text linear
|
||||||
|
node,way route text linear
|
||||||
|
node,way service text linear
|
||||||
|
node,way shop text polygon
|
||||||
|
node,way sport text polygon
|
||||||
|
node,way surface text linear
|
||||||
|
node,way toll text linear
|
||||||
|
node,way tourism text polygon
|
||||||
|
node,way tower:type text linear
|
||||||
|
way tracktype text linear
|
||||||
|
node,way tunnel text linear
|
||||||
|
node,way water text polygon
|
||||||
|
node,way waterway text polygon
|
||||||
|
node,way wetland text polygon
|
||||||
|
node,way width text linear
|
||||||
|
node,way wood text linear
|
||||||
|
node,way z_order int4 linear # This is calculated during import
|
||||||
|
way way_area real linear # This is calculated during import
|
||||||
|
|
||||||
|
# Area tags
|
||||||
|
# We don't make columns for these tags, but objects with them are areas.
|
||||||
|
# Mainly for use with hstore
|
||||||
|
way abandoned:aeroway text polygon,nocolumn
|
||||||
|
way abandoned:amenity text polygon,nocolumn
|
||||||
|
way abandoned:building text polygon,nocolumn
|
||||||
|
way abandoned:landuse text polygon,nocolumn
|
||||||
|
way abandoned:power text polygon,nocolumn
|
||||||
|
way area:highway text polygon,nocolumn
|
|
@ -4,7 +4,9 @@
|
||||||
CallParams = dict[str, str | int]
|
CallParams = dict[str, str | int]
|
||||||
|
|
||||||
user_agent = (
|
user_agent = (
|
||||||
"osm-wikidata/0.1 (https://github.com/EdwardBetts/osm-wikidata; edward@4angle.com)"
|
"osm-wikidata/0.2"
|
||||||
|
+ " (https://github.com/EdwardBetts/osm-wikidata;"
|
||||||
|
+ " edward@4angle.com)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -40,13 +40,14 @@ skip_tags = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_country_iso3166_1(lat: float, lon: float) -> set[str]:
|
def get_country_iso3166_1(lat, lon):
|
||||||
"""For a given lat/lon return a set of ISO country codes.
|
"""For a given lat/lon return a set of ISO country codes.
|
||||||
|
|
||||||
Also cache the country code in the global object.
|
Also cache the country code in the global object.
|
||||||
|
|
||||||
Normally there should be only one country.
|
Normally there should be only one country.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
|
point = func.ST_SetSRID(func.ST_MakePoint(lon, lat), srid)
|
||||||
alpha2_codes = set()
|
alpha2_codes = set()
|
||||||
q = model.Polygon.query.filter(
|
q = model.Polygon.query.filter(
|
||||||
|
@ -263,7 +264,7 @@ WHERE tags ? 'wikidata'
|
||||||
conn = database.session.connection()
|
conn = database.session.connection()
|
||||||
result = conn.execute(text(sql))
|
result = conn.execute(text(sql))
|
||||||
|
|
||||||
print(sql)
|
# print(sql)
|
||||||
|
|
||||||
point_sql = (
|
point_sql = (
|
||||||
f"""
|
f"""
|
||||||
|
@ -788,7 +789,7 @@ def find_osm_candidates(item, limit=80, max_distance=450, names=None):
|
||||||
if limit:
|
if limit:
|
||||||
s = s.limit(limit)
|
s = s.limit(limit)
|
||||||
|
|
||||||
print(s.compile(compile_kwargs={"literal_binds": True}))
|
# print(s.compile(compile_kwargs={"literal_binds": True}))
|
||||||
|
|
||||||
conn = database.session.connection()
|
conn = database.session.connection()
|
||||||
nearby = []
|
nearby = []
|
||||||
|
@ -1031,7 +1032,7 @@ def isa_incremental_search(search_terms):
|
||||||
func.length(en_label) < 20,
|
func.length(en_label) < 20,
|
||||||
)
|
)
|
||||||
|
|
||||||
print(q.statement.compile(compile_kwargs={"literal_binds": True}))
|
# print(q.statement.compile(compile_kwargs={"literal_binds": True}))
|
||||||
|
|
||||||
ret = []
|
ret = []
|
||||||
for item in q:
|
for item in q:
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
from flask import current_app, g, request, has_request_context
|
import smtplib
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.utils import formatdate, make_msgid
|
from email.utils import formatdate, make_msgid
|
||||||
from pprint import pformat
|
from pprint import pformat
|
||||||
import smtplib
|
|
||||||
import traceback
|
from flask import current_app, g, has_request_context, request
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def send_mail(subject, body, config=None):
|
def send_mail(subject, body, config=None):
|
||||||
|
@ -71,7 +72,7 @@ def open_changeset_error(session_id, changeset, r):
|
||||||
username = g.user.username
|
username = g.user.username
|
||||||
body = f"""
|
body = f"""
|
||||||
user: {username}
|
user: {username}
|
||||||
page: {url}
|
page: {r.url}
|
||||||
|
|
||||||
message user: https://www.openstreetmap.org/message/new/{username}
|
message user: https://www.openstreetmap.org/message/new/{username}
|
||||||
|
|
||||||
|
|
430
matcher/model.py
430
matcher/model.py
|
@ -1,89 +1,159 @@
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
|
||||||
from sqlalchemy.schema import ForeignKey, Column
|
|
||||||
from sqlalchemy.orm import relationship, column_property, deferred, backref
|
|
||||||
from sqlalchemy import func
|
|
||||||
from sqlalchemy.types import Integer, String, Float, Boolean, DateTime, Text, BigInteger
|
|
||||||
from sqlalchemy.dialects import postgresql
|
|
||||||
from sqlalchemy.sql.expression import cast
|
|
||||||
from sqlalchemy.ext.hybrid import hybrid_property
|
|
||||||
from sqlalchemy.ext.declarative import declared_attr
|
|
||||||
from geoalchemy2 import Geometry
|
|
||||||
from collections import defaultdict
|
|
||||||
from flask_login import UserMixin
|
|
||||||
from .database import session, now_utc
|
|
||||||
from . import wikidata, utils, mail
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
import typing
|
||||||
|
from collections import defaultdict
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from flask_login import UserMixin
|
||||||
|
from geoalchemy2 import Geometry
|
||||||
|
from sqlalchemy import func
|
||||||
|
from sqlalchemy.dialects import postgresql
|
||||||
|
from sqlalchemy.ext.associationproxy import association_proxy
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base, declared_attr
|
||||||
|
from sqlalchemy.ext.hybrid import hybrid_property
|
||||||
|
from sqlalchemy.orm import backref, column_property, deferred, relationship
|
||||||
|
from sqlalchemy.orm.collections import attribute_mapped_collection
|
||||||
|
from sqlalchemy.schema import Column, ForeignKey
|
||||||
|
from sqlalchemy.sql.expression import cast
|
||||||
|
from sqlalchemy.types import BigInteger, Boolean, DateTime, Float, Integer, String, Text
|
||||||
|
|
||||||
|
from . import mail, utils, wikidata
|
||||||
|
from .database import now_utc, session
|
||||||
|
|
||||||
Base = declarative_base()
|
Base = declarative_base()
|
||||||
Base.query = session.query_property()
|
Base.query = session.query_property()
|
||||||
|
|
||||||
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
|
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
|
||||||
|
|
||||||
|
osm_type_enum = postgresql.ENUM(
|
||||||
|
"node", "way", "relation", name="osm_type_enum", metadata=Base.metadata
|
||||||
|
)
|
||||||
|
|
||||||
|
re_lau_code = re.compile(r"^[A-Z]{2}([^A-Z].+)$") # 'LAU (local administrative unit)'
|
||||||
|
|
||||||
|
property_map = [
|
||||||
|
("P238", ["iata"], "IATA airport code"),
|
||||||
|
("P239", ["icao"], "ICAO airport code"),
|
||||||
|
("P240", ["faa", "ref"], "FAA airport code"),
|
||||||
|
("P296", ["ref", "ref:train", "railway:ref"], "station code"),
|
||||||
|
("P300", ["ISO3166-2"], "ISO 3166-2 code"),
|
||||||
|
("P359", ["ref:rce"], "Rijksmonument ID"),
|
||||||
|
("P590", ["ref:gnis", "GNISID", "gnis:id", "gnis:feature_id"], "USGS GNIS ID"),
|
||||||
|
("P649", ["ref:nrhp"], "NRHP reference number"),
|
||||||
|
("P722", ["uic_ref"], "UIC station code"),
|
||||||
|
("P782", ["ref"], "LAU (local administrative unit)"),
|
||||||
|
("P836", ["ref:gss"], "UK Government Statistical Service code"),
|
||||||
|
("P856", ["website", "contact:website", "url"], "website"),
|
||||||
|
("P882", ["nist:fips_code"], "FIPS 6-4 (US counties)"),
|
||||||
|
("P901", ["ref:fips"], "FIPS 10-4 (countries and regions)"),
|
||||||
|
# A UIC id can be a IBNR, but not every IBNR is an UIC id
|
||||||
|
("P954", ["uic_ref"], "IBNR ID"),
|
||||||
|
("P981", ["ref:woonplaatscode"], "BAG code for Dutch residencies"),
|
||||||
|
("P1216", ["HE_ref"], "National Heritage List for England number"),
|
||||||
|
("P2253", ["ref:edubase"], "EDUBase URN"),
|
||||||
|
("P2815", ["esr:user", "ref", "ref:train"], "ESR station code"),
|
||||||
|
("P3425", ["ref", "ref:SIC"], "Natura 2000 site ID"),
|
||||||
|
("P3562", ["seamark:light:reference"], "Admiralty number"),
|
||||||
|
(
|
||||||
|
"P4755",
|
||||||
|
["ref", "ref:train", "ref:crs", "crs", "nat_ref"],
|
||||||
|
"UK railway station code",
|
||||||
|
),
|
||||||
|
("P4803", ["ref", "ref:train"], "Amtrak station code"),
|
||||||
|
("P6082", ["nycdoitt:bin"], "NYC Building Identification Number"),
|
||||||
|
("P5086", ["ref"], "FIPS 5-2 alpha code (US states)"),
|
||||||
|
("P5087", ["ref:fips"], "FIPS 5-2 numeric code (US states)"),
|
||||||
|
("P5208", ["ref:bag"], "BAG building ID for Dutch buildings"),
|
||||||
|
]
|
||||||
|
|
||||||
|
T = typing.TypeVar("T", bound="Item")
|
||||||
|
|
||||||
osm_type_enum = postgresql.ENUM('node', 'way', 'relation',
|
|
||||||
name='osm_type_enum',
|
|
||||||
metadata=Base.metadata)
|
|
||||||
|
|
||||||
class Item(Base):
|
class Item(Base):
|
||||||
|
"""Wikidata item."""
|
||||||
|
|
||||||
__tablename__ = "item"
|
__tablename__ = "item"
|
||||||
item_id = Column(Integer, primary_key=True, autoincrement=False)
|
item_id = Column(Integer, primary_key=True, autoincrement=False)
|
||||||
labels = Column(postgresql.JSONB)
|
labels = Column(postgresql.JSONB)
|
||||||
descriptions = Column(postgresql.JSONB)
|
descriptions = Column(postgresql.JSONB)
|
||||||
aliases = Column(postgresql.JSONB)
|
aliases = Column(postgresql.JSONB)
|
||||||
sitelinks = Column(postgresql.JSONB)
|
sitelinks = Column(postgresql.JSONB)
|
||||||
claims = Column(postgresql.JSONB)
|
claims = Column(postgresql.JSONB, nullable=False)
|
||||||
lastrevid = Column(Integer, nullable=False, unique=True)
|
lastrevid = Column(Integer, nullable=False, unique=True)
|
||||||
locations = relationship("ItemLocation", cascade="all, delete-orphan", backref="item")
|
locations = relationship(
|
||||||
|
"ItemLocation", cascade="all, delete-orphan", backref="item"
|
||||||
|
)
|
||||||
qid = column_property("Q" + cast(item_id, String))
|
qid = column_property("Q" + cast(item_id, String))
|
||||||
|
|
||||||
|
wiki_extracts = relationship(
|
||||||
|
"Extract",
|
||||||
|
collection_class=attribute_mapped_collection("site"),
|
||||||
|
cascade="save-update, merge, delete, delete-orphan",
|
||||||
|
backref="item",
|
||||||
|
)
|
||||||
|
extracts = association_proxy("wiki_extracts", "extract")
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_by_qid(cls, qid):
|
def get_by_qid(cls: typing.Type[T], qid: str) -> T | None:
|
||||||
if qid and len(qid) > 1 and qid[0].upper() == "Q" and qid[1:].isdigit():
|
if qid and len(qid) > 1 and qid[0].upper() == "Q" and qid[1:].isdigit():
|
||||||
return cls.query.get(qid[1:])
|
obj: T = cls.query.get(qid[1:])
|
||||||
|
return obj
|
||||||
|
return None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def wd_url(self):
|
def wd_url(self) -> str:
|
||||||
|
"""Wikidata URL for item."""
|
||||||
return f"https://www.wikidata.org/wiki/{self.qid}"
|
return f"https://www.wikidata.org/wiki/{self.qid}"
|
||||||
|
|
||||||
def get_claim(self, pid):
|
def get_claim(self, pid: str) -> list[dict[str, Any] | None]:
|
||||||
return [i["mainsnak"]["datavalue"]["value"] if "datavalue" in i["mainsnak"] else None
|
"""List of claims for given Wikidata property ID."""
|
||||||
for i in self.claims.get(pid, [])]
|
claims = typing.cast(dict[str, list[dict[str, Any]]], self.claims)
|
||||||
|
return [
|
||||||
|
i["mainsnak"]["datavalue"]["value"]
|
||||||
|
if "datavalue" in i["mainsnak"]
|
||||||
|
else None
|
||||||
|
for i in claims.get(pid, [])
|
||||||
|
]
|
||||||
|
|
||||||
def label(self, lang='en'):
|
def label(self, lang: str = "en") -> str:
|
||||||
if lang in self.labels:
|
"""Label for this Wikidata item."""
|
||||||
return self.labels[lang]['value']
|
labels = typing.cast(dict[str, dict[str, Any]], self.labels)
|
||||||
elif 'en' in self.labels:
|
if lang in labels:
|
||||||
return self.labels['en']['value']
|
return typing.cast(str, labels[lang]["value"])
|
||||||
|
elif "en" in labels:
|
||||||
|
return typing.cast(str, labels["en"]["value"])
|
||||||
|
|
||||||
label_list = list(self.labels.values())
|
label_list = list(labels.values())
|
||||||
return label_list[0]['value'] if label_list else '[no label]'
|
return typing.cast(str, label_list[0]["value"]) if label_list else "[no label]"
|
||||||
|
|
||||||
def description(self, lang='en'):
|
def description(self, lang: str = "en") -> str | None:
|
||||||
if lang in self.descriptions:
|
"""Return a description of the item."""
|
||||||
return self.descriptions[lang]['value']
|
descriptions = typing.cast(dict[str, dict[str, Any]], self.descriptions)
|
||||||
elif 'en' in self.descriptions:
|
if lang in descriptions:
|
||||||
return self.descriptions['en']['value']
|
return typing.cast(str, descriptions[lang]["value"])
|
||||||
return
|
elif "en" in descriptions:
|
||||||
|
return typing.cast(str, descriptions["en"]["value"])
|
||||||
|
return None
|
||||||
|
|
||||||
d_list = list(self.descriptions.values())
|
d_list = list(self.descriptions.values())
|
||||||
if d_list:
|
if d_list:
|
||||||
return d_list[0]['value']
|
return d_list[0]["value"]
|
||||||
|
|
||||||
def get_aliases(self, lang='en'):
|
def get_aliases(self, lang="en"):
|
||||||
if lang not in self.aliases:
|
if lang not in self.aliases:
|
||||||
if 'en' not in self.aliases:
|
if "en" not in self.aliases:
|
||||||
return []
|
return []
|
||||||
lang = 'en'
|
lang = "en"
|
||||||
return [a['value'] for a in self.aliases[lang]]
|
return [a["value"] for a in self.aliases[lang]]
|
||||||
|
|
||||||
def get_part_of_names(self):
|
def get_part_of_names(self):
|
||||||
if not self.claims:
|
if not self.claims:
|
||||||
return set()
|
return set()
|
||||||
|
|
||||||
part_of_names = set()
|
part_of_names = set()
|
||||||
for p361 in self.claims.get('P361', []):
|
for p361 in self.claims.get("P361", []):
|
||||||
try:
|
try:
|
||||||
part_of_id = p361['mainsnak']['datavalue']['value']['numeric-id']
|
part_of_id = p361["mainsnak"]["datavalue"]["value"]["numeric-id"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
if part_of_id == self.item_id:
|
if part_of_id == self.item_id:
|
||||||
|
@ -98,7 +168,7 @@ class Item(Base):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def entity(self):
|
def entity(self):
|
||||||
keys = ['labels', 'aliases', 'descriptions', 'sitelinks', 'claims']
|
keys = ["labels", "aliases", "descriptions", "sitelinks", "claims"]
|
||||||
return {key: getattr(self, key) for key in keys}
|
return {key: getattr(self, key) for key in keys}
|
||||||
|
|
||||||
def names(self, check_part_of=True):
|
def names(self, check_part_of=True):
|
||||||
|
@ -107,22 +177,24 @@ class Item(Base):
|
||||||
d = wikidata.names_from_entity(self.entity) or defaultdict(list)
|
d = wikidata.names_from_entity(self.entity) or defaultdict(list)
|
||||||
|
|
||||||
for name, sources in list(d.items()):
|
for name, sources in list(d.items()):
|
||||||
if len(sources) == 1 and sources[0][0] == 'image':
|
if len(sources) == 1 and sources[0][0] == "image":
|
||||||
continue
|
continue
|
||||||
for part_of_name in part_of_names:
|
for part_of_name in part_of_names:
|
||||||
if not name.startswith(part_of_name):
|
if not name.startswith(part_of_name):
|
||||||
continue
|
continue
|
||||||
prefix_removed = name[len(part_of_name):].strip()
|
prefix_removed = name[len(part_of_name) :].strip()
|
||||||
if prefix_removed not in d:
|
if prefix_removed not in d:
|
||||||
d[prefix_removed] = sources
|
d[prefix_removed] = sources
|
||||||
|
|
||||||
if self.claims:
|
if self.claims:
|
||||||
for p6375 in self.claims.get('P6375', []):
|
for p6375 in self.claims.get("P6375", []):
|
||||||
try:
|
try:
|
||||||
street_address = p6375['mainsnak']['datavalue']['value']
|
street_address = p6375["mainsnak"]["datavalue"]["value"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
continue
|
continue
|
||||||
d[street_address['text']].append(('P6375', street_address.get('language')))
|
d[street_address["text"]].append(
|
||||||
|
("P6375", street_address.get("language"))
|
||||||
|
)
|
||||||
|
|
||||||
# A terrace of buildings can be illustrated with a photo of a single building.
|
# A terrace of buildings can be illustrated with a photo of a single building.
|
||||||
# We try to determine if this is the case and avoid using the filename of the
|
# We try to determine if this is the case and avoid using the filename of the
|
||||||
|
@ -131,8 +203,11 @@ class Item(Base):
|
||||||
def has_digit(s):
|
def has_digit(s):
|
||||||
return any(c.isdigit() for c in s)
|
return any(c.isdigit() for c in s)
|
||||||
|
|
||||||
image_names = {name for name, sources in d.items()
|
image_names = {
|
||||||
if len(sources) == 1 and sources[0][0] == 'image' and has_digit(name)}
|
name
|
||||||
|
for name, sources in d.items()
|
||||||
|
if len(sources) == 1 and sources[0][0] == "image" and has_digit(name)
|
||||||
|
}
|
||||||
if not image_names:
|
if not image_names:
|
||||||
return dict(d) or None
|
return dict(d) or None
|
||||||
|
|
||||||
|
@ -166,10 +241,10 @@ class Item(Base):
|
||||||
isa_qids = self.get_isa_qids()
|
isa_qids = self.get_isa_qids()
|
||||||
|
|
||||||
matching_types = {
|
matching_types = {
|
||||||
"Q12731", # dead end street
|
"Q12731", # dead end street
|
||||||
"Q34442", # road
|
"Q34442", # road
|
||||||
"Q79007", # street
|
"Q79007", # street
|
||||||
"Q83620", # thoroughfare
|
"Q83620", # thoroughfare
|
||||||
"Q21000333", # shopping street
|
"Q21000333", # shopping street
|
||||||
"Q62685721", # pedestrian street
|
"Q62685721", # pedestrian street
|
||||||
}
|
}
|
||||||
|
@ -179,14 +254,13 @@ class Item(Base):
|
||||||
if isa_qids is None:
|
if isa_qids is None:
|
||||||
isa_qids = self.get_isa_qids()
|
isa_qids = self.get_isa_qids()
|
||||||
matching_types = {
|
matching_types = {
|
||||||
"Q355304", # watercourse
|
"Q355304", # watercourse
|
||||||
"Q4022", # river
|
"Q4022", # river
|
||||||
"Q47521", # stream
|
"Q47521", # stream
|
||||||
"Q1437299", # creek
|
"Q1437299", # creek
|
||||||
"Q63565252", # brook
|
"Q63565252", # brook
|
||||||
"Q12284", # canal
|
"Q12284", # canal
|
||||||
"Q55659167", # natural watercourse
|
"Q55659167", # natural watercourse
|
||||||
|
|
||||||
}
|
}
|
||||||
return bool(matching_types & set(isa_qids))
|
return bool(matching_types & set(isa_qids))
|
||||||
|
|
||||||
|
@ -195,19 +269,29 @@ class Item(Base):
|
||||||
return self.is_street(isa_qids) or self.is_watercourse(isa_qids)
|
return self.is_street(isa_qids) or self.is_watercourse(isa_qids)
|
||||||
|
|
||||||
def is_tram_stop(self):
|
def is_tram_stop(self):
|
||||||
return 'Q2175765' in self.get_isa_qids()
|
return "Q2175765" in self.get_isa_qids()
|
||||||
|
|
||||||
def alert_admin_about_bad_time(self, v):
|
def alert_admin_about_bad_time(self, v):
|
||||||
body = ("Wikidata item has an unsupported time precision\n\n"
|
body = (
|
||||||
+ self.wd_url + "\n\n" + "Value:\n\n" + json.dumps(v, indent=2))
|
"Wikidata item has an unsupported time precision\n\n"
|
||||||
|
+ self.wd_url
|
||||||
|
+ "\n\n"
|
||||||
|
+ "Value:\n\n"
|
||||||
|
+ json.dumps(v, indent=2)
|
||||||
|
)
|
||||||
mail.send_mail(f"OWL Map: bad time value in {self.qid}", body)
|
mail.send_mail(f"OWL Map: bad time value in {self.qid}", body)
|
||||||
|
|
||||||
def closed(self):
|
def time_claim(self, pid):
|
||||||
ret = []
|
ret = []
|
||||||
for v in self.get_claim("P3999"):
|
for v in self.get_claim(pid):
|
||||||
if not v:
|
if not v:
|
||||||
continue
|
continue
|
||||||
t = utils.format_wikibase_time(v)
|
try:
|
||||||
|
t = utils.format_wikibase_time(v)
|
||||||
|
except Exception:
|
||||||
|
self.alert_admin_about_bad_time(v)
|
||||||
|
raise
|
||||||
|
|
||||||
if t:
|
if t:
|
||||||
ret.append(t)
|
ret.append(t)
|
||||||
else:
|
else:
|
||||||
|
@ -215,6 +299,84 @@ class Item(Base):
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
def closed(self):
|
||||||
|
return self.time_claim("P3999")
|
||||||
|
|
||||||
|
def first_paragraph_language(self, lang):
|
||||||
|
if lang not in self.sitelinks():
|
||||||
|
return
|
||||||
|
extract = self.extracts.get(lang)
|
||||||
|
if not extract:
|
||||||
|
return
|
||||||
|
|
||||||
|
empty_list = [
|
||||||
|
"<p><span></span></p>",
|
||||||
|
"<p><span></span>\n</p>",
|
||||||
|
"<p><span></span>\n\n</p>",
|
||||||
|
"<p>\n<span></span>\n</p>",
|
||||||
|
"<p>\n\n<span></span>\n</p>",
|
||||||
|
"<p>.\n</p>",
|
||||||
|
"<p><br></p>",
|
||||||
|
'<p class="mw-empty-elt">\n</p>',
|
||||||
|
'<p class="mw-empty-elt">\n\n</p>',
|
||||||
|
'<p class="mw-empty-elt">\n\n\n</p>',
|
||||||
|
]
|
||||||
|
|
||||||
|
text = extract.strip()
|
||||||
|
while True:
|
||||||
|
found_empty = False
|
||||||
|
for empty in empty_list:
|
||||||
|
if text.startswith(empty):
|
||||||
|
text = text[len(empty) :].strip()
|
||||||
|
found_empty = True
|
||||||
|
if not found_empty:
|
||||||
|
break
|
||||||
|
|
||||||
|
close_tag = "</p>"
|
||||||
|
first_end_p_tag = text.find(close_tag)
|
||||||
|
if first_end_p_tag == -1:
|
||||||
|
# FIXME: e-mail admin
|
||||||
|
return text
|
||||||
|
|
||||||
|
return text[: first_end_p_tag + len(close_tag)]
|
||||||
|
|
||||||
|
def get_identifiers_tags(self):
|
||||||
|
tags = defaultdict(list)
|
||||||
|
for claim, osm_keys, label in property_map:
|
||||||
|
values = [
|
||||||
|
i["mainsnak"]["datavalue"]["value"]
|
||||||
|
for i in self.claims.get(claim, [])
|
||||||
|
if "datavalue" in i["mainsnak"]
|
||||||
|
]
|
||||||
|
if not values:
|
||||||
|
continue
|
||||||
|
if claim == "P782":
|
||||||
|
values += [
|
||||||
|
m.group(1) for m in (re_lau_code.match(v) for v in values) if m
|
||||||
|
]
|
||||||
|
for osm_key in osm_keys:
|
||||||
|
tags[osm_key].append((values, label))
|
||||||
|
return dict(tags)
|
||||||
|
|
||||||
|
def get_identifiers(self):
|
||||||
|
ret = {}
|
||||||
|
for claim, osm_keys, label in property_map:
|
||||||
|
values = [
|
||||||
|
i["mainsnak"]["datavalue"]["value"]
|
||||||
|
for i in self.claims.get(claim, [])
|
||||||
|
if "datavalue" in i["mainsnak"]
|
||||||
|
]
|
||||||
|
if not values:
|
||||||
|
continue
|
||||||
|
if claim == "P782":
|
||||||
|
values += [
|
||||||
|
m.group(1) for m in (re_lau_code.match(v) for v in values) if m
|
||||||
|
]
|
||||||
|
for osm_key in osm_keys:
|
||||||
|
ret[label] = values
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
# class Claim(Base):
|
# class Claim(Base):
|
||||||
# __tablename__ = "claim"
|
# __tablename__ = "claim"
|
||||||
# item_id = Column(Integer, primary_key=True)
|
# item_id = Column(Integer, primary_key=True)
|
||||||
|
@ -222,13 +384,14 @@ class Item(Base):
|
||||||
# position = Column(Integer, primary_key=True)
|
# position = Column(Integer, primary_key=True)
|
||||||
# mainsnak = Column(postgresql.JSONB)
|
# mainsnak = Column(postgresql.JSONB)
|
||||||
|
|
||||||
class ItemIsA(Base):
|
|
||||||
__tablename__ = 'item_isa'
|
|
||||||
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
|
|
||||||
isa_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
|
|
||||||
|
|
||||||
item = relationship('Item', foreign_keys=[item_id])
|
class ItemIsA(Base):
|
||||||
isa = relationship('Item', foreign_keys=[isa_id])
|
__tablename__ = "item_isa"
|
||||||
|
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
|
||||||
|
isa_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
|
||||||
|
|
||||||
|
item = relationship("Item", foreign_keys=[item_id])
|
||||||
|
isa = relationship("Item", foreign_keys=[isa_id])
|
||||||
|
|
||||||
|
|
||||||
class ItemLocation(Base):
|
class ItemLocation(Base):
|
||||||
|
@ -241,18 +404,21 @@ class ItemLocation(Base):
|
||||||
qid = column_property("Q" + cast(item_id, String))
|
qid = column_property("Q" + cast(item_id, String))
|
||||||
pid = column_property("P" + cast(item_id, String))
|
pid = column_property("P" + cast(item_id, String))
|
||||||
|
|
||||||
def get_lat_lon(self):
|
def get_lat_lon(self) -> tuple[float, float]:
|
||||||
return session.query(func.ST_Y(self.location),
|
"""Get latitude and longitude of item."""
|
||||||
func.ST_X(self.location)).one()
|
loc: tuple[float, float]
|
||||||
|
loc = session.query(func.ST_Y(self.location), func.ST_X(self.location)).one()
|
||||||
|
return loc
|
||||||
|
|
||||||
|
|
||||||
def location_objects(coords):
|
def location_objects(coords):
|
||||||
locations = []
|
locations = []
|
||||||
for pid, coord_list in coords.items():
|
for pid, coord_list in coords.items():
|
||||||
for num, coords in enumerate(coord_list):
|
for num, coords in enumerate(coord_list):
|
||||||
point = f"POINT({coords['longitude']} {coords['latitude']})"
|
point = f"POINT({coords['longitude']} {coords['latitude']})"
|
||||||
loc = ItemLocation(property_id=int(pid[1:]),
|
loc = ItemLocation(
|
||||||
statement_order=num,
|
property_id=int(pid[1:]), statement_order=num, location=point
|
||||||
location=point)
|
)
|
||||||
locations.append(loc)
|
locations.append(loc)
|
||||||
return locations
|
return locations
|
||||||
|
|
||||||
|
@ -282,8 +448,7 @@ class MapMixin:
|
||||||
@declared_attr
|
@declared_attr
|
||||||
def geojson_str(cls):
|
def geojson_str(cls):
|
||||||
return column_property(
|
return column_property(
|
||||||
func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6),
|
func.ST_AsGeoJSON(cls.way, maxdecimaldigits=6), deferred=True
|
||||||
deferred=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@declared_attr
|
@declared_attr
|
||||||
|
@ -292,17 +457,16 @@ class MapMixin:
|
||||||
|
|
||||||
@hybrid_property
|
@hybrid_property
|
||||||
def has_street_address(self):
|
def has_street_address(self):
|
||||||
return ("addr:housenumber" in self.tags
|
return "addr:housenumber" in self.tags and "addr:street" in self.tags
|
||||||
and "addr:street" in self.tags)
|
|
||||||
|
|
||||||
def display_name(self):
|
def display_name(self):
|
||||||
for key in 'bridge:name', 'tunnel:name', 'lock_name':
|
for key in "bridge:name", "tunnel:name", "lock_name":
|
||||||
if key in self.tags:
|
if key in self.tags:
|
||||||
return self.tags[key]
|
return self.tags[key]
|
||||||
|
|
||||||
return (self.name
|
return (
|
||||||
or self.tags.get("addr:housename")
|
self.name or self.tags.get("addr:housename") or self.tags.get("inscription")
|
||||||
or self.tags.get("inscription"))
|
)
|
||||||
|
|
||||||
def geojson(self):
|
def geojson(self):
|
||||||
return json.loads(self.geojson_str)
|
return json.loads(self.geojson_str)
|
||||||
|
@ -343,7 +507,7 @@ class Line(MapMixin, Base):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_osm(cls, osm_type, osm_id):
|
def get_osm(cls, osm_type, osm_id):
|
||||||
src_id = osm_id * {'way': 1, 'relation': -1}[osm_type]
|
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
|
||||||
return cls.query.get(src_id)
|
return cls.query.get(src_id)
|
||||||
|
|
||||||
|
|
||||||
|
@ -352,11 +516,12 @@ class Polygon(MapMixin, Base):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_osm(cls, osm_type, osm_id):
|
def get_osm(cls, osm_type, osm_id):
|
||||||
src_id = osm_id * {'way': 1, 'relation': -1}[osm_type]
|
src_id = osm_id * {"way": 1, "relation": -1}[osm_type]
|
||||||
return cls.query.get(src_id)
|
return cls.query.get(src_id)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def type(self):
|
def type(self) -> str:
|
||||||
|
"""Polygon is either a way or a relation."""
|
||||||
return "way" if self.src_id > 0 else "relation"
|
return "way" if self.src_id > 0 else "relation"
|
||||||
|
|
||||||
@declared_attr
|
@declared_attr
|
||||||
|
@ -364,12 +529,15 @@ class Polygon(MapMixin, Base):
|
||||||
return column_property(func.ST_Area(cls.way, False), deferred=True)
|
return column_property(func.ST_Area(cls.way, False), deferred=True)
|
||||||
|
|
||||||
@hybrid_property
|
@hybrid_property
|
||||||
def area_in_sq_km(self):
|
def area_in_sq_km(self) -> float:
|
||||||
|
"""Size of area in square km."""
|
||||||
return self.area / (1000 * 1000)
|
return self.area / (1000 * 1000)
|
||||||
|
|
||||||
|
|
||||||
class User(Base, UserMixin):
|
class User(Base, UserMixin):
|
||||||
__tablename__ = 'user'
|
"""User."""
|
||||||
|
|
||||||
|
__tablename__ = "user"
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
username = Column(String)
|
username = Column(String)
|
||||||
password = Column(String)
|
password = Column(String)
|
||||||
|
@ -392,23 +560,27 @@ class User(Base, UserMixin):
|
||||||
osm_oauth_token = Column(String)
|
osm_oauth_token = Column(String)
|
||||||
osm_oauth_token_secret = Column(String)
|
osm_oauth_token_secret = Column(String)
|
||||||
|
|
||||||
def is_active(self):
|
def is_active(self) -> bool:
|
||||||
|
"""User is active."""
|
||||||
return self.active
|
return self.active
|
||||||
|
|
||||||
|
|
||||||
class EditSession(Base):
|
class EditSession(Base):
|
||||||
__tablename__ = 'edit_session'
|
__tablename__ = "edit_session"
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
user_id = Column(Integer, ForeignKey(User.id))
|
user_id = Column(Integer, ForeignKey(User.id))
|
||||||
created = Column(DateTime, default=now_utc(), nullable=False)
|
created = Column(DateTime, default=now_utc(), nullable=False)
|
||||||
edit_list = Column(postgresql.JSONB)
|
edit_list = Column(postgresql.JSONB)
|
||||||
comment = Column(String)
|
comment = Column(String)
|
||||||
|
|
||||||
user = relationship('User')
|
user = relationship("User")
|
||||||
changeset = relationship('Changeset', back_populates='edit_session', uselist=False)
|
changeset = relationship("Changeset", back_populates="edit_session", uselist=False)
|
||||||
|
|
||||||
|
|
||||||
class Changeset(Base):
|
class Changeset(Base):
|
||||||
__tablename__ = 'changeset'
|
"""An OSM Changeset generated by this tool."""
|
||||||
|
|
||||||
|
__tablename__ = "changeset"
|
||||||
id = Column(BigInteger, primary_key=True)
|
id = Column(BigInteger, primary_key=True)
|
||||||
created = Column(DateTime)
|
created = Column(DateTime)
|
||||||
comment = Column(String)
|
comment = Column(String)
|
||||||
|
@ -416,38 +588,62 @@ class Changeset(Base):
|
||||||
update_count = Column(Integer, nullable=False)
|
update_count = Column(Integer, nullable=False)
|
||||||
edit_session_id = Column(Integer, ForeignKey(EditSession.id))
|
edit_session_id = Column(Integer, ForeignKey(EditSession.id))
|
||||||
|
|
||||||
user = relationship('User',
|
user = relationship(
|
||||||
backref=backref('changesets',
|
"User",
|
||||||
lazy='dynamic',
|
backref=backref(
|
||||||
order_by='Changeset.created.desc()'))
|
"changesets", lazy="dynamic", order_by="Changeset.created.desc()"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
edit_session = relationship('EditSession', back_populates='changeset')
|
edit_session = relationship("EditSession", back_populates="changeset")
|
||||||
|
|
||||||
|
|
||||||
class ChangesetEdit(Base):
|
class ChangesetEdit(Base):
|
||||||
__tablename__ = 'changeset_edit'
|
"""Record details of edits within a changeset."""
|
||||||
|
|
||||||
changeset_id = Column(BigInteger,
|
__tablename__ = "changeset_edit"
|
||||||
ForeignKey('changeset.id'),
|
|
||||||
primary_key=True)
|
changeset_id = Column(BigInteger, ForeignKey("changeset.id"), primary_key=True)
|
||||||
item_id = Column(Integer, primary_key=True)
|
item_id = Column(Integer, primary_key=True)
|
||||||
osm_id = Column(BigInteger, primary_key=True)
|
osm_id = Column(BigInteger, primary_key=True)
|
||||||
osm_type = Column(osm_type_enum, primary_key=True)
|
osm_type = Column(osm_type_enum, primary_key=True)
|
||||||
saved = Column(DateTime, default=now_utc(), nullable=False)
|
saved = Column(DateTime, default=now_utc(), nullable=False)
|
||||||
|
|
||||||
changeset = relationship('Changeset',
|
changeset = relationship("Changeset", backref=backref("edits", lazy="dynamic"))
|
||||||
backref=backref('edits', lazy='dynamic'))
|
|
||||||
|
|
||||||
class SkipIsA(Base):
|
class SkipIsA(Base):
|
||||||
__tablename__ = 'skip_isa'
|
"""Ignore this item type when walking the Wikidata subclass graph."""
|
||||||
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
|
|
||||||
|
__tablename__ = "skip_isa"
|
||||||
|
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
|
||||||
|
qid = column_property("Q" + cast(item_id, String))
|
||||||
|
|
||||||
|
item = relationship("Item")
|
||||||
|
|
||||||
item = relationship('Item')
|
|
||||||
|
|
||||||
class ItemExtraKeys(Base):
|
class ItemExtraKeys(Base):
|
||||||
__tablename__ = 'item_extra_keys'
|
"""Extra tag or key to consider for an Wikidata item type."""
|
||||||
item_id = Column(Integer, ForeignKey('item.item_id'), primary_key=True)
|
|
||||||
|
__tablename__ = "item_extra_keys"
|
||||||
|
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
|
||||||
tag_or_key = Column(String, primary_key=True)
|
tag_or_key = Column(String, primary_key=True)
|
||||||
note = Column(String)
|
note = Column(String)
|
||||||
|
qid = column_property("Q" + cast(item_id, String))
|
||||||
|
|
||||||
item = relationship('Item')
|
item = relationship("Item")
|
||||||
|
|
||||||
|
|
||||||
|
class Extract(Base):
|
||||||
|
"""First paragraph from Wikipedia."""
|
||||||
|
|
||||||
|
__tablename__ = "extract"
|
||||||
|
|
||||||
|
item_id = Column(Integer, ForeignKey("item.item_id"), primary_key=True)
|
||||||
|
site = Column(String, primary_key=True)
|
||||||
|
extract = Column(String, nullable=False)
|
||||||
|
|
||||||
|
def __init__(self, site: str, extract: str):
|
||||||
|
"""Initialise the object."""
|
||||||
|
self.site = site
|
||||||
|
self.extract = extract
|
||||||
|
|
406
notes
Normal file
406
notes
Normal file
|
@ -0,0 +1,406 @@
|
||||||
|
# vim: spell:tw=80 ft=markdown
|
||||||
|
|
||||||
|
Extracted items from data dump that include "P625" with the quotes. There are 8,398,490 matching items.
|
||||||
|
|
||||||
|
Nearest-Neighbour Searching
|
||||||
|
|
||||||
|
https://postgis.net/workshops/postgis-intro/knn.html
|
||||||
|
|
||||||
|
---
|
||||||
|
Use recent changes API to update local Wikidata entity mirror.
|
||||||
|
|
||||||
|
Need to handle new item, edit, delete, and undelete.
|
||||||
|
|
||||||
|
For now we're just interested in items with coordinates, later we might care
|
||||||
|
about languages, and classes.
|
||||||
|
|
||||||
|
At some point we might keep track of redirects.
|
||||||
|
|
||||||
|
Deletes
|
||||||
|
-------
|
||||||
|
Is the item in our database? If not then ignore it, if yes then delete it.
|
||||||
|
|
||||||
|
New
|
||||||
|
---
|
||||||
|
Download full entity, check if it contains coordinates, if yes, then add to database, if not then ignore.
|
||||||
|
|
||||||
|
Make a note of item ID and revid. Avoid downloading item again during update.
|
||||||
|
|
||||||
|
Edits
|
||||||
|
-----
|
||||||
|
If the item is in our database and lastrevid is larger than the revid of the change then skip.
|
||||||
|
|
||||||
|
Download full entity.
|
||||||
|
|
||||||
|
If in our database and latest revision includes coordinates update item in
|
||||||
|
database. If no coordinates then delete from our database.
|
||||||
|
|
||||||
|
|
||||||
|
======
|
||||||
|
Currently we have geographic objects represented by the Item class. We also want
|
||||||
|
information about the type of object, languages and countries.
|
||||||
|
|
||||||
|
How about a hierarchy with Item as the base class and GeoItem as a subclass for
|
||||||
|
geographical objects. We can also have IsA, Language, and Country classes that
|
||||||
|
derive from Item.
|
||||||
|
|
||||||
|
Countries are a subclass of GeoItem.
|
||||||
|
|
||||||
|
With the current design the Item table represents a cached copy of the latest
|
||||||
|
version of the Wikidata item, no history is stored locally. This makes it had to
|
||||||
|
keep track of changes over time.
|
||||||
|
|
||||||
|
The same is true of the OSM data, we just keeping a copy of the most recent
|
||||||
|
version.
|
||||||
|
|
||||||
|
Instead we could store multiple revisions of Wikidata items. We want the latest
|
||||||
|
version and any that has been considered part of a match with OSM.
|
||||||
|
|
||||||
|
Which Wikidata revisions do we keep?
|
||||||
|
|
||||||
|
1. latest revision
|
||||||
|
2. revision used to generate match
|
||||||
|
3. revision used in match checked by user
|
||||||
|
|
||||||
|
Maybe a separate item revision table is too complex. We could just store JSON
|
||||||
|
from a match in a table of OSM user uploads.
|
||||||
|
|
||||||
|
===
|
||||||
|
All countries have a P625 statement
|
||||||
|
|
||||||
|
===
|
||||||
|
cable-stayed bridge (Q158555)
|
||||||
|
|
||||||
|
There are 786 bridges on OSM tagged with bridge:structure=cable-stayed. Some of
|
||||||
|
these have a Wikidata tag but aren't tagged as a cable-stayed bridge in
|
||||||
|
Wikidata. The Wikidata could be updated to tag them as a cable-stayed bridge.
|
||||||
|
Something similar could be applied to other types.
|
||||||
|
|
||||||
|
===
|
||||||
|
Lots of items with coordinates don\'t have OSM tags/keys, either because they
|
||||||
|
don\'t belong on the map or there isn\'t enough info in Wikidata.
|
||||||
|
|
||||||
|
Need to search different properties for OSM tags, at least 'instance of',
|
||||||
|
'use', 'sport' and 'religion'.
|
||||||
|
|
||||||
|
Should start from items with an OSM tag first. Download all items with OSM tag,
|
||||||
|
then walk subclass tree and download.
|
||||||
|
|
||||||
|
===
|
||||||
|
Test out a new design.
|
||||||
|
|
||||||
|
===
|
||||||
|
Make a status page that shows the size of each database table.
|
||||||
|
===
|
||||||
|
What should URLs look like, say I want to show lakes in lapland?
|
||||||
|
|
||||||
|
https://osm.wikidata.link/matches?isa=Lake&location=Lapland
|
||||||
|
|
||||||
|
===
|
||||||
|
OSM & Wikidata pin map TODO list
|
||||||
|
|
||||||
|
IsA list should support filtering
|
||||||
|
|
||||||
|
===
|
||||||
|
2021-06-17
|
||||||
|
|
||||||
|
Candidate list should show street address. For example:
|
||||||
|
|
||||||
|
https://alpha.osm.wikidata.link/map/17/40.01461/-105.28196?item=Q42384818
|
||||||
|
---
|
||||||
|
Preset could be more specific. For example mosque instead of place of worship.
|
||||||
|
|
||||||
|
id-tagging-schema/data/presets/amenity/place_of_worship/muslim.json
|
||||||
|
---
|
||||||
|
candidates list should show object tags
|
||||||
|
|
||||||
|
===
|
||||||
|
2021-06-19
|
||||||
|
|
||||||
|
* Rename from 'alpha' to 'v2'.
|
||||||
|
* Use Flask-Babel to for i18n. Get translations from
|
||||||
|
https://www.microsoft.com/en-us/language/
|
||||||
|
* Show total number of items
|
||||||
|
* Show place name
|
||||||
|
* Show aliases
|
||||||
|
|
||||||
|
===
|
||||||
|
2021-06-23
|
||||||
|
|
||||||
|
Planning to update user IP location code. Should grab items within city or
|
||||||
|
region. Need to handle IP that only resolves to a whole country. For example
|
||||||
|
archive.org is 207.241.224.2, and just returns USA. The USA is too big to apply
|
||||||
|
the matcher interface to.
|
||||||
|
|
||||||
|
When trying to match the whole USA we should show the whole country and
|
||||||
|
encourage the user to zoom in. Once the
|
||||||
|
|
||||||
|
---
|
||||||
|
Map thoughts. Questions:
|
||||||
|
|
||||||
|
What do we show to the user when the site loads?
|
||||||
|
What happens when the user drags the map?
|
||||||
|
What happens when the user changes zoom?
|
||||||
|
How does searching change things?
|
||||||
|
|
||||||
|
Starting scenarios:
|
||||||
|
|
||||||
|
User enters IP that resolves to a big country, say USA. We show a map of the
|
||||||
|
whole USA and ask them to zoom in. Once they've zoomed in we can show the total
|
||||||
|
number of items and the item type facets.
|
||||||
|
|
||||||
|
Find item type within Cambridge:
|
||||||
|
|
||||||
|
``` SQL
|
||||||
|
select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num
|
||||||
|
from item, item_location, planet_osm_polygon
|
||||||
|
where item.item_id = item_location.item_id and osm_id=-295355 and ST_Covers(way, location) group by isa order by num;
|
||||||
|
```
|
||||||
|
|
||||||
|
Also need a to show a facet for items where item type is empty
|
||||||
|
|
||||||
|
Find item type within California:
|
||||||
|
``` SQL
|
||||||
|
select jsonb_path_query(claims, '$.P31[*].mainsnak.datavalue.value.id') as isa, count(*) as num
|
||||||
|
from item, item_location, planet_osm_polygon
|
||||||
|
where item.item_id = item_location.item_id and osm_id=-165475 and ST_Intersects(way, location)
|
||||||
|
group by isa order by num desc limit 20;
|
||||||
|
```
|
||||||
|
This query takes 26.5 seconds.
|
||||||
|
|
||||||
|
England item count takes 1.5 seconds.
|
||||||
|
|
||||||
|
``` SQL
|
||||||
|
select count(distinct item_id)
|
||||||
|
from item_location, planet_osm_polygon
|
||||||
|
where osm_id=-58447 and ST_Covers(way, location);
|
||||||
|
```
|
||||||
|
|
||||||
|
===
|
||||||
|
2021-06-25
|
||||||
|
|
||||||
|
Library buildings (Q856584) in England. Query takes 3 seconds
|
||||||
|
|
||||||
|
``` SQL
|
||||||
|
select count(*)
|
||||||
|
from item, item_location, planet_osm_polygon as loc
|
||||||
|
where loc.osm_id=-58447
|
||||||
|
and jsonb_path_query_array(claims, '$.P31[*].mainsnak.datavalue.value.id') ? 'Q856584'
|
||||||
|
and item.item_id = item_location.item_id
|
||||||
|
and item_location.location && loc.way;
|
||||||
|
```
|
||||||
|
===
|
||||||
|
2021-07-04
|
||||||
|
|
||||||
|
TODO
|
||||||
|
* Better error page than just 500 Internal Server Error.
|
||||||
|
* Improve handling of Wikidata items without coordinates. Use different colour
|
||||||
|
for OSM Pin. Explain situation on item detail page. No need to look for matches.
|
||||||
|
* DONE: Show spinner when looking for nearby OSM candidate matches.
|
||||||
|
* DONE: Show message if no matches found.
|
||||||
|
* Add 'building only match' switch
|
||||||
|
* Two item pins on top of each other is a problem.
|
||||||
|
|
||||||
|
2021-07-05
|
||||||
|
|
||||||
|
Sometimes the selected OSM matches are incorrect. For example:
|
||||||
|
|
||||||
|
https://v2.osm.wikidata.link/map/15/37.31390/-121.86338?item=Q83645632
|
||||||
|
|
||||||
|
The item is linked to a node, a way and a relation. The node shows as a pin on
|
||||||
|
the map, but isn't in the list of possible nearby matches. The way and relation
|
||||||
|
both show in the list, but aren't selected.
|
||||||
|
|
||||||
|
2021-07-07
|
||||||
|
|
||||||
|
Logout link should come back to the same map location. Need to record the
|
||||||
|
location somewhere. Could be in a cookie, constant updating of the logout
|
||||||
|
URL, or have JavaScript that runs when the user follows the logout link.
|
||||||
|
|
||||||
|
Search
|
||||||
|
Should show a spinner so the user knows something is happening.
|
||||||
|
Trigger search after first three characters have been entered.
|
||||||
|
DONE: Style search hits so not so close to search box
|
||||||
|
|
||||||
|
Highlight chosen search result.
|
||||||
|
Close button to hide search results.
|
||||||
|
DONE: Zoom to result bounds instead of zoom level 16.
|
||||||
|
Should you be allowed to search while editing?
|
||||||
|
|
||||||
|
DONE: Hide OSM candidate checkboxes if user not logged in.
|
||||||
|
|
||||||
|
2021-07-10
|
||||||
|
|
||||||
|
Exclude ways that are part of a boundary. Example:
|
||||||
|
|
||||||
|
https://v2.osm.wikidata.link/map/18/42.37903/-71.11136?item=Q14715848
|
||||||
|
|
||||||
|
2021-07-16
|
||||||
|
|
||||||
|
Need better handling for OSM with wikidata tag but item has no coordinates.
|
||||||
|
|
||||||
|
Viewing a street shows too many yellow pins.
|
||||||
|
https://v2.osm.wikidata.link/map/15/37.31221/-121.88869?item=Q89545422
|
||||||
|
|
||||||
|
2021-07-17
|
||||||
|
Could match on just name
|
||||||
|
https://v2.osm.wikidata.link/map/18/50.21789/-5.28079?item=Q5288904
|
||||||
|
|
||||||
|
2021-07-18
|
||||||
|
Florida State Road 922 (Q2433226) is stored as multiple lines in the osm2pgsql
|
||||||
|
database. Need to rebuild the database with the --multi-geometry so there is
|
||||||
|
only one.
|
||||||
|
|
||||||
|
2021-07-19
|
||||||
|
After a save clicking on another item without closing edit panel causes
|
||||||
|
problems. Need to trigger close_edit_list when opening item if upload_state is
|
||||||
|
set to 'done'
|
||||||
|
|
||||||
|
2021-07-22
|
||||||
|
|
||||||
|
Example of a long road: Collins Avenue (Q652775)
|
||||||
|
https://v2.osm.wikidata.link/map/19/25.86222/-80.12032?item=Q652775
|
||||||
|
|
||||||
|
2021-08-04
|
||||||
|
Use https://vue-select.org/ for item type filter.
|
||||||
|
Show alert with spinner while count is running.
|
||||||
|
Maybe we want to supply the item type filter as JSON and filter in the browser,
|
||||||
|
no need to hit the server and database.
|
||||||
|
Write documentation for the API.
|
||||||
|
Speed up the item detail OSM nearby option.
|
||||||
|
Use the sidebar to show list of items in the current view, so the user can
|
||||||
|
go through the list and check them.
|
||||||
|
OSM object polygon size is broken
|
||||||
|
|
||||||
|
2021-08-05
|
||||||
|
|
||||||
|
IsA search
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT 'Q' || item.item_id, item.labels->'en'->>'value' FROM item WHERE
|
||||||
|
item.claims ? 'P1282' AND lower(jsonb_extract_path_text(item.labels, 'en',
|
||||||
|
'value')) LIKE lower('%hotel%') AND length(jsonb_extract_path_text(item.labels,
|
||||||
|
'en', 'value')) < 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
2021-09-11
|
||||||
|
|
||||||
|
Notes from Pixel 2
|
||||||
|
|
||||||
|
Pin at the centroid of a polygon is to busy, especially with an item that links
|
||||||
|
to multiple OSM objects. Object outline already on map, just need to connect
|
||||||
|
outline to Wikidata markers. Could try and work out corners of rectangular
|
||||||
|
buildings. Should link to ends nearest node for linear objects.
|
||||||
|
|
||||||
|
Show warning when navigating away from map with edits.
|
||||||
|
|
||||||
|
See WindowEventHandlers.onbeforeunload
|
||||||
|
|
||||||
|
Option to clear edit list.
|
||||||
|
|
||||||
|
---
|
||||||
|
Ignore coordinates with a Google Maps reference. Example:
|
||||||
|
|
||||||
|
https://www.wikidata.org/w/index.php?title=Q66228733&oldid=992964237
|
||||||
|
|
||||||
|
---
|
||||||
|
Check history for previous wikidata tags to warn mappers if a wikidata tag
|
||||||
|
they're adding has previously been removed.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
https://v2.osm.wikidata.link/map/17/52.18211/0.17756?item=Q6717455
|
||||||
|
and https://www.openstreetmap.org/way/143741201
|
||||||
|
https://www.openstreetmap.org/way/684624781
|
||||||
|
|
||||||
|
---
|
||||||
|
What happens when we moved the map?
|
||||||
|
|
||||||
|
First we check the area visible on the map. If it is too large then there is
|
||||||
|
nothing we can do, we give up and tell the user they need to zoom in.
|
||||||
|
|
||||||
|
Otherwise we send the server a request for a count of the number of items in the
|
||||||
|
current view. If the count is too high we abort and tell the user to zoom in.
|
||||||
|
|
||||||
|
Once we know the area isn't too big and doesn't have too many items we want to
|
||||||
|
make three requests to the server. First we make requests for the Wikidata items
|
||||||
|
on the map another request for OSM objects with a Wikidata tag on the map. Both
|
||||||
|
requests run at the same time. Once both requests complete we make another
|
||||||
|
request to check for missing Wikidata items that were linked from OSM objects.
|
||||||
|
|
||||||
|
---
|
||||||
|
This is done
|
||||||
|
|
||||||
|
https://v2.osm.wikidata.link/map/18/52.23270/0.21560?item=Q55099320
|
||||||
|
should match: https://www.openstreetmap.org/node/2000849525
|
||||||
|
|
||||||
|
Look for Tag:abandoned:railway=station
|
||||||
|
|
||||||
|
---
|
||||||
|
Need better handling for Wikidata redirects.
|
||||||
|
|
||||||
|
Example: https://www.openstreetmap.org/way/130458959
|
||||||
|
https://v2.osm.wikidata.link/map/18/51.36973/-2.81079?item=Q5117357
|
||||||
|
|
||||||
|
---
|
||||||
|
Consider 'OS grid reference'
|
||||||
|
https://www.wikidata.org/w/index.php?title=Q27082051&oldid=1336630735
|
||||||
|
|
||||||
|
---
|
||||||
|
Check for OpenStreetMap relation ID (P402) in Wikidata
|
||||||
|
|
||||||
|
Display on details page. Highlight matching relation.
|
||||||
|
|
||||||
|
example: https://www.wikidata.org/wiki/Q78078847
|
||||||
|
|
||||||
|
---
|
||||||
|
TODO
|
||||||
|
|
||||||
|
* DONE: Add special code for matching watercourses that works like street matching
|
||||||
|
* DONE: Frontend should catch API errors and show them
|
||||||
|
* DONE: API calls should return errors in JSON
|
||||||
|
|
||||||
|
* Run update code from systemd
|
||||||
|
* Stop Wikidata update code from crashing when it hits an error
|
||||||
|
* Add an option for 'select all' for linear features
|
||||||
|
* Add a note to details page explaining street matching
|
||||||
|
* Upload code to GitHub
|
||||||
|
* Candidates list jumps when first object is selected, because message appears
|
||||||
|
at the top the list. Can be fixed by having a message there and replacing
|
||||||
|
it.
|
||||||
|
|
||||||
|
IsA pages
|
||||||
|
* Flesh out IsA pages
|
||||||
|
* Allow users to add extra tags to IsA
|
||||||
|
* Add option to update IsA
|
||||||
|
|
||||||
|
Type filter
|
||||||
|
* Include type filter QIDs in URL
|
||||||
|
* Move type filter to modal box
|
||||||
|
* Show item type description
|
||||||
|
|
||||||
|
---
|
||||||
|
Show note about relations for tram stops and windfarms
|
||||||
|
|
||||||
|
---
|
||||||
|
Show dissolved, abolished or demolished date (P576)
|
||||||
|
https://map.osm.wikidata.link/map/18/40.74610/-73.99652?item=Q14707174
|
||||||
|
|
||||||
|
---
|
||||||
|
Get subclasses for one item type
|
||||||
|
|
||||||
|
``` SQL
|
||||||
|
select item_id, labels->'en'->'value' from item where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath) ?| '{"Q718893"}';
|
||||||
|
```
|
||||||
|
|
||||||
|
Get subclasses for items with OSM tag/key
|
||||||
|
|
||||||
|
``` SQL
|
||||||
|
select item_id, labels->'en'->'value'
|
||||||
|
from item
|
||||||
|
where jsonb_path_query_array(claims, '$."P279"[*]."mainsnak"."datavalue"."value"."id"'::jsonpath)
|
||||||
|
?| array(select 'Q' || item_id from item where claims ? 'P1282');
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
Shipyard results shouldn't include place=city
|
||||||
|
https://map.osm.wikidata.link/map/18/50.89540/-1.38243?item=Q551401
|
|
@ -4,7 +4,15 @@ export default {
|
||||||
public: {url: '/', static: true},
|
public: {url: '/', static: true},
|
||||||
frontend: {url: '/dist'},
|
frontend: {url: '/dist'},
|
||||||
},
|
},
|
||||||
plugins: ['@snowpack/plugin-vue', '@snowpack/plugin-dotenv'],
|
plugins: [
|
||||||
|
'@snowpack/plugin-vue',
|
||||||
|
'@snowpack/plugin-dotenv',
|
||||||
|
['snowpack-plugin-cdn-import', {
|
||||||
|
dependencies: pkg.dependencies,
|
||||||
|
enableInDevMode: true,
|
||||||
|
baseUrl: 'https://unpkg.com',
|
||||||
|
}]
|
||||||
|
],
|
||||||
routes: [
|
routes: [
|
||||||
/* Enable an SPA Fallback in development: */
|
/* Enable an SPA Fallback in development: */
|
||||||
// {"match": "routes", "src": ".*", "dest": "/index.html"},
|
// {"match": "routes", "src": ".*", "dest": "/index.html"},
|
||||||
|
|
12
templates/flash_msg.html
Normal file
12
templates/flash_msg.html
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
{% with messages = get_flashed_messages() %}
|
||||||
|
{% if messages %}
|
||||||
|
{% for message in messages %}
|
||||||
|
<div class="alert alert-success alert-dismissible" role="alert">
|
||||||
|
<button type="button" class="close" data-dismiss="alert" aria-label="Close">
|
||||||
|
<span aria-hidden="true">×</span>
|
||||||
|
</button>
|
||||||
|
{{ message }}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
{% endif %}
|
||||||
|
{% endwith %}
|
36
templates/show_error.html
Normal file
36
templates/show_error.html
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block style %}
|
||||||
|
<link rel="stylesheet" href="{{url_for('static', filename='css/exception.css')}}" />
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
|
||||||
|
<div class="container my-2">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col">
|
||||||
|
|
||||||
|
<h1>Software error: {{ tb.exception_type }}</h1>
|
||||||
|
<div>
|
||||||
|
<pre>{{ tb.exception }}</pre>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% set body %}
|
||||||
|
URL: {{ request.url }}
|
||||||
|
|
||||||
|
{{ tb.plaintext | safe }}
|
||||||
|
{% endset %}
|
||||||
|
|
||||||
|
<p><a class="btn btn-primary btn-lg" role="button" href="https://github.com/EdwardBetts/osm-wikidata/issues/new?title={{ tb.exception | urlencode }}&body={{ body | urlencode }}">Submit as an issue on GitHub</a> (requires an account with GitHub)</p>
|
||||||
|
|
||||||
|
<h2 class="traceback">Traceback <em>(most recent call last)</em></h2>
|
||||||
|
{{ tb.render_summary(include_title=False) | safe }}
|
||||||
|
|
||||||
|
<p>Error in function "{{ tb.frames[-1].function_name }}": {{ last_frame_args | pprint }}</p>
|
||||||
|
<pre>{{ last_frame.locals | pprint }}</pre>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% endblock %}
|
|
@ -12,3 +12,6 @@ def test_format_wikibase_time_century():
|
||||||
|
|
||||||
v = {"time": "+1950-00-00T00:00:00Z", "precision": 7}
|
v = {"time": "+1950-00-00T00:00:00Z", "precision": 7}
|
||||||
assert utils.format_wikibase_time(v) == "20th century"
|
assert utils.format_wikibase_time(v) == "20th century"
|
||||||
|
|
||||||
|
v = {"time": "+1868-01-09T00:00:00Z", "precision": 11}
|
||||||
|
assert utils.format_wikibase_time(v) == "9 January 1868"
|
||||||
|
|
159
update.py
Executable file
159
update.py
Executable file
|
@ -0,0 +1,159 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
"""Download Wikidata recent changes and update items in local database."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import typing
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
|
from matcher import database, model, wikidata, wikidata_api
|
||||||
|
|
||||||
|
DB_URL = "postgresql:///matcher"
|
||||||
|
database.init_db(DB_URL)
|
||||||
|
|
||||||
|
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
|
||||||
|
|
||||||
|
|
||||||
|
class Change(typing.TypedDict):
|
||||||
|
"""Dict representing an edit in recent changes."""
|
||||||
|
|
||||||
|
title: str
|
||||||
|
timestamp: str
|
||||||
|
redirect: dict[str, typing.Any] | None
|
||||||
|
revid: int
|
||||||
|
|
||||||
|
|
||||||
|
def handle_new(change: Change) -> None:
|
||||||
|
"""Handle a new Wikidata item from the recent changes feed."""
|
||||||
|
qid = change["title"]
|
||||||
|
ts = change["timestamp"]
|
||||||
|
if change["redirect"]:
|
||||||
|
print(f"{ts}: new item {qid}, since replaced with redirect")
|
||||||
|
return
|
||||||
|
item = model.Item.query.get(qid[1:]) # check if item is already loaded
|
||||||
|
if item:
|
||||||
|
return handle_edit(change)
|
||||||
|
|
||||||
|
entity = wikidata_api.get_entity(qid)
|
||||||
|
if entity["id"] != qid:
|
||||||
|
print(f'redirect {qid} -> {entity["id"]}')
|
||||||
|
return
|
||||||
|
|
||||||
|
if "claims" not in entity:
|
||||||
|
print(qid)
|
||||||
|
print(entity)
|
||||||
|
coords = wikidata.get_entity_coords(entity["claims"])
|
||||||
|
if not coords:
|
||||||
|
print(f"{ts}: new item {qid} without coordinates")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"{ts}: new item {qid} with coordinates")
|
||||||
|
|
||||||
|
item_id = int(qid[1:])
|
||||||
|
obj = {k: v for k, v in entity.items() if k in entity_keys}
|
||||||
|
try:
|
||||||
|
item = model.Item(item_id=item_id, **obj)
|
||||||
|
except TypeError:
|
||||||
|
print(qid)
|
||||||
|
print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
|
||||||
|
print(entity.keys())
|
||||||
|
raise
|
||||||
|
item.locations = model.location_objects(coords)
|
||||||
|
database.session.add(item)
|
||||||
|
|
||||||
|
|
||||||
|
def coords_equal(a: dict[str, typing.Any], b: dict[str, typing.Any]) -> bool:
|
||||||
|
"""Deep equality comparison of nested dicts."""
|
||||||
|
return json.dumps(a, sort_keys=True) == json.dumps(b, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
def handle_edit(change: Change) -> None:
|
||||||
|
"""Process an edit from recent changes."""
|
||||||
|
qid = change["title"]
|
||||||
|
item = model.Item.query.get(qid[1:])
|
||||||
|
if not item:
|
||||||
|
return # item isn't in our database so it probably has no coordinates
|
||||||
|
|
||||||
|
ts = change["timestamp"]
|
||||||
|
|
||||||
|
if item.lastrevid >= change["revid"]:
|
||||||
|
print(f"{ts}: no need to update {qid}")
|
||||||
|
return
|
||||||
|
|
||||||
|
entity = wikidata_api.get_entity(qid)
|
||||||
|
entity_qid = entity.pop("id")
|
||||||
|
if entity_qid != qid:
|
||||||
|
print(f"{ts}: item {qid} replaced with redirect")
|
||||||
|
database.session.delete(item)
|
||||||
|
database.session.commit()
|
||||||
|
return
|
||||||
|
|
||||||
|
assert entity_qid == qid
|
||||||
|
existing_coords = wikidata.get_entity_coords(item.claims)
|
||||||
|
if "claims" not in entity:
|
||||||
|
return
|
||||||
|
coords = wikidata.get_entity_coords(entity["claims"])
|
||||||
|
|
||||||
|
if not coords_equal(existing_coords, coords):
|
||||||
|
print(f"{ts}: update item {qid}, including coordinates")
|
||||||
|
item.locations = model.location_objects(coords)
|
||||||
|
else:
|
||||||
|
print(f"{ts}: update item {qid}, no change to coordinates")
|
||||||
|
|
||||||
|
for key in entity_keys:
|
||||||
|
setattr(item, key, entity[key])
|
||||||
|
|
||||||
|
|
||||||
|
def update_timestamp(timestamp: str) -> None:
|
||||||
|
"""Save timestamp to rc_timestamp."""
|
||||||
|
out = open("rc_timestamp", "w")
|
||||||
|
print(timestamp, file=out)
|
||||||
|
out.close()
|
||||||
|
|
||||||
|
|
||||||
|
def update_database() -> None:
|
||||||
|
"""Check recent changes and apply updates to local mirror of Wikidata."""
|
||||||
|
with open("rc_timestamp") as f:
|
||||||
|
start = f.read().strip()
|
||||||
|
|
||||||
|
rccontinue = None
|
||||||
|
seen = set()
|
||||||
|
while True:
|
||||||
|
r = wikidata_api.get_recent_changes(rcstart=start, rccontinue=rccontinue)
|
||||||
|
|
||||||
|
reply = r.json()
|
||||||
|
for change in reply["query"]["recentchanges"]:
|
||||||
|
rctype = change["type"]
|
||||||
|
timestamp = change["timestamp"]
|
||||||
|
qid = change["title"]
|
||||||
|
if qid in seen:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if rctype == "new":
|
||||||
|
handle_new(change)
|
||||||
|
seen.add(qid)
|
||||||
|
if rctype == "edit":
|
||||||
|
handle_edit(change)
|
||||||
|
seen.add(qid)
|
||||||
|
|
||||||
|
update_timestamp(timestamp)
|
||||||
|
print("commit")
|
||||||
|
database.session.commit()
|
||||||
|
|
||||||
|
if "continue" not in reply:
|
||||||
|
break
|
||||||
|
|
||||||
|
rccontinue = reply["continue"]["rccontinue"]
|
||||||
|
database.session.commit()
|
||||||
|
print("finished")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
"""Infinite loop."""
|
||||||
|
while True:
|
||||||
|
update_database()
|
||||||
|
sleep(60)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
372
web_view.py
372
web_view.py
|
@ -1,37 +1,61 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
from flask import (Flask, render_template, request, jsonify, redirect, url_for, g,
|
import json
|
||||||
flash, session, Response, stream_with_context, abort, send_file)
|
import re
|
||||||
|
from time import sleep, time
|
||||||
|
|
||||||
|
import flask_login
|
||||||
|
import GeoIP
|
||||||
|
import maxminddb
|
||||||
|
import requests
|
||||||
|
import sqlalchemy
|
||||||
|
from flask import (
|
||||||
|
Flask,
|
||||||
|
Response,
|
||||||
|
abort,
|
||||||
|
flash,
|
||||||
|
g,
|
||||||
|
jsonify,
|
||||||
|
redirect,
|
||||||
|
render_template,
|
||||||
|
request,
|
||||||
|
session,
|
||||||
|
stream_with_context,
|
||||||
|
url_for,
|
||||||
|
)
|
||||||
|
from lxml import etree
|
||||||
|
from requests_oauthlib import OAuth1Session
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
from sqlalchemy.sql.expression import update
|
from sqlalchemy.sql.expression import update
|
||||||
from matcher import (nominatim, model, database, commons, wikidata, wikidata_api,
|
|
||||||
osm_oauth, edit, mail, api, error_mail)
|
from matcher import (
|
||||||
# from werkzeug.debug.tbtools import get_current_traceback
|
api,
|
||||||
|
commons,
|
||||||
|
database,
|
||||||
|
edit,
|
||||||
|
error_mail,
|
||||||
|
mail,
|
||||||
|
model,
|
||||||
|
nominatim,
|
||||||
|
osm_oauth,
|
||||||
|
wikidata,
|
||||||
|
wikidata_api,
|
||||||
|
)
|
||||||
from matcher.data import property_map
|
from matcher.data import property_map
|
||||||
from time import time, sleep
|
|
||||||
from requests_oauthlib import OAuth1Session
|
# from werkzeug.debug.tbtools import get_current_traceback
|
||||||
from lxml import etree
|
|
||||||
import werkzeug.exceptions
|
|
||||||
import inspect
|
|
||||||
import flask_login
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
import GeoIP
|
|
||||||
import re
|
|
||||||
import maxminddb
|
|
||||||
import sqlalchemy
|
|
||||||
|
|
||||||
srid = 4326
|
srid = 4326
|
||||||
re_point = re.compile(r'^POINT\((.+) (.+)\)$')
|
re_point = re.compile(r"^POINT\((.+) (.+)\)$")
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
app.debug = True
|
app.debug = True
|
||||||
app.config.from_object('config.default')
|
app.config.from_object("config.default")
|
||||||
error_mail.setup_error_mail(app)
|
error_mail.setup_error_mail(app)
|
||||||
|
|
||||||
login_manager = flask_login.LoginManager(app)
|
login_manager = flask_login.LoginManager(app)
|
||||||
login_manager.login_view = 'login_route'
|
login_manager.login_view = "login_route"
|
||||||
osm_api_base = 'https://api.openstreetmap.org/api/0.6'
|
osm_api_base = "https://api.openstreetmap.org/api/0.6"
|
||||||
|
|
||||||
maxminddb_reader = maxminddb.open_database(app.config["GEOLITE2"])
|
maxminddb_reader = maxminddb.open_database(app.config["GEOLITE2"])
|
||||||
|
|
||||||
|
@ -39,7 +63,7 @@ DB_URL = "postgresql:///matcher"
|
||||||
database.init_db(DB_URL)
|
database.init_db(DB_URL)
|
||||||
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
|
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
|
||||||
|
|
||||||
re_qid = re.compile(r'^Q\d+$')
|
re_qid = re.compile(r"^Q\d+$")
|
||||||
|
|
||||||
|
|
||||||
@app.teardown_appcontext
|
@app.teardown_appcontext
|
||||||
|
@ -51,6 +75,7 @@ def shutdown_session(exception=None):
|
||||||
def global_user():
|
def global_user():
|
||||||
g.user = flask_login.current_user._get_current_object()
|
g.user = flask_login.current_user._get_current_object()
|
||||||
|
|
||||||
|
|
||||||
def dict_repr_values(d):
|
def dict_repr_values(d):
|
||||||
return {key: repr(value) for key, value in d.items()}
|
return {key: repr(value) for key, value in d.items()}
|
||||||
|
|
||||||
|
@ -77,11 +102,13 @@ def dict_repr_values(d):
|
||||||
# last_frame=last_frame,
|
# last_frame=last_frame,
|
||||||
# last_frame_args=last_frame_args), 500
|
# last_frame_args=last_frame_args), 500
|
||||||
|
|
||||||
|
|
||||||
def cors_jsonify(*args, **kwargs):
|
def cors_jsonify(*args, **kwargs):
|
||||||
response = jsonify(*args, **kwargs)
|
response = jsonify(*args, **kwargs)
|
||||||
response.headers["Access-Control-Allow-Origin"] = "*"
|
response.headers["Access-Control-Allow-Origin"] = "*"
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def check_for_tagged_qids(qids):
|
def check_for_tagged_qids(qids):
|
||||||
tagged = set()
|
tagged = set()
|
||||||
for qid in qids:
|
for qid in qids:
|
||||||
|
@ -108,12 +135,12 @@ def check_for_tagged_qid(qid):
|
||||||
def geoip_user_record():
|
def geoip_user_record():
|
||||||
gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD)
|
gi = GeoIP.open(app.config["GEOIP_DATA"], GeoIP.GEOIP_STANDARD)
|
||||||
|
|
||||||
remote_ip = request.get('ip', request.remote_addr)
|
remote_ip = request.get("ip", request.remote_addr)
|
||||||
return gi.record_by_addr(remote_ip)
|
return gi.record_by_addr(remote_ip)
|
||||||
|
|
||||||
|
|
||||||
def get_user_location():
|
def get_user_location():
|
||||||
remote_ip = request.args.get('ip', request.remote_addr)
|
remote_ip = request.args.get("ip", request.remote_addr)
|
||||||
maxmind = maxminddb_reader.get(remote_ip)
|
maxmind = maxminddb_reader.get(remote_ip)
|
||||||
return maxmind.get("location") if maxmind else None
|
return maxmind.get("location") if maxmind else None
|
||||||
|
|
||||||
|
@ -154,13 +181,15 @@ def isa_page(item_id):
|
||||||
subclass_list = []
|
subclass_list = []
|
||||||
for s in item.get_claim(subclass_property):
|
for s in item.get_claim(subclass_property):
|
||||||
subclass = api.get_item(s["numeric-id"])
|
subclass = api.get_item(s["numeric-id"])
|
||||||
subclass_list.append({
|
subclass_list.append(
|
||||||
"qid": s["id"],
|
{
|
||||||
"item_id": s["numeric-id"],
|
"qid": s["id"],
|
||||||
"label": subclass.label(),
|
"item_id": s["numeric-id"],
|
||||||
"description": subclass.description(),
|
"label": subclass.label(),
|
||||||
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
|
"description": subclass.description(),
|
||||||
})
|
"isa_page_url": url_for("isa_page", item_id=s["numeric-id"]),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
tags = api.get_tags_for_isa_item(item)
|
tags = api.get_tags_for_isa_item(item)
|
||||||
|
|
||||||
|
@ -254,14 +283,16 @@ def map_start_page():
|
||||||
lat, lon = 42.2917, -85.5872
|
lat, lon = 42.2917, -85.5872
|
||||||
radius = 5
|
radius = 5
|
||||||
|
|
||||||
return redirect(url_for(
|
return redirect(
|
||||||
'map_location',
|
url_for(
|
||||||
lat=f'{lat:.5f}',
|
"map_location",
|
||||||
lon=f'{lon:.5f}',
|
lat=f"{lat:.5f}",
|
||||||
zoom=16,
|
lon=f"{lon:.5f}",
|
||||||
radius=radius,
|
zoom=16,
|
||||||
ip=request.args.get('ip'),
|
radius=radius,
|
||||||
))
|
ip=request.args.get("ip"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/documentation")
|
@app.route("/documentation")
|
||||||
|
@ -270,16 +301,14 @@ def documentation_page():
|
||||||
username = user.username if user.is_authenticated else None
|
username = user.username if user.is_authenticated else None
|
||||||
|
|
||||||
return render_template(
|
return render_template(
|
||||||
"documentation.html",
|
"documentation.html", active_tab="documentation", username=username
|
||||||
active_tab="documentation",
|
|
||||||
username=username
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search")
|
@app.route("/search")
|
||||||
def search_page():
|
def search_page():
|
||||||
loc = get_user_location()
|
loc = get_user_location()
|
||||||
q = request.args.get('q')
|
q = request.args.get("q")
|
||||||
|
|
||||||
user = flask_login.current_user
|
user = flask_login.current_user
|
||||||
username = user.username if user.is_authenticated else None
|
username = user.username if user.is_authenticated else None
|
||||||
|
@ -296,6 +325,7 @@ def search_page():
|
||||||
q=q,
|
q=q,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/map/<int:zoom>/<float(signed=True):lat>/<float(signed=True):lon>")
|
@app.route("/map/<int:zoom>/<float(signed=True):lat>/<float(signed=True):lon>")
|
||||||
def map_location(zoom, lat, lon):
|
def map_location(zoom, lat, lon):
|
||||||
qid = request.args.get("item")
|
qid = request.args.get("item")
|
||||||
|
@ -359,6 +389,36 @@ def lookup_item(item_id):
|
||||||
return redirect(url)
|
return redirect(url)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/item/Q<int:item_id>")
|
||||||
|
def lookup_item(item_id):
|
||||||
|
item = api.get_item(item_id)
|
||||||
|
if not item:
|
||||||
|
# TODO: show nicer page for Wikidata item not found
|
||||||
|
return abort(404)
|
||||||
|
|
||||||
|
try:
|
||||||
|
lat, lon = item.locations[0].get_lat_lon()
|
||||||
|
except IndexError:
|
||||||
|
# TODO: show nicer page for Wikidata item without coordinates
|
||||||
|
return abort(404)
|
||||||
|
|
||||||
|
return render_template(
|
||||||
|
"map.html",
|
||||||
|
active_tab="map",
|
||||||
|
zoom=16,
|
||||||
|
lat=lat,
|
||||||
|
lon=lon,
|
||||||
|
username=get_username(),
|
||||||
|
mode="map",
|
||||||
|
q=None,
|
||||||
|
qid=item.qid,
|
||||||
|
item_type_filter=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
url = url_for("map_location", zoom=16, lat=lat, lon=lon, item=item.qid)
|
||||||
|
return redirect(url)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/search/map")
|
@app.route("/search/map")
|
||||||
def search_map_page():
|
def search_map_page():
|
||||||
user_lat, user_lon = get_user_location() or (None, None)
|
user_lat, user_lon = get_user_location() or (None, None)
|
||||||
|
@ -398,10 +458,12 @@ def old_search_page():
|
||||||
def read_bounds_param():
|
def read_bounds_param():
|
||||||
return [float(i) for i in request.args["bounds"].split(",")]
|
return [float(i) for i in request.args["bounds"].split(",")]
|
||||||
|
|
||||||
|
|
||||||
def read_isa_filter_param():
|
def read_isa_filter_param():
|
||||||
isa_param = request.args.get('isa')
|
isa_param = request.args.get("isa")
|
||||||
if isa_param:
|
if isa_param:
|
||||||
return set(qid.strip() for qid in isa_param.upper().split(','))
|
return set(qid.strip() for qid in isa_param.upper().split(","))
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/location")
|
@app.route("/api/1/location")
|
||||||
def show_user_location():
|
def show_user_location():
|
||||||
|
@ -417,6 +479,7 @@ def api_wikidata_items_count():
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
return cors_jsonify(success=True, count=count, duration=t1)
|
return cors_jsonify(success=True, count=count, duration=t1)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/isa_search")
|
@app.route("/api/1/isa_search")
|
||||||
def api_isa_search():
|
def api_isa_search():
|
||||||
t0 = time()
|
t0 = time()
|
||||||
|
@ -452,6 +515,7 @@ def api_wikidata_items():
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
return cors_jsonify(success=True, duration=t1, **ret)
|
return cors_jsonify(success=True, duration=t1, **ret)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/place/<osm_type>/<int:osm_id>")
|
@app.route("/api/1/place/<osm_type>/<int:osm_id>")
|
||||||
def api_place_items(osm_type, osm_id):
|
def api_place_items(osm_type, osm_id):
|
||||||
t0 = time()
|
t0 = time()
|
||||||
|
@ -478,9 +542,7 @@ def api_get_item(item_id):
|
||||||
detail = api.item_detail(item)
|
detail = api.item_detail(item)
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
|
|
||||||
return cors_jsonify(success=True,
|
return cors_jsonify(success=True, duration=t1, **detail)
|
||||||
duration=t1,
|
|
||||||
**detail)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/item/Q<int:item_id>/tags")
|
@app.route("/api/1/item/Q<int:item_id>/tags")
|
||||||
|
@ -491,25 +553,23 @@ def api_get_item_tags(item_id):
|
||||||
osm_list = sorted(tags.keys())
|
osm_list = sorted(tags.keys())
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
|
|
||||||
return cors_jsonify(success=True,
|
return cors_jsonify(
|
||||||
qid=item.qid,
|
success=True, qid=item.qid, tag_or_key_list=osm_list, tag_src=tags, duration=t1
|
||||||
tag_or_key_list=osm_list,
|
)
|
||||||
tag_src=tags,
|
|
||||||
duration=t1)
|
|
||||||
|
|
||||||
|
|
||||||
def expand_street_name(from_names):
|
def expand_street_name(from_names):
|
||||||
ret = set(from_names)
|
ret = set(from_names)
|
||||||
for name in from_names:
|
for name in from_names:
|
||||||
if any(name.startswith(st) for st in ('St ', 'St. ')):
|
if any(name.startswith(st) for st in ("St ", "St. ")):
|
||||||
first_space = name.find(' ')
|
first_space = name.find(" ")
|
||||||
ret.add("Saint" + name[first_space:])
|
ret.add("Saint" + name[first_space:])
|
||||||
|
|
||||||
if ', ' in name:
|
if ", " in name:
|
||||||
for n in set(ret):
|
for n in set(ret):
|
||||||
comma = n.find(", ")
|
comma = n.find(", ")
|
||||||
ret.add(name[:comma])
|
ret.add(name[:comma])
|
||||||
elif '/' in name:
|
elif "/" in name:
|
||||||
for n in set(ret):
|
for n in set(ret):
|
||||||
ret.extend(part.strip() for part in n.split("/"))
|
ret.extend(part.strip() for part in n.split("/"))
|
||||||
|
|
||||||
|
@ -522,14 +582,12 @@ def api_find_osm_candidates(item_id):
|
||||||
t0 = time()
|
t0 = time()
|
||||||
item = model.Item.query.get(item_id)
|
item = model.Item.query.get(item_id)
|
||||||
if not item:
|
if not item:
|
||||||
return cors_jsonify(success=True,
|
return cors_jsonify(success=True, qid=f"Q{item_id}", error="item doesn't exist")
|
||||||
qid=f'Q{item_id}',
|
|
||||||
error="item doesn't exist")
|
|
||||||
|
|
||||||
if not item.locations:
|
if not item.locations:
|
||||||
return cors_jsonify(success=True,
|
return cors_jsonify(
|
||||||
qid=f'Q{item_id}',
|
success=True, qid=f"Q{item_id}", error="item has no coordinates"
|
||||||
error="item has no coordinates")
|
)
|
||||||
|
|
||||||
label = item.label()
|
label = item.label()
|
||||||
item_is_street = item.is_street()
|
item_is_street = item.is_street()
|
||||||
|
@ -547,17 +605,15 @@ def api_find_osm_candidates(item_id):
|
||||||
max_distance = 1_000
|
max_distance = 1_000
|
||||||
limit = 40
|
limit = 40
|
||||||
names = None
|
names = None
|
||||||
nearby = api.find_osm_candidates(item,
|
nearby = api.find_osm_candidates(
|
||||||
limit=limit,
|
item, limit=limit, max_distance=max_distance, names=names
|
||||||
max_distance=max_distance,
|
)
|
||||||
names=names)
|
|
||||||
|
|
||||||
if (item_is_street or item_is_watercourse) and not nearby:
|
if (item_is_street or item_is_watercourse) and not nearby:
|
||||||
# nearby = [osm for osm in nearby if street_name_match(label, osm)]
|
# nearby = [osm for osm in nearby if street_name_match(label, osm)]
|
||||||
|
|
||||||
# try again without name filter
|
# try again without name filter
|
||||||
nearby = api.find_osm_candidates(item, limit=100,
|
nearby = api.find_osm_candidates(item, limit=100, max_distance=1_000)
|
||||||
max_distance=1_000)
|
|
||||||
|
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
return cors_jsonify(
|
return cors_jsonify(
|
||||||
|
@ -565,7 +621,7 @@ def api_find_osm_candidates(item_id):
|
||||||
qid=item.qid,
|
qid=item.qid,
|
||||||
nearby=nearby,
|
nearby=nearby,
|
||||||
duration=t1,
|
duration=t1,
|
||||||
max_distance=max_distance
|
max_distance=max_distance,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -574,10 +630,12 @@ def api_missing_wikidata_items():
|
||||||
t0 = time()
|
t0 = time()
|
||||||
qids_arg = request.args.get("qids")
|
qids_arg = request.args.get("qids")
|
||||||
if not qids_arg:
|
if not qids_arg:
|
||||||
return cors_jsonify(success=False,
|
return cors_jsonify(
|
||||||
error="required parameter 'qids' is missing",
|
success=False,
|
||||||
items=[],
|
error="required parameter 'qids' is missing",
|
||||||
isa_count=[])
|
items=[],
|
||||||
|
isa_count=[],
|
||||||
|
)
|
||||||
|
|
||||||
qids = []
|
qids = []
|
||||||
for qid in qids_arg.upper().split(","):
|
for qid in qids_arg.upper().split(","):
|
||||||
|
@ -593,10 +651,7 @@ def api_missing_wikidata_items():
|
||||||
|
|
||||||
ret = api.missing_wikidata_items(qids, lat, lon)
|
ret = api.missing_wikidata_items(qids, lat, lon)
|
||||||
t1 = time() - t0
|
t1 = time() - t0
|
||||||
return cors_jsonify(
|
return cors_jsonify(success=True, duration=t1, **ret)
|
||||||
success=True,
|
|
||||||
duration=t1,
|
|
||||||
**ret)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/search")
|
@app.route("/api/1/search")
|
||||||
|
@ -615,20 +670,20 @@ def api_search():
|
||||||
|
|
||||||
return cors_jsonify(success=True, hits=hits)
|
return cors_jsonify(success=True, hits=hits)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/polygon/<osm_type>/<int:osm_id>")
|
@app.route("/api/1/polygon/<osm_type>/<int:osm_id>")
|
||||||
def api_polygon(osm_type, osm_id):
|
def api_polygon(osm_type, osm_id):
|
||||||
obj = model.Polygon.get_osm(osm_type, osm_id)
|
obj = model.Polygon.get_osm(osm_type, osm_id)
|
||||||
return cors_jsonify(successful=True,
|
return cors_jsonify(
|
||||||
osm_type=osm_type,
|
successful=True, osm_type=osm_type, osm_id=osm_id, geojson=obj.geojson()
|
||||||
osm_id=osm_id,
|
)
|
||||||
geojson=obj.geojson())
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/refresh/Q<int:item_id>")
|
@app.route("/refresh/Q<int:item_id>")
|
||||||
def refresh_item(item_id):
|
def refresh_item(item_id):
|
||||||
assert not model.Item.query.get(item_id)
|
assert not model.Item.query.get(item_id)
|
||||||
|
|
||||||
qid = f'Q{item_id}'
|
qid = f"Q{item_id}"
|
||||||
entity = wikidata_api.get_entity(qid)
|
entity = wikidata_api.get_entity(qid)
|
||||||
entity_qid = entity.pop("id")
|
entity_qid = entity.pop("id")
|
||||||
assert qid == entity_qid
|
assert qid == entity_qid
|
||||||
|
@ -643,100 +698,110 @@ def refresh_item(item_id):
|
||||||
database.session.add(item)
|
database.session.add(item)
|
||||||
database.session.commit()
|
database.session.commit()
|
||||||
|
|
||||||
return 'done'
|
return "done"
|
||||||
|
|
||||||
@app.route('/login')
|
|
||||||
|
@app.route("/login")
|
||||||
def login_openstreetmap():
|
def login_openstreetmap():
|
||||||
return redirect(url_for('start_oauth',
|
return redirect(url_for("start_oauth", next=request.args.get("next")))
|
||||||
next=request.args.get('next')))
|
|
||||||
|
|
||||||
@app.route('/logout')
|
|
||||||
|
@app.route("/logout")
|
||||||
def logout():
|
def logout():
|
||||||
next_url = request.args.get('next') or url_for('map_start_page')
|
next_url = request.args.get("next") or url_for("map_start_page")
|
||||||
flask_login.logout_user()
|
flask_login.logout_user()
|
||||||
flash('you are logged out')
|
flash("you are logged out")
|
||||||
return redirect(next_url)
|
return redirect(next_url)
|
||||||
|
|
||||||
@app.route('/done/')
|
|
||||||
|
@app.route("/done/")
|
||||||
def done():
|
def done():
|
||||||
flash('login successful')
|
flash("login successful")
|
||||||
return redirect(url_for('map_start_page'))
|
return redirect(url_for("map_start_page"))
|
||||||
|
|
||||||
@app.route('/oauth/start')
|
|
||||||
|
@app.route("/oauth/start")
|
||||||
def start_oauth():
|
def start_oauth():
|
||||||
next_page = request.args.get('next')
|
next_page = request.args.get("next")
|
||||||
if next_page:
|
if next_page:
|
||||||
session['next'] = next_page
|
session["next"] = next_page
|
||||||
|
|
||||||
client_key = app.config['CLIENT_KEY']
|
client_key = app.config["CLIENT_KEY"]
|
||||||
client_secret = app.config['CLIENT_SECRET']
|
client_secret = app.config["CLIENT_SECRET"]
|
||||||
|
|
||||||
request_token_url = 'https://www.openstreetmap.org/oauth/request_token'
|
request_token_url = "https://www.openstreetmap.org/oauth/request_token"
|
||||||
|
|
||||||
callback = url_for('oauth_callback', _external=True)
|
callback = url_for("oauth_callback", _external=True)
|
||||||
|
|
||||||
oauth = OAuth1Session(client_key,
|
oauth = OAuth1Session(
|
||||||
client_secret=client_secret,
|
client_key, client_secret=client_secret, callback_uri=callback
|
||||||
callback_uri=callback)
|
)
|
||||||
fetch_response = oauth.fetch_request_token(request_token_url)
|
fetch_response = oauth.fetch_request_token(request_token_url)
|
||||||
|
|
||||||
session['owner_key'] = fetch_response.get('oauth_token')
|
session["owner_key"] = fetch_response.get("oauth_token")
|
||||||
session['owner_secret'] = fetch_response.get('oauth_token_secret')
|
session["owner_secret"] = fetch_response.get("oauth_token_secret")
|
||||||
|
|
||||||
base_authorization_url = 'https://www.openstreetmap.org/oauth/authorize'
|
base_authorization_url = "https://www.openstreetmap.org/oauth/authorize"
|
||||||
authorization_url = oauth.authorization_url(base_authorization_url,
|
authorization_url = oauth.authorization_url(
|
||||||
oauth_consumer_key=client_key)
|
base_authorization_url, oauth_consumer_key=client_key
|
||||||
|
)
|
||||||
return redirect(authorization_url)
|
return redirect(authorization_url)
|
||||||
|
|
||||||
|
|
||||||
@login_manager.user_loader
|
@login_manager.user_loader
|
||||||
def load_user(user_id):
|
def load_user(user_id):
|
||||||
return model.User.query.get(user_id)
|
return model.User.query.get(user_id)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/oauth/callback", methods=["GET"])
|
@app.route("/oauth/callback", methods=["GET"])
|
||||||
def oauth_callback():
|
def oauth_callback():
|
||||||
client_key = app.config['CLIENT_KEY']
|
client_key = app.config["CLIENT_KEY"]
|
||||||
client_secret = app.config['CLIENT_SECRET']
|
client_secret = app.config["CLIENT_SECRET"]
|
||||||
|
|
||||||
oauth = OAuth1Session(client_key,
|
oauth = OAuth1Session(
|
||||||
client_secret=client_secret,
|
client_key,
|
||||||
resource_owner_key=session['owner_key'],
|
client_secret=client_secret,
|
||||||
resource_owner_secret=session['owner_secret'])
|
resource_owner_key=session["owner_key"],
|
||||||
|
resource_owner_secret=session["owner_secret"],
|
||||||
|
)
|
||||||
|
|
||||||
oauth_response = oauth.parse_authorization_response(request.url)
|
oauth_response = oauth.parse_authorization_response(request.url)
|
||||||
verifier = oauth_response.get('oauth_verifier')
|
verifier = oauth_response.get("oauth_verifier")
|
||||||
access_token_url = 'https://www.openstreetmap.org/oauth/access_token'
|
access_token_url = "https://www.openstreetmap.org/oauth/access_token"
|
||||||
oauth = OAuth1Session(client_key,
|
oauth = OAuth1Session(
|
||||||
client_secret=client_secret,
|
client_key,
|
||||||
resource_owner_key=session['owner_key'],
|
client_secret=client_secret,
|
||||||
resource_owner_secret=session['owner_secret'],
|
resource_owner_key=session["owner_key"],
|
||||||
verifier=verifier)
|
resource_owner_secret=session["owner_secret"],
|
||||||
|
verifier=verifier,
|
||||||
|
)
|
||||||
|
|
||||||
oauth_tokens = oauth.fetch_access_token(access_token_url)
|
oauth_tokens = oauth.fetch_access_token(access_token_url)
|
||||||
session['owner_key'] = oauth_tokens.get('oauth_token')
|
session["owner_key"] = oauth_tokens.get("oauth_token")
|
||||||
session['owner_secret'] = oauth_tokens.get('oauth_token_secret')
|
session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
|
||||||
|
|
||||||
r = oauth.get(osm_api_base + '/user/details')
|
r = oauth.get(osm_api_base + "/user/details")
|
||||||
info = osm_oauth.parse_userinfo_call(r.content)
|
info = osm_oauth.parse_userinfo_call(r.content)
|
||||||
|
|
||||||
user = model.User.query.filter_by(osm_id=info['id']).one_or_none()
|
user = model.User.query.filter_by(osm_id=info["id"]).one_or_none()
|
||||||
|
|
||||||
if user:
|
if user:
|
||||||
user.osm_oauth_token = oauth_tokens.get('oauth_token')
|
user.osm_oauth_token = oauth_tokens.get("oauth_token")
|
||||||
user.osm_oauth_token_secret = oauth_tokens.get('oauth_token_secret')
|
user.osm_oauth_token_secret = oauth_tokens.get("oauth_token_secret")
|
||||||
else:
|
else:
|
||||||
user = model.User(
|
user = model.User(
|
||||||
username=info['username'],
|
username=info["username"],
|
||||||
description=info['description'],
|
description=info["description"],
|
||||||
img=info['img'],
|
img=info["img"],
|
||||||
osm_id=info['id'],
|
osm_id=info["id"],
|
||||||
osm_account_created=info['account_created'],
|
osm_account_created=info["account_created"],
|
||||||
mock_upload=False,
|
mock_upload=False,
|
||||||
)
|
)
|
||||||
database.session.add(user)
|
database.session.add(user)
|
||||||
database.session.commit()
|
database.session.commit()
|
||||||
flask_login.login_user(user)
|
flask_login.login_user(user)
|
||||||
|
|
||||||
next_page = session.get('next') or url_for('map_start_page')
|
next_page = session.get("next") or url_for("map_start_page")
|
||||||
return redirect(next_page)
|
return redirect(next_page)
|
||||||
|
|
||||||
|
|
||||||
|
@ -744,14 +809,13 @@ def validate_edit_list(edits):
|
||||||
for e in edits:
|
for e in edits:
|
||||||
assert model.Item.get_by_qid(e["qid"])
|
assert model.Item.get_by_qid(e["qid"])
|
||||||
assert e["op"] in {"add", "remove", "change"}
|
assert e["op"] in {"add", "remove", "change"}
|
||||||
osm_type, _, osm_id = e['osm'].partition('/')
|
osm_type, _, osm_id = e["osm"].partition("/")
|
||||||
osm_id = int(osm_id)
|
osm_id = int(osm_id)
|
||||||
if osm_type == 'node':
|
if osm_type == "node":
|
||||||
assert model.Point.query.get(osm_id)
|
assert model.Point.query.get(osm_id)
|
||||||
else:
|
else:
|
||||||
src_id = osm_id if osm_type == "way" else -osm_id
|
src_id = osm_id if osm_type == "way" else -osm_id
|
||||||
assert (model.Line.query.get(src_id)
|
assert model.Line.query.get(src_id) or model.Polygon.query.get(src_id)
|
||||||
or model.Polygon.query.get(src_id))
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/edit", methods=["POST"])
|
@app.route("/api/1/edit", methods=["POST"])
|
||||||
|
@ -760,9 +824,9 @@ def api_new_edit_session():
|
||||||
incoming = request.json
|
incoming = request.json
|
||||||
|
|
||||||
validate_edit_list(incoming["edit_list"])
|
validate_edit_list(incoming["edit_list"])
|
||||||
es = model.EditSession(user=user,
|
es = model.EditSession(
|
||||||
edit_list=incoming['edit_list'],
|
user=user, edit_list=incoming["edit_list"], comment=incoming["comment"]
|
||||||
comment=incoming['comment'])
|
)
|
||||||
database.session.add(es)
|
database.session.add(es)
|
||||||
database.session.commit()
|
database.session.commit()
|
||||||
|
|
||||||
|
@ -770,13 +834,14 @@ def api_new_edit_session():
|
||||||
|
|
||||||
return cors_jsonify(success=True, session_id=session_id)
|
return cors_jsonify(success=True, session_id=session_id)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/edit/<int:session_id>", methods=["POST"])
|
@app.route("/api/1/edit/<int:session_id>", methods=["POST"])
|
||||||
def api_edit_session(session_id):
|
def api_edit_session(session_id):
|
||||||
es = model.EditSession.query.get(session_id)
|
es = model.EditSession.query.get(session_id)
|
||||||
assert flask_login.current_user.id == es.user_id
|
assert flask_login.current_user.id == es.user_id
|
||||||
incoming = request.json
|
incoming = request.json
|
||||||
|
|
||||||
for f in 'edit_list', 'comment':
|
for f in "edit_list", "comment":
|
||||||
if f not in incoming:
|
if f not in incoming:
|
||||||
continue
|
continue
|
||||||
setattr(es, f, incoming[f])
|
setattr(es, f, incoming[f])
|
||||||
|
@ -784,21 +849,24 @@ def api_edit_session(session_id):
|
||||||
|
|
||||||
return cors_jsonify(success=True, session_id=session_id)
|
return cors_jsonify(success=True, session_id=session_id)
|
||||||
|
|
||||||
|
|
||||||
class VersionMismatch(Exception):
|
class VersionMismatch(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def osm_object(osm_type, osm_id):
|
def osm_object(osm_type, osm_id):
|
||||||
if osm_type == "node":
|
if osm_type == "node":
|
||||||
return model.Point.query.get(osm_id)
|
return model.Point.query.get(osm_id)
|
||||||
|
|
||||||
src_id = int(osm_id) * {'way': 1, 'relation': -1}[osm_type]
|
src_id = int(osm_id) * {"way": 1, "relation": -1}[osm_type]
|
||||||
for cls in model.Line, model.Polygon:
|
for cls in model.Line, model.Polygon:
|
||||||
obj = cls.query.get(src_id)
|
obj = cls.query.get(src_id)
|
||||||
if obj:
|
if obj:
|
||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def process_edit(changeset_id, e):
|
def process_edit(changeset_id, e):
|
||||||
osm_type, _, osm_id = e['osm'].partition('/')
|
osm_type, _, osm_id = e["osm"].partition("/")
|
||||||
qid = e["qid"]
|
qid = e["qid"]
|
||||||
item_id = qid[1:]
|
item_id = qid[1:]
|
||||||
|
|
||||||
|
@ -851,9 +919,7 @@ def process_edit(changeset_id, e):
|
||||||
|
|
||||||
cls = type(osm)
|
cls = type(osm)
|
||||||
database.session.execute(
|
database.session.execute(
|
||||||
update(cls).
|
update(cls).where(cls.src_id == osm.src_id).values(tags=new_tags)
|
||||||
where(cls.src_id == osm.src_id).
|
|
||||||
values(tags=new_tags)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
db_edit = model.ChangesetEdit(
|
db_edit = model.ChangesetEdit(
|
||||||
|
@ -867,6 +933,7 @@ def process_edit(changeset_id, e):
|
||||||
|
|
||||||
return "saved"
|
return "saved"
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/1/save/<int:session_id>")
|
@app.route("/api/1/save/<int:session_id>")
|
||||||
def api_save_changeset(session_id):
|
def api_save_changeset(session_id):
|
||||||
assert g.user.is_authenticated
|
assert g.user.is_authenticated
|
||||||
|
@ -938,7 +1005,8 @@ def api_real_save_changeset(session_id):
|
||||||
edit.close_changeset(changeset_id)
|
edit.close_changeset(changeset_id)
|
||||||
yield send("done")
|
yield send("done")
|
||||||
|
|
||||||
return Response(stream_with_context(stream(g.user)), mimetype='text/event-stream')
|
return Response(stream_with_context(stream(g.user)), mimetype="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
def api_mock_save_changeset(session_id):
|
def api_mock_save_changeset(session_id):
|
||||||
es = model.EditSession.query.get(session_id)
|
es = model.EditSession.query.get(session_id)
|
||||||
|
@ -948,7 +1016,7 @@ def api_mock_save_changeset(session_id):
|
||||||
return f"data: {json.dumps(data)}\n\n"
|
return f"data: {json.dumps(data)}\n\n"
|
||||||
|
|
||||||
def stream(user):
|
def stream(user):
|
||||||
print('stream')
|
print("stream")
|
||||||
changeset_id = database.session.query(func.max(model.Changeset.id) + 1).scalar()
|
changeset_id = database.session.query(func.max(model.Changeset.id) + 1).scalar()
|
||||||
sleep(1)
|
sleep(1)
|
||||||
yield send("open", id=changeset_id)
|
yield send("open", id=changeset_id)
|
||||||
|
@ -956,12 +1024,12 @@ def api_mock_save_changeset(session_id):
|
||||||
|
|
||||||
update_count = 0
|
update_count = 0
|
||||||
|
|
||||||
print('record_changeset', changeset_id)
|
print("record_changeset", changeset_id)
|
||||||
edit.record_changeset(
|
edit.record_changeset(
|
||||||
id=changeset_id, user=user, comment=es.comment, update_count=update_count
|
id=changeset_id, user=user, comment=es.comment, update_count=update_count
|
||||||
)
|
)
|
||||||
|
|
||||||
print('edits')
|
print("edits")
|
||||||
|
|
||||||
for num, e in enumerate(es.edit_list):
|
for num, e in enumerate(es.edit_list):
|
||||||
print(num, e)
|
print(num, e)
|
||||||
|
@ -970,12 +1038,12 @@ def api_mock_save_changeset(session_id):
|
||||||
yield send("saved", edit=e, num=num)
|
yield send("saved", edit=e, num=num)
|
||||||
sleep(1)
|
sleep(1)
|
||||||
|
|
||||||
print('closing')
|
print("closing")
|
||||||
yield send("closing")
|
yield send("closing")
|
||||||
sleep(1)
|
sleep(1)
|
||||||
yield send("done")
|
yield send("done")
|
||||||
|
|
||||||
return Response(stream(g.user), mimetype='text/event-stream')
|
return Response(stream(g.user), mimetype="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue