Compare commits

...

14 commits

7 changed files with 100 additions and 45 deletions

View file

@ -941,7 +941,11 @@ def get_item(item_id: int) -> model.Item | None:
def get_item_street_addresses(item: model.Item) -> list[str]:
"""Hunt for street addresses for the given item."""
street_address = [addr["text"] for addr in item.get_claim("P6375") if addr]
p6375 = item.get_claim("P6375")
assert isinstance(p6375, list)
street_address: list[str] = [
typing.cast(str, addr["text"]) for addr in p6375 if addr
]
if street_address or "P669" not in item.claims:
return street_address
@ -951,6 +955,9 @@ def get_item_street_addresses(item: model.Item) -> list[str]:
qualifiers = claim.get("qualifiers")
if not qualifiers or "P670" not in qualifiers:
continue
if "datavalue" not in qualifiers["P670"][0]: # 'no value' for P670
assert qualifiers["P670"][0]["snaktype"] == "novalue"
continue
number = qualifiers["P670"][0]["datavalue"]["value"]
street_item = get_item(claim["mainsnak"]["datavalue"]["value"]["numeric-id"])

View file

@ -107,7 +107,7 @@ class Item(Base):
aliases = Column(postgresql.JSONB)
sitelinks = Column(postgresql.JSONB)
claims = Column(postgresql.JSONB, nullable=False)
lastrevid = Column(Integer, nullable=False, unique=True)
lastrevid = Column(BigInteger, nullable=False, unique=True)
locations: Mapped[list["ItemLocation"]] = relationship(
"ItemLocation", cascade="all, delete-orphan", backref="item"
)

View file

@ -1,67 +1,82 @@
"""OSM Authentication."""
import json
import typing
from datetime import datetime
from urllib.parse import urlencode
import flask
import lxml.etree
from flask import current_app, g, session
from requests_oauthlib import OAuth1Session
import requests
from requests_oauthlib import OAuth2Session
from . import user_agent_headers
from .model import User
osm_api_base = "https://api.openstreetmap.org/api/0.6"
scope = ["read_prefs", "write_api"]
def api_put_request(path, **kwargs):
user = g.user
assert user.is_authenticated
oauth = OAuth1Session(
current_app.config["CLIENT_KEY"],
client_secret=current_app.config["CLIENT_SECRET"],
resource_owner_key=user.osm_oauth_token,
resource_owner_secret=user.osm_oauth_token_secret,
def get_session() -> OAuth2Session:
"""Get session."""
token = flask.session.get("oauth_token")
if not token:
user = flask.g.user
assert user.is_authenticated
token = json.loads(user.osm_oauth_token)
flask.session["oauth_token"] = token
callback = flask.url_for("oauth_callback", _external=True)
return OAuth2Session(
flask.current_app.config["CLIENT_KEY"],
redirect_uri=callback,
scope=scope,
token=token,
)
def api_put_request(path: str, **kwargs: typing.Any) -> requests.Response:
"""Send OSM API PUT request."""
oauth = get_session()
return oauth.request(
"PUT", osm_api_base + path, headers=user_agent_headers(), **kwargs
)
def api_request(path, **params):
user = g.user
assert user.is_authenticated
app = current_app
def api_request(path: str, **params: typing.Any) -> requests.Response:
"""Send OSM API request."""
url = osm_api_base + path
if params:
url += "?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=user.osm_oauth_token,
resource_owner_secret=user.osm_oauth_token_secret,
)
oauth = get_session()
return oauth.get(url, timeout=4)
def parse_iso_date(value):
def parse_iso_date(value: str) -> datetime:
"""Parse ISO date."""
return datetime.strptime(value, "%Y-%m-%dT%H:%M:%SZ")
def parse_userinfo_call(xml):
def parse_userinfo_call(xml: bytes) -> dict[str, typing.Any]:
"""Parse userinfo call."""
root = lxml.etree.fromstring(xml)
user = root[0]
img = user.find(".//img")
account_created = parse_iso_date(user.get("account_created"))
account_created_date = user.get("account_created")
assert account_created_date
account_created = parse_iso_date(account_created_date)
assert user.tag == "user"
id_str = user.get("id")
assert id_str and isinstance(id_str, str)
return {
"account_created": account_created,
"id": int(user.get("id")),
"id": int(id_str),
"username": user.get("display_name"),
"description": user.findtext(".//description"),
"img": (img.get("href") if img is not None else None),
@ -70,10 +85,10 @@ def parse_userinfo_call(xml):
def get_username() -> str | None:
"""Get username of current user."""
if "user_id" not in session:
if "user_id" not in flask.session:
return None # not authorized
user_id = session["user_id"]
user_id = flask.session["user_id"]
user = User.query.get(user_id)
return typing.cast(str, user.username)

View file

@ -204,7 +204,7 @@ def format_wikibase_time(v: WikibaseTime) -> str | None:
case 7: # century
century = ((int(t[:5]) - 1) // 100) + 1
ordinal_num = num2words(abs(century), to="ordinal_num")
return f"{ordinal_num} {century}{' BC' if century < 0 else ''}"
return f"{ordinal_num} century{' BC' if century < 0 else ''}"
case 6: # millennium
millennium = ((int(t[:5]) - 1) // 1000) + 1
ordinal_num = num2words(abs(millennium), to="ordinal_num")

View file

@ -1,17 +1,30 @@
"""Test matcher utils."""
from matcher import utils
def test_format_wikibase_time_year():
def test_format_wikibase_time_year() -> None:
"""Test passing a year to format_wikibase_time."""
v = {"time": "+1950-00-00T00:00:00Z", "precision": 9}
assert utils.format_wikibase_time(v) == "1950"
def test_format_wikibase_time_century():
def test_format_wikibase_time_century() -> None:
"""Test passing centuries to format_wikibase_time."""
v = {"time": "+0800-00-00T00:00:00Z", "precision": 7}
assert utils.format_wikibase_time(v) == "8th century"
v = {"time": "+1950-00-00T00:00:00Z", "precision": 7}
assert utils.format_wikibase_time(v) == "20th century"
def test_format_wikibase_time_decade() -> None:
"""Test passing a full date to format_wikibase_time."""
v = {"time": "+1910-00-00T00:00:00Z", "precision": 8}
assert utils.format_wikibase_time(v) == "1910s"
def test_format_wikibase_time_day() -> None:
"""Test passing a full date to format_wikibase_time."""
v = {"time": "+1868-01-09T00:00:00Z", "precision": 11}
assert utils.format_wikibase_time(v) == "9 January 1868"

View file

@ -6,6 +6,8 @@ import json
import typing
from time import sleep
import requests.exceptions
from matcher import model, wikidata, wikidata_api
from matcher.database import init_db, session
@ -81,7 +83,14 @@ def handle_edit(change: Change) -> None:
print(f"{ts}: no need to update {qid}")
return
entity = wikidata_api.get_entity(qid)
for attempt in range(100):
try:
entity = wikidata_api.get_entity(qid)
except requests.exceptions.ConnectionError:
print("connection error, retrying.")
sleep(10)
else:
break
entity_qid = entity.pop("id")
if entity_qid != qid:
print(f"{ts}: item {qid} replaced with redirect")
@ -123,6 +132,15 @@ def update_database() -> None:
r = wikidata_api.get_recent_changes(rcstart=start, rccontinue=rccontinue)
reply = r.json()
if (
"error" in reply
and reply["error"]["code"] == "internal_api_error_DBQueryTimeoutError"
):
print(reply)
sleep(10)
continue
if "query" not in reply:
print(reply)
for change in reply["query"]["recentchanges"]:
rctype = change["type"]
timestamp = change["timestamp"]

View file

@ -244,9 +244,12 @@ def identifier_index():
@app.route("/commons/<filename>")
def get_commons_image(filename):
if filename == "null":
flask.abort(404)
detail = commons.image_detail([filename], thumbheight=1200, thumbwidth=1200)
image = detail[filename]
return flask.redirect(image["thumburl"])
if filename not in detail:
flask.abort(404)
return flask.redirect(detail[filename]["thumburl"])
@app.route("/identifier/<pid>")
@ -552,7 +555,8 @@ def api_get_item_tags(item_id):
)
def expand_street_name(from_names):
def expand_street_name(from_names: typing.Collection[str]) -> set[str]:
"""Expand street name."""
ret = set(from_names)
for name in from_names:
if any(name.startswith(st) for st in ("St ", "St. ")):
@ -560,12 +564,10 @@ def expand_street_name(from_names):
ret.add("Saint" + name[first_space:])
if ", " in name:
for n in set(ret):
comma = n.find(", ")
ret.add(name[:comma])
comma = name.find(", ")
ret.add(name[:comma])
elif "/" in name:
for n in set(ret):
ret.extend(part.strip() for part in n.split("/"))
ret.update(part.strip() for part in name.split("/"))
ret.update({"The " + name for name in ret if not name.startswith("The ")})
return ret
@ -676,7 +678,7 @@ def api_polygon(osm_type, osm_id):
@app.route("/refresh/Q<int:item_id>")
def refresh_item(item_id: int) -> str:
"""Refresh the local mirror of a Wikidata item."""
existing = model.Item.query.get(item_id)
item = model.Item.query.get(item_id)
qid = f"Q{item_id}"
entity = wikidata_api.get_entity(qid)
@ -686,9 +688,9 @@ def refresh_item(item_id: int) -> str:
coords = wikidata.get_entity_coords(entity["claims"])
obj = {k: v for k, v in entity.items() if k in entity_keys}
if existing:
if item:
for k, v in obj.items():
setattr(model, k, v)
setattr(item, k, v)
else:
item = model.Item(item_id=item_id, **obj)
database.session.add(item)