Compare commits
No commits in common. "93a4572a5d9dd6d8d81ab80d18cbc90bd3ed863a" and "b430411b7edc4d56bc07fcf1b6eeec9ee21282f3" have entirely different histories.
93a4572a5d
...
b430411b7e
6 changed files with 14 additions and 359 deletions
|
|
@ -15,8 +15,7 @@ def send_to_admin(subject: str, body: str) -> None:
|
||||||
|
|
||||||
msg["Subject"] = subject
|
msg["Subject"] = subject
|
||||||
msg["To"] = ", ".join(app.config["ADMINS"])
|
msg["To"] = ", ".join(app.config["ADMINS"])
|
||||||
mail_from_name = app.config.get("MAIL_FROM_NAME", mail_from)
|
msg["From"] = f'{app.config["MAIL_FROM_NAME"]} <{app.config["MAIL_FROM"]}>'
|
||||||
msg["From"] = f"{mail_from_name} <{mail_from}>"
|
|
||||||
msg["Date"] = formatdate()
|
msg["Date"] = formatdate()
|
||||||
msg["Message-ID"] = make_msgid()
|
msg["Message-ID"] = make_msgid()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,5 @@
|
||||||
"""Wikidata API functions."""
|
"""Wikidata API functions."""
|
||||||
|
|
||||||
import os
|
|
||||||
from pathlib import Path
|
|
||||||
import typing
|
import typing
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
|
|
@ -12,37 +10,10 @@ from flask import render_template
|
||||||
from requests.exceptions import JSONDecodeError, RequestException
|
from requests.exceptions import JSONDecodeError, RequestException
|
||||||
|
|
||||||
from . import headers, mail
|
from . import headers, mail
|
||||||
from .wikimedia_api_logging import WikimediaApiLogConfig, WikimediaRequestTimer
|
|
||||||
|
|
||||||
wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
|
wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
|
||||||
wikidata_api_url = "https://www.wikidata.org/w/api.php"
|
|
||||||
wd_entity = "http://www.wikidata.org/entity/Q"
|
wd_entity = "http://www.wikidata.org/entity/Q"
|
||||||
commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
|
commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
|
||||||
wikimedia_log_config = WikimediaApiLogConfig(
|
|
||||||
tool="geocode",
|
|
||||||
log_path=Path(
|
|
||||||
os.environ.get(
|
|
||||||
"GEOCODE_WIKIMEDIA_API_LOG", "/var/log/geocode/wikimedia-api.jsonl"
|
|
||||||
)
|
|
||||||
),
|
|
||||||
user_agent=headers["User-Agent"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def logged_get(url: str, **kwargs: typing.Any) -> requests.Response:
|
|
||||||
"""Make a Wikimedia API request and log one JSONL metric line."""
|
|
||||||
with WikimediaRequestTimer(wikimedia_log_config, "GET", url) as timer:
|
|
||||||
r = requests.get(url, **kwargs)
|
|
||||||
timer.log_response(r.status_code, r.url)
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
def logged_post(url: str, **kwargs: typing.Any) -> requests.Response:
|
|
||||||
"""Make a Wikimedia API request and log one JSONL metric line."""
|
|
||||||
with WikimediaRequestTimer(wikimedia_log_config, "POST", url) as timer:
|
|
||||||
r = requests.post(url, **kwargs)
|
|
||||||
timer.log_response(r.status_code, r.url)
|
|
||||||
return r
|
|
||||||
|
|
||||||
|
|
||||||
def giveup(details: backoff.types.Details) -> None:
|
def giveup(details: backoff.types.Details) -> None:
|
||||||
|
|
@ -50,10 +21,7 @@ def giveup(details: backoff.types.Details) -> None:
|
||||||
last_exception = details["exception"] # type: ignore
|
last_exception = details["exception"] # type: ignore
|
||||||
if last_exception and isinstance(last_exception, APIResponseError):
|
if last_exception and isinstance(last_exception, APIResponseError):
|
||||||
body = f"Error making Wikidata API call\n\n{last_exception.response.text}"
|
body = f"Error making Wikidata API call\n\n{last_exception.response.text}"
|
||||||
try:
|
mail.send_to_admin("Geocode error", body)
|
||||||
mail.send_to_admin("Geocode error", body)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class QueryError(Exception):
|
class QueryError(Exception):
|
||||||
|
|
@ -61,7 +29,6 @@ class QueryError(Exception):
|
||||||
|
|
||||||
def __init__(self, query: str, r: requests.Response):
|
def __init__(self, query: str, r: requests.Response):
|
||||||
"""Init."""
|
"""Init."""
|
||||||
super().__init__(query, r)
|
|
||||||
self.query = query
|
self.query = query
|
||||||
self.r = r
|
self.r = r
|
||||||
|
|
||||||
|
|
@ -69,33 +36,12 @@ class QueryError(Exception):
|
||||||
class APIResponseError(Exception):
|
class APIResponseError(Exception):
|
||||||
"""Custom exception for API errors with response text."""
|
"""Custom exception for API errors with response text."""
|
||||||
|
|
||||||
def __init__(
|
def __init__(self, message: str, response: requests.Response):
|
||||||
self, message: str, response: requests.Response, detail: str | None = None
|
|
||||||
):
|
|
||||||
"""Init."""
|
"""Init."""
|
||||||
super().__init__(message)
|
super().__init__(message)
|
||||||
self.message = message
|
|
||||||
self.detail = detail or message
|
|
||||||
self.response = response
|
self.response = response
|
||||||
|
|
||||||
|
|
||||||
def mediawiki_error_message(
|
|
||||||
response: requests.Response, json_data: dict[str, typing.Any] | None = None
|
|
||||||
) -> str:
|
|
||||||
"""Extract the useful error message from a MediaWiki API response."""
|
|
||||||
if json_data and isinstance(json_data.get("error"), dict):
|
|
||||||
error = json_data["error"]
|
|
||||||
if isinstance(error.get("info"), str):
|
|
||||||
return typing.cast(str, error["info"])
|
|
||||||
if isinstance(error.get("code"), str):
|
|
||||||
return typing.cast(str, error["code"])
|
|
||||||
|
|
||||||
if response.text:
|
|
||||||
return response.text.strip()
|
|
||||||
|
|
||||||
return response.reason or f"HTTP {response.status_code}"
|
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(
|
@backoff.on_exception(
|
||||||
backoff.expo,
|
backoff.expo,
|
||||||
(RequestException, APIResponseError),
|
(RequestException, APIResponseError),
|
||||||
|
|
@ -106,23 +52,12 @@ def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]:
|
||||||
"""Wikidata API call."""
|
"""Wikidata API call."""
|
||||||
api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params}
|
api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params}
|
||||||
try:
|
try:
|
||||||
r = logged_get(
|
r = requests.get(
|
||||||
wikidata_api_url,
|
"https://www.wikidata.org/w/api.php", params=api_params, headers=headers
|
||||||
params=api_params,
|
|
||||||
headers=headers,
|
|
||||||
)
|
)
|
||||||
json_data = typing.cast(dict[str, typing.Any], r.json())
|
return typing.cast(dict[str, typing.Any], r.json())
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
detail = mediawiki_error_message(r)
|
raise APIResponseError("Failed to decode JSON", r)
|
||||||
message = f"Wikidata API error (HTTP {r.status_code}): {detail}"
|
|
||||||
raise APIResponseError(message, r, detail)
|
|
||||||
|
|
||||||
if not r.ok or "error" in json_data:
|
|
||||||
detail = mediawiki_error_message(r, json_data)
|
|
||||||
message = f"Wikidata API error (HTTP {r.status_code}): {detail}"
|
|
||||||
raise APIResponseError(message, r, detail)
|
|
||||||
|
|
||||||
return json_data
|
|
||||||
|
|
||||||
|
|
||||||
def get_entity(qid: str) -> dict[str, typing.Any] | None:
|
def get_entity(qid: str) -> dict[str, typing.Any] | None:
|
||||||
|
|
@ -172,16 +107,13 @@ Row = dict[str, dict[str, typing.Any]]
|
||||||
Hit = dict[str, str | int | None]
|
Hit = dict[str, str | int | None]
|
||||||
|
|
||||||
|
|
||||||
@backoff.on_exception(backoff.expo, (QueryError, RequestException), max_tries=5)
|
@backoff.on_exception(backoff.expo, QueryError, max_tries=5)
|
||||||
def wdqs(query: str) -> list[Row]:
|
def wdqs(query: str) -> list[Row]:
|
||||||
"""Pass query to the Wikidata Query Service."""
|
"""Pass query to the Wikidata Query Service."""
|
||||||
r = logged_post(
|
r = requests.post(
|
||||||
wikidata_query_api_url, data={"query": query, "format": "json"}, headers=headers
|
wikidata_query_api_url, data={"query": query, "format": "json"}, headers=headers
|
||||||
)
|
)
|
||||||
|
|
||||||
if not r.ok:
|
|
||||||
raise QueryError(query, r)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return typing.cast(list[Row], r.json()["results"]["bindings"])
|
return typing.cast(list[Row], r.json()["results"]["bindings"])
|
||||||
except requests.exceptions.JSONDecodeError:
|
except requests.exceptions.JSONDecodeError:
|
||||||
|
|
|
||||||
|
|
@ -1,169 +0,0 @@
|
||||||
"""JSONL logging helpers for Wikimedia API request metrics."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import socket
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import UTC, datetime
|
|
||||||
from pathlib import Path
|
|
||||||
from types import TracebackType
|
|
||||||
from urllib.parse import parse_qs, urlparse
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class WikimediaApiLogConfig:
|
|
||||||
"""Configuration for Wikimedia API request logging."""
|
|
||||||
|
|
||||||
tool: str
|
|
||||||
log_path: Path
|
|
||||||
user_agent: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class WikimediaApiRequestMetric:
|
|
||||||
"""Details of one Wikimedia API request."""
|
|
||||||
|
|
||||||
tool: str
|
|
||||||
url: str
|
|
||||||
method: str
|
|
||||||
status_code: int | None
|
|
||||||
elapsed_ms: int
|
|
||||||
user_agent: str
|
|
||||||
error: str | None = None
|
|
||||||
|
|
||||||
|
|
||||||
_logger_cache: dict[Path, logging.Logger] = {}
|
|
||||||
|
|
||||||
|
|
||||||
def setup_wikimedia_api_logger(log_path: Path) -> logging.Logger:
|
|
||||||
"""Create a JSONL logger for Wikimedia API request metrics."""
|
|
||||||
if log_path in _logger_cache:
|
|
||||||
return _logger_cache[log_path]
|
|
||||||
|
|
||||||
logger_name = f"wikimedia_api_metrics.{log_path}"
|
|
||||||
logger = logging.getLogger(logger_name)
|
|
||||||
logger.setLevel(logging.INFO)
|
|
||||||
logger.propagate = False
|
|
||||||
|
|
||||||
if not logger.handlers:
|
|
||||||
try:
|
|
||||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
handler: logging.Handler = logging.FileHandler(log_path)
|
|
||||||
except OSError:
|
|
||||||
handler = logging.NullHandler()
|
|
||||||
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
||||||
logger.addHandler(handler)
|
|
||||||
|
|
||||||
_logger_cache[log_path] = logger
|
|
||||||
return logger
|
|
||||||
|
|
||||||
|
|
||||||
def get_mediawiki_action(url: str) -> str | None:
|
|
||||||
"""Extract the MediaWiki API action from a URL, if present."""
|
|
||||||
parsed = urlparse(url)
|
|
||||||
query = parse_qs(parsed.query)
|
|
||||||
values = query.get("action")
|
|
||||||
|
|
||||||
if not values:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return values[0]
|
|
||||||
|
|
||||||
|
|
||||||
def build_log_record(metric: WikimediaApiRequestMetric) -> dict[str, object]:
|
|
||||||
"""Build a JSON-serialisable log record for one API request."""
|
|
||||||
parsed = urlparse(metric.url)
|
|
||||||
|
|
||||||
record: dict[str, object] = {
|
|
||||||
"ts": datetime.now(UTC).isoformat(),
|
|
||||||
"tool": metric.tool,
|
|
||||||
"host": socket.gethostname(),
|
|
||||||
"pid": os.getpid(),
|
|
||||||
"method": metric.method,
|
|
||||||
"api_host": parsed.netloc,
|
|
||||||
"path": parsed.path,
|
|
||||||
"action": get_mediawiki_action(metric.url),
|
|
||||||
"status_code": metric.status_code,
|
|
||||||
"elapsed_ms": metric.elapsed_ms,
|
|
||||||
"user_agent": metric.user_agent,
|
|
||||||
}
|
|
||||||
|
|
||||||
if metric.error is not None:
|
|
||||||
record["error"] = metric.error
|
|
||||||
|
|
||||||
return record
|
|
||||||
|
|
||||||
|
|
||||||
def log_wikimedia_api_request(
|
|
||||||
logger: logging.Logger,
|
|
||||||
metric: WikimediaApiRequestMetric,
|
|
||||||
) -> None:
|
|
||||||
"""Write one Wikimedia API request metric as a JSONL log line."""
|
|
||||||
record = build_log_record(metric)
|
|
||||||
logger.info(json.dumps(record, separators=(",", ":"), sort_keys=True))
|
|
||||||
|
|
||||||
|
|
||||||
class WikimediaRequestTimer:
|
|
||||||
"""Context manager for timing and logging a Wikimedia API request."""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
config: WikimediaApiLogConfig,
|
|
||||||
method: str,
|
|
||||||
url: str,
|
|
||||||
) -> None:
|
|
||||||
self.config = config
|
|
||||||
self.method = method
|
|
||||||
self.url = url
|
|
||||||
self.started = 0.0
|
|
||||||
self.logger = setup_wikimedia_api_logger(config.log_path)
|
|
||||||
|
|
||||||
def __enter__(self) -> "WikimediaRequestTimer":
|
|
||||||
"""Start timing a request."""
|
|
||||||
self.started = time.monotonic()
|
|
||||||
return self
|
|
||||||
|
|
||||||
def __exit__(
|
|
||||||
self,
|
|
||||||
exc_type: type[BaseException] | None,
|
|
||||||
exc: BaseException | None,
|
|
||||||
traceback: TracebackType | None,
|
|
||||||
) -> bool:
|
|
||||||
"""Log failed requests when an exception escapes."""
|
|
||||||
if exc is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
elapsed_ms = int((time.monotonic() - self.started) * 1000)
|
|
||||||
|
|
||||||
log_wikimedia_api_request(
|
|
||||||
self.logger,
|
|
||||||
WikimediaApiRequestMetric(
|
|
||||||
tool=self.config.tool,
|
|
||||||
url=self.url,
|
|
||||||
method=self.method,
|
|
||||||
status_code=None,
|
|
||||||
elapsed_ms=elapsed_ms,
|
|
||||||
user_agent=self.config.user_agent,
|
|
||||||
error=type(exc).__name__,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
def log_response(self, status_code: int, final_url: str | None = None) -> None:
|
|
||||||
"""Log a completed request."""
|
|
||||||
elapsed_ms = int((time.monotonic() - self.started) * 1000)
|
|
||||||
|
|
||||||
log_wikimedia_api_request(
|
|
||||||
self.logger,
|
|
||||||
WikimediaApiRequestMetric(
|
|
||||||
tool=self.config.tool,
|
|
||||||
url=final_url or self.url,
|
|
||||||
method=self.method,
|
|
||||||
status_code=status_code,
|
|
||||||
elapsed_ms=elapsed_ms,
|
|
||||||
user_agent=self.config.user_agent,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
36
lookup.py
36
lookup.py
|
|
@ -5,12 +5,12 @@ import inspect
|
||||||
import json
|
import json
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
import socket
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import typing
|
import typing
|
||||||
from time import time
|
from time import time
|
||||||
|
|
||||||
import requests.exceptions
|
|
||||||
import sqlalchemy.exc
|
import sqlalchemy.exc
|
||||||
import werkzeug.debug.tbtools
|
import werkzeug.debug.tbtools
|
||||||
from flask import Flask, jsonify, redirect, render_template, request, url_for
|
from flask import Flask, jsonify, redirect, render_template, request, url_for
|
||||||
|
|
@ -376,33 +376,7 @@ def index() -> str | Response:
|
||||||
return jsonify(coords={"lat": lat, "lon": lon}, error=error_msg)
|
return jsonify(coords={"lat": lat, "lon": lon}, error=error_msg)
|
||||||
|
|
||||||
needs_commons = request.args.get("needs_commons", "true").lower() != "false"
|
needs_commons = request.args.get("needs_commons", "true").lower() != "false"
|
||||||
try:
|
result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"]
|
||||||
result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"]
|
|
||||||
except (wikidata.QueryError, wikidata.APIResponseError) as e:
|
|
||||||
r = e.r if isinstance(e, wikidata.QueryError) else e.response
|
|
||||||
if r.status_code == 429:
|
|
||||||
extra = (
|
|
||||||
{"Retry-After": r.headers["Retry-After"]}
|
|
||||||
if "Retry-After" in r.headers
|
|
||||||
else {}
|
|
||||||
)
|
|
||||||
error = (
|
|
||||||
e.detail
|
|
||||||
if isinstance(e, wikidata.APIResponseError)
|
|
||||||
else wikidata.mediawiki_error_message(r)
|
|
||||||
)
|
|
||||||
return jsonify(error=error), 429, extra
|
|
||||||
if isinstance(e, wikidata.APIResponseError):
|
|
||||||
return jsonify(error=e.message), 503
|
|
||||||
return (
|
|
||||||
jsonify(
|
|
||||||
error=f"Wikidata query failed (HTTP {r.status_code}), "
|
|
||||||
+ "please try again later"
|
|
||||||
),
|
|
||||||
503,
|
|
||||||
)
|
|
||||||
except requests.exceptions.RequestException:
|
|
||||||
return jsonify(error="Could not connect to Wikidata, please try again later"), 503
|
|
||||||
result.pop("element", None)
|
result.pop("element", None)
|
||||||
result.pop("geojson", None)
|
result.pop("geojson", None)
|
||||||
if logging_enabled:
|
if logging_enabled:
|
||||||
|
|
@ -411,7 +385,7 @@ def index() -> str | Response:
|
||||||
lat=lat,
|
lat=lat,
|
||||||
lon=lon,
|
lon=lon,
|
||||||
remote_addr=remote_addr,
|
remote_addr=remote_addr,
|
||||||
fqdn=None,
|
fqdn=socket.getfqdn(remote_addr) if remote_addr else None,
|
||||||
result=result,
|
result=result,
|
||||||
response_time_ms=int((time() - t0) * 1000),
|
response_time_ms=int((time() - t0) * 1000),
|
||||||
)
|
)
|
||||||
|
|
@ -474,10 +448,6 @@ def build_detail_page(lat: float, lon: float, needs_commons: bool = True) -> str
|
||||||
except wikidata.QueryError as e:
|
except wikidata.QueryError as e:
|
||||||
query, r = e.args
|
query, r = e.args
|
||||||
return render_template("query_error.html", lat=lat, lon=lon, query=query, r=r)
|
return render_template("query_error.html", lat=lat, lon=lon, query=query, r=r)
|
||||||
except wikidata.APIResponseError as e:
|
|
||||||
return render_template(
|
|
||||||
"query_error.html", lat=lat, lon=lon, error=e.message, r=e.response
|
|
||||||
)
|
|
||||||
|
|
||||||
element = reply["result"].pop("element", None)
|
element = reply["result"].pop("element", None)
|
||||||
geojson = reply["result"].pop("geojson", None)
|
geojson = reply["result"].pop("geojson", None)
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@
|
||||||
<h1 class="mb-1">Geocode to Commons Category</h1>
|
<h1 class="mb-1">Geocode to Commons Category</h1>
|
||||||
<p class="text-muted mb-3">Convert latitude/longitude to a Wikidata item and Wikimedia Commons category.</p>
|
<p class="text-muted mb-3">Convert latitude/longitude to a Wikidata item and Wikimedia Commons category.</p>
|
||||||
|
|
||||||
<form class="d-flex gap-2 align-items-center mb-3" action="{{ url_for('index') }}">
|
<form class="d-flex gap-2 align-items-center mb-3" action="/">
|
||||||
<label for="q" class="text-nowrap text-muted small">Lat, Lon</label>
|
<label for="q" class="text-nowrap text-muted small">Lat, Lon</label>
|
||||||
<input id="q" name="q" class="form-control form-control-sm" style="max-width:18rem" placeholder="e.g. 54.375, -2.999">
|
<input id="q" name="q" class="form-control form-control-sm" style="max-width:18rem" placeholder="e.g. 54.375, -2.999">
|
||||||
<button type="submit" class="btn btn-sm btn-primary">Go</button>
|
<button type="submit" class="btn btn-sm btn-primary">Go</button>
|
||||||
|
|
|
||||||
|
|
@ -1,19 +1,8 @@
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pytest_mock
|
import pytest_mock
|
||||||
import requests
|
import requests
|
||||||
import responses
|
import responses
|
||||||
from geocode import headers
|
from geocode.wikidata import APIResponseError, QueryError, api_call, wdqs
|
||||||
from geocode.wikimedia_api_logging import WikimediaApiLogConfig
|
|
||||||
from geocode.wikidata import (
|
|
||||||
APIResponseError,
|
|
||||||
QueryError,
|
|
||||||
api_call,
|
|
||||||
mediawiki_error_message,
|
|
||||||
wdqs,
|
|
||||||
)
|
|
||||||
|
|
||||||
max_tries = 5
|
max_tries = 5
|
||||||
|
|
||||||
|
|
@ -64,72 +53,6 @@ def test_api_call_retries_on_connection_error(
|
||||||
assert mocked_sleep.call_count == max_tries - 1
|
assert mocked_sleep.call_count == max_tries - 1
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_api_call_uses_mediawiki_error_message(
|
|
||||||
mocker: pytest_mock.plugin.MockerFixture,
|
|
||||||
) -> None:
|
|
||||||
"""Test MediaWiki API error messages are preserved."""
|
|
||||||
mocker.patch("time.sleep", return_value=None)
|
|
||||||
mocker.patch("geocode.mail.send_to_admin")
|
|
||||||
|
|
||||||
responses.add(
|
|
||||||
responses.GET,
|
|
||||||
"https://www.wikidata.org/w/api.php",
|
|
||||||
json={"error": {"code": "ratelimited", "info": "Too many requests"}},
|
|
||||||
status=429,
|
|
||||||
headers={"Retry-After": "10"},
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(APIResponseError) as exc_info:
|
|
||||||
api_call({"action": "wbgetentities", "ids": "Q42"})
|
|
||||||
|
|
||||||
assert exc_info.value.detail == "Too many requests"
|
|
||||||
assert str(exc_info.value) == "Wikidata API error (HTTP 429): Too many requests"
|
|
||||||
|
|
||||||
|
|
||||||
def test_mediawiki_error_message_falls_back_to_response_text() -> None:
|
|
||||||
"""Test plain-text MediaWiki API errors are preserved."""
|
|
||||||
response = requests.Response()
|
|
||||||
response.status_code = 429
|
|
||||||
response.reason = "Too Many Requests"
|
|
||||||
response._content = b"Please slow down"
|
|
||||||
|
|
||||||
assert mediawiki_error_message(response) == "Please slow down"
|
|
||||||
|
|
||||||
|
|
||||||
@responses.activate
|
|
||||||
def test_api_call_logs_wikimedia_request(
|
|
||||||
mocker: pytest_mock.plugin.MockerFixture, tmp_path: Path
|
|
||||||
) -> None:
|
|
||||||
"""Test Wikimedia API requests are logged as JSONL metrics."""
|
|
||||||
log_path = tmp_path / "wikimedia-api.jsonl"
|
|
||||||
mocker.patch(
|
|
||||||
"geocode.wikidata.wikimedia_log_config",
|
|
||||||
WikimediaApiLogConfig(
|
|
||||||
tool="geocode",
|
|
||||||
log_path=log_path,
|
|
||||||
user_agent=headers["User-Agent"],
|
|
||||||
),
|
|
||||||
)
|
|
||||||
responses.add(
|
|
||||||
responses.GET,
|
|
||||||
"https://www.wikidata.org/w/api.php",
|
|
||||||
json={"entities": {"Q42": {"id": "Q42"}}},
|
|
||||||
status=200,
|
|
||||||
)
|
|
||||||
|
|
||||||
api_call({"action": "wbgetentities", "ids": "Q42"})
|
|
||||||
|
|
||||||
record = json.loads(log_path.read_text().strip())
|
|
||||||
assert record["tool"] == "geocode"
|
|
||||||
assert record["method"] == "GET"
|
|
||||||
assert record["api_host"] == "www.wikidata.org"
|
|
||||||
assert record["path"] == "/w/api.php"
|
|
||||||
assert record["action"] == "wbgetentities"
|
|
||||||
assert record["status_code"] == 200
|
|
||||||
assert record["user_agent"] == headers["User-Agent"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_wdqs_retry(mocker: pytest_mock.plugin.MockerFixture) -> None:
|
def test_wdqs_retry(mocker: pytest_mock.plugin.MockerFixture) -> None:
|
||||||
"""Test retry for WDQS API calls."""
|
"""Test retry for WDQS API calls."""
|
||||||
# Patch 'time.sleep' to instantly return, effectively skipping the sleep
|
# Patch 'time.sleep' to instantly return, effectively skipping the sleep
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue