diff --git a/geocode/mail.py b/geocode/mail.py index 136a373..f9586fa 100644 --- a/geocode/mail.py +++ b/geocode/mail.py @@ -15,8 +15,7 @@ def send_to_admin(subject: str, body: str) -> None: msg["Subject"] = subject msg["To"] = ", ".join(app.config["ADMINS"]) - mail_from_name = app.config.get("MAIL_FROM_NAME", mail_from) - msg["From"] = f"{mail_from_name} <{mail_from}>" + msg["From"] = f'{app.config["MAIL_FROM_NAME"]} <{app.config["MAIL_FROM"]}>' msg["Date"] = formatdate() msg["Message-ID"] = make_msgid() diff --git a/geocode/wikidata.py b/geocode/wikidata.py index 5f7c736..14ea217 100644 --- a/geocode/wikidata.py +++ b/geocode/wikidata.py @@ -1,7 +1,5 @@ """Wikidata API functions.""" -import os -from pathlib import Path import typing import urllib.parse @@ -12,37 +10,10 @@ from flask import render_template from requests.exceptions import JSONDecodeError, RequestException from . import headers, mail -from .wikimedia_api_logging import WikimediaApiLogConfig, WikimediaRequestTimer wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql" -wikidata_api_url = "https://www.wikidata.org/w/api.php" wd_entity = "http://www.wikidata.org/entity/Q" commons_cat_start = "https://commons.wikimedia.org/wiki/Category:" -wikimedia_log_config = WikimediaApiLogConfig( - tool="geocode", - log_path=Path( - os.environ.get( - "GEOCODE_WIKIMEDIA_API_LOG", "/var/log/geocode/wikimedia-api.jsonl" - ) - ), - user_agent=headers["User-Agent"], -) - - -def logged_get(url: str, **kwargs: typing.Any) -> requests.Response: - """Make a Wikimedia API request and log one JSONL metric line.""" - with WikimediaRequestTimer(wikimedia_log_config, "GET", url) as timer: - r = requests.get(url, **kwargs) - timer.log_response(r.status_code, r.url) - return r - - -def logged_post(url: str, **kwargs: typing.Any) -> requests.Response: - """Make a Wikimedia API request and log one JSONL metric line.""" - with WikimediaRequestTimer(wikimedia_log_config, "POST", url) as timer: - r = requests.post(url, **kwargs) - timer.log_response(r.status_code, r.url) - return r def giveup(details: backoff.types.Details) -> None: @@ -50,10 +21,7 @@ def giveup(details: backoff.types.Details) -> None: last_exception = details["exception"] # type: ignore if last_exception and isinstance(last_exception, APIResponseError): body = f"Error making Wikidata API call\n\n{last_exception.response.text}" - try: - mail.send_to_admin("Geocode error", body) - except Exception: - pass + mail.send_to_admin("Geocode error", body) class QueryError(Exception): @@ -61,7 +29,6 @@ class QueryError(Exception): def __init__(self, query: str, r: requests.Response): """Init.""" - super().__init__(query, r) self.query = query self.r = r @@ -69,33 +36,12 @@ class QueryError(Exception): class APIResponseError(Exception): """Custom exception for API errors with response text.""" - def __init__( - self, message: str, response: requests.Response, detail: str | None = None - ): + def __init__(self, message: str, response: requests.Response): """Init.""" super().__init__(message) - self.message = message - self.detail = detail or message self.response = response -def mediawiki_error_message( - response: requests.Response, json_data: dict[str, typing.Any] | None = None -) -> str: - """Extract the useful error message from a MediaWiki API response.""" - if json_data and isinstance(json_data.get("error"), dict): - error = json_data["error"] - if isinstance(error.get("info"), str): - return typing.cast(str, error["info"]) - if isinstance(error.get("code"), str): - return typing.cast(str, error["code"]) - - if response.text: - return response.text.strip() - - return response.reason or f"HTTP {response.status_code}" - - @backoff.on_exception( backoff.expo, (RequestException, APIResponseError), @@ -106,23 +52,12 @@ def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]: """Wikidata API call.""" api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params} try: - r = logged_get( - wikidata_api_url, - params=api_params, - headers=headers, + r = requests.get( + "https://www.wikidata.org/w/api.php", params=api_params, headers=headers ) - json_data = typing.cast(dict[str, typing.Any], r.json()) + return typing.cast(dict[str, typing.Any], r.json()) except JSONDecodeError: - detail = mediawiki_error_message(r) - message = f"Wikidata API error (HTTP {r.status_code}): {detail}" - raise APIResponseError(message, r, detail) - - if not r.ok or "error" in json_data: - detail = mediawiki_error_message(r, json_data) - message = f"Wikidata API error (HTTP {r.status_code}): {detail}" - raise APIResponseError(message, r, detail) - - return json_data + raise APIResponseError("Failed to decode JSON", r) def get_entity(qid: str) -> dict[str, typing.Any] | None: @@ -172,16 +107,13 @@ Row = dict[str, dict[str, typing.Any]] Hit = dict[str, str | int | None] -@backoff.on_exception(backoff.expo, (QueryError, RequestException), max_tries=5) +@backoff.on_exception(backoff.expo, QueryError, max_tries=5) def wdqs(query: str) -> list[Row]: """Pass query to the Wikidata Query Service.""" - r = logged_post( + r = requests.post( wikidata_query_api_url, data={"query": query, "format": "json"}, headers=headers ) - if not r.ok: - raise QueryError(query, r) - try: return typing.cast(list[Row], r.json()["results"]["bindings"]) except requests.exceptions.JSONDecodeError: diff --git a/geocode/wikimedia_api_logging.py b/geocode/wikimedia_api_logging.py deleted file mode 100644 index e4ce9f4..0000000 --- a/geocode/wikimedia_api_logging.py +++ /dev/null @@ -1,169 +0,0 @@ -"""JSONL logging helpers for Wikimedia API request metrics.""" - -import json -import logging -import os -import socket -import time -from dataclasses import dataclass -from datetime import UTC, datetime -from pathlib import Path -from types import TracebackType -from urllib.parse import parse_qs, urlparse - - -@dataclass(frozen=True) -class WikimediaApiLogConfig: - """Configuration for Wikimedia API request logging.""" - - tool: str - log_path: Path - user_agent: str - - -@dataclass(frozen=True) -class WikimediaApiRequestMetric: - """Details of one Wikimedia API request.""" - - tool: str - url: str - method: str - status_code: int | None - elapsed_ms: int - user_agent: str - error: str | None = None - - -_logger_cache: dict[Path, logging.Logger] = {} - - -def setup_wikimedia_api_logger(log_path: Path) -> logging.Logger: - """Create a JSONL logger for Wikimedia API request metrics.""" - if log_path in _logger_cache: - return _logger_cache[log_path] - - logger_name = f"wikimedia_api_metrics.{log_path}" - logger = logging.getLogger(logger_name) - logger.setLevel(logging.INFO) - logger.propagate = False - - if not logger.handlers: - try: - log_path.parent.mkdir(parents=True, exist_ok=True) - handler: logging.Handler = logging.FileHandler(log_path) - except OSError: - handler = logging.NullHandler() - handler.setFormatter(logging.Formatter("%(message)s")) - logger.addHandler(handler) - - _logger_cache[log_path] = logger - return logger - - -def get_mediawiki_action(url: str) -> str | None: - """Extract the MediaWiki API action from a URL, if present.""" - parsed = urlparse(url) - query = parse_qs(parsed.query) - values = query.get("action") - - if not values: - return None - - return values[0] - - -def build_log_record(metric: WikimediaApiRequestMetric) -> dict[str, object]: - """Build a JSON-serialisable log record for one API request.""" - parsed = urlparse(metric.url) - - record: dict[str, object] = { - "ts": datetime.now(UTC).isoformat(), - "tool": metric.tool, - "host": socket.gethostname(), - "pid": os.getpid(), - "method": metric.method, - "api_host": parsed.netloc, - "path": parsed.path, - "action": get_mediawiki_action(metric.url), - "status_code": metric.status_code, - "elapsed_ms": metric.elapsed_ms, - "user_agent": metric.user_agent, - } - - if metric.error is not None: - record["error"] = metric.error - - return record - - -def log_wikimedia_api_request( - logger: logging.Logger, - metric: WikimediaApiRequestMetric, -) -> None: - """Write one Wikimedia API request metric as a JSONL log line.""" - record = build_log_record(metric) - logger.info(json.dumps(record, separators=(",", ":"), sort_keys=True)) - - -class WikimediaRequestTimer: - """Context manager for timing and logging a Wikimedia API request.""" - - def __init__( - self, - config: WikimediaApiLogConfig, - method: str, - url: str, - ) -> None: - self.config = config - self.method = method - self.url = url - self.started = 0.0 - self.logger = setup_wikimedia_api_logger(config.log_path) - - def __enter__(self) -> "WikimediaRequestTimer": - """Start timing a request.""" - self.started = time.monotonic() - return self - - def __exit__( - self, - exc_type: type[BaseException] | None, - exc: BaseException | None, - traceback: TracebackType | None, - ) -> bool: - """Log failed requests when an exception escapes.""" - if exc is None: - return False - - elapsed_ms = int((time.monotonic() - self.started) * 1000) - - log_wikimedia_api_request( - self.logger, - WikimediaApiRequestMetric( - tool=self.config.tool, - url=self.url, - method=self.method, - status_code=None, - elapsed_ms=elapsed_ms, - user_agent=self.config.user_agent, - error=type(exc).__name__, - ), - ) - - return False - - def log_response(self, status_code: int, final_url: str | None = None) -> None: - """Log a completed request.""" - elapsed_ms = int((time.monotonic() - self.started) * 1000) - - log_wikimedia_api_request( - self.logger, - WikimediaApiRequestMetric( - tool=self.config.tool, - url=final_url or self.url, - method=self.method, - status_code=status_code, - elapsed_ms=elapsed_ms, - user_agent=self.config.user_agent, - ), - ) diff --git a/lookup.py b/lookup.py index 9531063..76c1745 100755 --- a/lookup.py +++ b/lookup.py @@ -5,12 +5,12 @@ import inspect import json import random import re +import socket import sys import traceback import typing from time import time -import requests.exceptions import sqlalchemy.exc import werkzeug.debug.tbtools from flask import Flask, jsonify, redirect, render_template, request, url_for @@ -376,33 +376,7 @@ def index() -> str | Response: return jsonify(coords={"lat": lat, "lon": lon}, error=error_msg) needs_commons = request.args.get("needs_commons", "true").lower() != "false" - try: - result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"] - except (wikidata.QueryError, wikidata.APIResponseError) as e: - r = e.r if isinstance(e, wikidata.QueryError) else e.response - if r.status_code == 429: - extra = ( - {"Retry-After": r.headers["Retry-After"]} - if "Retry-After" in r.headers - else {} - ) - error = ( - e.detail - if isinstance(e, wikidata.APIResponseError) - else wikidata.mediawiki_error_message(r) - ) - return jsonify(error=error), 429, extra - if isinstance(e, wikidata.APIResponseError): - return jsonify(error=e.message), 503 - return ( - jsonify( - error=f"Wikidata query failed (HTTP {r.status_code}), " - + "please try again later" - ), - 503, - ) - except requests.exceptions.RequestException: - return jsonify(error="Could not connect to Wikidata, please try again later"), 503 + result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"] result.pop("element", None) result.pop("geojson", None) if logging_enabled: @@ -411,7 +385,7 @@ def index() -> str | Response: lat=lat, lon=lon, remote_addr=remote_addr, - fqdn=None, + fqdn=socket.getfqdn(remote_addr) if remote_addr else None, result=result, response_time_ms=int((time() - t0) * 1000), ) @@ -474,10 +448,6 @@ def build_detail_page(lat: float, lon: float, needs_commons: bool = True) -> str except wikidata.QueryError as e: query, r = e.args return render_template("query_error.html", lat=lat, lon=lon, query=query, r=r) - except wikidata.APIResponseError as e: - return render_template( - "query_error.html", lat=lat, lon=lon, error=e.message, r=e.response - ) element = reply["result"].pop("element", None) geojson = reply["result"].pop("geojson", None) diff --git a/templates/index.html b/templates/index.html index 05d2187..c65c911 100644 --- a/templates/index.html +++ b/templates/index.html @@ -40,7 +40,7 @@
Convert latitude/longitude to a Wikidata item and Wikimedia Commons category.
-