From 413a6e485153ee1d354a3528aa58f53dd5c2ad26 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Tue, 13 Feb 2024 11:04:32 +0000 Subject: [PATCH] Catch Wikidata API errors and retry Retry API error calls with exponential backoff. Send mail to admin if errors continue after retries. Includes a test. --- geocode/mail.py | 28 +++++++++++++++++++++++++++ geocode/wikidata.py | 39 +++++++++++++++++++++++++++++++++----- tests/test_wikidata_api.py | 33 ++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 5 deletions(-) create mode 100644 geocode/mail.py create mode 100644 tests/test_wikidata_api.py diff --git a/geocode/mail.py b/geocode/mail.py new file mode 100644 index 0000000..f9586fa --- /dev/null +++ b/geocode/mail.py @@ -0,0 +1,28 @@ +"""Send mail to admin.""" + +import smtplib +from email.mime.text import MIMEText +from email.utils import formatdate, make_msgid + +import flask + + +def send_to_admin(subject: str, body: str) -> None: + """Send an e-mail.""" + app = flask.current_app + mail_from = app.config["MAIL_FROM"] + msg = MIMEText(body, "plain", "UTF-8") + + msg["Subject"] = subject + msg["To"] = ", ".join(app.config["ADMINS"]) + msg["From"] = f'{app.config["MAIL_FROM_NAME"]} <{app.config["MAIL_FROM"]}>' + msg["Date"] = formatdate() + msg["Message-ID"] = make_msgid() + + # extra mail headers from config + for header_name, value in app.config.get("MAIL_HEADERS", {}).items(): + msg[header_name] = value + + s = smtplib.SMTP(app.config["SMTP_HOST"]) + s.sendmail(mail_from, app.config["ADMINS"], msg.as_string()) + s.quit() diff --git a/geocode/wikidata.py b/geocode/wikidata.py index 16edc7f..441d460 100644 --- a/geocode/wikidata.py +++ b/geocode/wikidata.py @@ -3,16 +3,27 @@ import typing import urllib.parse +import backoff +import backoff.types import requests from flask import render_template +from requests.exceptions import JSONDecodeError, RequestException -from . import headers +from . import headers, mail wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql" wd_entity = "http://www.wikidata.org/entity/Q" commons_cat_start = "https://commons.wikimedia.org/wiki/Category:" +def giveup(details: backoff.types.Details) -> None: + """Display API call fail debug info.""" + last_exception = details["exception"] # type: ignore + if last_exception and isinstance(last_exception, APIResponseError): + body = f"Error making Wikidata API call\n\n{last_exception.response.text}" + mail.send_to_admin("Geocode error", body) + + class QueryError(Exception): """Query error.""" @@ -22,13 +33,31 @@ class QueryError(Exception): self.r = r +class APIResponseError(Exception): + """Custom exception for API errors with response text.""" + + def __init__(self, message: str, response: requests.Response): + """Init.""" + super().__init__(message) + self.response = response + + +@backoff.on_exception( + backoff.expo, + (RequestException, APIResponseError), + max_tries=5, + on_giveup=giveup, +) def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]: """Wikidata API call.""" api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params} - r = requests.get( - "https://www.wikidata.org/w/api.php", params=api_params, headers=headers - ) - return typing.cast(dict[str, typing.Any], r.json()) + try: + r = requests.get( + "https://www.wikidata.org/w/api.php", params=api_params, headers=headers + ) + return typing.cast(dict[str, typing.Any], r.json()) + except JSONDecodeError: + raise APIResponseError("Failed to decode JSON", r) def get_entity(qid: str) -> dict[str, typing.Any] | None: diff --git a/tests/test_wikidata_api.py b/tests/test_wikidata_api.py new file mode 100644 index 0000000..ae14cbc --- /dev/null +++ b/tests/test_wikidata_api.py @@ -0,0 +1,33 @@ +import pytest +import pytest_mock +import responses +from geocode.wikidata import APIResponseError, api_call + + +@responses.activate +def test_api_call_retries_on_failure(mocker: pytest_mock.plugin.MockerFixture) -> None: + """Test retry for API calls.""" + # Patch 'time.sleep' to instantly return, effectively skipping the sleep + mocked_sleep = mocker.patch("time.sleep", return_value=None) + + mock_send_mail = mocker.patch("geocode.mail.send_to_admin") + + responses.add( + responses.GET, + "https://www.wikidata.org/w/api.php", + body="bad request", + status=400, + ) + with pytest.raises(APIResponseError): + api_call({"action": "wbgetentities", "ids": "Q42"}) + assert len(responses.calls) == 5 # Assuming max_tries is 5 + + assert mocked_sleep.call_count == 4 + + mock_send_mail.assert_called() + + send_mail_call = mock_send_mail.call_args_list[0] + assert send_mail_call[0] == ( + "Geocode error", + "Error making Wikidata API call\n\nbad request", + )