From 413a6e485153ee1d354a3528aa58f53dd5c2ad26 Mon Sep 17 00:00:00 2001
From: Edward Betts <edward@4angle.com>
Date: Tue, 13 Feb 2024 11:04:32 +0000
Subject: [PATCH] Catch Wikidata API errors and retry

Retry API error calls with exponential backoff.

Send mail to admin if errors continue after retries.

Includes a test.
---
 geocode/mail.py            | 28 +++++++++++++++++++++++++++
 geocode/wikidata.py        | 39 +++++++++++++++++++++++++++++++++-----
 tests/test_wikidata_api.py | 33 ++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 5 deletions(-)
 create mode 100644 geocode/mail.py
 create mode 100644 tests/test_wikidata_api.py

diff --git a/geocode/mail.py b/geocode/mail.py
new file mode 100644
index 0000000..f9586fa
--- /dev/null
+++ b/geocode/mail.py
@@ -0,0 +1,28 @@
+"""Send mail to admin."""
+
+import smtplib
+from email.mime.text import MIMEText
+from email.utils import formatdate, make_msgid
+
+import flask
+
+
+def send_to_admin(subject: str, body: str) -> None:
+    """Send an e-mail."""
+    app = flask.current_app
+    mail_from = app.config["MAIL_FROM"]
+    msg = MIMEText(body, "plain", "UTF-8")
+
+    msg["Subject"] = subject
+    msg["To"] = ", ".join(app.config["ADMINS"])
+    msg["From"] = f'{app.config["MAIL_FROM_NAME"]} <{app.config["MAIL_FROM"]}>'
+    msg["Date"] = formatdate()
+    msg["Message-ID"] = make_msgid()
+
+    # extra mail headers from config
+    for header_name, value in app.config.get("MAIL_HEADERS", {}).items():
+        msg[header_name] = value
+
+    s = smtplib.SMTP(app.config["SMTP_HOST"])
+    s.sendmail(mail_from, app.config["ADMINS"], msg.as_string())
+    s.quit()
diff --git a/geocode/wikidata.py b/geocode/wikidata.py
index 16edc7f..441d460 100644
--- a/geocode/wikidata.py
+++ b/geocode/wikidata.py
@@ -3,16 +3,27 @@
 import typing
 import urllib.parse
 
+import backoff
+import backoff.types
 import requests
 from flask import render_template
+from requests.exceptions import JSONDecodeError, RequestException
 
-from . import headers
+from . import headers, mail
 
 wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
 wd_entity = "http://www.wikidata.org/entity/Q"
 commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
 
 
+def giveup(details: backoff.types.Details) -> None:
+    """Display API call fail debug info."""
+    last_exception = details["exception"]  # type: ignore
+    if last_exception and isinstance(last_exception, APIResponseError):
+        body = f"Error making Wikidata API call\n\n{last_exception.response.text}"
+        mail.send_to_admin("Geocode error", body)
+
+
 class QueryError(Exception):
     """Query error."""
 
@@ -22,13 +33,31 @@ class QueryError(Exception):
         self.r = r
 
 
+class APIResponseError(Exception):
+    """Custom exception for API errors with response text."""
+
+    def __init__(self, message: str, response: requests.Response):
+        """Init."""
+        super().__init__(message)
+        self.response = response
+
+
+@backoff.on_exception(
+    backoff.expo,
+    (RequestException, APIResponseError),
+    max_tries=5,
+    on_giveup=giveup,
+)
 def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]:
     """Wikidata API call."""
     api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params}
-    r = requests.get(
-        "https://www.wikidata.org/w/api.php", params=api_params, headers=headers
-    )
-    return typing.cast(dict[str, typing.Any], r.json())
+    try:
+        r = requests.get(
+            "https://www.wikidata.org/w/api.php", params=api_params, headers=headers
+        )
+        return typing.cast(dict[str, typing.Any], r.json())
+    except JSONDecodeError:
+        raise APIResponseError("Failed to decode JSON", r)
 
 
 def get_entity(qid: str) -> dict[str, typing.Any] | None:
diff --git a/tests/test_wikidata_api.py b/tests/test_wikidata_api.py
new file mode 100644
index 0000000..ae14cbc
--- /dev/null
+++ b/tests/test_wikidata_api.py
@@ -0,0 +1,33 @@
+import pytest
+import pytest_mock
+import responses
+from geocode.wikidata import APIResponseError, api_call
+
+
+@responses.activate
+def test_api_call_retries_on_failure(mocker: pytest_mock.plugin.MockerFixture) -> None:
+    """Test retry for API calls."""
+    # Patch 'time.sleep' to instantly return, effectively skipping the sleep
+    mocked_sleep = mocker.patch("time.sleep", return_value=None)
+
+    mock_send_mail = mocker.patch("geocode.mail.send_to_admin")
+
+    responses.add(
+        responses.GET,
+        "https://www.wikidata.org/w/api.php",
+        body="bad request",
+        status=400,
+    )
+    with pytest.raises(APIResponseError):
+        api_call({"action": "wbgetentities", "ids": "Q42"})
+    assert len(responses.calls) == 5  # Assuming max_tries is 5
+
+    assert mocked_sleep.call_count == 4
+
+    mock_send_mail.assert_called()
+
+    send_mail_call = mock_send_mail.call_args_list[0]
+    assert send_mail_call[0] == (
+        "Geocode error",
+        "Error making Wikidata API call\n\nbad request",
+    )