Catch Wikidata API errors and retry
Retry API error calls with exponential backoff. Send mail to admin if errors continue after retries. Includes a test.
This commit is contained in:
		
							parent
							
								
									747e9dec48
								
							
						
					
					
						commit
						413a6e4851
					
				
							
								
								
									
										28
									
								
								geocode/mail.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								geocode/mail.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,28 @@
 | 
				
			||||||
 | 
					"""Send mail to admin."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import smtplib
 | 
				
			||||||
 | 
					from email.mime.text import MIMEText
 | 
				
			||||||
 | 
					from email.utils import formatdate, make_msgid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import flask
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def send_to_admin(subject: str, body: str) -> None:
 | 
				
			||||||
 | 
					    """Send an e-mail."""
 | 
				
			||||||
 | 
					    app = flask.current_app
 | 
				
			||||||
 | 
					    mail_from = app.config["MAIL_FROM"]
 | 
				
			||||||
 | 
					    msg = MIMEText(body, "plain", "UTF-8")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg["Subject"] = subject
 | 
				
			||||||
 | 
					    msg["To"] = ", ".join(app.config["ADMINS"])
 | 
				
			||||||
 | 
					    msg["From"] = f'{app.config["MAIL_FROM_NAME"]} <{app.config["MAIL_FROM"]}>'
 | 
				
			||||||
 | 
					    msg["Date"] = formatdate()
 | 
				
			||||||
 | 
					    msg["Message-ID"] = make_msgid()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # extra mail headers from config
 | 
				
			||||||
 | 
					    for header_name, value in app.config.get("MAIL_HEADERS", {}).items():
 | 
				
			||||||
 | 
					        msg[header_name] = value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    s = smtplib.SMTP(app.config["SMTP_HOST"])
 | 
				
			||||||
 | 
					    s.sendmail(mail_from, app.config["ADMINS"], msg.as_string())
 | 
				
			||||||
 | 
					    s.quit()
 | 
				
			||||||
| 
						 | 
					@ -3,16 +3,27 @@
 | 
				
			||||||
import typing
 | 
					import typing
 | 
				
			||||||
import urllib.parse
 | 
					import urllib.parse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import backoff
 | 
				
			||||||
 | 
					import backoff.types
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
from flask import render_template
 | 
					from flask import render_template
 | 
				
			||||||
 | 
					from requests.exceptions import JSONDecodeError, RequestException
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from . import headers
 | 
					from . import headers, mail
 | 
				
			||||||
 | 
					
 | 
				
			||||||
wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
 | 
					wikidata_query_api_url = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"
 | 
				
			||||||
wd_entity = "http://www.wikidata.org/entity/Q"
 | 
					wd_entity = "http://www.wikidata.org/entity/Q"
 | 
				
			||||||
commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
 | 
					commons_cat_start = "https://commons.wikimedia.org/wiki/Category:"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def giveup(details: backoff.types.Details) -> None:
 | 
				
			||||||
 | 
					    """Display API call fail debug info."""
 | 
				
			||||||
 | 
					    last_exception = details["exception"]  # type: ignore
 | 
				
			||||||
 | 
					    if last_exception and isinstance(last_exception, APIResponseError):
 | 
				
			||||||
 | 
					        body = f"Error making Wikidata API call\n\n{last_exception.response.text}"
 | 
				
			||||||
 | 
					        mail.send_to_admin("Geocode error", body)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class QueryError(Exception):
 | 
					class QueryError(Exception):
 | 
				
			||||||
    """Query error."""
 | 
					    """Query error."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -22,13 +33,31 @@ class QueryError(Exception):
 | 
				
			||||||
        self.r = r
 | 
					        self.r = r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class APIResponseError(Exception):
 | 
				
			||||||
 | 
					    """Custom exception for API errors with response text."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, message: str, response: requests.Response):
 | 
				
			||||||
 | 
					        """Init."""
 | 
				
			||||||
 | 
					        super().__init__(message)
 | 
				
			||||||
 | 
					        self.response = response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@backoff.on_exception(
 | 
				
			||||||
 | 
					    backoff.expo,
 | 
				
			||||||
 | 
					    (RequestException, APIResponseError),
 | 
				
			||||||
 | 
					    max_tries=5,
 | 
				
			||||||
 | 
					    on_giveup=giveup,
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]:
 | 
					def api_call(params: dict[str, str | int]) -> dict[str, typing.Any]:
 | 
				
			||||||
    """Wikidata API call."""
 | 
					    """Wikidata API call."""
 | 
				
			||||||
    api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params}
 | 
					    api_params: dict[str, str | int] = {"format": "json", "formatversion": 2, **params}
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
        r = requests.get(
 | 
					        r = requests.get(
 | 
				
			||||||
            "https://www.wikidata.org/w/api.php", params=api_params, headers=headers
 | 
					            "https://www.wikidata.org/w/api.php", params=api_params, headers=headers
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        return typing.cast(dict[str, typing.Any], r.json())
 | 
					        return typing.cast(dict[str, typing.Any], r.json())
 | 
				
			||||||
 | 
					    except JSONDecodeError:
 | 
				
			||||||
 | 
					        raise APIResponseError("Failed to decode JSON", r)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_entity(qid: str) -> dict[str, typing.Any] | None:
 | 
					def get_entity(qid: str) -> dict[str, typing.Any] | None:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										33
									
								
								tests/test_wikidata_api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								tests/test_wikidata_api.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,33 @@
 | 
				
			||||||
 | 
					import pytest
 | 
				
			||||||
 | 
					import pytest_mock
 | 
				
			||||||
 | 
					import responses
 | 
				
			||||||
 | 
					from geocode.wikidata import APIResponseError, api_call
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@responses.activate
 | 
				
			||||||
 | 
					def test_api_call_retries_on_failure(mocker: pytest_mock.plugin.MockerFixture) -> None:
 | 
				
			||||||
 | 
					    """Test retry for API calls."""
 | 
				
			||||||
 | 
					    # Patch 'time.sleep' to instantly return, effectively skipping the sleep
 | 
				
			||||||
 | 
					    mocked_sleep = mocker.patch("time.sleep", return_value=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mock_send_mail = mocker.patch("geocode.mail.send_to_admin")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    responses.add(
 | 
				
			||||||
 | 
					        responses.GET,
 | 
				
			||||||
 | 
					        "https://www.wikidata.org/w/api.php",
 | 
				
			||||||
 | 
					        body="bad request",
 | 
				
			||||||
 | 
					        status=400,
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    with pytest.raises(APIResponseError):
 | 
				
			||||||
 | 
					        api_call({"action": "wbgetentities", "ids": "Q42"})
 | 
				
			||||||
 | 
					    assert len(responses.calls) == 5  # Assuming max_tries is 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    assert mocked_sleep.call_count == 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mock_send_mail.assert_called()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    send_mail_call = mock_send_mail.call_args_list[0]
 | 
				
			||||||
 | 
					    assert send_mail_call[0] == (
 | 
				
			||||||
 | 
					        "Geocode error",
 | 
				
			||||||
 | 
					        "Error making Wikidata API call\n\nbad request",
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
		Loading…
	
		Reference in a new issue