geocode/tests/test_wikidata_api.py

149 lines
4.5 KiB
Python

import json
from pathlib import Path
import pytest
import pytest_mock
import requests
import responses
from geocode import headers
from geocode.wikimedia_api_logging import WikimediaApiLogConfig
from geocode.wikidata import (
APIResponseError,
QueryError,
api_call,
mediawiki_error_message,
wdqs,
)
max_tries = 5
@responses.activate
def test_api_call_retries_on_failure(mocker: pytest_mock.plugin.MockerFixture) -> None:
"""Test retry for API calls."""
# Patch 'time.sleep' to instantly return, effectively skipping the sleep
mocked_sleep = mocker.patch("time.sleep", return_value=None)
mock_send_mail = mocker.patch("geocode.mail.send_to_admin")
responses.add(
responses.GET,
"https://www.wikidata.org/w/api.php",
body="bad request",
status=400,
)
with pytest.raises(APIResponseError):
api_call({"action": "wbgetentities", "ids": "Q42"})
assert len(responses.calls) == max_tries
assert mocked_sleep.call_count == max_tries - 1
mock_send_mail.assert_called()
send_mail_call = mock_send_mail.call_args_list[0]
assert send_mail_call[0] == (
"Geocode error",
"Error making Wikidata API call\n\nbad request",
)
def test_api_call_retries_on_connection_error(
mocker: pytest_mock.plugin.MockerFixture,
) -> None:
"""Test retry for API calls on connection error."""
# Patch 'time.sleep' to instantly return, effectively skipping the sleep
mocked_sleep = mocker.patch("time.sleep", return_value=None)
# Patch 'requests.get' to raise a ConnectionError
mocker.patch("requests.get", side_effect=requests.exceptions.ConnectionError)
mocker.patch("geocode.mail.send_to_admin")
with pytest.raises(requests.exceptions.ConnectionError):
api_call({"action": "wbgetentities", "ids": "Q42"})
assert mocked_sleep.call_count == max_tries - 1
@responses.activate
def test_api_call_uses_mediawiki_error_message(
mocker: pytest_mock.plugin.MockerFixture,
) -> None:
"""Test MediaWiki API error messages are preserved."""
mocker.patch("time.sleep", return_value=None)
mocker.patch("geocode.mail.send_to_admin")
responses.add(
responses.GET,
"https://www.wikidata.org/w/api.php",
json={"error": {"code": "ratelimited", "info": "Too many requests"}},
status=429,
headers={"Retry-After": "10"},
)
with pytest.raises(APIResponseError) as exc_info:
api_call({"action": "wbgetentities", "ids": "Q42"})
assert exc_info.value.detail == "Too many requests"
assert str(exc_info.value) == "Wikidata API error (HTTP 429): Too many requests"
def test_mediawiki_error_message_falls_back_to_response_text() -> None:
"""Test plain-text MediaWiki API errors are preserved."""
response = requests.Response()
response.status_code = 429
response.reason = "Too Many Requests"
response._content = b"Please slow down"
assert mediawiki_error_message(response) == "Please slow down"
@responses.activate
def test_api_call_logs_wikimedia_request(
mocker: pytest_mock.plugin.MockerFixture, tmp_path: Path
) -> None:
"""Test Wikimedia API requests are logged as JSONL metrics."""
log_path = tmp_path / "wikimedia-api.jsonl"
mocker.patch(
"geocode.wikidata.wikimedia_log_config",
WikimediaApiLogConfig(
tool="geocode",
log_path=log_path,
user_agent=headers["User-Agent"],
),
)
responses.add(
responses.GET,
"https://www.wikidata.org/w/api.php",
json={"entities": {"Q42": {"id": "Q42"}}},
status=200,
)
api_call({"action": "wbgetentities", "ids": "Q42"})
record = json.loads(log_path.read_text().strip())
assert record["tool"] == "geocode"
assert record["method"] == "GET"
assert record["api_host"] == "www.wikidata.org"
assert record["path"] == "/w/api.php"
assert record["action"] == "wbgetentities"
assert record["status_code"] == 200
assert record["user_agent"] == headers["User-Agent"]
def test_wdqs_retry(mocker: pytest_mock.plugin.MockerFixture) -> None:
"""Test retry for WDQS API calls."""
# Patch 'time.sleep' to instantly return, effectively skipping the sleep
mocked_sleep = mocker.patch("time.sleep", return_value=None)
responses.add(
responses.POST,
"https://query.wikidata.org/bigdata/namespace/wdq/sparql",
body="bad request",
status=400,
)
with pytest.raises(QueryError):
wdqs("test query")
max_tries = 5
assert mocked_sleep.call_count == max_tries - 1