Fix User-Agent header, timeouts, and JSON error handling
mediawiki_oauth: set User-Agent on all OAuth1Session instances so Wikimedia doesn't reject token and API requests with 403; add timeout parameter to api_post_request (default 4s). mediawiki_api: add APIError exception; wrap .json() in call() to raise APIError with status code and response body on decode failure; raise timeout to 30s for edit POSTs. api: wrap call_get_diff .json() with the same JSONDecodeError guard, raising MediawikiError with HTTP status and body on failure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
da83f0791d
commit
95ca5f755d
3 changed files with 24 additions and 7 deletions
|
|
@ -272,6 +272,10 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str:
|
||||||
}
|
}
|
||||||
|
|
||||||
s = get_session()
|
s = get_session()
|
||||||
ret = s.post(get_query_url(), data=data).json()
|
r = s.post(get_query_url(), data=data)
|
||||||
|
try:
|
||||||
|
ret = r.json()
|
||||||
|
except JSONDecodeError:
|
||||||
|
raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}")
|
||||||
check_for_error(ret)
|
check_for_error(ret)
|
||||||
return typing.cast(str, ret["query"]["pages"][0]["revisions"][0]["diff"]["body"])
|
return typing.cast(str, ret["query"]["pages"][0]["revisions"][0]["diff"]["body"])
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,14 @@ import typing
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
from . import mediawiki_oauth
|
from . import mediawiki_oauth
|
||||||
|
|
||||||
|
|
||||||
|
class APIError(Exception):
|
||||||
|
"""Unexpected response from the MediaWiki API."""
|
||||||
|
|
||||||
wiki_hostname = "en.wikipedia.org"
|
wiki_hostname = "en.wikipedia.org"
|
||||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||||
user_agent = "add-links/0.1"
|
user_agent = "add-links/0.1"
|
||||||
|
|
@ -27,10 +33,13 @@ def parse_page(enwiki: str) -> dict[str, Any]:
|
||||||
return parse
|
return parse
|
||||||
|
|
||||||
|
|
||||||
def call(params: dict[str, str | int]) -> dict[str, typing.Any]:
|
def call(params: dict[str, str | int], timeout: int = 4) -> dict[str, typing.Any]:
|
||||||
"""Make GET request to mediawiki API."""
|
"""Make GET request to mediawiki API."""
|
||||||
data = mediawiki_oauth.api_post_request(params)
|
r = mediawiki_oauth.api_post_request(params, timeout=timeout)
|
||||||
return cast(dict[str, Any], data.json())
|
try:
|
||||||
|
return cast(dict[str, Any], r.json())
|
||||||
|
except requests.exceptions.JSONDecodeError:
|
||||||
|
raise APIError(f"HTTP {r.status_code}: {r.text[:200]!r}")
|
||||||
|
|
||||||
|
|
||||||
def article_exists(title: str) -> bool:
|
def article_exists(title: str) -> bool:
|
||||||
|
|
@ -92,7 +101,7 @@ def edit_page(
|
||||||
"summary": summary,
|
"summary": summary,
|
||||||
"section": section,
|
"section": section,
|
||||||
}
|
}
|
||||||
ret = call(params)
|
ret = call(params, timeout=30)
|
||||||
if "edit" not in ret:
|
if "edit" not in ret:
|
||||||
print("params")
|
print("params")
|
||||||
pprint(params)
|
pprint(params)
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,8 @@ import requests
|
||||||
from flask import current_app, session
|
from flask import current_app, session
|
||||||
from requests_oauthlib import OAuth1Session
|
from requests_oauthlib import OAuth1Session
|
||||||
|
|
||||||
|
from .api import ua
|
||||||
|
|
||||||
wiki_hostname = "en.wikipedia.org"
|
wiki_hostname = "en.wikipedia.org"
|
||||||
api_url = f"https://{wiki_hostname}/w/api.php"
|
api_url = f"https://{wiki_hostname}/w/api.php"
|
||||||
|
|
||||||
|
|
@ -27,7 +29,7 @@ def get_edit_proxy() -> dict[str, str]:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def api_post_request(params: dict[str, str | int]) -> requests.Response:
|
def api_post_request(params: dict[str, str | int], timeout: int = 4) -> requests.Response:
|
||||||
"""HTTP Post using Oauth."""
|
"""HTTP Post using Oauth."""
|
||||||
app = current_app
|
app = current_app
|
||||||
# url = "https://www.wikidata.org/w/api.php"
|
# url = "https://www.wikidata.org/w/api.php"
|
||||||
|
|
@ -39,8 +41,9 @@ def api_post_request(params: dict[str, str | int]) -> requests.Response:
|
||||||
resource_owner_key=session["owner_key"],
|
resource_owner_key=session["owner_key"],
|
||||||
resource_owner_secret=session["owner_secret"],
|
resource_owner_secret=session["owner_secret"],
|
||||||
)
|
)
|
||||||
|
oauth.headers.update({"User-Agent": ua})
|
||||||
proxies = get_edit_proxy()
|
proxies = get_edit_proxy()
|
||||||
return oauth.post(api_url, data=params, timeout=4, proxies=proxies)
|
return oauth.post(api_url, data=params, timeout=timeout, proxies=proxies)
|
||||||
|
|
||||||
|
|
||||||
def raw_request(params: typing.Mapping[str, str | int]) -> requests.Response:
|
def raw_request(params: typing.Mapping[str, str | int]) -> requests.Response:
|
||||||
|
|
@ -57,6 +60,7 @@ def raw_request(params: typing.Mapping[str, str | int]) -> requests.Response:
|
||||||
resource_owner_key=session["owner_key"],
|
resource_owner_key=session["owner_key"],
|
||||||
resource_owner_secret=session["owner_secret"],
|
resource_owner_secret=session["owner_secret"],
|
||||||
)
|
)
|
||||||
|
oauth.headers.update({"User-Agent": ua})
|
||||||
proxies = get_edit_proxy()
|
proxies = get_edit_proxy()
|
||||||
return oauth.get(
|
return oauth.get(
|
||||||
api_url + "?" + urllib.parse.urlencode(params), timeout=4, proxies=proxies
|
api_url + "?" + urllib.parse.urlencode(params), timeout=4, proxies=proxies
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue