add-links/add_links/mediawiki_oauth.py
Edward Betts 95ca5f755d Fix User-Agent header, timeouts, and JSON error handling
mediawiki_oauth: set User-Agent on all OAuth1Session instances so
Wikimedia doesn't reject token and API requests with 403; add timeout
parameter to api_post_request (default 4s).

mediawiki_api: add APIError exception; wrap .json() in call() to raise
APIError with status code and response body on decode failure; raise
timeout to 30s for edit POSTs.

api: wrap call_get_diff .json() with the same JSONDecodeError guard,
raising MediawikiError with HTTP status and body on failure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-09 18:11:23 +01:00

113 lines
3.2 KiB
Python

"""Wikipedia OAuth."""
import typing
import urllib
from typing import cast
import requests
from flask import current_app, session
from requests_oauthlib import OAuth1Session
from .api import ua
wiki_hostname = "en.wikipedia.org"
api_url = f"https://{wiki_hostname}/w/api.php"
class LoginNeeded(Exception):
"""Not logged in."""
pass
def get_edit_proxy() -> dict[str, str]:
"""Retrieve proxy information from config."""
edit_proxy = current_app.config.get("EDIT_PROXY")
if edit_proxy:
return {"http": edit_proxy, "https": edit_proxy}
else:
return {}
def api_post_request(params: dict[str, str | int], timeout: int = 4) -> requests.Response:
"""HTTP Post using Oauth."""
app = current_app
# url = "https://www.wikidata.org/w/api.php"
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
)
oauth.headers.update({"User-Agent": ua})
proxies = get_edit_proxy()
return oauth.post(api_url, data=params, timeout=timeout, proxies=proxies)
def raw_request(params: typing.Mapping[str, str | int]) -> requests.Response:
"""Low-level API request."""
app = current_app
# url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
if "owner_key" not in session or "owner_secret" not in session:
raise LoginNeeded
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=session["owner_key"],
resource_owner_secret=session["owner_secret"],
)
oauth.headers.update({"User-Agent": ua})
proxies = get_edit_proxy()
return oauth.get(
api_url + "?" + urllib.parse.urlencode(params), timeout=4, proxies=proxies
)
def api_request(params: typing.Mapping[str, str | int]) -> dict[str, typing.Any]:
"""Make an API request with OAuth."""
r = raw_request(params)
try:
return cast(dict[str, typing.Any], r.json())
except Exception:
print("text")
print(r.text)
print("---")
raise
def get_token() -> str:
"""Get CSRF tokebn from MediaWiki API."""
params: dict[str, str | int] = {
"action": "query",
"meta": "tokens",
"format": "json",
"formatversion": 2,
}
reply = api_request(params)
token: str = reply["query"]["tokens"]["csrftoken"]
return token
def userinfo_call() -> typing.Mapping[str, typing.Any]:
"""Request user information via OAuth."""
params = {"action": "query", "meta": "userinfo", "format": "json"}
return api_request(params)
def get_username() -> None | str:
"""Get the username or None if not logged in."""
if "owner_key" not in session:
return None # not authorized
if "username" not in session:
reply = userinfo_call()
if "query" not in reply:
return None
session["username"] = reply["query"]["userinfo"]["name"]
return cast(str, session["username"])