Track processed candidates in session and use OAuth for API calls

- Record skips, saves, and no-match results in session["skipped"] so
  revisiting an article resumes past already-checked candidates
- Filter self-links (case-insensitive first letter) from hit list
- Use OAuth session for all API reads when logged in for higher rate limits
- Add "for" template to exclusion list to avoid bad edits
- Improve API error handling with HTTP status codes logged to stderr

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-05-11 12:46:13 +01:00
parent 2c197f5c43
commit 0239b83555
4 changed files with 73 additions and 12 deletions

View file

@ -1,4 +1,5 @@
import re
import sys
import typing
import requests
@ -72,18 +73,30 @@ webpage_error = (
)
def _get_active_session() -> requests.sessions.Session:
"""Return OAuth session if one is available in Flask context, else plain session."""
try:
from flask import g
if hasattr(g, "oauth_session") and g.oauth_session is not None:
return g.oauth_session # type: ignore[return-value]
except RuntimeError:
pass
return get_session()
def api_get(params: StrDict) -> StrDict:
"""Make call to Wikipedia API."""
s = get_session()
s = _get_active_session()
r = s.get(get_query_url(), params=params)
try:
ret: StrDict = r.json()
except JSONDecodeError:
print(f"API request failed: HTTP {r.status_code}", file=sys.stderr)
print(f"Response body: {r.text!r}", file=sys.stderr)
if webpage_error in r.text:
raise MediawikiError(webpage_error)
else:
raise MediawikiError("unknown error")
raise MediawikiError(f"HTTP {r.status_code}: unexpected response from Wikipedia API")
check_for_error(ret)
return ret
@ -271,7 +284,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str:
"rvdifftotext": section_text.strip(),
}
s = get_session()
s = _get_active_session()
r = s.post(get_query_url(), data=data)
try:
ret = r.json()