Track processed candidates in session and use OAuth for API calls
- Record skips, saves, and no-match results in session["skipped"] so revisiting an article resumes past already-checked candidates - Filter self-links (case-insensitive first letter) from hit list - Use OAuth session for all API reads when logged in for higher rate limits - Add "for" template to exclusion list to avoid bad edits - Improve API error handling with HTTP status codes logged to stderr Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2c197f5c43
commit
0239b83555
4 changed files with 73 additions and 12 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import sys
|
||||
import typing
|
||||
|
||||
import requests
|
||||
|
|
@ -72,18 +73,30 @@ webpage_error = (
|
|||
)
|
||||
|
||||
|
||||
def _get_active_session() -> requests.sessions.Session:
|
||||
"""Return OAuth session if one is available in Flask context, else plain session."""
|
||||
try:
|
||||
from flask import g
|
||||
if hasattr(g, "oauth_session") and g.oauth_session is not None:
|
||||
return g.oauth_session # type: ignore[return-value]
|
||||
except RuntimeError:
|
||||
pass
|
||||
return get_session()
|
||||
|
||||
|
||||
def api_get(params: StrDict) -> StrDict:
|
||||
"""Make call to Wikipedia API."""
|
||||
s = get_session()
|
||||
s = _get_active_session()
|
||||
|
||||
r = s.get(get_query_url(), params=params)
|
||||
try:
|
||||
ret: StrDict = r.json()
|
||||
except JSONDecodeError:
|
||||
print(f"API request failed: HTTP {r.status_code}", file=sys.stderr)
|
||||
print(f"Response body: {r.text!r}", file=sys.stderr)
|
||||
if webpage_error in r.text:
|
||||
raise MediawikiError(webpage_error)
|
||||
else:
|
||||
raise MediawikiError("unknown error")
|
||||
raise MediawikiError(f"HTTP {r.status_code}: unexpected response from Wikipedia API")
|
||||
check_for_error(ret)
|
||||
return ret
|
||||
|
||||
|
|
@ -271,7 +284,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str:
|
|||
"rvdifftotext": section_text.strip(),
|
||||
}
|
||||
|
||||
s = get_session()
|
||||
s = _get_active_session()
|
||||
r = s.post(get_query_url(), data=data)
|
||||
try:
|
||||
ret = r.json()
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ re_cite = re.compile(
|
|||
re.I | re.S,
|
||||
)
|
||||
|
||||
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see)\b", re.I)
|
||||
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see|for)\b", re.I)
|
||||
re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}")
|
||||
re_external_link = re.compile(r"\[https?://[^\]]+\]")
|
||||
# Italic text (work titles in bibliographies). Handles apostrophes in content
|
||||
|
|
|
|||
|
|
@ -99,6 +99,27 @@ def userinfo_call() -> typing.Mapping[str, typing.Any]:
|
|||
return api_request(params)
|
||||
|
||||
|
||||
def get_oauth_session() -> OAuth1Session | None:
|
||||
"""Return an OAuth1Session for the current user, or None if not logged in."""
|
||||
if "owner_key" not in session or "owner_secret" not in session:
|
||||
return None
|
||||
app = current_app
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
oauth = OAuth1Session(
|
||||
client_key,
|
||||
client_secret=client_secret,
|
||||
resource_owner_key=session["owner_key"],
|
||||
resource_owner_secret=session["owner_secret"],
|
||||
)
|
||||
oauth.headers.update({"User-Agent": ua})
|
||||
oauth.params = typing.cast(
|
||||
dict[str, str | int],
|
||||
{"format": "json", "action": "query", "formatversion": 2},
|
||||
)
|
||||
return oauth
|
||||
|
||||
|
||||
def get_username() -> None | str:
|
||||
"""Get the username or None if not logged in."""
|
||||
if "owner_key" not in session:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue