Track processed candidates in session and use OAuth for API calls
- Record skips, saves, and no-match results in session["skipped"] so revisiting an article resumes past already-checked candidates - Filter self-links (case-insensitive first letter) from hit list - Use OAuth session for all API reads when logged in for higher rate limits - Add "for" template to exclusion list to avoid bad edits - Improve API error handling with HTTP status codes logged to stderr Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
2c197f5c43
commit
0239b83555
4 changed files with 73 additions and 12 deletions
|
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
@ -72,18 +73,30 @@ webpage_error = (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_active_session() -> requests.sessions.Session:
|
||||||
|
"""Return OAuth session if one is available in Flask context, else plain session."""
|
||||||
|
try:
|
||||||
|
from flask import g
|
||||||
|
if hasattr(g, "oauth_session") and g.oauth_session is not None:
|
||||||
|
return g.oauth_session # type: ignore[return-value]
|
||||||
|
except RuntimeError:
|
||||||
|
pass
|
||||||
|
return get_session()
|
||||||
|
|
||||||
|
|
||||||
def api_get(params: StrDict) -> StrDict:
|
def api_get(params: StrDict) -> StrDict:
|
||||||
"""Make call to Wikipedia API."""
|
"""Make call to Wikipedia API."""
|
||||||
s = get_session()
|
s = _get_active_session()
|
||||||
|
|
||||||
r = s.get(get_query_url(), params=params)
|
r = s.get(get_query_url(), params=params)
|
||||||
try:
|
try:
|
||||||
ret: StrDict = r.json()
|
ret: StrDict = r.json()
|
||||||
except JSONDecodeError:
|
except JSONDecodeError:
|
||||||
|
print(f"API request failed: HTTP {r.status_code}", file=sys.stderr)
|
||||||
|
print(f"Response body: {r.text!r}", file=sys.stderr)
|
||||||
if webpage_error in r.text:
|
if webpage_error in r.text:
|
||||||
raise MediawikiError(webpage_error)
|
raise MediawikiError(webpage_error)
|
||||||
else:
|
raise MediawikiError(f"HTTP {r.status_code}: unexpected response from Wikipedia API")
|
||||||
raise MediawikiError("unknown error")
|
|
||||||
check_for_error(ret)
|
check_for_error(ret)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
|
@ -271,7 +284,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str:
|
||||||
"rvdifftotext": section_text.strip(),
|
"rvdifftotext": section_text.strip(),
|
||||||
}
|
}
|
||||||
|
|
||||||
s = get_session()
|
s = _get_active_session()
|
||||||
r = s.post(get_query_url(), data=data)
|
r = s.post(get_query_url(), data=data)
|
||||||
try:
|
try:
|
||||||
ret = r.json()
|
ret = r.json()
|
||||||
|
|
|
||||||
|
|
@ -78,7 +78,7 @@ re_cite = re.compile(
|
||||||
re.I | re.S,
|
re.I | re.S,
|
||||||
)
|
)
|
||||||
|
|
||||||
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see)\b", re.I)
|
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see|for)\b", re.I)
|
||||||
re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}")
|
re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}")
|
||||||
re_external_link = re.compile(r"\[https?://[^\]]+\]")
|
re_external_link = re.compile(r"\[https?://[^\]]+\]")
|
||||||
# Italic text (work titles in bibliographies). Handles apostrophes in content
|
# Italic text (work titles in bibliographies). Handles apostrophes in content
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,27 @@ def userinfo_call() -> typing.Mapping[str, typing.Any]:
|
||||||
return api_request(params)
|
return api_request(params)
|
||||||
|
|
||||||
|
|
||||||
|
def get_oauth_session() -> OAuth1Session | None:
|
||||||
|
"""Return an OAuth1Session for the current user, or None if not logged in."""
|
||||||
|
if "owner_key" not in session or "owner_secret" not in session:
|
||||||
|
return None
|
||||||
|
app = current_app
|
||||||
|
client_key = app.config["CLIENT_KEY"]
|
||||||
|
client_secret = app.config["CLIENT_SECRET"]
|
||||||
|
oauth = OAuth1Session(
|
||||||
|
client_key,
|
||||||
|
client_secret=client_secret,
|
||||||
|
resource_owner_key=session["owner_key"],
|
||||||
|
resource_owner_secret=session["owner_secret"],
|
||||||
|
)
|
||||||
|
oauth.headers.update({"User-Agent": ua})
|
||||||
|
oauth.params = typing.cast(
|
||||||
|
dict[str, str | int],
|
||||||
|
{"format": "json", "action": "query", "formatversion": 2},
|
||||||
|
)
|
||||||
|
return oauth
|
||||||
|
|
||||||
|
|
||||||
def get_username() -> None | str:
|
def get_username() -> None | str:
|
||||||
"""Get the username or None if not logged in."""
|
"""Get the username or None if not logged in."""
|
||||||
if "owner_key" not in session:
|
if "owner_key" not in session:
|
||||||
|
|
|
||||||
41
web_view.py
41
web_view.py
|
|
@ -111,6 +111,7 @@ def search_no_link(q: str) -> tuple[int, list[Hit]]:
|
||||||
def global_user() -> None:
|
def global_user() -> None:
|
||||||
"""Make username available everywhere."""
|
"""Make username available everywhere."""
|
||||||
flask.g.user = mediawiki_oauth.get_username()
|
flask.g.user = mediawiki_oauth.get_username()
|
||||||
|
flask.g.oauth_session = mediawiki_oauth.get_oauth_session()
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
|
|
@ -294,6 +295,16 @@ def match_type(q: str, snippet: str) -> str | None:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _record_skip(from_title: str, hit_title: str) -> None:
|
||||||
|
"""Record that a candidate was skipped or saved for this article."""
|
||||||
|
skipped: dict[str, list[str]] = flask.session.get("skipped", {})
|
||||||
|
article_skipped = skipped.get(from_title, [])
|
||||||
|
if hit_title not in article_skipped:
|
||||||
|
skipped[from_title] = article_skipped + [hit_title]
|
||||||
|
flask.session["skipped"] = skipped
|
||||||
|
flask.session.modified = True
|
||||||
|
|
||||||
|
|
||||||
def handle_post(url_title: str) -> Response:
|
def handle_post(url_title: str) -> Response:
|
||||||
"""Handle POST request."""
|
"""Handle POST request."""
|
||||||
from_title = url_title.replace("_", " ").strip()
|
from_title = url_title.replace("_", " ").strip()
|
||||||
|
|
@ -305,6 +316,7 @@ def handle_post(url_title: str) -> Response:
|
||||||
except mediawiki_api.APIError as e:
|
except mediawiki_api.APIError as e:
|
||||||
return flask.make_response(f"Save failed: {e}", 502)
|
return flask.make_response(f"Save failed: {e}", 502)
|
||||||
flask.session["saves"] = flask.session.get("saves", 0) + 1
|
flask.session["saves"] = flask.session.get("saves", 0) + 1
|
||||||
|
_record_skip(from_title, hit_title)
|
||||||
return flask.redirect(
|
return flask.redirect(
|
||||||
flask.url_for("article_page", url_title=url_title, after=hit_title)
|
flask.url_for("article_page", url_title=url_title, after=hit_title)
|
||||||
)
|
)
|
||||||
|
|
@ -318,9 +330,19 @@ def article_page(url_title: str) -> str | Response:
|
||||||
|
|
||||||
from_title = url_title.replace("_", " ").strip()
|
from_title = url_title.replace("_", " ").strip()
|
||||||
|
|
||||||
total = search_count(from_title)
|
try:
|
||||||
with_link = search_count_with_link(from_title)
|
total = search_count(from_title)
|
||||||
_no_link_count, hits = search_no_link(from_title)
|
with_link = search_count_with_link(from_title)
|
||||||
|
_no_link_count, hits = search_no_link(from_title)
|
||||||
|
except api.MediawikiError as e:
|
||||||
|
return flask.make_response(
|
||||||
|
flask.render_template("error.html", message=str(e)), 502
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter out candidates already processed this session
|
||||||
|
session_skipped: set[str] = set(
|
||||||
|
flask.session.get("skipped", {}).get(from_title, [])
|
||||||
|
)
|
||||||
|
|
||||||
# If a specific candidate was requested, move it to the front
|
# If a specific candidate was requested, move it to the front
|
||||||
title_param = flask.request.args.get("title")
|
title_param = flask.request.args.get("title")
|
||||||
|
|
@ -328,12 +350,14 @@ def article_page(url_title: str) -> str | Response:
|
||||||
hits = [h for h in hits if h["title"] == title_param] + \
|
hits = [h for h in hits if h["title"] == title_param] + \
|
||||||
[h for h in hits if h["title"] != title_param]
|
[h for h in hits if h["title"] != title_param]
|
||||||
|
|
||||||
# Skip past already-processed candidates
|
# Record and apply explicit skip-past
|
||||||
after = flask.request.args.get("after")
|
after = flask.request.args.get("after")
|
||||||
if after:
|
if after:
|
||||||
hits_iter = itertools.dropwhile(lambda h: h["title"] != after, hits)
|
_record_skip(from_title, after)
|
||||||
next(hits_iter, None) # consume the "after" entry itself
|
session_skipped.add(after)
|
||||||
hits = list(hits_iter)
|
|
||||||
|
hits = [h for h in hits if h["title"] not in session_skipped
|
||||||
|
and h["title"] != from_title and h["title"] != case_flip_first(from_title)]
|
||||||
|
|
||||||
if not hits:
|
if not hits:
|
||||||
return flask.render_template("all_done.html")
|
return flask.render_template("all_done.html")
|
||||||
|
|
@ -397,7 +421,10 @@ def api_valid_hit() -> werkzeug.wrappers.response.Response:
|
||||||
try:
|
try:
|
||||||
found = get_diff(link_to, link_from, None)
|
found = get_diff(link_to, link_from, None)
|
||||||
except NoMatch:
|
except NoMatch:
|
||||||
|
_record_skip(link_to, link_from)
|
||||||
return flask.jsonify(valid=False)
|
return flask.jsonify(valid=False)
|
||||||
|
except api.MediawikiError as e:
|
||||||
|
return flask.jsonify(valid=False, error=str(e))
|
||||||
|
|
||||||
return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"])
|
return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue