Track processed candidates in session and use OAuth for API calls

- Record skips, saves, and no-match results in session["skipped"] so
  revisiting an article resumes past already-checked candidates
- Filter self-links (case-insensitive first letter) from hit list
- Use OAuth session for all API reads when logged in for higher rate limits
- Add "for" template to exclusion list to avoid bad edits
- Improve API error handling with HTTP status codes logged to stderr

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-05-11 12:46:13 +01:00
parent 2c197f5c43
commit 0239b83555
4 changed files with 73 additions and 12 deletions

View file

@ -111,6 +111,7 @@ def search_no_link(q: str) -> tuple[int, list[Hit]]:
def global_user() -> None:
"""Make username available everywhere."""
flask.g.user = mediawiki_oauth.get_username()
flask.g.oauth_session = mediawiki_oauth.get_oauth_session()
@app.route("/")
@ -294,6 +295,16 @@ def match_type(q: str, snippet: str) -> str | None:
def _record_skip(from_title: str, hit_title: str) -> None:
"""Record that a candidate was skipped or saved for this article."""
skipped: dict[str, list[str]] = flask.session.get("skipped", {})
article_skipped = skipped.get(from_title, [])
if hit_title not in article_skipped:
skipped[from_title] = article_skipped + [hit_title]
flask.session["skipped"] = skipped
flask.session.modified = True
def handle_post(url_title: str) -> Response:
"""Handle POST request."""
from_title = url_title.replace("_", " ").strip()
@ -305,6 +316,7 @@ def handle_post(url_title: str) -> Response:
except mediawiki_api.APIError as e:
return flask.make_response(f"Save failed: {e}", 502)
flask.session["saves"] = flask.session.get("saves", 0) + 1
_record_skip(from_title, hit_title)
return flask.redirect(
flask.url_for("article_page", url_title=url_title, after=hit_title)
)
@ -318,9 +330,19 @@ def article_page(url_title: str) -> str | Response:
from_title = url_title.replace("_", " ").strip()
total = search_count(from_title)
with_link = search_count_with_link(from_title)
_no_link_count, hits = search_no_link(from_title)
try:
total = search_count(from_title)
with_link = search_count_with_link(from_title)
_no_link_count, hits = search_no_link(from_title)
except api.MediawikiError as e:
return flask.make_response(
flask.render_template("error.html", message=str(e)), 502
)
# Filter out candidates already processed this session
session_skipped: set[str] = set(
flask.session.get("skipped", {}).get(from_title, [])
)
# If a specific candidate was requested, move it to the front
title_param = flask.request.args.get("title")
@ -328,12 +350,14 @@ def article_page(url_title: str) -> str | Response:
hits = [h for h in hits if h["title"] == title_param] + \
[h for h in hits if h["title"] != title_param]
# Skip past already-processed candidates
# Record and apply explicit skip-past
after = flask.request.args.get("after")
if after:
hits_iter = itertools.dropwhile(lambda h: h["title"] != after, hits)
next(hits_iter, None) # consume the "after" entry itself
hits = list(hits_iter)
_record_skip(from_title, after)
session_skipped.add(after)
hits = [h for h in hits if h["title"] not in session_skipped
and h["title"] != from_title and h["title"] != case_flip_first(from_title)]
if not hits:
return flask.render_template("all_done.html")
@ -397,7 +421,10 @@ def api_valid_hit() -> werkzeug.wrappers.response.Response:
try:
found = get_diff(link_to, link_from, None)
except NoMatch:
_record_skip(link_to, link_from)
return flask.jsonify(valid=False)
except api.MediawikiError as e:
return flask.jsonify(valid=False, error=str(e))
return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"])