diff --git a/add_links/api.py b/add_links/api.py index 05fc7b1..5d8de19 100644 --- a/add_links/api.py +++ b/add_links/api.py @@ -1,4 +1,5 @@ import re +import sys import typing import requests @@ -72,18 +73,30 @@ webpage_error = ( ) +def _get_active_session() -> requests.sessions.Session: + """Return OAuth session if one is available in Flask context, else plain session.""" + try: + from flask import g + if hasattr(g, "oauth_session") and g.oauth_session is not None: + return g.oauth_session # type: ignore[return-value] + except RuntimeError: + pass + return get_session() + + def api_get(params: StrDict) -> StrDict: """Make call to Wikipedia API.""" - s = get_session() + s = _get_active_session() r = s.get(get_query_url(), params=params) try: ret: StrDict = r.json() except JSONDecodeError: + print(f"API request failed: HTTP {r.status_code}", file=sys.stderr) + print(f"Response body: {r.text!r}", file=sys.stderr) if webpage_error in r.text: raise MediawikiError(webpage_error) - else: - raise MediawikiError("unknown error") + raise MediawikiError(f"HTTP {r.status_code}: unexpected response from Wikipedia API") check_for_error(ret) return ret @@ -271,7 +284,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str: "rvdifftotext": section_text.strip(), } - s = get_session() + s = _get_active_session() r = s.post(get_query_url(), data=data) try: ret = r.json() diff --git a/add_links/match.py b/add_links/match.py index eaac7f9..a3440e7 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -78,7 +78,7 @@ re_cite = re.compile( re.I | re.S, ) -re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see)\b", re.I) +re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see|for)\b", re.I) re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}") re_external_link = re.compile(r"\[https?://[^\]]+\]") # Italic text (work titles in bibliographies). Handles apostrophes in content diff --git a/add_links/mediawiki_oauth.py b/add_links/mediawiki_oauth.py index 4da3388..12ebd58 100644 --- a/add_links/mediawiki_oauth.py +++ b/add_links/mediawiki_oauth.py @@ -99,6 +99,27 @@ def userinfo_call() -> typing.Mapping[str, typing.Any]: return api_request(params) +def get_oauth_session() -> OAuth1Session | None: + """Return an OAuth1Session for the current user, or None if not logged in.""" + if "owner_key" not in session or "owner_secret" not in session: + return None + app = current_app + client_key = app.config["CLIENT_KEY"] + client_secret = app.config["CLIENT_SECRET"] + oauth = OAuth1Session( + client_key, + client_secret=client_secret, + resource_owner_key=session["owner_key"], + resource_owner_secret=session["owner_secret"], + ) + oauth.headers.update({"User-Agent": ua}) + oauth.params = typing.cast( + dict[str, str | int], + {"format": "json", "action": "query", "formatversion": 2}, + ) + return oauth + + def get_username() -> None | str: """Get the username or None if not logged in.""" if "owner_key" not in session: diff --git a/web_view.py b/web_view.py index 92f7bc8..bc412c0 100755 --- a/web_view.py +++ b/web_view.py @@ -111,6 +111,7 @@ def search_no_link(q: str) -> tuple[int, list[Hit]]: def global_user() -> None: """Make username available everywhere.""" flask.g.user = mediawiki_oauth.get_username() + flask.g.oauth_session = mediawiki_oauth.get_oauth_session() @app.route("/") @@ -294,6 +295,16 @@ def match_type(q: str, snippet: str) -> str | None: +def _record_skip(from_title: str, hit_title: str) -> None: + """Record that a candidate was skipped or saved for this article.""" + skipped: dict[str, list[str]] = flask.session.get("skipped", {}) + article_skipped = skipped.get(from_title, []) + if hit_title not in article_skipped: + skipped[from_title] = article_skipped + [hit_title] + flask.session["skipped"] = skipped + flask.session.modified = True + + def handle_post(url_title: str) -> Response: """Handle POST request.""" from_title = url_title.replace("_", " ").strip() @@ -305,6 +316,7 @@ def handle_post(url_title: str) -> Response: except mediawiki_api.APIError as e: return flask.make_response(f"Save failed: {e}", 502) flask.session["saves"] = flask.session.get("saves", 0) + 1 + _record_skip(from_title, hit_title) return flask.redirect( flask.url_for("article_page", url_title=url_title, after=hit_title) ) @@ -318,9 +330,19 @@ def article_page(url_title: str) -> str | Response: from_title = url_title.replace("_", " ").strip() - total = search_count(from_title) - with_link = search_count_with_link(from_title) - _no_link_count, hits = search_no_link(from_title) + try: + total = search_count(from_title) + with_link = search_count_with_link(from_title) + _no_link_count, hits = search_no_link(from_title) + except api.MediawikiError as e: + return flask.make_response( + flask.render_template("error.html", message=str(e)), 502 + ) + + # Filter out candidates already processed this session + session_skipped: set[str] = set( + flask.session.get("skipped", {}).get(from_title, []) + ) # If a specific candidate was requested, move it to the front title_param = flask.request.args.get("title") @@ -328,12 +350,14 @@ def article_page(url_title: str) -> str | Response: hits = [h for h in hits if h["title"] == title_param] + \ [h for h in hits if h["title"] != title_param] - # Skip past already-processed candidates + # Record and apply explicit skip-past after = flask.request.args.get("after") if after: - hits_iter = itertools.dropwhile(lambda h: h["title"] != after, hits) - next(hits_iter, None) # consume the "after" entry itself - hits = list(hits_iter) + _record_skip(from_title, after) + session_skipped.add(after) + + hits = [h for h in hits if h["title"] not in session_skipped + and h["title"] != from_title and h["title"] != case_flip_first(from_title)] if not hits: return flask.render_template("all_done.html") @@ -397,7 +421,10 @@ def api_valid_hit() -> werkzeug.wrappers.response.Response: try: found = get_diff(link_to, link_from, None) except NoMatch: + _record_skip(link_to, link_from) return flask.jsonify(valid=False) + except api.MediawikiError as e: + return flask.jsonify(valid=False, error=str(e)) return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"])