From fad1ef9e49dcf7a4e576f64441bea7252b5c2116 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 10:50:38 +0100 Subject: [PATCH 1/8] Using bootstrap 5 --- templates/base.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/base.html b/templates/base.html index 3804a16..d29dec1 100644 --- a/templates/base.html +++ b/templates/base.html @@ -2,7 +2,7 @@ - + @@ -15,7 +15,7 @@ <body> {% block content %}{% endblock %} - <script src="{{ url_for("static", filename="bootstrap/js/bootstrap.bundle.min.js")}}></script> + <script src="{{ url_for("static", filename="bootstrap5/js/bootstrap.bundle.min.js")}}></script> {% block script %}{% endblock %} </body> From 7867122326beb97f65e81a242f16fa1a68e91bb7 Mon Sep 17 00:00:00 2001 From: Edward Betts <edward@4angle.com> Date: Mon, 11 May 2026 10:51:11 +0100 Subject: [PATCH 2/8] Fix for match starts inside one link and continues into the next opening link. --- add_links/match.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/add_links/match.py b/add_links/match.py index 9d00404..6c3a12e 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -252,6 +252,14 @@ def add_link(m: re.Match[str], replacement: str, text: str) -> str: if matched_text.startswith("[[") and matched_text.endswith("|"): return m.re.sub(lambda m: f"[[{replacement}|", text, count=1) + split_links = matched_text.find("]] [[") + if split_links > 0 and m.start() >= 2 and text[m.start() - 2 : m.start()] == "[[": + # Match starts inside one link and continues into the next opening link. + # Link only the text from the first link span and leave the second link as-is. + link_dest = replacement.split("|")[0] if "|" in replacement else replacement + visible = matched_text[:split_links] + return text[: m.start() - 2] + f"[[{link_dest}|{visible}]]" + text[m.start() + split_links + 2 :] + inner_bracket = matched_text.find("[[") if inner_bracket > 0: prefix = matched_text[:inner_bracket].rstrip() From bc6265d4cdd7553bd16a83048776bd769ed6df03 Mon Sep 17 00:00:00 2001 From: Edward Betts <edward@4angle.com> Date: Mon, 11 May 2026 11:30:12 +0100 Subject: [PATCH 3/8] Redesign UI and fix several bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add navbar with login/logout, search form, and Find Link branding - Clean up index page: search-only, examples behind ?debug=1 - Improve article page: remove debug clutter, named Wikipedia links, collapsible candidates - Add SVG favicon (🔗 emoji) - Fix diff CSS: compact layout, auto table layout to eliminate wide marker column gap - Catch TokenRequestDenied in OAuth start and show error page - Store username in session at login; clear bad session on API failure - Raise NoMatch when diff is empty (edit already applied) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --- add_links/match.py | 2 ++ add_links/mediawiki_oauth.py | 14 +++++--- static/css/diff.css | 19 +++++----- static/favicon.svg | 3 ++ templates/all_done.html | 11 +++--- templates/article.html | 68 ++++++++++++++++++++---------------- templates/base.html | 28 +++++++++++---- templates/error.html | 17 +++++++++ templates/index.html | 57 ++++++++++++++++++++---------- templates/save_done.html | 11 +++--- web_view.py | 27 +++++++++----- 11 files changed, 167 insertions(+), 90 deletions(-) create mode 100644 static/favicon.svg create mode 100644 templates/error.html diff --git a/add_links/match.py b/add_links/match.py index 6c3a12e..eaac7f9 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -559,4 +559,6 @@ def get_diff(q: str, title: str, linkto: str | None) -> dict[str, typing.Any]: ) found["diff"] = call_get_diff(title, found["section_num"], section_text) + if not found["diff"]: + raise NoMatch return found diff --git a/add_links/mediawiki_oauth.py b/add_links/mediawiki_oauth.py index 39aecb0..4da3388 100644 --- a/add_links/mediawiki_oauth.py +++ b/add_links/mediawiki_oauth.py @@ -1,5 +1,6 @@ """Wikipedia OAuth.""" +import sys import typing import urllib from typing import cast @@ -73,9 +74,8 @@ def api_request(params: typing.Mapping[str, str | int]) -> dict[str, typing.Any] try: return cast(dict[str, typing.Any], r.json()) except Exception: - print("text") - print(r.text) - print("---") + print(f"API request failed: HTTP {r.status_code}", file=sys.stderr) + print(f"Response body: {r.text!r}", file=sys.stderr) raise @@ -105,7 +105,13 @@ def get_username() -> None | str: return None # not authorized if "username" not in session: - reply = userinfo_call() + try: + reply = userinfo_call() + except Exception as e: + print(f"get_username failed, clearing session: {e}", file=sys.stderr) + session.pop("owner_key", None) + session.pop("owner_secret", None) + return None if "query" not in reply: return None session["username"] = reply["query"]["userinfo"]["name"] diff --git a/static/css/diff.css b/static/css/diff.css index 65d5ef9..c7009f2 100644 --- a/static/css/diff.css +++ b/static/css/diff.css @@ -5,19 +5,16 @@ span.searchmatch { font-weight: bold; } table.diff,td.diff-otitle,td.diff-ntitle{background-color:white} td.diff-otitle,td.diff-ntitle{text-align:center} -td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em} +td.diff-marker{width:1.5em;text-align:center;font-weight:bold;font-size:1.25em;padding:0 0.3em} td.diff-lineno{font-weight:bold} td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap} -td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em} -td.diff-addedline{border-color:#a3d3ff} -td.diff-deletedline{border-color:#ffe49c} -td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em} +td.diff-addedline,td.diff-deletedline{border-left:3px solid} +td.diff-addedline{border-color:#a3d3ff;background:#f0f8ff} +td.diff-deletedline{border-color:#ffe49c;background:#fffaf0} +td.diff-context{color:#555} .diffchange{font-weight:bold;text-decoration:none} -table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed} -td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0} +table.diff{border:none;width:100%;border-spacing:0;border-collapse:collapse;table-layout:auto} td.diff-addedline .diffchange{background:#d8ecff} td.diff-deletedline .diffchange{background:#feeec8} -table.diff td{padding:0.33em 0.66em} -table.diff col.diff-marker{width:2%} -table.diff col.diff-content{width:48%} -table.diff td div{ word-wrap:break-word; overflow:auto} +table.diff td{padding:0.2em 0.5em} +table.diff td div{word-wrap:break-word;overflow:auto} diff --git a/static/favicon.svg b/static/favicon.svg new file mode 100644 index 0000000..181c415 --- /dev/null +++ b/static/favicon.svg @@ -0,0 +1,3 @@ +<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"> + <text y=".9em" font-size="90">🔗</text> +</svg> diff --git a/templates/all_done.html b/templates/all_done.html index c346482..e3135f7 100644 --- a/templates/all_done.html +++ b/templates/all_done.html @@ -1,10 +1,11 @@ {% extends "base.html" %} -{% block title %}Index{% endblock %} +{% block title %}All done{% endblock %} {% block content %} - <div class="container"> - <h1>All done</h1> - <div><a href="{{ url_for('index') }}">back to index </a></div> - </div> +<div class="container text-center mt-5"> + <h1 class="mb-3">All done</h1> + <p class="text-muted mb-4">No more candidates found for this article.</p> + <a href="{{ url_for('index') }}" class="btn btn-primary">Search another article</a> +</div> {% endblock %} diff --git a/templates/article.html b/templates/article.html index 55d05b0..4d5c97f 100644 --- a/templates/article.html +++ b/templates/article.html @@ -1,48 +1,54 @@ {% extends "base.html" %} -{% block title %}Link '{{ title }}' in '{{ hit_title }}'{% endblock %} +{% block title %}{{ title }} in {{ hit_title }}{% endblock %} {% block style %} <link href="{{ url_for("static", filename="css/diff.css") }}" rel="stylesheet"/> {% endblock %} {% block content %} - <div class="container"> - <h1>Link '{{ title }}' in '{{ hit_title }}'</h1> - <form action="{{ url_for("index") }}"> - <input name="q"> - <input type="submit" value="search"> - </form> +<div class="container"> + <nav aria-label="breadcrumb" class="mb-3"> + <ol class="breadcrumb"> + <li class="breadcrumb-item"><a href="{{ url_for('index') }}">Home</a></li> + <li class="breadcrumb-item active">{{ title }}</li> + </ol> + </nav> - <div>Username: {{ g.user }}</div> + <div class="d-flex flex-wrap align-items-baseline gap-3 mb-1"> + <h1 class="h4 mb-0">Link "{{ title }}" in "{{ hit_title }}"</h1> + <a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank" class="text-muted small">{{ title }} ↗</a> + <a href="https://en.wikipedia.org/wiki/{{ hit_title }}" target="_blank" class="text-muted small">{{ hit_title }} ↗</a> + </div> - <div><a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank">view article</a></div> + <div class="d-flex gap-3 mb-4 text-muted small"> + <span>{{ total }} mentions total</span> + <span>{{ with_link }} already linked ({{ "{:.0%}".format(with_link / total) }})</span> + </div> - <div><a href="{{ url_for('index') }}">back to index </a></div> + <div class="mb-4"> + <table class="diff">{{ diff | safe }}</table> + </div> - <div>total: {{ total }}</div> - <div>with link: {{ with_link }}</div> - <div>ratio: {{ "{:.1%}".format(with_link / total) }}</div> - {# <div>hit: {{ hit }}</div> #} - <div>replacement: {{ found.replacement }}</div> - <div>section: {{ found.section }}</div> - <table> - {{ diff | safe }} - </table> - <form method="POST"> - <input type="hidden" name="hit" value="{{ hit_title }}"> - <div class="my-3"> - <input type="submit" class="btn btn-primary" value="save"/> - <a href="{{url_for("article_page", url_title=url_title, after=hit_title)}}" class="btn btn-primary">skip</a> - </div> - </form> + <form method="POST" class="mb-4"> + <input type="hidden" name="hit" value="{{ hit_title }}"> + <div class="d-flex gap-2"> + <button type="submit" class="btn btn-success">Save edit</button> + <a href="{{ url_for("article_page", url_title=url_title, after=hit_title) }}" class="btn btn-outline-secondary">Skip</a> + </div> + </form> - <ol> + {% if hits %} + <details class="border rounded p-3"> + <summary class="text-muted small" style="cursor:pointer">{{ hits | length }} other candidates</summary> + <ol class="mt-3 mb-0 small"> {% for hit in hits %} - {% set url = url_for("article_page", url_title=url_title, title=hit.title) %} - <li><a href="{{ url }}">{{ hit.title }}</a> – {{ hit.snippet | safe }}</li> + <li class="mb-1"> + <a href="{{ url_for("article_page", url_title=url_title, title=hit.title) }}">{{ hit.title }}</a> + </li> {% endfor %} </ol> - </div> + </details> + {% endif %} +</div> {% endblock %} - diff --git a/templates/base.html b/templates/base.html index d29dec1..7043811 100644 --- a/templates/base.html +++ b/templates/base.html @@ -4,19 +4,33 @@ <meta charset="utf-8"> <link href="{{ url_for("static", filename="bootstrap5/css/bootstrap.min.css") }}" rel="stylesheet"> <meta name="viewport" content="width=device-width, initial-scale=1.0" /> - - <title> - {% block title %}{% endblock %} - - + + {% block title %}{% endblock %} – Find Link {% block style %}{% endblock %} + + {% block content %}{% endblock %} - - + {% block script %}{% endblock %} diff --git a/templates/error.html b/templates/error.html new file mode 100644 index 0000000..c1a5018 --- /dev/null +++ b/templates/error.html @@ -0,0 +1,17 @@ +{% extends "base.html" %} + +{% block title %}Error{% endblock %} + +{% block content %} +
+
+
+
+

Something went wrong

+

{{ message }}

+
+ Back to home +
+
+
+{% endblock %} diff --git a/templates/index.html b/templates/index.html index feab172..af5314b 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,25 +1,44 @@ {% extends "base.html" %} -{% block title %}Index{% endblock %} +{% block title %}Find Link{% endblock %} {% block content %} -
-

Index

-
- - -
- -
Username: {{ g.user }}
- - - {% for item in examples %} - - - - - - {% endfor %} -
{{ item.title }}{{ item.total }}{{ "{:.1%}".format(item.with_links / item.total) }}
+
+
+
+

Find Link

+

Find unlinked mentions of a Wikipedia article and add the links.

+
+ + +
+
+ + {% if debug %} +
+
+

Examples

+ + + + + + + + + + {% for item in examples %} + + + + + + {% endfor %} + +
ArticleTotal% linked
{{ item.title }}{{ item.total }}{{ "{:.0%}".format(item.with_links / item.total) }}
+
+
+ {% endif %} +
{% endblock %} diff --git a/templates/save_done.html b/templates/save_done.html index c96cc66..ffca063 100644 --- a/templates/save_done.html +++ b/templates/save_done.html @@ -1,10 +1,11 @@ {% extends "base.html" %} -{% block title %}Index{% endblock %} +{% block title %}Edit saved{% endblock %} {% block content %} -
-

Save done

-
Save is complete.
-
+
+

Edit saved

+

Your edit has been saved to Wikipedia.

+ Search another article +
{% endblock %} diff --git a/web_view.py b/web_view.py index 32fb1da..c90865e 100755 --- a/web_view.py +++ b/web_view.py @@ -4,11 +4,13 @@ import html import itertools import json import re +import sys import typing import flask import werkzeug from requests_oauthlib import OAuth1Session +from requests_oauthlib.oauth1_session import TokenRequestDenied from werkzeug.wrappers.response import Response from add_links import api, core, mediawiki_api, mediawiki_oauth @@ -118,17 +120,20 @@ def index() -> str | Response: url = flask.url_for("oauth_callback", **flask.request.args) # type: ignore return flask.redirect(url) - examples = load_examples() - examples.sort( - key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True - ) - if q := flask.request.args.get("q"): if q_trimmed := q.strip(): return flask.redirect(article_url(q_trimmed)) + debug = flask.request.args.get("debug") + examples: list[dict[str, str | int]] = [] + if debug: + examples = load_examples() + examples.sort( + key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True + ) + return flask.render_template( - "index.html", examples=examples, article_url=article_url + "index.html", examples=examples, article_url=article_url, debug=debug ) @@ -187,7 +192,12 @@ def start_oauth() -> Response: oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob") oauth.headers.update({"User-Agent": api.ua}) - fetch_response = oauth.fetch_request_token(request_token_url) + try: + fetch_response = oauth.fetch_request_token(request_token_url) + except TokenRequestDenied as e: + return flask.make_response( + flask.render_template("error.html", message=str(e)), 502 + ) flask.session["owner_key"] = fetch_response.get("oauth_token") flask.session["owner_secret"] = fetch_response.get("oauth_token_secret") @@ -229,7 +239,8 @@ def oauth_callback() -> werkzeug.wrappers.response.Response: flask.session["owner_key"] = oauth_tokens.get("oauth_token") flask.session["owner_secret"] = oauth_tokens.get("oauth_token_secret") - print("login successful") + username = mediawiki_oauth.get_username() + print(f"login successful: {username}", file=sys.stderr) next_page = flask.session.get("after_login") return flask.redirect(next_page if next_page else flask.url_for("index")) From 2c197f5c43f8f8b80b880ae5b93e9a38777be1f5 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 12:24:15 +0100 Subject: [PATCH 4/8] Add live search progress, session counter, and fix URLs - Search candidates client-side with JS, showing "Checking X..." spinner instead of leaving user waiting on a blank page - Fix broken api_valid_hit endpoint (get_diff returns dict, not tuple) - Remove server-side get_best_hit; article_page now returns candidate list immediately and JS iterates via /api/1/valid_hit - URL now reflects current article via history.replaceState (?title=X), Skip navigates to ?after=X to advance past it - Track saves in session; show count as green badge in navbar - Add session counter incremented on each successful save Co-Authored-By: Claude Sonnet 4.6 --- templates/article.html | 106 +++++++++++++++++++++++++++++++++++------ templates/base.html | 3 ++ web_view.py | 76 ++++++++--------------------- 3 files changed, 114 insertions(+), 71 deletions(-) diff --git a/templates/article.html b/templates/article.html index 4d5c97f..5b11226 100644 --- a/templates/article.html +++ b/templates/article.html @@ -1,6 +1,6 @@ {% extends "base.html" %} -{% block title %}{{ title }} in {{ hit_title }}{% endblock %} +{% block title %}{{ title }}{% endblock %} {% block style %} @@ -16,31 +16,49 @@
-

Link "{{ title }}" in "{{ hit_title }}"

+

Find links to "{{ title }}"

{{ title }} ↗ - {{ hit_title }} ↗
-
+
{{ total }} mentions total {{ with_link }} already linked ({{ "{:.0%}".format(with_link / total) }})
-
- {{ diff | safe }}
+
+
+
+ Searching… +
+ Searching… +
-
- -
- - Skip + + + {% if hits %} -
- {{ hits | length }} other candidates +
+ {{ hits | length }} candidates
    {% for hit in hits %}
  1. @@ -52,3 +70,63 @@ {% endif %}
{% endblock %} + +{% block script %} + +{% endblock %} diff --git a/templates/base.html b/templates/base.html index 7043811..324d9fc 100644 --- a/templates/base.html +++ b/templates/base.html @@ -20,6 +20,9 @@ {% if g.user %} {{ g.user }} + {% if session.get("saves") %} + {{ session["saves"] }} saved + {% endif %} Log out {% else %} Log in with Wikipedia diff --git a/web_view.py b/web_view.py index c90865e..92f7bc8 100755 --- a/web_view.py +++ b/web_view.py @@ -292,31 +292,6 @@ def match_type(q: str, snippet: str) -> str | None: return match -class NoGoodHit(Exception): - """No good hit.""" - - -def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]: - """Find the best hit within the search results.""" - for hit in hits: - if hit["title"].lower() == title.lower(): - continue - # if match_type(title, hit["snippet"]) != "exact": - # continue - - try: - print(f'get diff: {hit["title"]}, {title}') - found = get_diff(title, hit["title"], None) - except NoMatch: - print("no match") - continue - except api.MediawikiError as e: - print(f"MediawikiError for {hit['title']!r}: {e}") - continue - - return (hit, found) - - raise NoGoodHit def handle_post(url_title: str) -> Response: @@ -329,6 +304,7 @@ def handle_post(url_title: str) -> Response: return flask.redirect(flask.url_for("start_oauth")) except mediawiki_api.APIError as e: return flask.make_response(f"Save failed: {e}", 502) + flask.session["saves"] = flask.session.get("saves", 0) + 1 return flask.redirect( flask.url_for("article_page", url_title=url_title, after=hit_title) ) @@ -341,47 +317,33 @@ def article_page(url_title: str) -> str | Response: return handle_post(url_title) from_title = url_title.replace("_", " ").strip() - article_title = flask.request.args.get("title") total = search_count(from_title) with_link = search_count_with_link(from_title) + _no_link_count, hits = search_no_link(from_title) - no_link_count, hits = search_no_link(from_title) + # If a specific candidate was requested, move it to the front + title_param = flask.request.args.get("title") + if title_param: + hits = [h for h in hits if h["title"] == title_param] + \ + [h for h in hits if h["title"] != title_param] - by_title = {hit["title"]: hit for hit in hits} + # Skip past already-processed candidates + after = flask.request.args.get("after") + if after: + hits_iter = itertools.dropwhile(lambda h: h["title"] != after, hits) + next(hits_iter, None) # consume the "after" entry itself + hits = list(hits_iter) - found = None - if article_title in by_title: - hit = by_title[article_title] - try: - found = get_diff(from_title, hit["title"], None) - except NoMatch: - pass - - if not found: - after = flask.request.args.get("after") - if after: - print(after) - hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits) - skip = next(hits_iter, None) - if skip: - hits = list(hits_iter) - - try: - hit, found = get_best_hit(from_title, hits) - except NoGoodHit: - return flask.render_template("all_done.html") + if not hits: + return flask.render_template("all_done.html") return flask.render_template( "article.html", title=from_title, total=total, with_link=with_link, - hit_title=hit["title"], hits=hits, - replacement=found["replacement"], - diff=found["diff"], - found=found, url_title=url_title, ) @@ -428,16 +390,16 @@ def api_hits() -> werkzeug.wrappers.response.Response: @app.route("/api/1/valid_hit") def api_valid_hit() -> werkzeug.wrappers.response.Response: - """Return candidates for the given article title.""" - link_from = flask.request.args["link_from"] + """Check if a candidate article has a valid unlinked mention.""" link_to = flask.request.args["link_to"] + link_from = flask.request.args["link_from"] try: - diff, replacement = get_diff(link_to, link_from, None) + found = get_diff(link_to, link_from, None) except NoMatch: return flask.jsonify(valid=False) - return flask.jsonify(valid=True, diff=diff, replacement=replacement) + return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"]) @app.route("/favicon.ico") From 0239b835555312e5fa8c956a1a0eaabdddf3ac12 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 12:46:13 +0100 Subject: [PATCH 5/8] Track processed candidates in session and use OAuth for API calls - Record skips, saves, and no-match results in session["skipped"] so revisiting an article resumes past already-checked candidates - Filter self-links (case-insensitive first letter) from hit list - Use OAuth session for all API reads when logged in for higher rate limits - Add "for" template to exclusion list to avoid bad edits - Improve API error handling with HTTP status codes logged to stderr Co-Authored-By: Claude Sonnet 4.6 --- add_links/api.py | 21 ++++++++++++++---- add_links/match.py | 2 +- add_links/mediawiki_oauth.py | 21 ++++++++++++++++++ web_view.py | 41 ++++++++++++++++++++++++++++++------ 4 files changed, 73 insertions(+), 12 deletions(-) diff --git a/add_links/api.py b/add_links/api.py index 05fc7b1..5d8de19 100644 --- a/add_links/api.py +++ b/add_links/api.py @@ -1,4 +1,5 @@ import re +import sys import typing import requests @@ -72,18 +73,30 @@ webpage_error = ( ) +def _get_active_session() -> requests.sessions.Session: + """Return OAuth session if one is available in Flask context, else plain session.""" + try: + from flask import g + if hasattr(g, "oauth_session") and g.oauth_session is not None: + return g.oauth_session # type: ignore[return-value] + except RuntimeError: + pass + return get_session() + + def api_get(params: StrDict) -> StrDict: """Make call to Wikipedia API.""" - s = get_session() + s = _get_active_session() r = s.get(get_query_url(), params=params) try: ret: StrDict = r.json() except JSONDecodeError: + print(f"API request failed: HTTP {r.status_code}", file=sys.stderr) + print(f"Response body: {r.text!r}", file=sys.stderr) if webpage_error in r.text: raise MediawikiError(webpage_error) - else: - raise MediawikiError("unknown error") + raise MediawikiError(f"HTTP {r.status_code}: unexpected response from Wikipedia API") check_for_error(ret) return ret @@ -271,7 +284,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str: "rvdifftotext": section_text.strip(), } - s = get_session() + s = _get_active_session() r = s.post(get_query_url(), data=data) try: ret = r.json() diff --git a/add_links/match.py b/add_links/match.py index eaac7f9..a3440e7 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -78,7 +78,7 @@ re_cite = re.compile( re.I | re.S, ) -re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see)\b", re.I) +re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see|for)\b", re.I) re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}") re_external_link = re.compile(r"\[https?://[^\]]+\]") # Italic text (work titles in bibliographies). Handles apostrophes in content diff --git a/add_links/mediawiki_oauth.py b/add_links/mediawiki_oauth.py index 4da3388..12ebd58 100644 --- a/add_links/mediawiki_oauth.py +++ b/add_links/mediawiki_oauth.py @@ -99,6 +99,27 @@ def userinfo_call() -> typing.Mapping[str, typing.Any]: return api_request(params) +def get_oauth_session() -> OAuth1Session | None: + """Return an OAuth1Session for the current user, or None if not logged in.""" + if "owner_key" not in session or "owner_secret" not in session: + return None + app = current_app + client_key = app.config["CLIENT_KEY"] + client_secret = app.config["CLIENT_SECRET"] + oauth = OAuth1Session( + client_key, + client_secret=client_secret, + resource_owner_key=session["owner_key"], + resource_owner_secret=session["owner_secret"], + ) + oauth.headers.update({"User-Agent": ua}) + oauth.params = typing.cast( + dict[str, str | int], + {"format": "json", "action": "query", "formatversion": 2}, + ) + return oauth + + def get_username() -> None | str: """Get the username or None if not logged in.""" if "owner_key" not in session: diff --git a/web_view.py b/web_view.py index 92f7bc8..bc412c0 100755 --- a/web_view.py +++ b/web_view.py @@ -111,6 +111,7 @@ def search_no_link(q: str) -> tuple[int, list[Hit]]: def global_user() -> None: """Make username available everywhere.""" flask.g.user = mediawiki_oauth.get_username() + flask.g.oauth_session = mediawiki_oauth.get_oauth_session() @app.route("/") @@ -294,6 +295,16 @@ def match_type(q: str, snippet: str) -> str | None: +def _record_skip(from_title: str, hit_title: str) -> None: + """Record that a candidate was skipped or saved for this article.""" + skipped: dict[str, list[str]] = flask.session.get("skipped", {}) + article_skipped = skipped.get(from_title, []) + if hit_title not in article_skipped: + skipped[from_title] = article_skipped + [hit_title] + flask.session["skipped"] = skipped + flask.session.modified = True + + def handle_post(url_title: str) -> Response: """Handle POST request.""" from_title = url_title.replace("_", " ").strip() @@ -305,6 +316,7 @@ def handle_post(url_title: str) -> Response: except mediawiki_api.APIError as e: return flask.make_response(f"Save failed: {e}", 502) flask.session["saves"] = flask.session.get("saves", 0) + 1 + _record_skip(from_title, hit_title) return flask.redirect( flask.url_for("article_page", url_title=url_title, after=hit_title) ) @@ -318,9 +330,19 @@ def article_page(url_title: str) -> str | Response: from_title = url_title.replace("_", " ").strip() - total = search_count(from_title) - with_link = search_count_with_link(from_title) - _no_link_count, hits = search_no_link(from_title) + try: + total = search_count(from_title) + with_link = search_count_with_link(from_title) + _no_link_count, hits = search_no_link(from_title) + except api.MediawikiError as e: + return flask.make_response( + flask.render_template("error.html", message=str(e)), 502 + ) + + # Filter out candidates already processed this session + session_skipped: set[str] = set( + flask.session.get("skipped", {}).get(from_title, []) + ) # If a specific candidate was requested, move it to the front title_param = flask.request.args.get("title") @@ -328,12 +350,14 @@ def article_page(url_title: str) -> str | Response: hits = [h for h in hits if h["title"] == title_param] + \ [h for h in hits if h["title"] != title_param] - # Skip past already-processed candidates + # Record and apply explicit skip-past after = flask.request.args.get("after") if after: - hits_iter = itertools.dropwhile(lambda h: h["title"] != after, hits) - next(hits_iter, None) # consume the "after" entry itself - hits = list(hits_iter) + _record_skip(from_title, after) + session_skipped.add(after) + + hits = [h for h in hits if h["title"] not in session_skipped + and h["title"] != from_title and h["title"] != case_flip_first(from_title)] if not hits: return flask.render_template("all_done.html") @@ -397,7 +421,10 @@ def api_valid_hit() -> werkzeug.wrappers.response.Response: try: found = get_diff(link_to, link_from, None) except NoMatch: + _record_skip(link_to, link_from) return flask.jsonify(valid=False) + except api.MediawikiError as e: + return flask.jsonify(valid=False, error=str(e)) return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"]) From c9b4e2face0ea380d63cd752f0bb2ce5acd28285 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 13:53:35 +0100 Subject: [PATCH 6/8] Add redirect support, live candidate list, per-article save count, and error pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Detect redirect targets (e.g. "handling stolen goods" → "Possession of stolen goods") and use piped links [[target|title]] in edits; exclude articles already linking to the redirect target from candidates - Remove candidates from the list in real time as they are checked and found invalid, with live count update in the summary - Track and display per-article save count in the stats line - Rename "Find Link" to "Missing Link" throughout - Show redirect target in the article heading - Report save errors to the user via error page instead of crashing - Filter self-links using case-insensitive first-letter comparison Co-Authored-By: Claude Sonnet 4.6 --- templates/article.html | 30 ++++++++++++++++++++++----- templates/base.html | 4 ++-- web_view.py | 47 +++++++++++++++++++++++++++++++++--------- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/templates/article.html b/templates/article.html index 5b11226..05a45a7 100644 --- a/templates/article.html +++ b/templates/article.html @@ -18,11 +18,17 @@

Find links to "{{ title }}"

{{ title }} ↗ + {% if redirect_to %} + → redirects to {{ redirect_to }} ↗ + {% endif %}
{{ total }} mentions total {{ with_link }} already linked ({{ "{:.0%}".format(with_link / total) }}) + {% if saves_this_session %} + {{ saves_this_session }} added this session + {% endif %}
@@ -57,11 +63,11 @@
{% if hits %} -
- {{ hits | length }} candidates -
    +
    + {{ hits | length }} candidates +
      {% for hit in hits %} -
    1. +
    2. {{ hit.title }}
    3. {% endfor %} @@ -76,6 +82,7 @@ (function () { const hits = {{ hits | map(attribute='title') | list | tojson }}; const linkTo = {{ title | tojson }}; + const redirectTo = {{ redirect_to | tojson }}; const apiUrl = {{ url_for('api_valid_hit') | tojson }}; const pageUrl = new URL(window.location.href); @@ -83,6 +90,16 @@ const elStatus = document.getElementById('search-status'); const elResult = document.getElementById('result'); const elAllDone = document.getElementById('all-done'); + const elList = document.getElementById('candidates-list'); + + function removeCandidate(title) { + if (!elList) return; + const li = elList.querySelector(`li[data-title="${CSS.escape(title)}"]`); + if (!li) return; + li.remove(); + const elCount = document.getElementById('candidates-count'); + if (elCount) elCount.textContent = elList.children.length; + } async function search() { for (const hitTitle of hits) { @@ -91,6 +108,7 @@ let data; try { const params = new URLSearchParams({ link_to: linkTo, link_from: hitTitle }); + if (redirectTo) params.append('redirect_to', redirectTo); const resp = await fetch(apiUrl + '?' + params); if (!resp.ok) continue; data = await resp.json(); @@ -98,7 +116,7 @@ continue; } - if (!data.valid) continue; + if (!data.valid) { removeCandidate(hitTitle); continue; } elProgress.hidden = true; @@ -123,6 +141,8 @@ } elProgress.hidden = true; + const elCandidates = document.getElementById('candidates-section'); + if (elCandidates) elCandidates.hidden = true; elAllDone.hidden = false; } diff --git a/templates/base.html b/templates/base.html index 324d9fc..0328311 100644 --- a/templates/base.html +++ b/templates/base.html @@ -5,14 +5,14 @@ - {% block title %}{% endblock %} – Find Link + {% block title %}{% endblock %} – Missing Link {% block style %}{% endblock %}