From 9289cd1450140d18d981420a99ae1528eb98614c Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 15:22:53 +0100 Subject: [PATCH 1/6] Add README.md Co-Authored-By: Claude Sonnet 4.6 --- README.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..73cb212 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# Missing Link + +A Wikipedia tool that finds articles which mention a topic by name but don't +link to it, then lets you add the missing wikilink directly from your browser. + +Live at: https://edwardbetts.com/missinglink/ + +## How it works + +1. Enter a Wikipedia article title. +2. The tool searches Wikipedia for articles that mention the title but don't + include a wikilink to it. +3. For each candidate, a diff shows the proposed edit in context. +4. Save the edit to Wikipedia with one click (requires a Wikipedia account), + or skip to the next candidate. + +Redirect targets are handled automatically — if the title is a redirect, +the tool produces a piped link (e.g. `[[Possession of stolen goods|handling stolen goods]]`) +and excludes articles that already link to the redirect target. + +## Setup + +### Dependencies + +``` +pip install flask requests requests-oauthlib simplejson flipflop +``` + +### Configuration + +Copy `config/default.py` and add your Wikipedia OAuth credentials: + +```python +SECRET_KEY = 'your-flask-secret-key' +CLIENT_KEY = 'your-oauth-consumer-key' +CLIENT_SECRET = 'your-oauth-consumer-secret' +``` + +Register an OAuth 1.0a consumer at +https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose + +### Running locally + +``` +python web_view.py +``` + +The app listens on `http://0.0.0.0:8000`. + +### Running under Apache mod_fcgid + +The `run.fcgi` entry point is used by Apache. Touch it to reload the app +after code changes: + +``` +touch run.fcgi +``` + +## Project layout + +``` +web_view.py Flask application and routes +add_links/ + api.py Wikipedia API client (read, search, diff) + match.py Link-finding and wikitext editing logic + core.py Search and candidate ranking + mediawiki_oauth.py OAuth session management + mediawiki_api.py Authenticated write API calls +templates/ Jinja2 HTML templates +static/ CSS and assets +``` From 626afe487c8bcd575314c44ca6cd3bd47021211f Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 11 May 2026 17:04:08 +0100 Subject: [PATCH 2/6] Render newlines in error messages as
instead of literal \n Co-Authored-By: Claude Sonnet 4.6 --- templates/error.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/error.html b/templates/error.html index c1a5018..60246fb 100644 --- a/templates/error.html +++ b/templates/error.html @@ -8,7 +8,7 @@

Something went wrong

-

{{ message }}

+

{{ message | replace('\n', '
') | safe }}

Back to home
From fd59e2a9a42b787a8803ccf18a5d1e56d4dbcbe7 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 14 May 2026 10:14:59 +0100 Subject: [PATCH 3/6] Guard oauth callback against missing session tokens --- web_view.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/web_view.py b/web_view.py index ae2564d..7895c8d 100755 --- a/web_view.py +++ b/web_view.py @@ -222,13 +222,16 @@ def start_oauth() -> Response: def oauth_callback() -> werkzeug.wrappers.response.Response: """Oauth callback.""" client_key = app.config["CLIENT_KEY"] + if "owner_key" not in flask.session or "owner_secret" not in flask.session: + return flask.redirect(flask.url_for("start_oauth")) + client_secret = app.config["CLIENT_SECRET"] oauth = OAuth1Session( client_key, client_secret=client_secret, - resource_owner_key=flask.session.get("owner_key"), - resource_owner_secret=flask.session.get("owner_secret"), + resource_owner_key=flask.session["owner_key"], + resource_owner_secret=flask.session["owner_secret"], ) oauth_response = oauth.parse_authorization_response(flask.request.url) From 3ba7eaefd08f008846e9ffa74310104cafe66393 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 14 May 2026 10:15:19 +0100 Subject: [PATCH 4/6] Show full Wikipedia 429 error message --- add_links/api.py | 2 +- test_api.py | 30 ++++++++++++++++++++++++++++++ web_view.py | 17 ++++++++++++++--- 3 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 test_api.py diff --git a/add_links/api.py b/add_links/api.py index 6f57def..0742fe2 100644 --- a/add_links/api.py +++ b/add_links/api.py @@ -98,7 +98,7 @@ def api_get(params: StrDict) -> StrDict: if webpage_error in r.text: raise MediawikiError(webpage_error) if r.status_code == 429: - raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.") + raise MediawikiError(r.text) raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}") check_for_error(ret) return ret diff --git a/test_api.py b/test_api.py new file mode 100644 index 0000000..14968ca --- /dev/null +++ b/test_api.py @@ -0,0 +1,30 @@ +import unittest +from unittest.mock import Mock, patch + +from simplejson.scanner import JSONDecodeError + +from add_links import api + + +class ApiGetTests(unittest.TestCase): + def test_429_error_preserves_full_message(self) -> None: + response = Mock() + response.status_code = 429 + response.text = ( + "Too many requests. If you are a tool operator, contact " + "noc@example.org for help." + ) + response.json.side_effect = JSONDecodeError("bad json", "", 0) + + session = Mock() + session.get.return_value = response + + with patch("add_links.api._get_active_session", return_value=session): + with self.assertRaises(api.MediawikiError) as ctx: + api.api_get({"action": "query"}) + + self.assertEqual(str(ctx.exception), response.text) + + +if __name__ == "__main__": + unittest.main() diff --git a/web_view.py b/web_view.py index 7895c8d..6f7c9c5 100755 --- a/web_view.py +++ b/web_view.py @@ -37,6 +37,17 @@ class Hit(typing.TypedDict): timestamp: str +def render_error(message: str) -> str: + """Render shared error page.""" + return flask.render_template("error.html", message=message) + + +def render_mediawiki_error(error: Exception, *, prefix: str | None = None) -> str: + """Render MediaWiki errors.""" + message = f"{prefix}: {error}" if prefix else str(error) + return render_error(message) + + def load_examples() -> list[dict[str, str | int]]: """Load examples.""" return [json.loads(line) for line in open("examples")] @@ -203,7 +214,7 @@ def start_oauth() -> Response: fetch_response = oauth.fetch_request_token(request_token_url) except TokenRequestDenied as e: return flask.make_response( - flask.render_template("error.html", message=str(e)), 502 + render_error(str(e)), 502 ) flask.session["owner_key"] = fetch_response.get("oauth_token") @@ -325,7 +336,7 @@ def handle_post(url_title: str) -> Response: return flask.redirect(flask.url_for("start_oauth", next=next_url)) except (mediawiki_api.APIError, api.MediawikiError) as e: return flask.make_response( - flask.render_template("error.html", message=f"Save failed: {e}"), 502 + render_mediawiki_error(e, prefix="Save failed"), 502 ) flask.session["saves"] = flask.session.get("saves", 0) + 1 saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {}) @@ -357,7 +368,7 @@ def article_page(url_title: str) -> str | Response: _no_link_count, hits = search_no_link(from_title, redirect_to) except api.MediawikiError as e: return flask.make_response( - flask.render_template("error.html", message=str(e)), 502 + render_mediawiki_error(e), 502 ) # Filter out candidates already processed this session From 728020d342f778ece7c086873a527d2420af1078 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 14 May 2026 10:18:43 +0100 Subject: [PATCH 5/6] Lowercase redirect targets for sentence-case link text --- add_links/match.py | 6 +++- test_match.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 test_match.py diff --git a/add_links/match.py b/add_links/match.py index a3440e7..732e3ba 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -205,7 +205,11 @@ def match_found(m: re.Match[str], q: str, linkto: str | None) -> str: if pos == 0 or m.string[pos - 1] == "\n": replacement = replacement[0].upper() + replacement[1:] if linkto: - if linkto[0].isupper() and replacement[0] == linkto[0].lower(): + if ( + linkto[0].isupper() + and replacement[0].islower() + and not is_title_case(replacement) + ): linkto = linkto[0].lower() + linkto[1:] elif replacement[0].isupper(): linkto = linkto[0].upper() + linkto[1:] diff --git a/test_match.py b/test_match.py new file mode 100644 index 0000000..c50470e --- /dev/null +++ b/test_match.py @@ -0,0 +1,75 @@ +import unittest + +from add_links.match import NoMatch, find_link_in_content + + +class FindLinkInContentTests(unittest.TestCase): + def test_links_first_of_two_adjacent_existing_links(self) -> None: + content = ( + "'''''North Star''''' is a 1974 British [[thriller novel]] by " + "[[Hammond Innes]].Vinson & Kirkpatrick p.455 " + "A man tries to prevent a plot to blow up a [[North Sea]] [[oil rig]]." + ) + + new_content, replacement, replaced_text = find_link_in_content( + "North Sea oil", content + ) + + self.assertEqual( + new_content, + "'''''North Star''''' is a 1974 British [[thriller novel]] by " + "[[Hammond Innes]].Vinson & Kirkpatrick p.455 " + "A man tries to prevent a plot to blow up a " + "[[North Sea oil|North Sea]] [[oil rig]].", + ) + self.assertEqual(replacement, "North Sea oil") + self.assertEqual(replaced_text, "North Sea]] [[oil") + + def test_merges_existing_link_with_following_plain_text(self) -> None: + content = "[[anti-globalization]] movement" + + new_content, replacement, replaced_text = find_link_in_content( + "anti-globalization movement", content + ) + + self.assertEqual(new_content, "[[anti-globalization movement]]") + self.assertEqual(replacement, "anti-globalization movement") + self.assertEqual(replaced_text, "anti-globalization]] movement") + + def test_links_prefix_before_existing_link(self) -> None: + content = "cross-platform [[interchange station]]" + + new_content, replacement, replaced_text = find_link_in_content( + "cross-platform interchange", content + ) + + self.assertEqual( + new_content, + "[[cross-platform interchange|cross-platform]] [[interchange station]]", + ) + self.assertEqual(replacement, "cross-platform interchange") + self.assertEqual(replaced_text, "cross-platform [[interchange") + + def test_lowercases_redirect_target_for_sentence_case_display_text(self) -> None: + content = ( + "The absence of a voters' roll requirement was controversial, with " + "the NCA alleging potential rigging through ballot stuffing and " + "coercion in rural areas." + ) + + new_content, replacement, replaced_text = find_link_in_content( + "ballot stuffing", content, "Electoral fraud" + ) + + self.assertEqual( + new_content, + "The absence of a voters' roll requirement was controversial, with " + "the NCA alleging potential rigging through " + "[[electoral fraud|ballot stuffing]] and coercion in rural areas.", + ) + self.assertEqual(replacement, "electoral fraud|ballot stuffing") + self.assertEqual(replaced_text, "ballot stuffing") + + +if __name__ == "__main__": + unittest.main() From eecbd3cfb829083e9c2b4b4fe40251e9941b97bd Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 14 May 2026 10:34:19 +0100 Subject: [PATCH 6/6] Escape error messages on error page --- templates/error.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/error.html b/templates/error.html index 60246fb..0120bec 100644 --- a/templates/error.html +++ b/templates/error.html @@ -8,7 +8,7 @@

Something went wrong

-

{{ message | replace('\n', '
') | safe }}

+

{{ message | e | replace('\n', '
\n') | safe }}

Back to home