diff --git a/README.md b/README.md
new file mode 100644
index 0000000..73cb212
--- /dev/null
+++ b/README.md
@@ -0,0 +1,71 @@
+# Missing Link
+
+A Wikipedia tool that finds articles which mention a topic by name but don't
+link to it, then lets you add the missing wikilink directly from your browser.
+
+Live at: https://edwardbetts.com/missinglink/
+
+## How it works
+
+1. Enter a Wikipedia article title.
+2. The tool searches Wikipedia for articles that mention the title but don't
+ include a wikilink to it.
+3. For each candidate, a diff shows the proposed edit in context.
+4. Save the edit to Wikipedia with one click (requires a Wikipedia account),
+ or skip to the next candidate.
+
+Redirect targets are handled automatically — if the title is a redirect,
+the tool produces a piped link (e.g. `[[Possession of stolen goods|handling stolen goods]]`)
+and excludes articles that already link to the redirect target.
+
+## Setup
+
+### Dependencies
+
+```
+pip install flask requests requests-oauthlib simplejson flipflop
+```
+
+### Configuration
+
+Copy `config/default.py` and add your Wikipedia OAuth credentials:
+
+```python
+SECRET_KEY = 'your-flask-secret-key'
+CLIENT_KEY = 'your-oauth-consumer-key'
+CLIENT_SECRET = 'your-oauth-consumer-secret'
+```
+
+Register an OAuth 1.0a consumer at
+https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose
+
+### Running locally
+
+```
+python web_view.py
+```
+
+The app listens on `http://0.0.0.0:8000`.
+
+### Running under Apache mod_fcgid
+
+The `run.fcgi` entry point is used by Apache. Touch it to reload the app
+after code changes:
+
+```
+touch run.fcgi
+```
+
+## Project layout
+
+```
+web_view.py Flask application and routes
+add_links/
+ api.py Wikipedia API client (read, search, diff)
+ match.py Link-finding and wikitext editing logic
+ core.py Search and candidate ranking
+ mediawiki_oauth.py OAuth session management
+ mediawiki_api.py Authenticated write API calls
+templates/ Jinja2 HTML templates
+static/ CSS and assets
+```
diff --git a/add_links/api.py b/add_links/api.py
index 6f57def..0742fe2 100644
--- a/add_links/api.py
+++ b/add_links/api.py
@@ -98,7 +98,7 @@ def api_get(params: StrDict) -> StrDict:
if webpage_error in r.text:
raise MediawikiError(webpage_error)
if r.status_code == 429:
- raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.")
+ raise MediawikiError(r.text)
raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}")
check_for_error(ret)
return ret
diff --git a/add_links/match.py b/add_links/match.py
index a3440e7..732e3ba 100644
--- a/add_links/match.py
+++ b/add_links/match.py
@@ -205,7 +205,11 @@ def match_found(m: re.Match[str], q: str, linkto: str | None) -> str:
if pos == 0 or m.string[pos - 1] == "\n":
replacement = replacement[0].upper() + replacement[1:]
if linkto:
- if linkto[0].isupper() and replacement[0] == linkto[0].lower():
+ if (
+ linkto[0].isupper()
+ and replacement[0].islower()
+ and not is_title_case(replacement)
+ ):
linkto = linkto[0].lower() + linkto[1:]
elif replacement[0].isupper():
linkto = linkto[0].upper() + linkto[1:]
diff --git a/templates/error.html b/templates/error.html
index c1a5018..0120bec 100644
--- a/templates/error.html
+++ b/templates/error.html
@@ -8,7 +8,7 @@
Something went wrong
-
{{ message }}
+
{{ message | e | replace('\n', '
\n') | safe }}
Back to home
diff --git a/test_api.py b/test_api.py
new file mode 100644
index 0000000..14968ca
--- /dev/null
+++ b/test_api.py
@@ -0,0 +1,30 @@
+import unittest
+from unittest.mock import Mock, patch
+
+from simplejson.scanner import JSONDecodeError
+
+from add_links import api
+
+
+class ApiGetTests(unittest.TestCase):
+ def test_429_error_preserves_full_message(self) -> None:
+ response = Mock()
+ response.status_code = 429
+ response.text = (
+ "Too many requests. If you are a tool operator, contact "
+ "noc@example.org for help."
+ )
+ response.json.side_effect = JSONDecodeError("bad json", "", 0)
+
+ session = Mock()
+ session.get.return_value = response
+
+ with patch("add_links.api._get_active_session", return_value=session):
+ with self.assertRaises(api.MediawikiError) as ctx:
+ api.api_get({"action": "query"})
+
+ self.assertEqual(str(ctx.exception), response.text)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/test_match.py b/test_match.py
new file mode 100644
index 0000000..c50470e
--- /dev/null
+++ b/test_match.py
@@ -0,0 +1,75 @@
+import unittest
+
+from add_links.match import NoMatch, find_link_in_content
+
+
+class FindLinkInContentTests(unittest.TestCase):
+ def test_links_first_of_two_adjacent_existing_links(self) -> None:
+ content = (
+ "'''''North Star''''' is a 1974 British [[thriller novel]] by "
+ "[[Hammond Innes]].[Vinson & Kirkpatrick p.455] "
+ "A man tries to prevent a plot to blow up a [[North Sea]] [[oil rig]]."
+ )
+
+ new_content, replacement, replaced_text = find_link_in_content(
+ "North Sea oil", content
+ )
+
+ self.assertEqual(
+ new_content,
+ "'''''North Star''''' is a 1974 British [[thriller novel]] by "
+ "[[Hammond Innes]].[Vinson & Kirkpatrick p.455] "
+ "A man tries to prevent a plot to blow up a "
+ "[[North Sea oil|North Sea]] [[oil rig]].",
+ )
+ self.assertEqual(replacement, "North Sea oil")
+ self.assertEqual(replaced_text, "North Sea]] [[oil")
+
+ def test_merges_existing_link_with_following_plain_text(self) -> None:
+ content = "[[anti-globalization]] movement"
+
+ new_content, replacement, replaced_text = find_link_in_content(
+ "anti-globalization movement", content
+ )
+
+ self.assertEqual(new_content, "[[anti-globalization movement]]")
+ self.assertEqual(replacement, "anti-globalization movement")
+ self.assertEqual(replaced_text, "anti-globalization]] movement")
+
+ def test_links_prefix_before_existing_link(self) -> None:
+ content = "cross-platform [[interchange station]]"
+
+ new_content, replacement, replaced_text = find_link_in_content(
+ "cross-platform interchange", content
+ )
+
+ self.assertEqual(
+ new_content,
+ "[[cross-platform interchange|cross-platform]] [[interchange station]]",
+ )
+ self.assertEqual(replacement, "cross-platform interchange")
+ self.assertEqual(replaced_text, "cross-platform [[interchange")
+
+ def test_lowercases_redirect_target_for_sentence_case_display_text(self) -> None:
+ content = (
+ "The absence of a voters' roll requirement was controversial, with "
+ "the NCA alleging potential rigging through ballot stuffing and "
+ "coercion in rural areas."
+ )
+
+ new_content, replacement, replaced_text = find_link_in_content(
+ "ballot stuffing", content, "Electoral fraud"
+ )
+
+ self.assertEqual(
+ new_content,
+ "The absence of a voters' roll requirement was controversial, with "
+ "the NCA alleging potential rigging through "
+ "[[electoral fraud|ballot stuffing]] and coercion in rural areas.",
+ )
+ self.assertEqual(replacement, "electoral fraud|ballot stuffing")
+ self.assertEqual(replaced_text, "ballot stuffing")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/web_view.py b/web_view.py
index ae2564d..6f7c9c5 100755
--- a/web_view.py
+++ b/web_view.py
@@ -37,6 +37,17 @@ class Hit(typing.TypedDict):
timestamp: str
+def render_error(message: str) -> str:
+ """Render shared error page."""
+ return flask.render_template("error.html", message=message)
+
+
+def render_mediawiki_error(error: Exception, *, prefix: str | None = None) -> str:
+ """Render MediaWiki errors."""
+ message = f"{prefix}: {error}" if prefix else str(error)
+ return render_error(message)
+
+
def load_examples() -> list[dict[str, str | int]]:
"""Load examples."""
return [json.loads(line) for line in open("examples")]
@@ -203,7 +214,7 @@ def start_oauth() -> Response:
fetch_response = oauth.fetch_request_token(request_token_url)
except TokenRequestDenied as e:
return flask.make_response(
- flask.render_template("error.html", message=str(e)), 502
+ render_error(str(e)), 502
)
flask.session["owner_key"] = fetch_response.get("oauth_token")
@@ -222,13 +233,16 @@ def start_oauth() -> Response:
def oauth_callback() -> werkzeug.wrappers.response.Response:
"""Oauth callback."""
client_key = app.config["CLIENT_KEY"]
+ if "owner_key" not in flask.session or "owner_secret" not in flask.session:
+ return flask.redirect(flask.url_for("start_oauth"))
+
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
- resource_owner_key=flask.session.get("owner_key"),
- resource_owner_secret=flask.session.get("owner_secret"),
+ resource_owner_key=flask.session["owner_key"],
+ resource_owner_secret=flask.session["owner_secret"],
)
oauth_response = oauth.parse_authorization_response(flask.request.url)
@@ -322,7 +336,7 @@ def handle_post(url_title: str) -> Response:
return flask.redirect(flask.url_for("start_oauth", next=next_url))
except (mediawiki_api.APIError, api.MediawikiError) as e:
return flask.make_response(
- flask.render_template("error.html", message=f"Save failed: {e}"), 502
+ render_mediawiki_error(e, prefix="Save failed"), 502
)
flask.session["saves"] = flask.session.get("saves", 0) + 1
saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {})
@@ -354,7 +368,7 @@ def article_page(url_title: str) -> str | Response:
_no_link_count, hits = search_no_link(from_title, redirect_to)
except api.MediawikiError as e:
return flask.make_response(
- flask.render_template("error.html", message=str(e)), 502
+ render_mediawiki_error(e), 502
)
# Filter out candidates already processed this session