Compare commits

...

6 commits

Author SHA1 Message Date
eecbd3cfb8 Escape error messages on error page 2026-05-14 10:34:19 +01:00
728020d342 Lowercase redirect targets for sentence-case link text 2026-05-14 10:18:43 +01:00
3ba7eaefd0 Show full Wikipedia 429 error message 2026-05-14 10:15:19 +01:00
fd59e2a9a4 Guard oauth callback against missing session tokens 2026-05-14 10:14:59 +01:00
626afe487c Render newlines in error messages as <br> instead of literal \n
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 17:04:08 +01:00
9289cd1450 Add README.md
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 15:22:53 +01:00
7 changed files with 202 additions and 8 deletions

71
README.md Normal file
View file

@ -0,0 +1,71 @@
# Missing Link
A Wikipedia tool that finds articles which mention a topic by name but don't
link to it, then lets you add the missing wikilink directly from your browser.
Live at: https://edwardbetts.com/missinglink/
## How it works
1. Enter a Wikipedia article title.
2. The tool searches Wikipedia for articles that mention the title but don't
include a wikilink to it.
3. For each candidate, a diff shows the proposed edit in context.
4. Save the edit to Wikipedia with one click (requires a Wikipedia account),
or skip to the next candidate.
Redirect targets are handled automatically — if the title is a redirect,
the tool produces a piped link (e.g. `[[Possession of stolen goods|handling stolen goods]]`)
and excludes articles that already link to the redirect target.
## Setup
### Dependencies
```
pip install flask requests requests-oauthlib simplejson flipflop
```
### Configuration
Copy `config/default.py` and add your Wikipedia OAuth credentials:
```python
SECRET_KEY = 'your-flask-secret-key'
CLIENT_KEY = 'your-oauth-consumer-key'
CLIENT_SECRET = 'your-oauth-consumer-secret'
```
Register an OAuth 1.0a consumer at
https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose
### Running locally
```
python web_view.py
```
The app listens on `http://0.0.0.0:8000`.
### Running under Apache mod_fcgid
The `run.fcgi` entry point is used by Apache. Touch it to reload the app
after code changes:
```
touch run.fcgi
```
## Project layout
```
web_view.py Flask application and routes
add_links/
api.py Wikipedia API client (read, search, diff)
match.py Link-finding and wikitext editing logic
core.py Search and candidate ranking
mediawiki_oauth.py OAuth session management
mediawiki_api.py Authenticated write API calls
templates/ Jinja2 HTML templates
static/ CSS and assets
```

View file

@ -98,7 +98,7 @@ def api_get(params: StrDict) -> StrDict:
if webpage_error in r.text:
raise MediawikiError(webpage_error)
if r.status_code == 429:
raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.")
raise MediawikiError(r.text)
raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}")
check_for_error(ret)
return ret

View file

@ -205,7 +205,11 @@ def match_found(m: re.Match[str], q: str, linkto: str | None) -> str:
if pos == 0 or m.string[pos - 1] == "\n":
replacement = replacement[0].upper() + replacement[1:]
if linkto:
if linkto[0].isupper() and replacement[0] == linkto[0].lower():
if (
linkto[0].isupper()
and replacement[0].islower()
and not is_title_case(replacement)
):
linkto = linkto[0].lower() + linkto[1:]
elif replacement[0].isupper():
linkto = linkto[0].upper() + linkto[1:]

View file

@ -8,7 +8,7 @@
<div class="col-md-7">
<div class="alert alert-danger">
<h4 class="alert-heading">Something went wrong</h4>
<p class="mb-0"><code>{{ message }}</code></p>
<p class="mb-0"><code>{{ message | e | replace('\n', '<br>\n') | safe }}</code></p>
</div>
<a href="{{ url_for('index') }}" class="btn btn-outline-secondary btn-sm">Back to home</a>
</div>

30
test_api.py Normal file
View file

@ -0,0 +1,30 @@
import unittest
from unittest.mock import Mock, patch
from simplejson.scanner import JSONDecodeError
from add_links import api
class ApiGetTests(unittest.TestCase):
def test_429_error_preserves_full_message(self) -> None:
response = Mock()
response.status_code = 429
response.text = (
"Too many requests. If you are a tool operator, contact "
"noc@example.org for help."
)
response.json.side_effect = JSONDecodeError("bad json", "", 0)
session = Mock()
session.get.return_value = response
with patch("add_links.api._get_active_session", return_value=session):
with self.assertRaises(api.MediawikiError) as ctx:
api.api_get({"action": "query"})
self.assertEqual(str(ctx.exception), response.text)
if __name__ == "__main__":
unittest.main()

75
test_match.py Normal file
View file

@ -0,0 +1,75 @@
import unittest
from add_links.match import NoMatch, find_link_in_content
class FindLinkInContentTests(unittest.TestCase):
def test_links_first_of_two_adjacent_existing_links(self) -> None:
content = (
"'''''North Star''''' is a 1974 British [[thriller novel]] by "
"[[Hammond Innes]].<ref>Vinson & Kirkpatrick p.455</ref> "
"A man tries to prevent a plot to blow up a [[North Sea]] [[oil rig]]."
)
new_content, replacement, replaced_text = find_link_in_content(
"North Sea oil", content
)
self.assertEqual(
new_content,
"'''''North Star''''' is a 1974 British [[thriller novel]] by "
"[[Hammond Innes]].<ref>Vinson & Kirkpatrick p.455</ref> "
"A man tries to prevent a plot to blow up a "
"[[North Sea oil|North Sea]] [[oil rig]].",
)
self.assertEqual(replacement, "North Sea oil")
self.assertEqual(replaced_text, "North Sea]] [[oil")
def test_merges_existing_link_with_following_plain_text(self) -> None:
content = "[[anti-globalization]] movement"
new_content, replacement, replaced_text = find_link_in_content(
"anti-globalization movement", content
)
self.assertEqual(new_content, "[[anti-globalization movement]]")
self.assertEqual(replacement, "anti-globalization movement")
self.assertEqual(replaced_text, "anti-globalization]] movement")
def test_links_prefix_before_existing_link(self) -> None:
content = "cross-platform [[interchange station]]"
new_content, replacement, replaced_text = find_link_in_content(
"cross-platform interchange", content
)
self.assertEqual(
new_content,
"[[cross-platform interchange|cross-platform]] [[interchange station]]",
)
self.assertEqual(replacement, "cross-platform interchange")
self.assertEqual(replaced_text, "cross-platform [[interchange")
def test_lowercases_redirect_target_for_sentence_case_display_text(self) -> None:
content = (
"The absence of a voters' roll requirement was controversial, with "
"the NCA alleging potential rigging through ballot stuffing and "
"coercion in rural areas."
)
new_content, replacement, replaced_text = find_link_in_content(
"ballot stuffing", content, "Electoral fraud"
)
self.assertEqual(
new_content,
"The absence of a voters' roll requirement was controversial, with "
"the NCA alleging potential rigging through "
"[[electoral fraud|ballot stuffing]] and coercion in rural areas.",
)
self.assertEqual(replacement, "electoral fraud|ballot stuffing")
self.assertEqual(replaced_text, "ballot stuffing")
if __name__ == "__main__":
unittest.main()

View file

@ -37,6 +37,17 @@ class Hit(typing.TypedDict):
timestamp: str
def render_error(message: str) -> str:
"""Render shared error page."""
return flask.render_template("error.html", message=message)
def render_mediawiki_error(error: Exception, *, prefix: str | None = None) -> str:
"""Render MediaWiki errors."""
message = f"{prefix}: {error}" if prefix else str(error)
return render_error(message)
def load_examples() -> list[dict[str, str | int]]:
"""Load examples."""
return [json.loads(line) for line in open("examples")]
@ -203,7 +214,7 @@ def start_oauth() -> Response:
fetch_response = oauth.fetch_request_token(request_token_url)
except TokenRequestDenied as e:
return flask.make_response(
flask.render_template("error.html", message=str(e)), 502
render_error(str(e)), 502
)
flask.session["owner_key"] = fetch_response.get("oauth_token")
@ -222,13 +233,16 @@ def start_oauth() -> Response:
def oauth_callback() -> werkzeug.wrappers.response.Response:
"""Oauth callback."""
client_key = app.config["CLIENT_KEY"]
if "owner_key" not in flask.session or "owner_secret" not in flask.session:
return flask.redirect(flask.url_for("start_oauth"))
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=flask.session.get("owner_key"),
resource_owner_secret=flask.session.get("owner_secret"),
resource_owner_key=flask.session["owner_key"],
resource_owner_secret=flask.session["owner_secret"],
)
oauth_response = oauth.parse_authorization_response(flask.request.url)
@ -322,7 +336,7 @@ def handle_post(url_title: str) -> Response:
return flask.redirect(flask.url_for("start_oauth", next=next_url))
except (mediawiki_api.APIError, api.MediawikiError) as e:
return flask.make_response(
flask.render_template("error.html", message=f"Save failed: {e}"), 502
render_mediawiki_error(e, prefix="Save failed"), 502
)
flask.session["saves"] = flask.session.get("saves", 0) + 1
saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {})
@ -354,7 +368,7 @@ def article_page(url_title: str) -> str | Response:
_no_link_count, hits = search_no_link(from_title, redirect_to)
except api.MediawikiError as e:
return flask.make_response(
flask.render_template("error.html", message=str(e)), 502
render_mediawiki_error(e), 502
)
# Filter out candidates already processed this session