Compare commits
6 commits
5ba380d590
...
eecbd3cfb8
| Author | SHA1 | Date | |
|---|---|---|---|
| eecbd3cfb8 | |||
| 728020d342 | |||
| 3ba7eaefd0 | |||
| fd59e2a9a4 | |||
| 626afe487c | |||
| 9289cd1450 |
7 changed files with 202 additions and 8 deletions
71
README.md
Normal file
71
README.md
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
# Missing Link
|
||||
|
||||
A Wikipedia tool that finds articles which mention a topic by name but don't
|
||||
link to it, then lets you add the missing wikilink directly from your browser.
|
||||
|
||||
Live at: https://edwardbetts.com/missinglink/
|
||||
|
||||
## How it works
|
||||
|
||||
1. Enter a Wikipedia article title.
|
||||
2. The tool searches Wikipedia for articles that mention the title but don't
|
||||
include a wikilink to it.
|
||||
3. For each candidate, a diff shows the proposed edit in context.
|
||||
4. Save the edit to Wikipedia with one click (requires a Wikipedia account),
|
||||
or skip to the next candidate.
|
||||
|
||||
Redirect targets are handled automatically — if the title is a redirect,
|
||||
the tool produces a piped link (e.g. `[[Possession of stolen goods|handling stolen goods]]`)
|
||||
and excludes articles that already link to the redirect target.
|
||||
|
||||
## Setup
|
||||
|
||||
### Dependencies
|
||||
|
||||
```
|
||||
pip install flask requests requests-oauthlib simplejson flipflop
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
Copy `config/default.py` and add your Wikipedia OAuth credentials:
|
||||
|
||||
```python
|
||||
SECRET_KEY = 'your-flask-secret-key'
|
||||
CLIENT_KEY = 'your-oauth-consumer-key'
|
||||
CLIENT_SECRET = 'your-oauth-consumer-secret'
|
||||
```
|
||||
|
||||
Register an OAuth 1.0a consumer at
|
||||
https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose
|
||||
|
||||
### Running locally
|
||||
|
||||
```
|
||||
python web_view.py
|
||||
```
|
||||
|
||||
The app listens on `http://0.0.0.0:8000`.
|
||||
|
||||
### Running under Apache mod_fcgid
|
||||
|
||||
The `run.fcgi` entry point is used by Apache. Touch it to reload the app
|
||||
after code changes:
|
||||
|
||||
```
|
||||
touch run.fcgi
|
||||
```
|
||||
|
||||
## Project layout
|
||||
|
||||
```
|
||||
web_view.py Flask application and routes
|
||||
add_links/
|
||||
api.py Wikipedia API client (read, search, diff)
|
||||
match.py Link-finding and wikitext editing logic
|
||||
core.py Search and candidate ranking
|
||||
mediawiki_oauth.py OAuth session management
|
||||
mediawiki_api.py Authenticated write API calls
|
||||
templates/ Jinja2 HTML templates
|
||||
static/ CSS and assets
|
||||
```
|
||||
|
|
@ -98,7 +98,7 @@ def api_get(params: StrDict) -> StrDict:
|
|||
if webpage_error in r.text:
|
||||
raise MediawikiError(webpage_error)
|
||||
if r.status_code == 429:
|
||||
raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.")
|
||||
raise MediawikiError(r.text)
|
||||
raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}")
|
||||
check_for_error(ret)
|
||||
return ret
|
||||
|
|
|
|||
|
|
@ -205,7 +205,11 @@ def match_found(m: re.Match[str], q: str, linkto: str | None) -> str:
|
|||
if pos == 0 or m.string[pos - 1] == "\n":
|
||||
replacement = replacement[0].upper() + replacement[1:]
|
||||
if linkto:
|
||||
if linkto[0].isupper() and replacement[0] == linkto[0].lower():
|
||||
if (
|
||||
linkto[0].isupper()
|
||||
and replacement[0].islower()
|
||||
and not is_title_case(replacement)
|
||||
):
|
||||
linkto = linkto[0].lower() + linkto[1:]
|
||||
elif replacement[0].isupper():
|
||||
linkto = linkto[0].upper() + linkto[1:]
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
<div class="col-md-7">
|
||||
<div class="alert alert-danger">
|
||||
<h4 class="alert-heading">Something went wrong</h4>
|
||||
<p class="mb-0"><code>{{ message }}</code></p>
|
||||
<p class="mb-0"><code>{{ message | e | replace('\n', '<br>\n') | safe }}</code></p>
|
||||
</div>
|
||||
<a href="{{ url_for('index') }}" class="btn btn-outline-secondary btn-sm">Back to home</a>
|
||||
</div>
|
||||
|
|
|
|||
30
test_api.py
Normal file
30
test_api.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import unittest
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
from simplejson.scanner import JSONDecodeError
|
||||
|
||||
from add_links import api
|
||||
|
||||
|
||||
class ApiGetTests(unittest.TestCase):
|
||||
def test_429_error_preserves_full_message(self) -> None:
|
||||
response = Mock()
|
||||
response.status_code = 429
|
||||
response.text = (
|
||||
"Too many requests. If you are a tool operator, contact "
|
||||
"noc@example.org for help."
|
||||
)
|
||||
response.json.side_effect = JSONDecodeError("bad json", "", 0)
|
||||
|
||||
session = Mock()
|
||||
session.get.return_value = response
|
||||
|
||||
with patch("add_links.api._get_active_session", return_value=session):
|
||||
with self.assertRaises(api.MediawikiError) as ctx:
|
||||
api.api_get({"action": "query"})
|
||||
|
||||
self.assertEqual(str(ctx.exception), response.text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
75
test_match.py
Normal file
75
test_match.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import unittest
|
||||
|
||||
from add_links.match import NoMatch, find_link_in_content
|
||||
|
||||
|
||||
class FindLinkInContentTests(unittest.TestCase):
|
||||
def test_links_first_of_two_adjacent_existing_links(self) -> None:
|
||||
content = (
|
||||
"'''''North Star''''' is a 1974 British [[thriller novel]] by "
|
||||
"[[Hammond Innes]].<ref>Vinson & Kirkpatrick p.455</ref> "
|
||||
"A man tries to prevent a plot to blow up a [[North Sea]] [[oil rig]]."
|
||||
)
|
||||
|
||||
new_content, replacement, replaced_text = find_link_in_content(
|
||||
"North Sea oil", content
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
new_content,
|
||||
"'''''North Star''''' is a 1974 British [[thriller novel]] by "
|
||||
"[[Hammond Innes]].<ref>Vinson & Kirkpatrick p.455</ref> "
|
||||
"A man tries to prevent a plot to blow up a "
|
||||
"[[North Sea oil|North Sea]] [[oil rig]].",
|
||||
)
|
||||
self.assertEqual(replacement, "North Sea oil")
|
||||
self.assertEqual(replaced_text, "North Sea]] [[oil")
|
||||
|
||||
def test_merges_existing_link_with_following_plain_text(self) -> None:
|
||||
content = "[[anti-globalization]] movement"
|
||||
|
||||
new_content, replacement, replaced_text = find_link_in_content(
|
||||
"anti-globalization movement", content
|
||||
)
|
||||
|
||||
self.assertEqual(new_content, "[[anti-globalization movement]]")
|
||||
self.assertEqual(replacement, "anti-globalization movement")
|
||||
self.assertEqual(replaced_text, "anti-globalization]] movement")
|
||||
|
||||
def test_links_prefix_before_existing_link(self) -> None:
|
||||
content = "cross-platform [[interchange station]]"
|
||||
|
||||
new_content, replacement, replaced_text = find_link_in_content(
|
||||
"cross-platform interchange", content
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
new_content,
|
||||
"[[cross-platform interchange|cross-platform]] [[interchange station]]",
|
||||
)
|
||||
self.assertEqual(replacement, "cross-platform interchange")
|
||||
self.assertEqual(replaced_text, "cross-platform [[interchange")
|
||||
|
||||
def test_lowercases_redirect_target_for_sentence_case_display_text(self) -> None:
|
||||
content = (
|
||||
"The absence of a voters' roll requirement was controversial, with "
|
||||
"the NCA alleging potential rigging through ballot stuffing and "
|
||||
"coercion in rural areas."
|
||||
)
|
||||
|
||||
new_content, replacement, replaced_text = find_link_in_content(
|
||||
"ballot stuffing", content, "Electoral fraud"
|
||||
)
|
||||
|
||||
self.assertEqual(
|
||||
new_content,
|
||||
"The absence of a voters' roll requirement was controversial, with "
|
||||
"the NCA alleging potential rigging through "
|
||||
"[[electoral fraud|ballot stuffing]] and coercion in rural areas.",
|
||||
)
|
||||
self.assertEqual(replacement, "electoral fraud|ballot stuffing")
|
||||
self.assertEqual(replaced_text, "ballot stuffing")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
24
web_view.py
24
web_view.py
|
|
@ -37,6 +37,17 @@ class Hit(typing.TypedDict):
|
|||
timestamp: str
|
||||
|
||||
|
||||
def render_error(message: str) -> str:
|
||||
"""Render shared error page."""
|
||||
return flask.render_template("error.html", message=message)
|
||||
|
||||
|
||||
def render_mediawiki_error(error: Exception, *, prefix: str | None = None) -> str:
|
||||
"""Render MediaWiki errors."""
|
||||
message = f"{prefix}: {error}" if prefix else str(error)
|
||||
return render_error(message)
|
||||
|
||||
|
||||
def load_examples() -> list[dict[str, str | int]]:
|
||||
"""Load examples."""
|
||||
return [json.loads(line) for line in open("examples")]
|
||||
|
|
@ -203,7 +214,7 @@ def start_oauth() -> Response:
|
|||
fetch_response = oauth.fetch_request_token(request_token_url)
|
||||
except TokenRequestDenied as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=str(e)), 502
|
||||
render_error(str(e)), 502
|
||||
)
|
||||
|
||||
flask.session["owner_key"] = fetch_response.get("oauth_token")
|
||||
|
|
@ -222,13 +233,16 @@ def start_oauth() -> Response:
|
|||
def oauth_callback() -> werkzeug.wrappers.response.Response:
|
||||
"""Oauth callback."""
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
if "owner_key" not in flask.session or "owner_secret" not in flask.session:
|
||||
return flask.redirect(flask.url_for("start_oauth"))
|
||||
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
|
||||
oauth = OAuth1Session(
|
||||
client_key,
|
||||
client_secret=client_secret,
|
||||
resource_owner_key=flask.session.get("owner_key"),
|
||||
resource_owner_secret=flask.session.get("owner_secret"),
|
||||
resource_owner_key=flask.session["owner_key"],
|
||||
resource_owner_secret=flask.session["owner_secret"],
|
||||
)
|
||||
|
||||
oauth_response = oauth.parse_authorization_response(flask.request.url)
|
||||
|
|
@ -322,7 +336,7 @@ def handle_post(url_title: str) -> Response:
|
|||
return flask.redirect(flask.url_for("start_oauth", next=next_url))
|
||||
except (mediawiki_api.APIError, api.MediawikiError) as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=f"Save failed: {e}"), 502
|
||||
render_mediawiki_error(e, prefix="Save failed"), 502
|
||||
)
|
||||
flask.session["saves"] = flask.session.get("saves", 0) + 1
|
||||
saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {})
|
||||
|
|
@ -354,7 +368,7 @@ def article_page(url_title: str) -> str | Response:
|
|||
_no_link_count, hits = search_no_link(from_title, redirect_to)
|
||||
except api.MediawikiError as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=str(e)), 502
|
||||
render_mediawiki_error(e), 502
|
||||
)
|
||||
|
||||
# Filter out candidates already processed this session
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue