diff --git a/README.md b/README.md deleted file mode 100644 index 73cb212..0000000 --- a/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# Missing Link - -A Wikipedia tool that finds articles which mention a topic by name but don't -link to it, then lets you add the missing wikilink directly from your browser. - -Live at: https://edwardbetts.com/missinglink/ - -## How it works - -1. Enter a Wikipedia article title. -2. The tool searches Wikipedia for articles that mention the title but don't - include a wikilink to it. -3. For each candidate, a diff shows the proposed edit in context. -4. Save the edit to Wikipedia with one click (requires a Wikipedia account), - or skip to the next candidate. - -Redirect targets are handled automatically — if the title is a redirect, -the tool produces a piped link (e.g. `[[Possession of stolen goods|handling stolen goods]]`) -and excludes articles that already link to the redirect target. - -## Setup - -### Dependencies - -``` -pip install flask requests requests-oauthlib simplejson flipflop -``` - -### Configuration - -Copy `config/default.py` and add your Wikipedia OAuth credentials: - -```python -SECRET_KEY = 'your-flask-secret-key' -CLIENT_KEY = 'your-oauth-consumer-key' -CLIENT_SECRET = 'your-oauth-consumer-secret' -``` - -Register an OAuth 1.0a consumer at -https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration/propose - -### Running locally - -``` -python web_view.py -``` - -The app listens on `http://0.0.0.0:8000`. - -### Running under Apache mod_fcgid - -The `run.fcgi` entry point is used by Apache. Touch it to reload the app -after code changes: - -``` -touch run.fcgi -``` - -## Project layout - -``` -web_view.py Flask application and routes -add_links/ - api.py Wikipedia API client (read, search, diff) - match.py Link-finding and wikitext editing logic - core.py Search and candidate ranking - mediawiki_oauth.py OAuth session management - mediawiki_api.py Authenticated write API calls -templates/ Jinja2 HTML templates -static/ CSS and assets -``` diff --git a/add_links/api.py b/add_links/api.py index 0742fe2..6f57def 100644 --- a/add_links/api.py +++ b/add_links/api.py @@ -98,7 +98,7 @@ def api_get(params: StrDict) -> StrDict: if webpage_error in r.text: raise MediawikiError(webpage_error) if r.status_code == 429: - raise MediawikiError(r.text) + raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.") raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}") check_for_error(ret) return ret diff --git a/add_links/match.py b/add_links/match.py index 732e3ba..a3440e7 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -205,11 +205,7 @@ def match_found(m: re.Match[str], q: str, linkto: str | None) -> str: if pos == 0 or m.string[pos - 1] == "\n": replacement = replacement[0].upper() + replacement[1:] if linkto: - if ( - linkto[0].isupper() - and replacement[0].islower() - and not is_title_case(replacement) - ): + if linkto[0].isupper() and replacement[0] == linkto[0].lower(): linkto = linkto[0].lower() + linkto[1:] elif replacement[0].isupper(): linkto = linkto[0].upper() + linkto[1:] diff --git a/templates/error.html b/templates/error.html index 0120bec..c1a5018 100644 --- a/templates/error.html +++ b/templates/error.html @@ -8,7 +8,7 @@

Something went wrong

-

{{ message | e | replace('\n', '
\n') | safe }}

+

{{ message }}

Back to home
diff --git a/test_api.py b/test_api.py deleted file mode 100644 index 14968ca..0000000 --- a/test_api.py +++ /dev/null @@ -1,30 +0,0 @@ -import unittest -from unittest.mock import Mock, patch - -from simplejson.scanner import JSONDecodeError - -from add_links import api - - -class ApiGetTests(unittest.TestCase): - def test_429_error_preserves_full_message(self) -> None: - response = Mock() - response.status_code = 429 - response.text = ( - "Too many requests. If you are a tool operator, contact " - "noc@example.org for help." - ) - response.json.side_effect = JSONDecodeError("bad json", "", 0) - - session = Mock() - session.get.return_value = response - - with patch("add_links.api._get_active_session", return_value=session): - with self.assertRaises(api.MediawikiError) as ctx: - api.api_get({"action": "query"}) - - self.assertEqual(str(ctx.exception), response.text) - - -if __name__ == "__main__": - unittest.main() diff --git a/test_match.py b/test_match.py deleted file mode 100644 index c50470e..0000000 --- a/test_match.py +++ /dev/null @@ -1,75 +0,0 @@ -import unittest - -from add_links.match import NoMatch, find_link_in_content - - -class FindLinkInContentTests(unittest.TestCase): - def test_links_first_of_two_adjacent_existing_links(self) -> None: - content = ( - "'''''North Star''''' is a 1974 British [[thriller novel]] by " - "[[Hammond Innes]].Vinson & Kirkpatrick p.455 " - "A man tries to prevent a plot to blow up a [[North Sea]] [[oil rig]]." - ) - - new_content, replacement, replaced_text = find_link_in_content( - "North Sea oil", content - ) - - self.assertEqual( - new_content, - "'''''North Star''''' is a 1974 British [[thriller novel]] by " - "[[Hammond Innes]].Vinson & Kirkpatrick p.455 " - "A man tries to prevent a plot to blow up a " - "[[North Sea oil|North Sea]] [[oil rig]].", - ) - self.assertEqual(replacement, "North Sea oil") - self.assertEqual(replaced_text, "North Sea]] [[oil") - - def test_merges_existing_link_with_following_plain_text(self) -> None: - content = "[[anti-globalization]] movement" - - new_content, replacement, replaced_text = find_link_in_content( - "anti-globalization movement", content - ) - - self.assertEqual(new_content, "[[anti-globalization movement]]") - self.assertEqual(replacement, "anti-globalization movement") - self.assertEqual(replaced_text, "anti-globalization]] movement") - - def test_links_prefix_before_existing_link(self) -> None: - content = "cross-platform [[interchange station]]" - - new_content, replacement, replaced_text = find_link_in_content( - "cross-platform interchange", content - ) - - self.assertEqual( - new_content, - "[[cross-platform interchange|cross-platform]] [[interchange station]]", - ) - self.assertEqual(replacement, "cross-platform interchange") - self.assertEqual(replaced_text, "cross-platform [[interchange") - - def test_lowercases_redirect_target_for_sentence_case_display_text(self) -> None: - content = ( - "The absence of a voters' roll requirement was controversial, with " - "the NCA alleging potential rigging through ballot stuffing and " - "coercion in rural areas." - ) - - new_content, replacement, replaced_text = find_link_in_content( - "ballot stuffing", content, "Electoral fraud" - ) - - self.assertEqual( - new_content, - "The absence of a voters' roll requirement was controversial, with " - "the NCA alleging potential rigging through " - "[[electoral fraud|ballot stuffing]] and coercion in rural areas.", - ) - self.assertEqual(replacement, "electoral fraud|ballot stuffing") - self.assertEqual(replaced_text, "ballot stuffing") - - -if __name__ == "__main__": - unittest.main() diff --git a/web_view.py b/web_view.py index 6f7c9c5..ae2564d 100755 --- a/web_view.py +++ b/web_view.py @@ -37,17 +37,6 @@ class Hit(typing.TypedDict): timestamp: str -def render_error(message: str) -> str: - """Render shared error page.""" - return flask.render_template("error.html", message=message) - - -def render_mediawiki_error(error: Exception, *, prefix: str | None = None) -> str: - """Render MediaWiki errors.""" - message = f"{prefix}: {error}" if prefix else str(error) - return render_error(message) - - def load_examples() -> list[dict[str, str | int]]: """Load examples.""" return [json.loads(line) for line in open("examples")] @@ -214,7 +203,7 @@ def start_oauth() -> Response: fetch_response = oauth.fetch_request_token(request_token_url) except TokenRequestDenied as e: return flask.make_response( - render_error(str(e)), 502 + flask.render_template("error.html", message=str(e)), 502 ) flask.session["owner_key"] = fetch_response.get("oauth_token") @@ -233,16 +222,13 @@ def start_oauth() -> Response: def oauth_callback() -> werkzeug.wrappers.response.Response: """Oauth callback.""" client_key = app.config["CLIENT_KEY"] - if "owner_key" not in flask.session or "owner_secret" not in flask.session: - return flask.redirect(flask.url_for("start_oauth")) - client_secret = app.config["CLIENT_SECRET"] oauth = OAuth1Session( client_key, client_secret=client_secret, - resource_owner_key=flask.session["owner_key"], - resource_owner_secret=flask.session["owner_secret"], + resource_owner_key=flask.session.get("owner_key"), + resource_owner_secret=flask.session.get("owner_secret"), ) oauth_response = oauth.parse_authorization_response(flask.request.url) @@ -336,7 +322,7 @@ def handle_post(url_title: str) -> Response: return flask.redirect(flask.url_for("start_oauth", next=next_url)) except (mediawiki_api.APIError, api.MediawikiError) as e: return flask.make_response( - render_mediawiki_error(e, prefix="Save failed"), 502 + flask.render_template("error.html", message=f"Save failed: {e}"), 502 ) flask.session["saves"] = flask.session.get("saves", 0) + 1 saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {}) @@ -368,7 +354,7 @@ def article_page(url_title: str) -> str | Response: _no_link_count, hits = search_no_link(from_title, redirect_to) except api.MediawikiError as e: return flask.make_response( - render_mediawiki_error(e), 502 + flask.render_template("error.html", message=str(e)), 502 ) # Filter out candidates already processed this session