Compare commits
8 commits
fe89db11bd
...
25056aaf33
| Author | SHA1 | Date | |
|---|---|---|---|
| 25056aaf33 | |||
| 2eef8f480f | |||
| c9b4e2face | |||
| 0239b83555 | |||
| 2c197f5c43 | |||
| bc6265d4cd | |||
| 7867122326 | |||
| fad1ef9e49 |
12 changed files with 394 additions and 155 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import sys
|
||||
import typing
|
||||
|
||||
import requests
|
||||
|
|
@ -72,18 +73,33 @@ webpage_error = (
|
|||
)
|
||||
|
||||
|
||||
def _get_active_session() -> requests.sessions.Session:
|
||||
"""Return OAuth session if one is available in Flask context, else plain session."""
|
||||
try:
|
||||
from flask import g
|
||||
if hasattr(g, "oauth_session") and g.oauth_session is not None:
|
||||
return g.oauth_session # type: ignore[return-value]
|
||||
except RuntimeError:
|
||||
pass
|
||||
print("WARNING: using unauthenticated session", file=sys.stderr)
|
||||
return get_session()
|
||||
|
||||
|
||||
def api_get(params: StrDict) -> StrDict:
|
||||
"""Make call to Wikipedia API."""
|
||||
s = get_session()
|
||||
s = _get_active_session()
|
||||
|
||||
r = s.get(get_query_url(), params=params)
|
||||
try:
|
||||
ret: StrDict = r.json()
|
||||
except JSONDecodeError:
|
||||
print(f"API request failed: HTTP {r.status_code}", file=sys.stderr)
|
||||
print(f"Response body: {r.text!r}", file=sys.stderr)
|
||||
if webpage_error in r.text:
|
||||
raise MediawikiError(webpage_error)
|
||||
else:
|
||||
raise MediawikiError("unknown error")
|
||||
if r.status_code == 429:
|
||||
raise MediawikiError("Wikipedia rate limit exceeded — wait a moment and try again.")
|
||||
raise MediawikiError(f"HTTP {r.status_code}: {r.text[:200]!r}")
|
||||
check_for_error(ret)
|
||||
return ret
|
||||
|
||||
|
|
@ -271,7 +287,7 @@ def call_get_diff(title: str, section_num: int, section_text: str) -> str:
|
|||
"rvdifftotext": section_text.strip(),
|
||||
}
|
||||
|
||||
s = get_session()
|
||||
s = _get_active_session()
|
||||
r = s.post(get_query_url(), data=data)
|
||||
try:
|
||||
ret = r.json()
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ re_cite = re.compile(
|
|||
re.I | re.S,
|
||||
)
|
||||
|
||||
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see)\b", re.I)
|
||||
re_cite_template_start = re.compile(r"\{\{(?:cite|citation|short description|gli|defn|annotated link|excerpt|main|see|for)\b", re.I)
|
||||
re_no_param_template = re.compile(r"\{\{[^|{}]+\}\}")
|
||||
re_external_link = re.compile(r"\[https?://[^\]]+\]")
|
||||
# Italic text (work titles in bibliographies). Handles apostrophes in content
|
||||
|
|
@ -252,6 +252,14 @@ def add_link(m: re.Match[str], replacement: str, text: str) -> str:
|
|||
if matched_text.startswith("[[") and matched_text.endswith("|"):
|
||||
return m.re.sub(lambda m: f"[[{replacement}|", text, count=1)
|
||||
|
||||
split_links = matched_text.find("]] [[")
|
||||
if split_links > 0 and m.start() >= 2 and text[m.start() - 2 : m.start()] == "[[":
|
||||
# Match starts inside one link and continues into the next opening link.
|
||||
# Link only the text from the first link span and leave the second link as-is.
|
||||
link_dest = replacement.split("|")[0] if "|" in replacement else replacement
|
||||
visible = matched_text[:split_links]
|
||||
return text[: m.start() - 2] + f"[[{link_dest}|{visible}]]" + text[m.start() + split_links + 2 :]
|
||||
|
||||
inner_bracket = matched_text.find("[[")
|
||||
if inner_bracket > 0:
|
||||
prefix = matched_text[:inner_bracket].rstrip()
|
||||
|
|
@ -551,4 +559,6 @@ def get_diff(q: str, title: str, linkto: str | None) -> dict[str, typing.Any]:
|
|||
)
|
||||
|
||||
found["diff"] = call_get_diff(title, found["section_num"], section_text)
|
||||
if not found["diff"]:
|
||||
raise NoMatch
|
||||
return found
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
"""Wikipedia OAuth."""
|
||||
|
||||
import sys
|
||||
import typing
|
||||
import urllib
|
||||
from typing import cast
|
||||
|
|
@ -73,9 +74,8 @@ def api_request(params: typing.Mapping[str, str | int]) -> dict[str, typing.Any]
|
|||
try:
|
||||
return cast(dict[str, typing.Any], r.json())
|
||||
except Exception:
|
||||
print("text")
|
||||
print(r.text)
|
||||
print("---")
|
||||
print(f"API request failed: HTTP {r.status_code}", file=sys.stderr)
|
||||
print(f"Response body: {r.text!r}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
|
|
@ -99,13 +99,40 @@ def userinfo_call() -> typing.Mapping[str, typing.Any]:
|
|||
return api_request(params)
|
||||
|
||||
|
||||
def get_oauth_session() -> OAuth1Session | None:
|
||||
"""Return an OAuth1Session for the current user, or None if not logged in."""
|
||||
if "owner_key" not in session or "owner_secret" not in session:
|
||||
return None
|
||||
app = current_app
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
oauth = OAuth1Session(
|
||||
client_key,
|
||||
client_secret=client_secret,
|
||||
resource_owner_key=session["owner_key"],
|
||||
resource_owner_secret=session["owner_secret"],
|
||||
)
|
||||
oauth.headers.update({"User-Agent": ua})
|
||||
oauth.params = typing.cast(
|
||||
dict[str, str | int],
|
||||
{"format": "json", "action": "query", "formatversion": 2},
|
||||
)
|
||||
return oauth
|
||||
|
||||
|
||||
def get_username() -> None | str:
|
||||
"""Get the username or None if not logged in."""
|
||||
if "owner_key" not in session:
|
||||
return None # not authorized
|
||||
|
||||
if "username" not in session:
|
||||
reply = userinfo_call()
|
||||
try:
|
||||
reply = userinfo_call()
|
||||
except Exception as e:
|
||||
print(f"get_username failed, clearing session: {e}", file=sys.stderr)
|
||||
session.pop("owner_key", None)
|
||||
session.pop("owner_secret", None)
|
||||
return None
|
||||
if "query" not in reply:
|
||||
return None
|
||||
session["username"] = reply["query"]["userinfo"]["name"]
|
||||
|
|
|
|||
|
|
@ -5,19 +5,16 @@ span.searchmatch { font-weight: bold; }
|
|||
|
||||
table.diff,td.diff-otitle,td.diff-ntitle{background-color:white}
|
||||
td.diff-otitle,td.diff-ntitle{text-align:center}
|
||||
td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em}
|
||||
td.diff-marker{width:1.5em;text-align:center;font-weight:bold;font-size:1.25em;padding:0 0.3em}
|
||||
td.diff-lineno{font-weight:bold}
|
||||
td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap}
|
||||
td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em}
|
||||
td.diff-addedline{border-color:#a3d3ff}
|
||||
td.diff-deletedline{border-color:#ffe49c}
|
||||
td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em}
|
||||
td.diff-addedline,td.diff-deletedline{border-left:3px solid}
|
||||
td.diff-addedline{border-color:#a3d3ff;background:#f0f8ff}
|
||||
td.diff-deletedline{border-color:#ffe49c;background:#fffaf0}
|
||||
td.diff-context{color:#555}
|
||||
.diffchange{font-weight:bold;text-decoration:none}
|
||||
table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed}
|
||||
td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0}
|
||||
table.diff{border:none;width:100%;border-spacing:0;border-collapse:collapse;table-layout:auto}
|
||||
td.diff-addedline .diffchange{background:#d8ecff}
|
||||
td.diff-deletedline .diffchange{background:#feeec8}
|
||||
table.diff td{padding:0.33em 0.66em}
|
||||
table.diff col.diff-marker{width:2%}
|
||||
table.diff col.diff-content{width:48%}
|
||||
table.diff td div{ word-wrap:break-word; overflow:auto}
|
||||
table.diff td{padding:0.2em 0.5em}
|
||||
table.diff td div{word-wrap:break-word;overflow:auto}
|
||||
|
|
|
|||
3
static/favicon.svg
Normal file
3
static/favicon.svg
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
|
||||
<text y=".9em" font-size="90">🔗</text>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 114 B |
|
|
@ -1,10 +1,11 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Index{% endblock %}
|
||||
{% block title %}All done{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<h1>All done</h1>
|
||||
<div><a href="{{ url_for('index') }}">back to index </a></div>
|
||||
</div>
|
||||
<div class="container text-center mt-5">
|
||||
<h1 class="mb-3">All done</h1>
|
||||
<p class="text-muted mb-4">No more candidates found for this article.</p>
|
||||
<a href="{{ url_for('index') }}" class="btn btn-primary">Search another article</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
|
|
|||
|
|
@ -1,48 +1,152 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Link '{{ title }}' in '{{ hit_title }}'{% endblock %}
|
||||
{% block title %}{{ title }}{% endblock %}
|
||||
|
||||
{% block style %}
|
||||
<link href="{{ url_for("static", filename="css/diff.css") }}" rel="stylesheet"/>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<h1>Link '{{ title }}' in '{{ hit_title }}'</h1>
|
||||
<form action="{{ url_for("index") }}">
|
||||
<input name="q">
|
||||
<input type="submit" value="search">
|
||||
</form>
|
||||
<div class="container">
|
||||
<nav aria-label="breadcrumb" class="mb-3">
|
||||
<ol class="breadcrumb">
|
||||
<li class="breadcrumb-item"><a href="{{ url_for('index') }}">Home</a></li>
|
||||
<li class="breadcrumb-item active">{{ title }}</li>
|
||||
</ol>
|
||||
</nav>
|
||||
|
||||
<div>Username: {{ g.user }}</div>
|
||||
<div class="d-flex flex-wrap align-items-baseline gap-3 mb-1">
|
||||
<h1 class="h4 mb-0">Find links to "{{ title }}"</h1>
|
||||
<a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank" class="text-muted small">{{ title }} ↗</a>
|
||||
{% if redirect_to %}
|
||||
<span class="text-muted small">→ redirects to <a href="https://en.wikipedia.org/wiki/{{ redirect_to }}" target="_blank">{{ redirect_to }} ↗</a></span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div><a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank">view article</a></div>
|
||||
<div class="d-flex gap-3 mb-3 text-muted small">
|
||||
<span>{{ total }} mentions total</span>
|
||||
<span>{{ with_link }} already linked{% if total > 0 %} ({{ "{:.0%}".format(with_link / total) }}){% endif %}</span>
|
||||
{% if saves_this_session %}
|
||||
<span class="text-success">{{ saves_this_session }} added this session</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div><a href="{{ url_for('index') }}">back to index </a></div>
|
||||
<div id="search-progress" class="my-4">
|
||||
<div class="d-flex align-items-center gap-2 text-muted">
|
||||
<div class="spinner-border spinner-border-sm" role="status">
|
||||
<span class="visually-hidden">Searching…</span>
|
||||
</div>
|
||||
<span id="search-status">Searching…</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>total: {{ total }}</div>
|
||||
<div>with link: {{ with_link }}</div>
|
||||
<div>ratio: {{ "{:.1%}".format(with_link / total) }}</div>
|
||||
{# <div>hit: {{ hit }}</div> #}
|
||||
<div>replacement: {{ found.replacement }}</div>
|
||||
<div>section: {{ found.section }}</div>
|
||||
<table>
|
||||
{{ diff | safe }}
|
||||
</table>
|
||||
<form method="POST">
|
||||
<input type="hidden" name="hit" value="{{ hit_title }}">
|
||||
<div class="my-3">
|
||||
<input type="submit" class="btn btn-primary" value="save"/>
|
||||
<a href="{{url_for("article_page", url_title=url_title, after=hit_title)}}" class="btn btn-primary">skip</a>
|
||||
<div id="result" hidden>
|
||||
<div class="d-flex flex-wrap align-items-baseline gap-2 mb-3">
|
||||
<span class="text-muted small">Adding link in</span>
|
||||
<a id="result-hit-link" href="#" target="_blank" class="small"><span id="result-hit-title"></span> ↗</a>
|
||||
</div>
|
||||
<div class="mb-4">
|
||||
<table class="diff" id="diff-table"></table>
|
||||
</div>
|
||||
<form method="POST" class="mb-4">
|
||||
<input type="hidden" name="hit" id="hit-input">
|
||||
<div class="d-flex gap-2">
|
||||
<button type="submit" class="btn btn-success">Save edit</button>
|
||||
<a id="skip-link" href="#" class="btn btn-outline-secondary">Skip</a>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<ol>
|
||||
<div id="all-done" hidden class="text-center mt-4">
|
||||
<p class="text-muted mb-4">No more candidates found for this article.</p>
|
||||
<a href="{{ url_for('index') }}" class="btn btn-primary">Search another article</a>
|
||||
</div>
|
||||
|
||||
{% if hits %}
|
||||
<details id="candidates-section" class="border rounded p-3 mt-2">
|
||||
<summary class="text-muted small" style="cursor:pointer"><span id="candidates-count">{{ hits | length }}</span> candidates</summary>
|
||||
<ol class="mt-3 mb-0 small" id="candidates-list">
|
||||
{% for hit in hits %}
|
||||
{% set url = url_for("article_page", url_title=url_title, title=hit.title) %}
|
||||
<li><a href="{{ url }}">{{ hit.title }}</a> – {{ hit.snippet | safe }}</li>
|
||||
<li class="mb-1" data-title="{{ hit.title }}">
|
||||
<a href="{{ url_for("article_page", url_title=url_title, title=hit.title) }}">{{ hit.title }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
</ol>
|
||||
</div>
|
||||
</details>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block script %}
|
||||
<script>
|
||||
(function () {
|
||||
const hits = {{ hits | map(attribute='title') | list | tojson }};
|
||||
const linkTo = {{ title | tojson }};
|
||||
const redirectTo = {{ redirect_to | tojson }};
|
||||
const apiUrl = {{ url_for('api_valid_hit') | tojson }};
|
||||
const pageUrl = new URL(window.location.href);
|
||||
|
||||
const elProgress = document.getElementById('search-progress');
|
||||
const elStatus = document.getElementById('search-status');
|
||||
const elResult = document.getElementById('result');
|
||||
const elAllDone = document.getElementById('all-done');
|
||||
const elList = document.getElementById('candidates-list');
|
||||
|
||||
function removeCandidate(title) {
|
||||
if (!elList) return;
|
||||
const li = elList.querySelector(`li[data-title="${CSS.escape(title)}"]`);
|
||||
if (!li) return;
|
||||
li.remove();
|
||||
const elCount = document.getElementById('candidates-count');
|
||||
if (elCount) elCount.textContent = elList.children.length;
|
||||
}
|
||||
|
||||
async function search() {
|
||||
for (const hitTitle of hits) {
|
||||
elStatus.textContent = `Checking "${hitTitle}"…`;
|
||||
|
||||
let data;
|
||||
try {
|
||||
const params = new URLSearchParams({ link_to: linkTo, link_from: hitTitle });
|
||||
if (redirectTo) params.append('redirect_to', redirectTo);
|
||||
const resp = await fetch(apiUrl + '?' + params);
|
||||
if (!resp.ok) continue;
|
||||
data = await resp.json();
|
||||
} catch (e) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!data.valid) { removeCandidate(hitTitle); continue; }
|
||||
|
||||
elProgress.hidden = true;
|
||||
|
||||
document.getElementById('result-hit-title').textContent = hitTitle;
|
||||
document.getElementById('result-hit-link').href =
|
||||
'https://en.wikipedia.org/wiki/' + encodeURIComponent(hitTitle.replace(/ /g, '_'));
|
||||
document.getElementById('diff-table').innerHTML = data.diff;
|
||||
document.getElementById('hit-input').value = hitTitle;
|
||||
|
||||
const skipUrl = new URL(pageUrl);
|
||||
skipUrl.searchParams.delete('title');
|
||||
skipUrl.searchParams.set('after', hitTitle);
|
||||
document.getElementById('skip-link').href = skipUrl.toString();
|
||||
|
||||
const currentUrl = new URL(pageUrl);
|
||||
currentUrl.searchParams.delete('after');
|
||||
currentUrl.searchParams.set('title', hitTitle);
|
||||
history.replaceState(null, '', currentUrl.toString());
|
||||
|
||||
elResult.hidden = false;
|
||||
return;
|
||||
}
|
||||
|
||||
elProgress.hidden = true;
|
||||
const elCandidates = document.getElementById('candidates-section');
|
||||
if (elCandidates) elCandidates.hidden = true;
|
||||
elAllDone.hidden = false;
|
||||
}
|
||||
|
||||
search();
|
||||
}());
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
|
|
|||
|
|
@ -2,21 +2,38 @@
|
|||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<link href="{{ url_for("static", filename="bootstrap/css/bootstrap.min.css") }}" rel="stylesheet">
|
||||
<link href="{{ url_for("static", filename="bootstrap5/css/bootstrap.min.css") }}" rel="stylesheet">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
|
||||
<title>
|
||||
{% block title %}{% endblock %}
|
||||
</title>
|
||||
|
||||
<link rel="icon" type="image/svg+xml" href="{{ url_for('static', filename='favicon.svg') }}">
|
||||
<title>{% block title %}{% endblock %} – Missing Link</title>
|
||||
{% block style %}{% endblock %}
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<nav class="navbar navbar-expand-md navbar-dark bg-dark mb-4">
|
||||
<div class="container">
|
||||
<a class="navbar-brand fw-semibold" href="{{ url_for('index') }}">Missing Link</a>
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
<form class="d-flex" action="{{ url_for('index') }}">
|
||||
<input class="form-control form-control-sm me-2" name="q" placeholder="Article title…" style="width:240px">
|
||||
<button class="btn btn-outline-light btn-sm" type="submit">Go</button>
|
||||
</form>
|
||||
{% if g.user %}
|
||||
<span class="text-light small opacity-75">{{ g.user }}</span>
|
||||
{% if session.get("saves") %}
|
||||
<span class="badge bg-success">{{ session["saves"] }} saved</span>
|
||||
{% endif %}
|
||||
<a class="btn btn-outline-light btn-sm" href="{{ url_for('oauth_disconnect') }}">Log out</a>
|
||||
{% else %}
|
||||
<a class="btn btn-outline-light btn-sm" href="{{ url_for('start_oauth') }}">Log in with Wikipedia</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
|
||||
{% block content %}{% endblock %}
|
||||
|
||||
<script src="{{ url_for("static", filename="bootstrap/js/bootstrap.bundle.min.js")}}></script>
|
||||
|
||||
<script src="{{ url_for("static", filename="bootstrap5/js/bootstrap.bundle.min.js") }}"></script>
|
||||
{% block script %}{% endblock %}
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
17
templates/error.html
Normal file
17
templates/error.html
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Error{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container mt-5">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-7">
|
||||
<div class="alert alert-danger">
|
||||
<h4 class="alert-heading">Something went wrong</h4>
|
||||
<p class="mb-0"><code>{{ message }}</code></p>
|
||||
</div>
|
||||
<a href="{{ url_for('index') }}" class="btn btn-outline-secondary btn-sm">Back to home</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
|
@ -1,25 +1,44 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Index{% endblock %}
|
||||
{% block title %}Missing Link{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<h1>Index</h1>
|
||||
<form>
|
||||
<input name="q">
|
||||
<input type="submit" value="search">
|
||||
</form>
|
||||
|
||||
<div>Username: {{ g.user }}</div>
|
||||
|
||||
<table class="table w-auto">
|
||||
{% for item in examples %}
|
||||
<tr>
|
||||
<td><a href="{{ article_url(item.title) }}">{{ item.title }}</a></td>
|
||||
<td>{{ item.total }}</td>
|
||||
<td>{{ "{:.1%}".format(item.with_links / item.total) }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center mt-5">
|
||||
<div class="col-md-6 text-center">
|
||||
<h1 class="mb-2">Missing Link</h1>
|
||||
<p class="text-muted mb-4">Find unlinked mentions of a Wikipedia article and add the links.</p>
|
||||
<form class="d-flex gap-2 justify-content-center" action="{{ url_for('index') }}">
|
||||
<input class="form-control" name="q" placeholder="Article title…" style="max-width:360px" autofocus>
|
||||
<button class="btn btn-primary" type="submit">Search</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if debug %}
|
||||
<div class="row mt-5">
|
||||
<div class="col">
|
||||
<h2 class="h6 text-muted text-uppercase mb-3">Examples</h2>
|
||||
<table class="table table-sm table-hover w-auto">
|
||||
<thead class="table-light">
|
||||
<tr>
|
||||
<th>Article</th>
|
||||
<th class="text-end">Total</th>
|
||||
<th class="text-end">% linked</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in examples %}
|
||||
<tr>
|
||||
<td><a href="{{ article_url(item.title) }}">{{ item.title }}</a></td>
|
||||
<td class="text-end text-muted">{{ item.total }}</td>
|
||||
<td class="text-end text-muted">{{ "{:.0%}".format(item.with_links / item.total) }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Index{% endblock %}
|
||||
{% block title %}Edit saved{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<h1>Save done</h1>
|
||||
<div>Save is complete.</div>
|
||||
</div>
|
||||
<div class="container text-center mt-5">
|
||||
<h1 class="mb-3">Edit saved</h1>
|
||||
<p class="text-muted mb-4">Your edit has been saved to Wikipedia.</p>
|
||||
<a href="{{ url_for('index') }}" class="btn btn-primary">Search another article</a>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
|
|
|||
167
web_view.py
167
web_view.py
|
|
@ -4,11 +4,13 @@ import html
|
|||
import itertools
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import typing
|
||||
|
||||
import flask
|
||||
import werkzeug
|
||||
from requests_oauthlib import OAuth1Session
|
||||
from requests_oauthlib.oauth1_session import TokenRequestDenied
|
||||
from werkzeug.wrappers.response import Response
|
||||
|
||||
from add_links import api, core, mediawiki_api, mediawiki_oauth
|
||||
|
|
@ -94,14 +96,20 @@ def search_count(q: str) -> int:
|
|||
return get_hit_count(article_title_to_search_query(q)) - 1
|
||||
|
||||
|
||||
def search_count_with_link(q: str) -> int:
|
||||
def search_count_with_link(q: str, redirect_to: str | None = None) -> int:
|
||||
"""Articles in Wikipedia that include this search term and a link."""
|
||||
return get_hit_count(article_title_to_search_query(q) + f' linksto:"{q}"')
|
||||
count = get_hit_count(article_title_to_search_query(q) + f' linksto:"{q}"')
|
||||
if redirect_to:
|
||||
count += get_hit_count(article_title_to_search_query(q) + f' linksto:"{redirect_to}"')
|
||||
return count
|
||||
|
||||
|
||||
def search_no_link(q: str) -> tuple[int, list[Hit]]:
|
||||
def search_no_link(q: str, redirect_to: str | None = None) -> tuple[int, list[Hit]]:
|
||||
"""Search for mentions of article title with no link included."""
|
||||
query = run_search(article_title_to_search_query(q) + f' -linksto:"{q}"', "max")
|
||||
exclude = f' -linksto:"{q}"'
|
||||
if redirect_to:
|
||||
exclude += f' -linksto:"{redirect_to}"'
|
||||
query = run_search(article_title_to_search_query(q) + exclude, "max")
|
||||
return (query["searchinfo"]["totalhits"], query["search"])
|
||||
|
||||
|
||||
|
|
@ -109,6 +117,7 @@ def search_no_link(q: str) -> tuple[int, list[Hit]]:
|
|||
def global_user() -> None:
|
||||
"""Make username available everywhere."""
|
||||
flask.g.user = mediawiki_oauth.get_username()
|
||||
flask.g.oauth_session = mediawiki_oauth.get_oauth_session()
|
||||
|
||||
|
||||
@app.route("/")
|
||||
|
|
@ -118,17 +127,20 @@ def index() -> str | Response:
|
|||
url = flask.url_for("oauth_callback", **flask.request.args) # type: ignore
|
||||
return flask.redirect(url)
|
||||
|
||||
examples = load_examples()
|
||||
examples.sort(
|
||||
key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True
|
||||
)
|
||||
|
||||
if q := flask.request.args.get("q"):
|
||||
if q_trimmed := q.strip():
|
||||
return flask.redirect(article_url(q_trimmed))
|
||||
|
||||
debug = flask.request.args.get("debug")
|
||||
examples: list[dict[str, str | int]] = []
|
||||
if debug:
|
||||
examples = load_examples()
|
||||
examples.sort(
|
||||
key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True
|
||||
)
|
||||
|
||||
return flask.render_template(
|
||||
"index.html", examples=examples, article_url=article_url
|
||||
"index.html", examples=examples, article_url=article_url, debug=debug
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -187,7 +199,12 @@ def start_oauth() -> Response:
|
|||
|
||||
oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob")
|
||||
oauth.headers.update({"User-Agent": api.ua})
|
||||
fetch_response = oauth.fetch_request_token(request_token_url)
|
||||
try:
|
||||
fetch_response = oauth.fetch_request_token(request_token_url)
|
||||
except TokenRequestDenied as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=str(e)), 502
|
||||
)
|
||||
|
||||
flask.session["owner_key"] = fetch_response.get("oauth_token")
|
||||
flask.session["owner_secret"] = fetch_response.get("oauth_token_secret")
|
||||
|
|
@ -229,7 +246,8 @@ def oauth_callback() -> werkzeug.wrappers.response.Response:
|
|||
flask.session["owner_key"] = oauth_tokens.get("oauth_token")
|
||||
flask.session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
|
||||
|
||||
print("login successful")
|
||||
username = mediawiki_oauth.get_username()
|
||||
print(f"login successful: {username}", file=sys.stderr)
|
||||
|
||||
next_page = flask.session.get("after_login")
|
||||
return flask.redirect(next_page if next_page else flask.url_for("index"))
|
||||
|
|
@ -281,31 +299,16 @@ def match_type(q: str, snippet: str) -> str | None:
|
|||
return match
|
||||
|
||||
|
||||
class NoGoodHit(Exception):
|
||||
"""No good hit."""
|
||||
|
||||
|
||||
def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]:
|
||||
"""Find the best hit within the search results."""
|
||||
for hit in hits:
|
||||
if hit["title"].lower() == title.lower():
|
||||
continue
|
||||
# if match_type(title, hit["snippet"]) != "exact":
|
||||
# continue
|
||||
|
||||
try:
|
||||
print(f'get diff: {hit["title"]}, {title}')
|
||||
found = get_diff(title, hit["title"], None)
|
||||
except NoMatch:
|
||||
print("no match")
|
||||
continue
|
||||
except api.MediawikiError as e:
|
||||
print(f"MediawikiError for {hit['title']!r}: {e}")
|
||||
continue
|
||||
|
||||
return (hit, found)
|
||||
|
||||
raise NoGoodHit
|
||||
def _record_skip(from_title: str, hit_title: str) -> None:
|
||||
"""Record that a candidate was skipped or saved for this article."""
|
||||
skipped: dict[str, list[str]] = flask.session.get("skipped", {})
|
||||
article_skipped = skipped.get(from_title, [])
|
||||
if hit_title not in article_skipped:
|
||||
skipped[from_title] = article_skipped + [hit_title]
|
||||
flask.session["skipped"] = skipped
|
||||
flask.session.modified = True
|
||||
|
||||
|
||||
def handle_post(url_title: str) -> Response:
|
||||
|
|
@ -316,8 +319,16 @@ def handle_post(url_title: str) -> Response:
|
|||
do_save(from_title, hit_title)
|
||||
except mediawiki_oauth.LoginNeeded:
|
||||
return flask.redirect(flask.url_for("start_oauth"))
|
||||
except mediawiki_api.APIError as e:
|
||||
return flask.make_response(f"Save failed: {e}", 502)
|
||||
except (mediawiki_api.APIError, api.MediawikiError) as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=f"Save failed: {e}"), 502
|
||||
)
|
||||
flask.session["saves"] = flask.session.get("saves", 0) + 1
|
||||
saves_by_title: dict[str, int] = flask.session.get("saves_by_title", {})
|
||||
saves_by_title[from_title] = saves_by_title.get(from_title, 0) + 1
|
||||
flask.session["saves_by_title"] = saves_by_title
|
||||
flask.session.modified = True
|
||||
_record_skip(from_title, hit_title)
|
||||
return flask.redirect(
|
||||
flask.url_for("article_page", url_title=url_title, after=hit_title)
|
||||
)
|
||||
|
|
@ -330,48 +341,55 @@ def article_page(url_title: str) -> str | Response:
|
|||
return handle_post(url_title)
|
||||
|
||||
from_title = url_title.replace("_", " ").strip()
|
||||
article_title = flask.request.args.get("title")
|
||||
|
||||
total = search_count(from_title)
|
||||
with_link = search_count_with_link(from_title)
|
||||
try:
|
||||
redirect_to = api.get_wiki_info(from_title)
|
||||
except (api.MissingPage, api.MultipleRedirects, api.MediawikiError):
|
||||
redirect_to = None
|
||||
|
||||
no_link_count, hits = search_no_link(from_title)
|
||||
try:
|
||||
total = search_count(from_title)
|
||||
with_link = search_count_with_link(from_title, redirect_to)
|
||||
_no_link_count, hits = search_no_link(from_title, redirect_to)
|
||||
except api.MediawikiError as e:
|
||||
return flask.make_response(
|
||||
flask.render_template("error.html", message=str(e)), 502
|
||||
)
|
||||
|
||||
by_title = {hit["title"]: hit for hit in hits}
|
||||
# Filter out candidates already processed this session
|
||||
session_skipped: set[str] = set(
|
||||
flask.session.get("skipped", {}).get(from_title, [])
|
||||
)
|
||||
|
||||
found = None
|
||||
if article_title in by_title:
|
||||
hit = by_title[article_title]
|
||||
try:
|
||||
found = get_diff(from_title, hit["title"], None)
|
||||
except NoMatch:
|
||||
pass
|
||||
# If a specific candidate was requested, move it to the front
|
||||
title_param = flask.request.args.get("title")
|
||||
if title_param:
|
||||
hits = [h for h in hits if h["title"] == title_param] + \
|
||||
[h for h in hits if h["title"] != title_param]
|
||||
|
||||
if not found:
|
||||
after = flask.request.args.get("after")
|
||||
if after:
|
||||
print(after)
|
||||
hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits)
|
||||
skip = next(hits_iter, None)
|
||||
if skip:
|
||||
hits = list(hits_iter)
|
||||
# Record and apply explicit skip-past
|
||||
after = flask.request.args.get("after")
|
||||
if after:
|
||||
_record_skip(from_title, after)
|
||||
session_skipped.add(after)
|
||||
|
||||
try:
|
||||
hit, found = get_best_hit(from_title, hits)
|
||||
except NoGoodHit:
|
||||
return flask.render_template("all_done.html")
|
||||
hits = [h for h in hits if h["title"] not in session_skipped
|
||||
and h["title"] != from_title and h["title"] != case_flip_first(from_title)]
|
||||
|
||||
if not hits:
|
||||
return flask.render_template("all_done.html")
|
||||
|
||||
saves_this_session = flask.session.get("saves_by_title", {}).get(from_title, 0)
|
||||
|
||||
return flask.render_template(
|
||||
"article.html",
|
||||
title=from_title,
|
||||
redirect_to=redirect_to,
|
||||
total=total,
|
||||
with_link=with_link,
|
||||
hit_title=hit["title"],
|
||||
hits=hits,
|
||||
replacement=found["replacement"],
|
||||
diff=found["diff"],
|
||||
found=found,
|
||||
url_title=url_title,
|
||||
saves_this_session=saves_this_session,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -379,7 +397,12 @@ def do_save(title: str, hit_title: str) -> str:
|
|||
"""Update page on Wikipedia."""
|
||||
token = mediawiki_oauth.get_token()
|
||||
|
||||
found = get_match(title, hit_title, None)
|
||||
try:
|
||||
redirect_to = api.get_wiki_info(title)
|
||||
except (api.MissingPage, api.MultipleRedirects, api.MediawikiError):
|
||||
redirect_to = None
|
||||
|
||||
found = get_match(title, hit_title, redirect_to)
|
||||
|
||||
summary = (
|
||||
f"link [[{found['replacement']}]] using [[:en:User:Edward/Find link|Find link]]"
|
||||
|
|
@ -417,16 +440,20 @@ def api_hits() -> werkzeug.wrappers.response.Response:
|
|||
|
||||
@app.route("/api/1/valid_hit")
|
||||
def api_valid_hit() -> werkzeug.wrappers.response.Response:
|
||||
"""Return candidates for the given article title."""
|
||||
link_from = flask.request.args["link_from"]
|
||||
"""Check if a candidate article has a valid unlinked mention."""
|
||||
link_to = flask.request.args["link_to"]
|
||||
link_from = flask.request.args["link_from"]
|
||||
redirect_to = flask.request.args.get("redirect_to") or None
|
||||
|
||||
try:
|
||||
diff, replacement = get_diff(link_to, link_from, None)
|
||||
found = get_diff(link_to, link_from, redirect_to)
|
||||
except NoMatch:
|
||||
_record_skip(link_to, link_from)
|
||||
return flask.jsonify(valid=False)
|
||||
except api.MediawikiError as e:
|
||||
return flask.jsonify(valid=False, error=str(e))
|
||||
|
||||
return flask.jsonify(valid=True, diff=diff, replacement=replacement)
|
||||
return flask.jsonify(valid=True, diff=found["diff"], replacement=found["replacement"])
|
||||
|
||||
|
||||
@app.route("/favicon.ico")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue