Compare commits

...

5 commits

6 changed files with 131 additions and 100 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@ __pycache__
.mypy_cache/ .mypy_cache/
node_modules node_modules
package-lock.json package-lock.json
static/bootstrap

View file

@ -1,7 +1,10 @@
"""Wikipedia OAuth."""
import typing import typing
import urllib import urllib
from typing import cast from typing import cast
import requests
from flask import current_app, session from flask import current_app, session
from requests_oauthlib import OAuth1Session from requests_oauthlib import OAuth1Session
@ -9,6 +12,12 @@ wiki_hostname = "en.wikipedia.org"
api_url = f"https://{wiki_hostname}/w/api.php" api_url = f"https://{wiki_hostname}/w/api.php"
class LoginNeeded(Exception):
"""Not logged in."""
pass
def get_edit_proxy() -> dict[str, str]: def get_edit_proxy() -> dict[str, str]:
"""Retrieve proxy information from config.""" """Retrieve proxy information from config."""
edit_proxy = current_app.config.get("EDIT_PROXY") edit_proxy = current_app.config.get("EDIT_PROXY")
@ -18,7 +27,7 @@ def get_edit_proxy() -> dict[str, str]:
return {} return {}
def api_post_request(params: dict[str, str | int]): def api_post_request(params: dict[str, str | int]) -> requests.Response:
"""HTTP Post using Oauth.""" """HTTP Post using Oauth."""
app = current_app app = current_app
# url = "https://www.wikidata.org/w/api.php" # url = "https://www.wikidata.org/w/api.php"
@ -34,12 +43,14 @@ def api_post_request(params: dict[str, str | int]):
return oauth.post(api_url, data=params, timeout=4, proxies=proxies) return oauth.post(api_url, data=params, timeout=4, proxies=proxies)
def raw_request(params: typing.Mapping[str, str | int]): def raw_request(params: typing.Mapping[str, str | int]) -> requests.Response:
"""Low-level API request.""" """Low-level API request."""
app = current_app app = current_app
# url = "https://www.wikidata.org/w/api.php?" + urlencode(params) # url = "https://www.wikidata.org/w/api.php?" + urlencode(params)
client_key = app.config["CLIENT_KEY"] client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"] client_secret = app.config["CLIENT_SECRET"]
if "owner_key" not in session or "owner_secret" not in session:
raise LoginNeeded
oauth = OAuth1Session( oauth = OAuth1Session(
client_key, client_key,
client_secret=client_secret, client_secret=client_secret,

View file

@ -1,56 +1,48 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}{{ title }}{% endblock %} {% block title %}Link '{{ title }}' in '{{ hit_title }}'{% endblock %}
{% block style %} {% block style %}
<style> <link href="{{ url_for("static", filename="css/diff.css") }}" rel="stylesheet"/>
span.exact { padding: 2px; background: green; color: white; font-weight: bold; }
span.nomatch { padding: 2px; background: red; color: white; font-weight: bold; }
span.case_mismatch { padding: 2px; background: orange; color: white; font-weight: bold; }
span.searchmatch { font-weight: bold; }
table.diff,td.diff-otitle,td.diff-ntitle{background-color:white}
td.diff-otitle,td.diff-ntitle{text-align:center}
td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em}
td.diff-lineno{font-weight:bold}
td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap}
td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em}
td.diff-addedline{border-color:#a3d3ff}
td.diff-deletedline{border-color:#ffe49c}
td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em}
.diffchange{font-weight:bold;text-decoration:none}
table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed}
td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0}
td.diff-addedline .diffchange{background:#d8ecff}
td.diff-deletedline .diffchange{background:#feeec8}
table.diff td{padding:0.33em 0.66em}
table.diff col.diff-marker{width:2%}
table.diff col.diff-content{width:48%}
table.diff td div{ word-wrap:break-word; overflow:auto}
</style>
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<div class="container"> <div class="container">
<h1>{{ self.title() }}</h1> <h1>Link '{{ title }}' in '{{ hit_title }}'</h1>
<form> <form action="{{ url_for("index") }}">
<input name="q"> <input name="q">
<input type="submit" value="search"> <input type="submit" value="search">
</form> </form>
<div id="app"></div>
<div>Username: {{ g.user }}</div>
<div><a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank">view article</a></div>
<div><a href="{{ url_for('index') }}">back to index </a></div>
<div>total: {{ total }}</div>
<div>with link: {{ with_link }}</div>
<div>ratio: {{ "{:.1%}".format(with_link / total) }}</div>
{# <div>hit: {{ hit }}</div> #}
<div>replacement: {{ found.replacement }}</div>
<div>section: {{ found.section }}</div>
<table>
{{ diff | safe }}
</table>
<form method="POST">
<input type="hidden" name="hit" value="{{ hit_title }}">
<div class="my-3">
<input type="submit" class="btn btn-primary" value="save"/>
<a href="{{url_for("article_page", url_title=url_title, after=hit_title)}}" class="btn btn-primary">skip</a>
</div>
</form>
<ol>
{% for hit in hits %}
{% set url = url_for("article_page", url_title=url_title, title=hit.title) %}
<li><a href="{{ url }}">{{ hit.title }}</a> &ndash; {{ hit.snippet | safe }}</li>
{% endfor %}
</ol>
</div> </div>
<script type="module">
import main from {{ url_for('static', filename='add_links.es.js') | tojson }};
const props = {
title: {{ title | tojson }},
api_base_url: "/api/1"
}
main(props);
</script>
{% endblock %} {% endblock %}

View file

@ -1,41 +0,0 @@
{% extends "base.html" %}
{% block title %}Link '{{ title }}' in '{{ hit.title }}'{% endblock %}
{% block style %}
<link href="{{ url_for("static", filename="css/diff.css") }}" rel="stylesheet"/>
{% endblock %}
{% block content %}
<div class="container">
<h1>Link '{{ title }}' in '{{ hit.title }}'</h1>
<form action="{{ url_for("index") }}">
<input name="q">
<input type="submit" value="search">
</form>
<div>Username: {{ g.user }}</div>
<div><a href="https://en.wikipedia.org/wiki/{{ title }}" target="_blank">view article</a></div>
<div><a href="{{ url_for('index') }}">back to index </a></div>
<div>total: {{ total }}</div>
<div>with link: {{ with_link }}</div>
<div>ratio: {{ "{:.1%}".format(with_link / total) }}</div>
{# <div>hit: {{ hit }}</div> #}
<div>replacement: {{ found.replacement }}</div>
<div>section: {{ found.section }}</div>
<table>
{{ diff | safe }}
</table>
<form method="POST">
<input type="hidden" name="hit" value="{{ hit.title }}">
<div class="my-3">
<input type="submit" class="btn btn-primary" value="save"/>
<a href="{{url_for("article_page", url_title=url_title, after=hit["title"])}}" class="btn btn-primary">skip</a>
</div>
</form>
</div>
{% endblock %}

View file

@ -0,0 +1,56 @@
{% extends "base.html" %}
{% block title %}{{ title }}{% endblock %}
{% block style %}
<style>
span.exact { padding: 2px; background: green; color: white; font-weight: bold; }
span.nomatch { padding: 2px; background: red; color: white; font-weight: bold; }
span.case_mismatch { padding: 2px; background: orange; color: white; font-weight: bold; }
span.searchmatch { font-weight: bold; }
table.diff,td.diff-otitle,td.diff-ntitle{background-color:white}
td.diff-otitle,td.diff-ntitle{text-align:center}
td.diff-marker{text-align:right;font-weight:bold;font-size:1.25em}
td.diff-lineno{font-weight:bold}
td.diff-addedline,td.diff-deletedline,td.diff-context{font-size:88%;vertical-align:top;white-space:-moz-pre-wrap;white-space:pre-wrap}
td.diff-addedline,td.diff-deletedline{border-style:solid;border-width:1px 1px 1px 4px;border-radius:0.33em}
td.diff-addedline{border-color:#a3d3ff}
td.diff-deletedline{border-color:#ffe49c}
td.diff-context{background:#f3f3f3;color:#333333;border-style:solid;border-width:1px 1px 1px 4px;border-color:#e6e6e6;border-radius:0.33em}
.diffchange{font-weight:bold;text-decoration:none}
table.diff{border:none;width:98%;border-spacing:4px; table-layout:fixed}
td.diff-addedline .diffchange,td.diff-deletedline .diffchange{border-radius:0.33em;padding:0.25em 0}
td.diff-addedline .diffchange{background:#d8ecff}
td.diff-deletedline .diffchange{background:#feeec8}
table.diff td{padding:0.33em 0.66em}
table.diff col.diff-marker{width:2%}
table.diff col.diff-content{width:48%}
table.diff td div{ word-wrap:break-word; overflow:auto}
</style>
{% endblock %}
{% block content %}
<div class="container">
<h1>{{ self.title() }}</h1>
<form>
<input name="q">
<input type="submit" value="search">
</form>
<div id="app"></div>
</div>
<script type="module">
import main from {{ url_for('static', filename='add_links.es.js') | tojson }};
const props = {
title: {{ title | tojson }},
api_base_url: "/api/1"
}
main(props);
</script>
{% endblock %}

View file

@ -236,7 +236,7 @@ def match_type(q: str, snippet: str) -> str | None:
class NoGoodHit(Exception): class NoGoodHit(Exception):
pass """No good hit."""
def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]: def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]:
@ -266,35 +266,47 @@ def article_page(url_title: str) -> str | Response:
if flask.request.method == "POST": if flask.request.method == "POST":
hit_title = flask.request.form["hit"] hit_title = flask.request.form["hit"]
do_save(from_title, hit_title) try:
do_save(from_title, hit_title)
except wikidata_oauth.LoginNeeded:
return flask.redirect(flask.url_for("start_oauth"))
return flask.redirect( return flask.redirect(
flask.url_for("article_page", url_title=url_title, after=hit_title) flask.url_for("article_page", url_title=url_title, after=hit_title)
) )
article_title = flask.request.args.get("title")
total = search_count(from_title) total = search_count(from_title)
with_link = search_count_with_link(from_title) with_link = search_count_with_link(from_title)
no_link_count, hits = search_no_link(from_title) no_link_count, hits = search_no_link(from_title)
after = flask.request.args.get("after") by_title = {hit["title"]: hit for hit in hits}
if after:
print(after)
hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits)
skip = next(hits_iter, None)
if skip:
hits = list(hits_iter)
try: if article_title in by_title:
hit, found = get_best_hit(from_title, hits) hit = by_title[article_title]
except NoGoodHit: found = get_diff(from_title, hit["title"], None)
return flask.render_template("all_done.html") else:
after = flask.request.args.get("after")
if after:
print(after)
hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits)
skip = next(hits_iter, None)
if skip:
hits = list(hits_iter)
try:
hit, found = get_best_hit(from_title, hits)
except NoGoodHit:
return flask.render_template("all_done.html")
return flask.render_template( return flask.render_template(
"article2.html", "article.html",
title=from_title, title=from_title,
total=total, total=total,
with_link=with_link, with_link=with_link,
hit=hit, hit_title=hit["title"],
hits=hits,
replacement=found["replacement"], replacement=found["replacement"],
diff=found["diff"], diff=found["diff"],
found=found, found=found,