Initial commit
This commit is contained in:
commit
f07b407e7a
25 changed files with 2383 additions and 0 deletions
362
web_view.py
Executable file
362
web_view.py
Executable file
|
|
@ -0,0 +1,362 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import html
|
||||
import itertools
|
||||
import json
|
||||
import re
|
||||
import typing
|
||||
|
||||
import flask
|
||||
import werkzeug
|
||||
from requests_oauthlib import OAuth1Session
|
||||
from werkzeug.wrappers.response import Response
|
||||
|
||||
from add_links import api, core, mediawiki_api, wikidata_oauth
|
||||
from add_links.match import NoMatch, get_diff, get_match
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.config.from_object("config.default")
|
||||
app.debug = True
|
||||
|
||||
wiki_hostname = "en.wikipedia.org"
|
||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||
wiki_index_php = f"https://{wiki_hostname}/w/index.php"
|
||||
|
||||
|
||||
class Hit(typing.TypedDict):
|
||||
"""Candidate articles."""
|
||||
|
||||
ns: int
|
||||
title: str
|
||||
pageid: int
|
||||
size: int
|
||||
wordcount: int
|
||||
snippet: str
|
||||
timestamp: str
|
||||
|
||||
|
||||
re_disambig = re.compile(r"^(.*) \((.*)\)$")
|
||||
|
||||
|
||||
def load_examples() -> list[dict[str, str | int]]:
|
||||
"""Load examples."""
|
||||
return [json.loads(line) for line in open("examples")]
|
||||
|
||||
|
||||
def article_title_to_search_query(title: str) -> str:
|
||||
"""Convert from article title to search query string."""
|
||||
m = re_disambig.match(title)
|
||||
return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
|
||||
|
||||
|
||||
def run_search(q: str, limit: int | str = "max") -> dict[str, typing.Any]:
|
||||
"""Search Wikipedia."""
|
||||
params = {"list": "search", "srwhat": "text", "srlimit": limit, "srsearch": q}
|
||||
return typing.cast(dict[str, typing.Any], api.api_get(params)["query"])
|
||||
|
||||
|
||||
def article_url(title: str) -> str:
|
||||
"""URL for search page."""
|
||||
return flask.url_for("article_page", url_title=title.replace(" ", "_"))
|
||||
|
||||
|
||||
def search_count(q: str) -> int:
|
||||
"""How often does this article title appear in Wikipedia."""
|
||||
query = run_search(article_title_to_search_query(q), limit=0)
|
||||
return typing.cast(int, query["searchinfo"]["totalhits"]) - 1
|
||||
|
||||
|
||||
def search_count_with_link(q: str) -> int:
|
||||
"""How often does this article title appear in Wikipedia."""
|
||||
query = run_search(article_title_to_search_query(q) + f' linksto:"{q}"', limit=0)
|
||||
return typing.cast(int, query["searchinfo"]["totalhits"])
|
||||
|
||||
|
||||
def search_no_link(q: str) -> tuple[int, list[Hit]]:
|
||||
"""Search for mentions of article title with no link included."""
|
||||
query = run_search(article_title_to_search_query(q) + f' -linksto:"{q}"', "max")
|
||||
totalhits = query["searchinfo"]["totalhits"]
|
||||
results = query["search"]
|
||||
return (totalhits, results)
|
||||
|
||||
|
||||
@app.before_request
|
||||
def global_user() -> None:
|
||||
"""Make username available everywhere."""
|
||||
flask.g.user = wikidata_oauth.get_username()
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index() -> str | Response:
|
||||
"""Index page."""
|
||||
if "oauth_verifier" in flask.request.args and "oauth_token" in flask.request.args:
|
||||
return flask.redirect(flask.url_for("oauth_callback", **flask.request.args))
|
||||
|
||||
examples = load_examples()
|
||||
examples.sort(
|
||||
key=lambda i: float(i["with_links"]) / float(i["total"]), reverse=True
|
||||
)
|
||||
|
||||
if q := flask.request.args.get("q"):
|
||||
if q_trimmed := q.strip():
|
||||
return flask.redirect(article_url(q_trimmed))
|
||||
|
||||
return flask.render_template(
|
||||
"index.html", examples=examples, article_url=article_url
|
||||
)
|
||||
|
||||
|
||||
def case_flip(s: str) -> str:
|
||||
"""Switch case of character."""
|
||||
if s.islower():
|
||||
return s.upper()
|
||||
if s.isupper():
|
||||
return s.lower()
|
||||
return s
|
||||
|
||||
|
||||
def case_flip_first(s: str) -> str:
|
||||
"""Switch case of first character in string."""
|
||||
return case_flip(s[0]) + s[1:]
|
||||
|
||||
|
||||
def tidy_snippet(snippet: str) -> str:
|
||||
"""Remove HTML from snippet."""
|
||||
snippet = snippet.replace("\u2013", "-")
|
||||
snippet = snippet.replace("</span>", "")
|
||||
snippet = snippet.replace('<span class="searchmatch">', "")
|
||||
return html.unescape(snippet)
|
||||
|
||||
|
||||
@app.route("/oauth/start")
|
||||
def start_oauth() -> Response:
|
||||
"""Start OAuth."""
|
||||
next_page = flask.request.args.get("next")
|
||||
if next_page:
|
||||
flask.session["after_login"] = next_page
|
||||
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
request_token_url = wiki_index_php + "?title=Special%3aOAuth%2finitiate"
|
||||
|
||||
oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob")
|
||||
fetch_response = oauth.fetch_request_token(request_token_url)
|
||||
|
||||
flask.session["owner_key"] = fetch_response.get("oauth_token")
|
||||
flask.session["owner_secret"] = fetch_response.get("oauth_token_secret")
|
||||
|
||||
base_authorization_url = f"https://{wiki_hostname}/wiki/Special:OAuth/authorize"
|
||||
authorization_url = oauth.authorization_url(
|
||||
base_authorization_url, oauth_consumer_key=client_key
|
||||
)
|
||||
return flask.redirect(authorization_url)
|
||||
|
||||
|
||||
@app.route("/oauth/callback", methods=["GET"])
|
||||
def oauth_callback() -> werkzeug.wrappers.response.Response:
|
||||
"""Oauth callback."""
|
||||
client_key = app.config["CLIENT_KEY"]
|
||||
client_secret = app.config["CLIENT_SECRET"]
|
||||
|
||||
oauth = OAuth1Session(
|
||||
client_key,
|
||||
client_secret=client_secret,
|
||||
resource_owner_key=flask.session["owner_key"],
|
||||
resource_owner_secret=flask.session["owner_secret"],
|
||||
)
|
||||
|
||||
oauth_response = oauth.parse_authorization_response(flask.request.url)
|
||||
verifier = oauth_response.get("oauth_verifier")
|
||||
access_token_url = wiki_index_php + "?title=Special%3aOAuth%2ftoken"
|
||||
oauth = OAuth1Session(
|
||||
client_key,
|
||||
client_secret=client_secret,
|
||||
resource_owner_key=flask.session["owner_key"],
|
||||
resource_owner_secret=flask.session["owner_secret"],
|
||||
verifier=verifier,
|
||||
)
|
||||
|
||||
oauth_tokens = oauth.fetch_access_token(access_token_url)
|
||||
flask.session["owner_key"] = oauth_tokens.get("oauth_token")
|
||||
flask.session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
|
||||
|
||||
print("login successful")
|
||||
|
||||
next_page = flask.session.get("after_login")
|
||||
return flask.redirect(next_page if next_page else flask.url_for("index"))
|
||||
|
||||
|
||||
@app.route("/oauth/disconnect")
|
||||
def oauth_disconnect() -> werkzeug.wrappers.response.Response:
|
||||
"""Disconnect OAuth."""
|
||||
for key in "owner_key", "owner_secret", "username", "after_login":
|
||||
if key in flask.session:
|
||||
del flask.session[key]
|
||||
return flask.redirect(flask.url_for("index"))
|
||||
|
||||
|
||||
def match_type(q: str, snippet: str) -> str | None:
|
||||
"""Discover match type, ''exact', 'case_mismatch' or None.
|
||||
|
||||
>>> match_type('foo', 'foo')
|
||||
'exact'
|
||||
>>> match_type('foo', 'bar') is None
|
||||
True
|
||||
>>> match_type('bar', 'foo bar baz')
|
||||
'exact'
|
||||
>>> match_type('clean coal technology', 'foo clean coal technologies baz')
|
||||
'exact'
|
||||
>>> match_type('bar', 'foo Bar baz')
|
||||
'exact'
|
||||
>>> match_type('bar', 'foo BAR baz')
|
||||
'case_mismatch'
|
||||
>>> match_type('foo-bar', 'aa foo-bar cc')
|
||||
'exact'
|
||||
>>> match_type(u'foo\u2013bar', 'aa foo-bar cc')
|
||||
'exact'
|
||||
"""
|
||||
q = q.replace("\u2013", "-")
|
||||
snippet = tidy_snippet(snippet)
|
||||
|
||||
if q in snippet or case_flip_first(q) in snippet:
|
||||
return "exact"
|
||||
match = None
|
||||
if q.lower() in snippet.lower():
|
||||
match = "case_mismatch"
|
||||
if match != "exact" and q.endswith("y"):
|
||||
if q[:-1] in snippet or case_flip_first(q[:-1]) in snippet:
|
||||
return "exact"
|
||||
elif match is None:
|
||||
if q[:-1].lower() in snippet.lower():
|
||||
match = "case_mismatch"
|
||||
return match
|
||||
|
||||
|
||||
class NoGoodHit(Exception):
|
||||
pass
|
||||
|
||||
|
||||
def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any]]:
|
||||
"""Find the best hit within the search results."""
|
||||
for hit in hits:
|
||||
if hit["title"].lower() == title.lower():
|
||||
continue
|
||||
if match_type(title, hit["snippet"]) != "exact":
|
||||
continue
|
||||
|
||||
try:
|
||||
print(f'get diff: {hit["title"]}, {title}')
|
||||
found = get_diff(title, hit["title"], None)
|
||||
except NoMatch:
|
||||
print("no match")
|
||||
continue
|
||||
|
||||
return (hit, found)
|
||||
|
||||
raise NoGoodHit
|
||||
|
||||
|
||||
@app.route("/<path:url_title>", methods=["GET", "POST"])
|
||||
def article_page(url_title: str) -> str | Response:
|
||||
"""Article page."""
|
||||
from_title = url_title.replace("_", " ").strip()
|
||||
|
||||
if flask.request.method == "POST":
|
||||
hit_title = flask.request.form["hit"]
|
||||
do_save(from_title, hit_title)
|
||||
return flask.redirect(
|
||||
flask.url_for("article_page", url_title=url_title, after=hit_title)
|
||||
)
|
||||
|
||||
total = search_count(from_title)
|
||||
with_link = search_count_with_link(from_title)
|
||||
|
||||
no_link_count, hits = search_no_link(from_title)
|
||||
|
||||
after = flask.request.args.get("after")
|
||||
if after:
|
||||
print(after)
|
||||
hits_iter = itertools.dropwhile(lambda hit: hit["title"] != after, hits)
|
||||
skip = next(hits_iter, None)
|
||||
if skip:
|
||||
hits = list(hits_iter)
|
||||
|
||||
try:
|
||||
hit, found = get_best_hit(from_title, hits)
|
||||
except NoGoodHit:
|
||||
return flask.render_template("all_done.html")
|
||||
|
||||
return flask.render_template(
|
||||
"article2.html",
|
||||
title=from_title,
|
||||
total=total,
|
||||
with_link=with_link,
|
||||
hit=hit,
|
||||
replacement=found["replacement"],
|
||||
diff=found["diff"],
|
||||
found=found,
|
||||
url_title=url_title,
|
||||
)
|
||||
|
||||
|
||||
def do_save(title: str, hit_title: str) -> str:
|
||||
"""Update page on Wikipedia."""
|
||||
token = wikidata_oauth.get_token()
|
||||
|
||||
found = get_match(title, hit_title, None)
|
||||
|
||||
summary = (
|
||||
f"link [[{found['replacement']}]] using [[:en:User:Edward/Find link|Find link]]"
|
||||
)
|
||||
|
||||
edit = mediawiki_api.edit_page(
|
||||
pageid=found["pageid"],
|
||||
section=found["section_num"],
|
||||
text=found["section_text"],
|
||||
summary=summary,
|
||||
baserevid=found["revid"],
|
||||
token=token,
|
||||
)
|
||||
|
||||
return edit
|
||||
|
||||
|
||||
@app.route("/saved")
|
||||
def save_done() -> str:
|
||||
"""Save complete."""
|
||||
return flask.render_template("save_done.html")
|
||||
|
||||
|
||||
@app.route("/api/1/hits")
|
||||
def api_hits() -> werkzeug.wrappers.response.Response:
|
||||
"""Return canidates for the given article title."""
|
||||
title = flask.request.args.get("title")
|
||||
assert title
|
||||
ret = core.do_search(title)
|
||||
return flask.jsonify(title=title, hits=ret["results"])
|
||||
|
||||
# mock_hits: list[Hit] = json.load(open("sample.json"))
|
||||
# return flask.jsonify(title=title, hits=mock_hits)
|
||||
|
||||
|
||||
@app.route("/api/1/valid_hit")
|
||||
def api_valid_hit() -> werkzeug.wrappers.response.Response:
|
||||
"""Return canidates for the given article title."""
|
||||
link_from = flask.request.args.get("link_from")
|
||||
link_to = flask.request.args.get("link_to")
|
||||
|
||||
try:
|
||||
diff, replacement = get_diff(link_to, link_from, None)
|
||||
except NoMatch:
|
||||
return flask.jsonify(valid=False)
|
||||
|
||||
return flask.jsonify(valid=True, diff=diff, replacement=replacement)
|
||||
|
||||
|
||||
@app.route("/favicon.ico")
|
||||
def favicon() -> None:
|
||||
flask.abort(404)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8000)
|
||||
Loading…
Add table
Add a link
Reference in a new issue