dab-mechanic/web_view.py

235 lines
6.9 KiB
Python
Raw Permalink Normal View History

2022-08-13 13:16:49 +01:00
#!/usr/bin/python3
2022-08-15 17:56:21 +01:00
import inspect
2022-08-13 16:25:07 +01:00
import json
2022-08-14 17:44:07 +01:00
import re
import sys
import traceback
2022-08-17 14:48:20 +01:00
from typing import Optional
2022-08-13 13:16:49 +01:00
import flask
import lxml.html
import requests
2022-08-15 17:56:21 +01:00
import werkzeug.exceptions
2022-08-15 11:43:22 +01:00
from requests_oauthlib import OAuth1Session
from werkzeug.debug.tbtools import DebugTraceback
2022-08-13 13:16:49 +01:00
from werkzeug.wrappers import Response
2022-08-17 14:38:30 +01:00
from dab_mechanic import mediawiki_api, wikidata_oauth, wikipedia
2022-08-13 13:16:49 +01:00
app = flask.Flask(__name__)
2022-08-15 13:11:29 +01:00
app.config.from_object("config.default")
2022-08-13 13:16:49 +01:00
2022-08-16 12:43:03 +01:00
wiki_hostname = "en.wikipedia.org"
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
wiki_index_php = f"https://{wiki_hostname}/w/index.php"
2022-08-17 13:34:55 +01:00
awdl_url = "https://dplbot.toolforge.org/articles_with_dab_links.php"
2022-08-16 12:43:03 +01:00
@app.before_request
2023-09-29 17:52:06 +01:00
def global_user() -> None:
2022-08-16 12:43:03 +01:00
"""Make username available everywhere."""
flask.g.user = wikidata_oauth.get_username()
2022-08-13 13:16:49 +01:00
2022-08-14 17:44:07 +01:00
2022-08-15 17:56:21 +01:00
@app.errorhandler(werkzeug.exceptions.InternalServerError)
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
"""Handle exception."""
exec_type, exc_value, current_traceback = sys.exc_info()
assert exc_value
tb = DebugTraceback(exc_value)
summary = tb.render_traceback_html(include_title=False)
exc_lines = "".join(tb._te.format_exception_only())
last_frame = list(traceback.walk_tb(current_traceback))[-1][0]
last_frame_args = inspect.getargs(last_frame.f_code)
2022-08-15 17:56:21 +01:00
return (
flask.render_template(
"show_error.html",
plaintext=tb.render_traceback_text(),
exception=exc_lines,
exception_type=tb._te.exc_type.__name__,
summary=summary,
2022-08-15 17:56:21 +01:00
last_frame=last_frame,
last_frame_args=last_frame_args,
),
500,
)
2023-09-29 17:52:06 +01:00
def parse_articles_with_dab_links(root: lxml.html.HtmlElement) -> list[tuple[str, int]]:
"""Parse Articles With Multiple Dablinks."""
articles = []
table = root.find(".//table")
2023-09-29 17:52:06 +01:00
assert table is not None
for tr in table:
title = tr[0][0].text
count_text = tr[1][0].text
2023-09-29 17:52:06 +01:00
assert title and count_text and count_text.endswith(" links")
count = int(count_text[:-6])
articles.append((title, count))
return articles
2022-08-13 13:16:49 +01:00
@app.route("/")
2023-09-29 17:52:06 +01:00
def index() -> str:
"""Index page."""
2022-08-17 13:34:55 +01:00
r = requests.get(awdl_url, params={"limit": 100})
root = lxml.html.fromstring(r.content)
articles = parse_articles_with_dab_links(root)
# articles = [line[:-1] for line in open("article_list")]
2022-08-13 13:16:49 +01:00
return flask.render_template("index.html", articles=articles)
2022-08-14 17:44:07 +01:00
def make_disamb_link(edit: tuple[str, str]) -> str:
"""Given an edit return the appropriate link."""
return f"[[{edit[1]}|{edit[0]}]]"
def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
for link_from, link_to in edits:
print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
article_text,
)
return article_text
2022-08-13 16:25:07 +01:00
@app.route("/save/<path:enwiki>", methods=["POST"])
def save(enwiki: str) -> Response | str:
"""Save edits to article."""
2022-08-14 17:44:07 +01:00
edits = [
(link_to, link_from)
for link_to, link_from in json.loads(flask.request.form["edits"])
]
enwiki = enwiki.replace("_", " ")
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
2022-08-17 14:38:30 +01:00
article_text = apply_edits(mediawiki_api.get_content(enwiki), edits)
2022-08-14 17:44:07 +01:00
return flask.render_template(
"save.html",
edit_summary=edit_summary,
title=enwiki,
edits=edits,
text=article_text,
)
2022-08-17 14:48:20 +01:00
def redirect_if_needed(enwiki: str) -> Optional[Response]:
"""Check if there are spaces in the article name and redirect."""
2023-09-29 17:52:06 +01:00
endpoint = flask.request.endpoint
assert endpoint
2022-08-17 14:48:20 +01:00
return (
2023-09-29 17:52:06 +01:00
flask.redirect(flask.url_for(endpoint, enwiki=enwiki.replace(" ", "_")))
2022-08-17 14:48:20 +01:00
if " " in enwiki
else None
)
2022-08-13 13:16:49 +01:00
@app.route("/enwiki/<path:enwiki>")
2023-09-29 17:52:06 +01:00
def article_page(enwiki: str) -> Response | str:
2022-08-13 13:16:49 +01:00
"""Article Page."""
2022-08-17 14:48:20 +01:00
redirect = redirect_if_needed(enwiki)
if redirect:
return redirect
2022-08-13 13:16:49 +01:00
2022-08-17 13:35:26 +01:00
article = wikipedia.Article(enwiki)
2022-08-14 17:44:07 +01:00
article.load()
article.process_links()
2022-08-13 13:16:49 +01:00
assert article.parse
2022-08-14 17:48:19 +01:00
return flask.render_template("article.html", article=article)
2022-08-13 13:16:49 +01:00
2022-08-15 11:43:22 +01:00
@app.route("/oauth/start")
2023-09-29 17:52:06 +01:00
def start_oauth() -> Response:
"""Start OAuth."""
2022-08-15 11:43:22 +01:00
next_page = flask.request.args.get("next")
if next_page:
flask.session["after_login"] = next_page
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
2022-08-16 12:43:03 +01:00
request_token_url = wiki_index_php + "?title=Special%3aOAuth%2finitiate"
2022-08-15 11:43:22 +01:00
oauth = OAuth1Session(client_key, client_secret=client_secret, callback_uri="oob")
fetch_response = oauth.fetch_request_token(request_token_url)
flask.session["owner_key"] = fetch_response.get("oauth_token")
flask.session["owner_secret"] = fetch_response.get("oauth_token_secret")
2022-08-16 12:43:03 +01:00
base_authorization_url = f"https://{wiki_hostname}/wiki/Special:OAuth/authorize"
2022-08-15 11:43:22 +01:00
authorization_url = oauth.authorization_url(
base_authorization_url, oauth_consumer_key=client_key
)
return flask.redirect(authorization_url)
@app.route("/oauth/callback", methods=["GET"])
2023-09-29 17:52:06 +01:00
def oauth_callback() -> Response:
"""Autentication callback."""
2022-08-15 11:43:22 +01:00
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=flask.session["owner_key"],
resource_owner_secret=flask.session["owner_secret"],
)
oauth_response = oauth.parse_authorization_response(flask.request.url)
verifier = oauth_response.get("oauth_verifier")
2022-08-16 12:43:03 +01:00
access_token_url = wiki_index_php + "?title=Special%3aOAuth%2ftoken"
2022-08-15 11:43:22 +01:00
oauth = OAuth1Session(
client_key,
client_secret=client_secret,
resource_owner_key=flask.session["owner_key"],
resource_owner_secret=flask.session["owner_secret"],
verifier=verifier,
)
oauth_tokens = oauth.fetch_access_token(access_token_url)
flask.session["owner_key"] = oauth_tokens.get("oauth_token")
flask.session["owner_secret"] = oauth_tokens.get("oauth_token_secret")
next_page = flask.session.get("after_login")
2023-09-29 17:52:06 +01:00
return flask.redirect(next_page if next_page else flask.url_for("index"))
2022-08-15 11:43:22 +01:00
@app.route("/oauth/disconnect")
2023-09-29 17:52:06 +01:00
def oauth_disconnect() -> Response:
"""Disconnect OAuth."""
2022-08-15 11:43:22 +01:00
for key in "owner_key", "owner_secret", "username", "after_login":
if key in flask.session:
del flask.session[key]
return flask.redirect(flask.url_for("index"))
2022-08-13 13:16:49 +01:00
if __name__ == "__main__":
app.run(host="0.0.0.0")