Compare commits

..

No commits in common. "b6953cf52f1186770b7b03eacf07eb9d780edd81" and "4d175c8733b043a51f35d09cf11a4d92de34b498" have entirely different histories.

9 changed files with 85 additions and 291 deletions

View file

@ -30,62 +30,16 @@ def call(params: dict[str, str | int]) -> dict[str, Any]:
return data.json()
def article_exists(title: str) -> bool:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"titles": title,
}
return not call(params)["query"]["pages"][0].get("missing")
def get_content(title: str) -> tuple[str, int]:
def get_content(title: str) -> str:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"prop": "revisions|info",
"rvprop": "content|timestamp|ids",
"rvprop": "content|timestamp",
"titles": title,
}
data = call(params)
rev = data["query"]["pages"][0]["revisions"][0]
content: str = rev["content"]
revid: int = int(rev["revid"])
return content, revid
def compare(title: str, new_text: str) -> str:
"""Generate a diff for the new article text."""
params: dict[str, str | int] = {
"format": "json",
"formatversion": 2,
"action": "compare",
"fromtitle": title,
"toslots": "main",
"totext-main": new_text,
"prop": "diff",
}
diff: str = call(params)["compare"]["body"]
return diff
def edit_page(
title: str, text: str, summary: str, baserevid: str, token: str
) -> dict[str, str | int]:
"""Edit a page on Wikipedia."""
params: dict[str, str | int] = {
"format": "json",
"formatversion": 2,
"action": "edit",
"title": title,
"text": text,
"baserevid": baserevid,
"token": token,
"summary": summary,
}
edit: str = call(params)["edit"]
return edit
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
return rev

View file

@ -3,10 +3,8 @@ from urllib.parse import urlencode
from flask import current_app, session
from requests_oauthlib import OAuth1Session
WIKI_HOSTNAME = "en.wikipedia.org"
API_URL = f"https://{WIKI_HOSTNAME}/w/api.php"
TIMEOUT = 20
wiki_hostname = "en.wikipedia.org"
api_url = f"https://{wiki_hostname}/w/api.php"
def get_edit_proxy() -> dict[str, str]:
@ -30,12 +28,12 @@ def api_post_request(params: dict[str, str | int]):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
return oauth.post(API_URL, data=params, timeout=TIMEOUT, proxies=proxies)
return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
def raw_request(params):
app = current_app
url = API_URL + "?" + urlencode(params)
url = api_url + "?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
@ -45,7 +43,7 @@ def raw_request(params):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
return oauth.get(url, timeout=TIMEOUT, proxies=proxies)
return oauth.get(url, timeout=10, proxies=proxies)
def api_request(params):

View file

@ -68,7 +68,7 @@ def needs_disambig(link: dict[str, Any]) -> bool:
)
def get_article_links(enwiki: str) -> dict[str, str]:
def get_article_links(enwiki: str) -> list[str]:
"""Get links that appear in this article."""
params: dict[str, str | int] = link_params(enwiki)
@ -92,13 +92,11 @@ def get_article_links(enwiki: str) -> dict[str, str]:
params["gplcontinue"] = data["continue"]["gplcontinue"]
sleep(0.1)
ret_links = {}
for link in set(links):
ret_links[link] = link
for r in redirects.get(link, []):
ret_links[r] = link
if link in redirects:
links.update(redirects[link])
return ret_links
return list(links)
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
@ -123,9 +121,10 @@ def delete_toc(root: lxml.html.HtmlElement) -> None:
toc.getparent().remove(toc)
def get_dab_html(dab_num: int, html: str) -> str:
def get_dab_html(dab_num: int, title: str) -> str:
"""Parse dab page and rewrite links."""
root = lxml.html.fromstring(html)
dab_html = get_article_html(title)
root = lxml.html.fromstring(dab_html)
delete_toc(root)
element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
@ -161,11 +160,10 @@ class Article:
self.dab_lookup: dict[int, str] = {}
self.dab_order: list[str] = []
self.parse: Optional[dict[str, Any]] = None
self.dab_html: dict[str, str] = {}
def preview_endpoint(self) -> str:
def save_endpoint(self) -> str:
"""Endpoint for saving changes."""
href: str = flask.url_for("preview", enwiki=self.enwiki.replace(" ", "_"))
href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
return href
def load(self) -> None:
@ -175,34 +173,28 @@ class Article:
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
"""Disambiguation links that need fixing."""
seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
if title is not None and title in self.links:
yield a, title, self.links[title]
href = a.get("href")
if not href.startswith("/wiki/"):
if title is None or title not in self.links:
continue
a.set("href", "https://en.wikipedia.org" + href)
a.set("target", "_blank")
a.set("class", "disambig")
def dab_link_to(self):
return [dab["link_to"] for dab in self.dab_list]
if title in seen:
continue
seen.add(title)
yield a, title
def process_links(self) -> None:
"""Process links in parsed wikitext."""
for dab_num, (a, link_to, title) in enumerate(self.iter_links()):
a.set("class", "disambig")
for dab_num, (a, title) in enumerate(self.iter_links()):
a.set("id", f"dab-{dab_num}")
if title not in self.dab_html:
self.dab_html[title] = get_article_html(title)
dab: DabItem = {
"num": dab_num,
"title": title,
"link_to": link_to,
"html": get_dab_html(dab_num, self.dab_html[title]),
"html": get_dab_html(dab_num, title),
}
self.dab_list.append(dab)
self.dab_order.append(title)

View file

@ -53,8 +53,8 @@ a.new { color: red; }
<div id="dabs" class="p-3">
<h1>{{ article.enwiki }}</h1>
<div id="save-panel" class="d-none">
<form method="POST" action="{{ article.preview_endpoint() }}">
<button class="btn btn-primary" id="save-btn">Preview before save</button>
<form method="POST" action="{{ article.save_endpoint() }}">
<button class="btn btn-primary" id="save-btn">Save</button>
<span id="edit-count"></span>
<input type="hidden" value="{}" id="save-edits" name="edits">
</form>
@ -62,9 +62,7 @@ a.new { color: red; }
<div>There are {{ article.dab_list | count }} links in the article that need disambiguating.</div>
{% for dab in article.dab_list %}
<div class="card p-1 m-2">
<div class="card-body">
<h3 class="card-title" id="dab-card-title-{{ dab.num }}" onclick="return jump_to({{ dab.num }})">{{ dab.title }}</h3>
{% if dab.title != dab.link_to %}<div>redirect from {{ dab.link_to }}</div>{% endif %}
<div>
<a href="#" onclick="return jump_to({{ dab.num }})">highlight link</a>
<span class="d-none" id="cancel-{{ dab.num }}">
@ -72,8 +70,7 @@ a.new { color: red; }
<a href="#" onclick="return cancel_selection({{ dab.num }})">cancel selection</a>
</span>
</div>
<div class="dab-article d-none" id="dab-article-{{ dab.num }}">{{ dab.html | safe }}</div>
</div>
<div class="dab-article" id="dab-article-{{ dab.num }}">{{ dab.html | safe }}</div>
</div>
{% endfor %}
</div>
@ -87,38 +84,12 @@ a.new { color: red; }
var edit_set = new Set();
var edits = {};
var dab_lookup = {{ article.dab_lookup | tojson }};
var dab_order = {{ article.dab_order | tojson }};
var dab_link_to = {{ article.dab_link_to() | tojson }};
var dab_links = document.getElementsByClassName("disambig");
for(var i=0; i<dab_links.length; i++) {
dab_links[i].addEventListener("click", (event) => {
event.preventDefault();
var dab_num = event.target.id.substring(4);
open_dab(dab_num);
});
}
function jump_to(dab_num) {
open_dab(dab_num);
var link = document.getElementById("dab-" + dab_num);
link.scrollIntoView();
link.classList.add("disambig-highlight")
return false;
}
function open_dab(dab_num) {
var highlight_title = "text-bg-primary";
var dab_articles = document.getElementsByClassName("dab-article");
for(var i=0; i<dab_articles.length; i++) {
dab_articles[i].classList.add("d-none");
}
var dab_article = document.getElementById("dab-article-" + dab_num);
dab_article.classList.remove("d-none");
var links = document.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
links[i].classList.remove("disambig-highlight");
@ -133,7 +104,9 @@ a.new { color: red; }
card_title.classList.add(highlight_title);
var link = document.getElementById("dab-" + dab_num);
link.scrollIntoView();
link.classList.add("disambig-highlight")
return false;
}
function clear_dab_highlight(dab_num) {
@ -158,8 +131,7 @@ a.new { color: red; }
}
function update_edits() {
var saves = dab_link_to.map((link_to, num) => (
{"num": num, "link_to": link_to, "title": edits[num]}));
var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]);
var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(saves);
}
@ -169,7 +141,7 @@ a.new { color: red; }
document.getElementById("cancel-" + dab_num).classList.remove("d-none");
var title = element.getAttribute("title");
edits[dab_num] = title;
edits[dab_lookup[dab_num]] = title;
edit_set.add(dab_num);
update_edits();
@ -191,7 +163,7 @@ a.new { color: red; }
}
function cancel_selection(dab_num) {
delete edits[dab_num];
delete edits[dab_lookup[dab_num]];
document.getElementById("cancel-" + dab_num).classList.add("d-none");
clear_dab_highlight(dab_num);
edit_set.delete(dab_num);

View file

@ -1,21 +1,7 @@
{% extends "base.html" %}
{% block title %}DAB Mechanic{% endblock %}
{% block content %}
<div class="m-3">
<form>
article title:
<input name="title" value="{{ request.args.get("title", "") }}">
<button class="btn btn-sm btn-primary">go</button>
</form>
{% if title and not exists %}
<p>No article titled "{{ title }}" found in Wikipedia.</p>
{% endif %}
<ol>
{% for enwiki, count in articles %}
<li>

View file

@ -15,7 +15,13 @@
<a class="navbar-brand" href="{{ url_for('index') }}">Dab Mechanic</a>
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
<li class="nav-item">
<a class="nav-link active" aria-current="page" href="/">Home</a>
<a class="nav-link active" aria-current="page" href="#">Home</a>
</li>
<li class="nav-item">
<a class="nav-link" href="#">Link</a>
</li>
<li class="nav-item">
<a class="nav-link disabled">Disabled</a>
</li>
</ul>

View file

@ -1,39 +0,0 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{ title }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
<link rel="stylesheet" href="https://www.mediawiki.org/w/load.php?modules=mediawiki.diff.styles&only=styles">
</head>
<body>
<div class="m-3">
<h2>Preview of changes: {{ title }}</h2>
<div class="card">
<div class="card-body">
<h5 class="card-title">Edit summary</h5>
<p class="card-text">{{ edit_summary }}</p>
</div>
</div>
{# <pre>{{ text }}</pre> #}
<table class="diff my-3">
<colgroup>
<col class="diff-marker">
<col class="diff-content">
<col class="diff-marker">
<col class="diff-content">
</colgroup>
<tbody>
{{ diff | safe }}
</tbody>
</table>
<form method="POST" action="{{ url_for("save", enwiki=title) }}">
<button class="btn btn-primary" id="save-btn">Save changes</button>
<input type="hidden" value="{{ request.form.edits }}" id="save-edits" name="edits">
</form>
</body>
</html>

18
templates/save.html Normal file
View file

@ -0,0 +1,18 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{ title }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
</head>
<body>
<div class="m-3">
<h2>Save edits: {{ title }}</h2>
<p>Edit summary: {{ edit_summary }}</p>
</div>
<div class="m-3">
<pre>{{ text }}</pre>
</div>
</body>
</html>

View file

@ -3,9 +3,7 @@
import inspect
import json
import re
from typing import Optional, TypedDict
import mwparserfromhell
from pprint import pprint
from typing import Optional
import flask
import lxml.html
@ -66,43 +64,28 @@ def parse_articles_with_dab_links(root: lxml.html.Element) -> list[tuple[str, in
@app.route("/")
def index():
title = flask.request.args.get("title")
exists = None
if title:
title = title.strip()
exists = mediawiki_api.article_exists(title)
if exists:
return flask.redirect(
flask.url_for("article_page", enwiki=title.replace(" ", "_"))
)
r = requests.get(awdl_url, params={"limit": 100})
root = lxml.html.fromstring(r.content)
articles = parse_articles_with_dab_links(root)
# articles = [line[:-1] for line in open("article_list")]
return flask.render_template(
"index.html", title=title, exists=exists, articles=articles,
)
return flask.render_template("index.html", articles=articles)
class Edit(TypedDict):
"""Edit to an article."""
num: int
link_to: str
title: str
def make_disamb_link(edit: tuple[str, str]) -> str:
"""Given an edit return the appropriate link."""
return f"[[{edit[1]}|{edit[0]}]]"
def old_apply_edits(article_text: str, edits: list[Edit]) -> str:
def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
for edit in edits:
# print(rf"\[\[{escape(link_from)}\]\]")
for link_from, link_to in edits:
print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
@ -112,107 +95,34 @@ def old_apply_edits(article_text: str, edits: list[Edit]) -> str:
return article_text
def make_disamb_link(edit: Edit) -> str:
"""Given an edit return the appropriate link."""
return f"[[{edit['title']}|{edit['link_to']}]]"
@app.route("/save/<path:enwiki>", methods=["POST"])
def save(enwiki: str) -> Response | str:
"""Save edits to article."""
edits = [
(link_to, link_from)
for link_to, link_from in json.loads(flask.request.form["edits"])
]
def build_edit_summary(edits: list[Edit]) -> str:
"""Given a list of edits return an edit summary."""
enwiki = enwiki.replace("_", " ")
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
return f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
def get_links(wikicode, dab_links):
edits = [edit for edit in dab_links if edit.get("title")]
dab_titles = {dab["link_to"] for dab in edits}
return [
link for link in wikicode.filter_wikilinks() if str(link.title) in dab_titles
]
def apply_edits(text, dab_links):
wikicode = mwparserfromhell.parse(text)
links = get_links(wikicode, dab_links)
if len(links) != len(dab_links):
print("links:", len(links))
print("dab_links:", len(dab_links))
print("dab_links:", dab_links)
assert len(links) == len(dab_links)
for wikilink, edit in zip(links, dab_links):
if not edit.get("title"):
continue
if not wikilink.text:
wikilink.text = wikilink.title
wikilink.title = edit["title"]
return str(wikicode)
@app.route("/preview/<path:enwiki>", methods=["POST"])
def preview(enwiki: str) -> Response | str:
"""Preview article edits."""
enwiki = enwiki.replace("_", " ")
dab_links = json.loads(flask.request.form["edits"])
dab_links = [link for link in dab_links if "title" in link]
cur_text, baserevid = mediawiki_api.get_content(enwiki)
text = apply_edits(cur_text, dab_links)
diff = mediawiki_api.compare(enwiki, text)
article_text = apply_edits(mediawiki_api.get_content(enwiki), edits)
return flask.render_template(
"preview.html",
edit_summary=build_edit_summary(dab_links),
"save.html",
edit_summary=edit_summary,
title=enwiki,
edits=dab_links,
diff=diff,
edits=edits,
text=article_text,
)
def do_save(enwiki: str):
"""Update page on Wikipedia."""
dab_links = json.loads(flask.request.form["edits"])
dab_links = [link for link in dab_links if "title" in link]
cur_text, baserevid = mediawiki_api.get_content(enwiki)
new_text = apply_edits(cur_text, dab_links)
token = wikidata_oauth.get_token()
summary = build_edit_summary(dab_links)
print(summary)
edit = mediawiki_api.edit_page(
title=enwiki,
text=new_text,
summary=summary,
baserevid=baserevid,
token=token,
)
return edit
@app.route("/save/<path:enwiki>", methods=["GET", "POST"])
def save(enwiki: str) -> Response | str:
"""Save edits to article."""
enwiki_norm = enwiki.replace("_", " ")
if flask.request.method == "GET":
return flask.render_template("edit_saved.html", title=enwiki_norm)
do_save(enwiki_norm)
return flask.redirect(flask.url_for(flask.request.endpoint, enwiki=enwiki))
def redirect_if_needed(enwiki: str) -> Optional[Response]:
"""Check if there are spaces in the article name and redirect."""
return (
@ -231,9 +141,6 @@ def article_page(enwiki: str) -> Response:
if redirect:
return redirect
if "owner_key" not in flask.session:
return flask.render_template("login_needed.html")
article = wikipedia.Article(enwiki)
article.load()
article.process_links()