diff --git a/dab_mechanic/mediawiki_api.py b/dab_mechanic/mediawiki_api.py
index 26d7a20..265cb73 100644
--- a/dab_mechanic/mediawiki_api.py
+++ b/dab_mechanic/mediawiki_api.py
@@ -30,16 +30,62 @@ def call(params: dict[str, str | int]) -> dict[str, Any]:
return data.json()
-def get_content(title: str) -> str:
+def article_exists(title: str) -> bool:
+ """Get article text."""
+ params: dict[str, str | int] = {
+ "action": "query",
+ "format": "json",
+ "formatversion": 2,
+ "titles": title,
+ }
+ return not call(params)["query"]["pages"][0].get("missing")
+
+
+def get_content(title: str) -> tuple[str, int]:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"prop": "revisions|info",
- "rvprop": "content|timestamp",
+ "rvprop": "content|timestamp|ids",
"titles": title,
}
data = call(params)
- rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
- return rev
+ rev = data["query"]["pages"][0]["revisions"][0]
+ content: str = rev["content"]
+ revid: int = int(rev["revid"])
+ return content, revid
+
+
+def compare(title: str, new_text: str) -> str:
+ """Generate a diff for the new article text."""
+ params: dict[str, str | int] = {
+ "format": "json",
+ "formatversion": 2,
+ "action": "compare",
+ "fromtitle": title,
+ "toslots": "main",
+ "totext-main": new_text,
+ "prop": "diff",
+ }
+ diff: str = call(params)["compare"]["body"]
+ return diff
+
+
+def edit_page(
+ title: str, text: str, summary: str, baserevid: str, token: str
+) -> dict[str, str | int]:
+ """Edit a page on Wikipedia."""
+ params: dict[str, str | int] = {
+ "format": "json",
+ "formatversion": 2,
+ "action": "edit",
+ "title": title,
+ "text": text,
+ "baserevid": baserevid,
+ "token": token,
+ "summary": summary,
+ }
+ edit: str = call(params)["edit"]
+ return edit
diff --git a/dab_mechanic/wikidata_oauth.py b/dab_mechanic/wikidata_oauth.py
index 5af0976..5b048b2 100644
--- a/dab_mechanic/wikidata_oauth.py
+++ b/dab_mechanic/wikidata_oauth.py
@@ -3,8 +3,10 @@ from urllib.parse import urlencode
from flask import current_app, session
from requests_oauthlib import OAuth1Session
-wiki_hostname = "en.wikipedia.org"
-api_url = f"https://{wiki_hostname}/w/api.php"
+WIKI_HOSTNAME = "en.wikipedia.org"
+API_URL = f"https://{WIKI_HOSTNAME}/w/api.php"
+
+TIMEOUT = 20
def get_edit_proxy() -> dict[str, str]:
@@ -28,12 +30,12 @@ def api_post_request(params: dict[str, str | int]):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
- return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
+ return oauth.post(API_URL, data=params, timeout=TIMEOUT, proxies=proxies)
def raw_request(params):
app = current_app
- url = api_url + "?" + urlencode(params)
+ url = API_URL + "?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
@@ -43,7 +45,7 @@ def raw_request(params):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
- return oauth.get(url, timeout=10, proxies=proxies)
+ return oauth.get(url, timeout=TIMEOUT, proxies=proxies)
def api_request(params):
diff --git a/dab_mechanic/wikipedia.py b/dab_mechanic/wikipedia.py
index 57c03c4..fecf7f2 100644
--- a/dab_mechanic/wikipedia.py
+++ b/dab_mechanic/wikipedia.py
@@ -68,7 +68,7 @@ def needs_disambig(link: dict[str, Any]) -> bool:
)
-def get_article_links(enwiki: str) -> list[str]:
+def get_article_links(enwiki: str) -> dict[str, str]:
"""Get links that appear in this article."""
params: dict[str, str | int] = link_params(enwiki)
@@ -92,11 +92,13 @@ def get_article_links(enwiki: str) -> list[str]:
params["gplcontinue"] = data["continue"]["gplcontinue"]
sleep(0.1)
+ ret_links = {}
for link in set(links):
- if link in redirects:
- links.update(redirects[link])
+ ret_links[link] = link
+ for r in redirects.get(link, []):
+ ret_links[r] = link
- return list(links)
+ return ret_links
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
@@ -121,10 +123,9 @@ def delete_toc(root: lxml.html.HtmlElement) -> None:
toc.getparent().remove(toc)
-def get_dab_html(dab_num: int, title: str) -> str:
+def get_dab_html(dab_num: int, html: str) -> str:
"""Parse dab page and rewrite links."""
- dab_html = get_article_html(title)
- root = lxml.html.fromstring(dab_html)
+ root = lxml.html.fromstring(html)
delete_toc(root)
element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
@@ -160,10 +161,11 @@ class Article:
self.dab_lookup: dict[int, str] = {}
self.dab_order: list[str] = []
self.parse: Optional[dict[str, Any]] = None
+ self.dab_html: dict[str, str] = {}
- def save_endpoint(self) -> str:
+ def preview_endpoint(self) -> str:
"""Endpoint for saving changes."""
- href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
+ href: str = flask.url_for("preview", enwiki=self.enwiki.replace(" ", "_"))
return href
def load(self) -> None:
@@ -173,28 +175,34 @@ class Article:
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
"""Disambiguation links that need fixing."""
- seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
- if title is None or title not in self.links:
- continue
- a.set("class", "disambig")
+ if title is not None and title in self.links:
+ yield a, title, self.links[title]
- if title in seen:
+ href = a.get("href")
+ if not href.startswith("/wiki/"):
continue
- seen.add(title)
+ a.set("href", "https://en.wikipedia.org" + href)
+ a.set("target", "_blank")
- yield a, title
+ def dab_link_to(self):
+ return [dab["link_to"] for dab in self.dab_list]
def process_links(self) -> None:
"""Process links in parsed wikitext."""
- for dab_num, (a, title) in enumerate(self.iter_links()):
+ for dab_num, (a, link_to, title) in enumerate(self.iter_links()):
+ a.set("class", "disambig")
a.set("id", f"dab-{dab_num}")
+ if title not in self.dab_html:
+ self.dab_html[title] = get_article_html(title)
+
dab: DabItem = {
"num": dab_num,
"title": title,
- "html": get_dab_html(dab_num, title),
+ "link_to": link_to,
+ "html": get_dab_html(dab_num, self.dab_html[title]),
}
self.dab_list.append(dab)
self.dab_order.append(title)
diff --git a/templates/article.html b/templates/article.html
index 7c9afbf..eb1e62d 100644
--- a/templates/article.html
+++ b/templates/article.html
@@ -53,8 +53,8 @@ a.new { color: red; }
{{ article.enwiki }}
-
@@ -62,7 +62,9 @@ a.new { color: red; }
There are {{ article.dab_list | count }} links in the article that need disambiguating.
{% for dab in article.dab_list %}
-
{{ dab.title }}
+
+
{{ dab.title }}
+ {% if dab.title != dab.link_to %}
redirect from {{ dab.link_to }}
{% endif %}
-
{{ dab.html | safe }}
+
{{ dab.html | safe }}
+
{% endfor %}
@@ -84,12 +87,38 @@ a.new { color: red; }
var edit_set = new Set();
var edits = {};
- var dab_lookup = {{ article.dab_lookup | tojson }};
var dab_order = {{ article.dab_order | tojson }};
+ var dab_link_to = {{ article.dab_link_to() | tojson }};
+
+ var dab_links = document.getElementsByClassName("disambig");
+ for(var i=0; i
{
+ event.preventDefault();
+ var dab_num = event.target.id.substring(4);
+ open_dab(dab_num);
+ });
+ }
function jump_to(dab_num) {
+ open_dab(dab_num);
+
+ var link = document.getElementById("dab-" + dab_num);
+ link.scrollIntoView();
+ link.classList.add("disambig-highlight")
+ return false;
+ }
+
+ function open_dab(dab_num) {
var highlight_title = "text-bg-primary";
+ var dab_articles = document.getElementsByClassName("dab-article");
+ for(var i=0; i edits[t]).map(t => [t, edits[t]]);
+ var saves = dab_link_to.map((link_to, num) => (
+ {"num": num, "link_to": link_to, "title": edits[num]}));
var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(saves);
}
@@ -141,7 +169,7 @@ a.new { color: red; }
document.getElementById("cancel-" + dab_num).classList.remove("d-none");
var title = element.getAttribute("title");
- edits[dab_lookup[dab_num]] = title;
+ edits[dab_num] = title;
edit_set.add(dab_num);
update_edits();
@@ -163,7 +191,7 @@ a.new { color: red; }
}
function cancel_selection(dab_num) {
- delete edits[dab_lookup[dab_num]];
+ delete edits[dab_num];
document.getElementById("cancel-" + dab_num).classList.add("d-none");
clear_dab_highlight(dab_num);
edit_set.delete(dab_num);
diff --git a/templates/index.html b/templates/index.html
index 1758d8e..d6d6ca3 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -1,7 +1,21 @@
{% extends "base.html" %}
+{% block title %}DAB Mechanic{% endblock %}
+
{% block content %}
+
+
+
+ {% if title and not exists %}
+
No article titled "{{ title }}" found in Wikipedia.
+ {% endif %}
+
+
{% for enwiki, count in articles %}
-
diff --git a/templates/navbar.html b/templates/navbar.html
index 9ce6f07..0b52925 100644
--- a/templates/navbar.html
+++ b/templates/navbar.html
@@ -15,13 +15,7 @@
Dab Mechanic
diff --git a/templates/preview.html b/templates/preview.html
new file mode 100644
index 0000000..cd4bb3a
--- /dev/null
+++ b/templates/preview.html
@@ -0,0 +1,39 @@
+
+
+
+
+ {{ title }} – dab mechanic
+
+
+
+
+
+
+
Preview of changes: {{ title }}
+
+
+
Edit summary
+
{{ edit_summary }}
+
+
+ {#
{{ text }} #}
+
+
+
+
+
+
+
+
+
+ {{ diff | safe }}
+
+
+
+
+
+
+
diff --git a/templates/save.html b/templates/save.html
deleted file mode 100644
index 7bd28a5..0000000
--- a/templates/save.html
+++ /dev/null
@@ -1,18 +0,0 @@
-
-
-
-
-
{{ title }} – dab mechanic
-
-
-
-
-
-
Save edits: {{ title }}
-
Edit summary: {{ edit_summary }}
-
-
-
-
diff --git a/web_view.py b/web_view.py
index 2730f23..aa962bc 100755
--- a/web_view.py
+++ b/web_view.py
@@ -3,7 +3,9 @@
import inspect
import json
import re
-from typing import Optional
+from typing import Optional, TypedDict
+import mwparserfromhell
+from pprint import pprint
import flask
import lxml.html
@@ -64,28 +66,43 @@ def parse_articles_with_dab_links(root: lxml.html.Element) -> list[tuple[str, in
@app.route("/")
def index():
+ title = flask.request.args.get("title")
+ exists = None
+ if title:
+ title = title.strip()
+ exists = mediawiki_api.article_exists(title)
+ if exists:
+ return flask.redirect(
+ flask.url_for("article_page", enwiki=title.replace(" ", "_"))
+ )
+
r = requests.get(awdl_url, params={"limit": 100})
root = lxml.html.fromstring(r.content)
articles = parse_articles_with_dab_links(root)
# articles = [line[:-1] for line in open("article_list")]
- return flask.render_template("index.html", articles=articles)
+ return flask.render_template(
+ "index.html", title=title, exists=exists, articles=articles,
+ )
-def make_disamb_link(edit: tuple[str, str]) -> str:
- """Given an edit return the appropriate link."""
- return f"[[{edit[1]}|{edit[0]}]]"
+class Edit(TypedDict):
+ """Edit to an article."""
+
+ num: int
+ link_to: str
+ title: str
-def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
+def old_apply_edits(article_text: str, edits: list[Edit]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
- for link_from, link_to in edits:
- print(rf"\[\[{escape(link_from)}\]\]")
+ for edit in edits:
+ # print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
@@ -95,34 +112,107 @@ def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
return article_text
-@app.route("/save/
", methods=["POST"])
-def save(enwiki: str) -> Response | str:
- """Save edits to article."""
- edits = [
- (link_to, link_from)
- for link_to, link_from in json.loads(flask.request.form["edits"])
- ]
+def make_disamb_link(edit: Edit) -> str:
+ """Given an edit return the appropriate link."""
+ return f"[[{edit['title']}|{edit['link_to']}]]"
- enwiki = enwiki.replace("_", " ")
+
+def build_edit_summary(edits: list[Edit]) -> str:
+ """Given a list of edits return an edit summary."""
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
- edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
+ return f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
- article_text = apply_edits(mediawiki_api.get_content(enwiki), edits)
+
+def get_links(wikicode, dab_links):
+ edits = [edit for edit in dab_links if edit.get("title")]
+
+ dab_titles = {dab["link_to"] for dab in edits}
+ return [
+ link for link in wikicode.filter_wikilinks() if str(link.title) in dab_titles
+ ]
+
+
+def apply_edits(text, dab_links):
+ wikicode = mwparserfromhell.parse(text)
+ links = get_links(wikicode, dab_links)
+ if len(links) != len(dab_links):
+ print("links:", len(links))
+ print("dab_links:", len(dab_links))
+ print("dab_links:", dab_links)
+ assert len(links) == len(dab_links)
+
+ for wikilink, edit in zip(links, dab_links):
+ if not edit.get("title"):
+ continue
+ if not wikilink.text:
+ wikilink.text = wikilink.title
+ wikilink.title = edit["title"]
+
+ return str(wikicode)
+
+
+@app.route("/preview/", methods=["POST"])
+def preview(enwiki: str) -> Response | str:
+ """Preview article edits."""
+ enwiki = enwiki.replace("_", " ")
+
+ dab_links = json.loads(flask.request.form["edits"])
+ dab_links = [link for link in dab_links if "title" in link]
+ cur_text, baserevid = mediawiki_api.get_content(enwiki)
+
+ text = apply_edits(cur_text, dab_links)
+ diff = mediawiki_api.compare(enwiki, text)
return flask.render_template(
- "save.html",
- edit_summary=edit_summary,
+ "preview.html",
+ edit_summary=build_edit_summary(dab_links),
title=enwiki,
- edits=edits,
- text=article_text,
+ edits=dab_links,
+ diff=diff,
)
+def do_save(enwiki: str):
+ """Update page on Wikipedia."""
+ dab_links = json.loads(flask.request.form["edits"])
+ dab_links = [link for link in dab_links if "title" in link]
+
+ cur_text, baserevid = mediawiki_api.get_content(enwiki)
+
+ new_text = apply_edits(cur_text, dab_links)
+ token = wikidata_oauth.get_token()
+
+ summary = build_edit_summary(dab_links)
+ print(summary)
+
+ edit = mediawiki_api.edit_page(
+ title=enwiki,
+ text=new_text,
+ summary=summary,
+ baserevid=baserevid,
+ token=token,
+ )
+
+ return edit
+
+
+@app.route("/save/", methods=["GET", "POST"])
+def save(enwiki: str) -> Response | str:
+ """Save edits to article."""
+ enwiki_norm = enwiki.replace("_", " ")
+
+ if flask.request.method == "GET":
+ return flask.render_template("edit_saved.html", title=enwiki_norm)
+
+ do_save(enwiki_norm)
+ return flask.redirect(flask.url_for(flask.request.endpoint, enwiki=enwiki))
+
+
def redirect_if_needed(enwiki: str) -> Optional[Response]:
"""Check if there are spaces in the article name and redirect."""
return (
@@ -141,6 +231,9 @@ def article_page(enwiki: str) -> Response:
if redirect:
return redirect
+ if "owner_key" not in flask.session:
+ return flask.render_template("login_needed.html")
+
article = wikipedia.Article(enwiki)
article.load()
article.process_links()