diff --git a/dab_mechanic/mediawiki_api.py b/dab_mechanic/mediawiki_api.py
index 265cb73..26d7a20 100644
--- a/dab_mechanic/mediawiki_api.py
+++ b/dab_mechanic/mediawiki_api.py
@@ -30,62 +30,16 @@ def call(params: dict[str, str | int]) -> dict[str, Any]:
return data.json()
-def article_exists(title: str) -> bool:
- """Get article text."""
- params: dict[str, str | int] = {
- "action": "query",
- "format": "json",
- "formatversion": 2,
- "titles": title,
- }
- return not call(params)["query"]["pages"][0].get("missing")
-
-
-def get_content(title: str) -> tuple[str, int]:
+def get_content(title: str) -> str:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"prop": "revisions|info",
- "rvprop": "content|timestamp|ids",
+ "rvprop": "content|timestamp",
"titles": title,
}
data = call(params)
- rev = data["query"]["pages"][0]["revisions"][0]
- content: str = rev["content"]
- revid: int = int(rev["revid"])
- return content, revid
-
-
-def compare(title: str, new_text: str) -> str:
- """Generate a diff for the new article text."""
- params: dict[str, str | int] = {
- "format": "json",
- "formatversion": 2,
- "action": "compare",
- "fromtitle": title,
- "toslots": "main",
- "totext-main": new_text,
- "prop": "diff",
- }
- diff: str = call(params)["compare"]["body"]
- return diff
-
-
-def edit_page(
- title: str, text: str, summary: str, baserevid: str, token: str
-) -> dict[str, str | int]:
- """Edit a page on Wikipedia."""
- params: dict[str, str | int] = {
- "format": "json",
- "formatversion": 2,
- "action": "edit",
- "title": title,
- "text": text,
- "baserevid": baserevid,
- "token": token,
- "summary": summary,
- }
- edit: str = call(params)["edit"]
- return edit
+ rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
+ return rev
diff --git a/dab_mechanic/wikidata_oauth.py b/dab_mechanic/wikidata_oauth.py
index 5b048b2..5af0976 100644
--- a/dab_mechanic/wikidata_oauth.py
+++ b/dab_mechanic/wikidata_oauth.py
@@ -3,10 +3,8 @@ from urllib.parse import urlencode
from flask import current_app, session
from requests_oauthlib import OAuth1Session
-WIKI_HOSTNAME = "en.wikipedia.org"
-API_URL = f"https://{WIKI_HOSTNAME}/w/api.php"
-
-TIMEOUT = 20
+wiki_hostname = "en.wikipedia.org"
+api_url = f"https://{wiki_hostname}/w/api.php"
def get_edit_proxy() -> dict[str, str]:
@@ -30,12 +28,12 @@ def api_post_request(params: dict[str, str | int]):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
- return oauth.post(API_URL, data=params, timeout=TIMEOUT, proxies=proxies)
+ return oauth.post(api_url, data=params, timeout=10, proxies=proxies)
def raw_request(params):
app = current_app
- url = API_URL + "?" + urlencode(params)
+ url = api_url + "?" + urlencode(params)
client_key = app.config["CLIENT_KEY"]
client_secret = app.config["CLIENT_SECRET"]
oauth = OAuth1Session(
@@ -45,7 +43,7 @@ def raw_request(params):
resource_owner_secret=session["owner_secret"],
)
proxies = get_edit_proxy()
- return oauth.get(url, timeout=TIMEOUT, proxies=proxies)
+ return oauth.get(url, timeout=10, proxies=proxies)
def api_request(params):
diff --git a/dab_mechanic/wikipedia.py b/dab_mechanic/wikipedia.py
index fecf7f2..57c03c4 100644
--- a/dab_mechanic/wikipedia.py
+++ b/dab_mechanic/wikipedia.py
@@ -68,7 +68,7 @@ def needs_disambig(link: dict[str, Any]) -> bool:
)
-def get_article_links(enwiki: str) -> dict[str, str]:
+def get_article_links(enwiki: str) -> list[str]:
"""Get links that appear in this article."""
params: dict[str, str | int] = link_params(enwiki)
@@ -92,13 +92,11 @@ def get_article_links(enwiki: str) -> dict[str, str]:
params["gplcontinue"] = data["continue"]["gplcontinue"]
sleep(0.1)
- ret_links = {}
for link in set(links):
- ret_links[link] = link
- for r in redirects.get(link, []):
- ret_links[r] = link
+ if link in redirects:
+ links.update(redirects[link])
- return ret_links
+ return list(links)
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
@@ -123,9 +121,10 @@ def delete_toc(root: lxml.html.HtmlElement) -> None:
toc.getparent().remove(toc)
-def get_dab_html(dab_num: int, html: str) -> str:
+def get_dab_html(dab_num: int, title: str) -> str:
"""Parse dab page and rewrite links."""
- root = lxml.html.fromstring(html)
+ dab_html = get_article_html(title)
+ root = lxml.html.fromstring(dab_html)
delete_toc(root)
element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
@@ -161,11 +160,10 @@ class Article:
self.dab_lookup: dict[int, str] = {}
self.dab_order: list[str] = []
self.parse: Optional[dict[str, Any]] = None
- self.dab_html: dict[str, str] = {}
- def preview_endpoint(self) -> str:
+ def save_endpoint(self) -> str:
"""Endpoint for saving changes."""
- href: str = flask.url_for("preview", enwiki=self.enwiki.replace(" ", "_"))
+ href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
return href
def load(self) -> None:
@@ -175,34 +173,28 @@ class Article:
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
"""Disambiguation links that need fixing."""
+ seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
- if title is not None and title in self.links:
- yield a, title, self.links[title]
-
- href = a.get("href")
- if not href.startswith("/wiki/"):
+ if title is None or title not in self.links:
continue
- a.set("href", "https://en.wikipedia.org" + href)
- a.set("target", "_blank")
+ a.set("class", "disambig")
- def dab_link_to(self):
- return [dab["link_to"] for dab in self.dab_list]
+ if title in seen:
+ continue
+ seen.add(title)
+
+ yield a, title
def process_links(self) -> None:
"""Process links in parsed wikitext."""
- for dab_num, (a, link_to, title) in enumerate(self.iter_links()):
- a.set("class", "disambig")
+ for dab_num, (a, title) in enumerate(self.iter_links()):
a.set("id", f"dab-{dab_num}")
- if title not in self.dab_html:
- self.dab_html[title] = get_article_html(title)
-
dab: DabItem = {
"num": dab_num,
"title": title,
- "link_to": link_to,
- "html": get_dab_html(dab_num, self.dab_html[title]),
+ "html": get_dab_html(dab_num, title),
}
self.dab_list.append(dab)
self.dab_order.append(title)
diff --git a/templates/article.html b/templates/article.html
index eb1e62d..7c9afbf 100644
--- a/templates/article.html
+++ b/templates/article.html
@@ -53,8 +53,8 @@ a.new { color: red; }
{{ article.enwiki }}
-
@@ -62,9 +62,7 @@ a.new { color: red; }
There are {{ article.dab_list | count }} links in the article that need disambiguating.
{% for dab in article.dab_list %}
-
-
{{ dab.title }}
- {% if dab.title != dab.link_to %}
redirect from {{ dab.link_to }}
{% endif %}
+
{{ dab.title }}
-
{{ dab.html | safe }}
-
+
{{ dab.html | safe }}
{% endfor %}
@@ -87,38 +84,12 @@ a.new { color: red; }
var edit_set = new Set();
var edits = {};
+ var dab_lookup = {{ article.dab_lookup | tojson }};
var dab_order = {{ article.dab_order | tojson }};
- var dab_link_to = {{ article.dab_link_to() | tojson }};
-
- var dab_links = document.getElementsByClassName("disambig");
- for(var i=0; i
{
- event.preventDefault();
- var dab_num = event.target.id.substring(4);
- open_dab(dab_num);
- });
- }
function jump_to(dab_num) {
- open_dab(dab_num);
-
- var link = document.getElementById("dab-" + dab_num);
- link.scrollIntoView();
- link.classList.add("disambig-highlight")
- return false;
- }
-
- function open_dab(dab_num) {
var highlight_title = "text-bg-primary";
- var dab_articles = document.getElementsByClassName("dab-article");
- for(var i=0; i (
- {"num": num, "link_to": link_to, "title": edits[num]}));
+ var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]);
var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(saves);
}
@@ -169,7 +141,7 @@ a.new { color: red; }
document.getElementById("cancel-" + dab_num).classList.remove("d-none");
var title = element.getAttribute("title");
- edits[dab_num] = title;
+ edits[dab_lookup[dab_num]] = title;
edit_set.add(dab_num);
update_edits();
@@ -191,7 +163,7 @@ a.new { color: red; }
}
function cancel_selection(dab_num) {
- delete edits[dab_num];
+ delete edits[dab_lookup[dab_num]];
document.getElementById("cancel-" + dab_num).classList.add("d-none");
clear_dab_highlight(dab_num);
edit_set.delete(dab_num);
diff --git a/templates/index.html b/templates/index.html
index d6d6ca3..1758d8e 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -1,21 +1,7 @@
{% extends "base.html" %}
-{% block title %}DAB Mechanic{% endblock %}
-
{% block content %}
-
-
-
- {% if title and not exists %}
-
No article titled "{{ title }}" found in Wikipedia.
- {% endif %}
-
-
{% for enwiki, count in articles %}
-
diff --git a/templates/navbar.html b/templates/navbar.html
index 0b52925..9ce6f07 100644
--- a/templates/navbar.html
+++ b/templates/navbar.html
@@ -15,7 +15,13 @@
Dab Mechanic
diff --git a/templates/preview.html b/templates/preview.html
deleted file mode 100644
index cd4bb3a..0000000
--- a/templates/preview.html
+++ /dev/null
@@ -1,39 +0,0 @@
-
-
-
-
- {{ title }} – dab mechanic
-
-
-
-
-
-
-
Preview of changes: {{ title }}
-
-
-
Edit summary
-
{{ edit_summary }}
-
-
- {#
{{ text }} #}
-
-
-
-
-
-
-
-
-
- {{ diff | safe }}
-
-
-
-
-
-
-
diff --git a/templates/save.html b/templates/save.html
new file mode 100644
index 0000000..7bd28a5
--- /dev/null
+++ b/templates/save.html
@@ -0,0 +1,18 @@
+
+
+
+
+
{{ title }} – dab mechanic
+
+
+
+
+
+
Save edits: {{ title }}
+
Edit summary: {{ edit_summary }}
+
+
+
+
diff --git a/web_view.py b/web_view.py
index aa962bc..2730f23 100755
--- a/web_view.py
+++ b/web_view.py
@@ -3,9 +3,7 @@
import inspect
import json
import re
-from typing import Optional, TypedDict
-import mwparserfromhell
-from pprint import pprint
+from typing import Optional
import flask
import lxml.html
@@ -66,43 +64,28 @@ def parse_articles_with_dab_links(root: lxml.html.Element) -> list[tuple[str, in
@app.route("/")
def index():
- title = flask.request.args.get("title")
- exists = None
- if title:
- title = title.strip()
- exists = mediawiki_api.article_exists(title)
- if exists:
- return flask.redirect(
- flask.url_for("article_page", enwiki=title.replace(" ", "_"))
- )
-
r = requests.get(awdl_url, params={"limit": 100})
root = lxml.html.fromstring(r.content)
articles = parse_articles_with_dab_links(root)
# articles = [line[:-1] for line in open("article_list")]
- return flask.render_template(
- "index.html", title=title, exists=exists, articles=articles,
- )
+ return flask.render_template("index.html", articles=articles)
-class Edit(TypedDict):
- """Edit to an article."""
-
- num: int
- link_to: str
- title: str
+def make_disamb_link(edit: tuple[str, str]) -> str:
+ """Given an edit return the appropriate link."""
+ return f"[[{edit[1]}|{edit[0]}]]"
-def old_apply_edits(article_text: str, edits: list[Edit]) -> str:
+def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
- for edit in edits:
- # print(rf"\[\[{escape(link_from)}\]\]")
+ for link_from, link_to in edits:
+ print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
@@ -112,107 +95,34 @@ def old_apply_edits(article_text: str, edits: list[Edit]) -> str:
return article_text
-def make_disamb_link(edit: Edit) -> str:
- """Given an edit return the appropriate link."""
- return f"[[{edit['title']}|{edit['link_to']}]]"
+@app.route("/save/
", methods=["POST"])
+def save(enwiki: str) -> Response | str:
+ """Save edits to article."""
+ edits = [
+ (link_to, link_from)
+ for link_to, link_from in json.loads(flask.request.form["edits"])
+ ]
-
-def build_edit_summary(edits: list[Edit]) -> str:
- """Given a list of edits return an edit summary."""
+ enwiki = enwiki.replace("_", " ")
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
- return f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
+ edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
-
-def get_links(wikicode, dab_links):
- edits = [edit for edit in dab_links if edit.get("title")]
-
- dab_titles = {dab["link_to"] for dab in edits}
- return [
- link for link in wikicode.filter_wikilinks() if str(link.title) in dab_titles
- ]
-
-
-def apply_edits(text, dab_links):
- wikicode = mwparserfromhell.parse(text)
- links = get_links(wikicode, dab_links)
- if len(links) != len(dab_links):
- print("links:", len(links))
- print("dab_links:", len(dab_links))
- print("dab_links:", dab_links)
- assert len(links) == len(dab_links)
-
- for wikilink, edit in zip(links, dab_links):
- if not edit.get("title"):
- continue
- if not wikilink.text:
- wikilink.text = wikilink.title
- wikilink.title = edit["title"]
-
- return str(wikicode)
-
-
-@app.route("/preview/", methods=["POST"])
-def preview(enwiki: str) -> Response | str:
- """Preview article edits."""
- enwiki = enwiki.replace("_", " ")
-
- dab_links = json.loads(flask.request.form["edits"])
- dab_links = [link for link in dab_links if "title" in link]
- cur_text, baserevid = mediawiki_api.get_content(enwiki)
-
- text = apply_edits(cur_text, dab_links)
- diff = mediawiki_api.compare(enwiki, text)
+ article_text = apply_edits(mediawiki_api.get_content(enwiki), edits)
return flask.render_template(
- "preview.html",
- edit_summary=build_edit_summary(dab_links),
+ "save.html",
+ edit_summary=edit_summary,
title=enwiki,
- edits=dab_links,
- diff=diff,
+ edits=edits,
+ text=article_text,
)
-def do_save(enwiki: str):
- """Update page on Wikipedia."""
- dab_links = json.loads(flask.request.form["edits"])
- dab_links = [link for link in dab_links if "title" in link]
-
- cur_text, baserevid = mediawiki_api.get_content(enwiki)
-
- new_text = apply_edits(cur_text, dab_links)
- token = wikidata_oauth.get_token()
-
- summary = build_edit_summary(dab_links)
- print(summary)
-
- edit = mediawiki_api.edit_page(
- title=enwiki,
- text=new_text,
- summary=summary,
- baserevid=baserevid,
- token=token,
- )
-
- return edit
-
-
-@app.route("/save/", methods=["GET", "POST"])
-def save(enwiki: str) -> Response | str:
- """Save edits to article."""
- enwiki_norm = enwiki.replace("_", " ")
-
- if flask.request.method == "GET":
- return flask.render_template("edit_saved.html", title=enwiki_norm)
-
- do_save(enwiki_norm)
- return flask.redirect(flask.url_for(flask.request.endpoint, enwiki=enwiki))
-
-
def redirect_if_needed(enwiki: str) -> Optional[Response]:
"""Check if there are spaces in the article name and redirect."""
return (
@@ -231,9 +141,6 @@ def article_page(enwiki: str) -> Response:
if redirect:
return redirect
- if "owner_key" not in flask.session:
- return flask.render_template("login_needed.html")
-
article = wikipedia.Article(enwiki)
article.load()
article.process_links()