diff --git a/article_list b/article_list index 2c37b8d..878bf75 100644 --- a/article_list +++ b/article_list @@ -1,16 +1,15 @@ Rail transport in Indonesia -Canadian Alpine Ski Championships +Media franchise Orwell Prize SchleFaZ List of fatal victims of the September 11 attacks -List of Parkruns in the United Kingdom -Beitar Jerusalem F.C. -List of Hindi songs recorded by Asha Bhosle +Canadian Alpine Ski Championships Arabic exonyms Popular Union The Cantos Unisex name -2021 Intercontinental GT Challenge +Urban Hymns +47th Saturn Awards AS Kaloum Star Akademi Fantasia (season 1) Athletics at the 2022 Bolivarian Games @@ -21,16 +20,15 @@ Education in the Republic of Ireland Healthcare in the Republic of Ireland I Love the 2000s Kununokuni -List of Belgian football transfers summer 2022 -List of Ultimate Marvel characters List of Wisin & Yandel collaborations List of comics based on films List of programs broadcast by Asianet -List of tributaries of the Missouri River -Music of South Africa Neuruppin +1973–74 Kilmarnock F.C. season 1979 Sydney City FC season 2007 in Spanish television +2016 Colorado House of Representatives election +2021 Intercontinental GT Challenge 2022 Washington House of Representatives election 2022 World Athletics U20 Championships – Men's 4 × 100 metres relay A2 autostrada (Poland) @@ -41,10 +39,12 @@ Dwayne McDuffie Award for Diversity in Comics FTSE Italia Mid Cap Globoplay Index of Armenia-related articles -List of Denmark national football team hat-tricks List of Equinox episodes List of Indian monarchs List of Italian exonyms in Dalmatia +List of Ultimate Marvel characters List of cities with historical German exonyms List of jötnar in Norse mythology List of language families +List of people with surname Davis +List of political parties in Venezuela diff --git a/templates/article.html b/templates/article.html index f31fc91..5e28052 100644 --- a/templates/article.html +++ b/templates/article.html @@ -2,7 +2,7 @@ - {{ title }} – dab mecanic + {{ article.enwiki }} – dab mechanic @@ -47,9 +47,9 @@ a.new { color: red; }
-

{{ title }}

+

{{ article.enwiki }}

-
+ @@ -80,7 +80,8 @@ a.new { color: red; } var edit_set = new Set(); var edits = {}; - var dab_lookup = {{ dab_lookup | tojson }}; + var dab_lookup = {{ article.dab_lookup | tojson }}; + var dab_order = {{ article.dab_order | tojson }}; function jump_to(dab_num) { var highlight_title = "text-bg-primary"; @@ -126,8 +127,9 @@ a.new { color: red; } } function update_edits() { + var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]); var save_edits = document.getElementById("save-edits"); - save_edits.value = JSON.stringify(edits); + save_edits.value = JSON.stringify(saves); } function select_dab(element, dab_num) { diff --git a/templates/index.html b/templates/index.html index 94c44d9..65236a2 100644 --- a/templates/index.html +++ b/templates/index.html @@ -8,7 +8,7 @@ diff --git a/templates/save.html b/templates/save.html index c94970d..7bd28a5 100644 --- a/templates/save.html +++ b/templates/save.html @@ -2,11 +2,17 @@ - + {{ title }} – dab mechanic + +

Save edits: {{ title }}

- {{ edits | pprint }} +

Edit summary: {{ edit_summary }}

+
+
+
{{ text }}
+
diff --git a/web_view.py b/web_view.py index e0f03f9..341a7b0 100755 --- a/web_view.py +++ b/web_view.py @@ -1,8 +1,8 @@ #!/usr/bin/python3 import json -from collections import defaultdict -from typing import Any +import re +from typing import Any, TypedDict import flask import lxml.html @@ -10,10 +10,25 @@ import requests from werkzeug.wrappers import Response app = flask.Flask(__name__) - - app.debug = True +api_url = "https://en.wikipedia.org/w/api.php" + + +def get_content(title: str) -> str: + """Get article text.""" + params: dict[str, str | int] = { + "action": "query", + "format": "json", + "formatversion": 2, + "prop": "revisions|info", + "rvprop": "content|timestamp", + "titles": title, + } + data = requests.get(api_url, params=params).json() + rev: str = data["query"]["pages"][0]["revisions"][0]["content"] + return rev + @app.route("/") def index(): @@ -153,15 +168,120 @@ def get_dab_html(dab_num: int, title: str) -> str: return html +def make_disamb_link(edit: tuple[str, str]) -> str: + """Given an edit return the appropriate link.""" + return f"[[{edit[1]}|{edit[0]}]]" + + +def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str: + """Apply edits to article text.""" + + def escape(s: str) -> str: + return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]") + + for link_from, link_to in edits: + print(rf"\[\[{escape(link_from)}\]\]") + article_text = re.sub( + rf"\[\[{escape(link_from)}\]\]", + f"[[{link_to}|{link_from}]]", + article_text, + ) + + return article_text + + @app.route("/save/", methods=["POST"]) def save(enwiki: str) -> Response | str: """Save edits to article.""" - edits = json.loads(flask.request.form["edits"]) - return flask.render_template("save.html", title=enwiki, edits=edits) + edits = [ + (link_to, link_from) + for link_to, link_from in json.loads(flask.request.form["edits"]) + ] + + enwiki = enwiki.replace("_", " ") + titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1]) + if len(titles) > 1: + titles += " and " + + titles += make_disamb_link(edits[-1]) + + edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]" + + article_text = apply_edits(get_content(enwiki), edits) + + return flask.render_template( + "save.html", + edit_summary=edit_summary, + title=enwiki, + edits=edits, + text=article_text, + ) + + +class DabItem(TypedDict): + """Represent a disabiguation page.""" + + num: int + title: str + html: str + + +class Article: + """Current article we're working on.""" + + def __init__(self, enwiki: str) -> None: + """Make a new Article object.""" + self.enwiki = enwiki + + self.links = get_article_links(enwiki) + + self.dab_list: list[DabItem] = [] + self.dab_lookup: dict[int, str] = {} + self.dab_order: list[str] = [] + # self.html_links: defaultdict[str, lxml.html.Element] = defaultdict(list) + + def save_endpoint(self) -> str: + """Endpoint for saving changes.""" + href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_")) + return href + + def load(self) -> None: + """Load parsed article HTML.""" + html = get_article_html(self.enwiki) + self.root = lxml.html.fromstring(html) + + def process_links(self) -> None: + """Process links in parsed wikitext.""" + dab_num = 0 + seen = set() + + for a in self.root.findall(".//a[@href]"): + title = a.get("title") + if title is None: + continue + if title not in self.links: + continue + a.set("class", "disambig") + if title not in seen: + dab_num += 1 + a.set("id", f"dab-{dab_num}") + seen.add(title) + dab_html = get_dab_html(dab_num, title) + dab: DabItem = {"num": dab_num, "title": title, "html": dab_html} + self.dab_list.append(dab) + self.dab_order.append(title) + self.dab_lookup[dab_num] = title + + # self.html_links[title].append(a) + + def article_html(self) -> str: + """Return the processed article HTML.""" + html: str = lxml.html.tostring(self.root, encoding=str) + return html @app.route("/enwiki/") -def article(enwiki: str) -> Response: +def article_page(enwiki: str) -> Response: """Article Page.""" enwiki_orig = enwiki enwiki = enwiki.replace("_", " ") @@ -170,43 +290,18 @@ def article(enwiki: str) -> Response: return flask.redirect( flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore) ) - html = get_article_html(enwiki) - links = get_article_links(enwiki) - root = lxml.html.fromstring(html) - html_links = defaultdict(list) - seen = set() - - dab_list = [] - dab_lookup = {} - dab_num = 0 - - for a in root.findall(".//a[@href]"): - title = a.get("title") - if title is None: - continue - if title not in links: - continue - a.set("class", "disambig") - if title not in seen: - dab_num += 1 - a.set("id", f"dab-{dab_num}") - seen.add(title) - dab_html = get_dab_html(dab_num, title) - dab_list.append({"num": dab_num, "title": title, "html": dab_html}) - dab_lookup[dab_num] = title - - html_links[title].append(a) + article = Article(enwiki) + article.load() + article.process_links() return flask.render_template( "article.html", - title=enwiki, - enwiki_underscore=enwiki_underscore, - text=lxml.html.tostring(root, encoding=str), - links=links, - html_links=html_links, - dab_list=dab_list, - dab_lookup=dab_lookup, + article=article, + text=article.article_html(), + links=article.links, + # html_links=article.html_links, + dab_list=article.dab_list, )