This commit is contained in:
Edward Betts 2022-08-14 17:44:07 +01:00
parent a4c533c626
commit 7afe78c1ae
5 changed files with 161 additions and 58 deletions

View file

@ -1,16 +1,15 @@
Rail transport in Indonesia Rail transport in Indonesia
Canadian Alpine Ski Championships Media franchise
Orwell Prize Orwell Prize
SchleFaZ SchleFaZ
List of fatal victims of the September 11 attacks List of fatal victims of the September 11 attacks
List of Parkruns in the United Kingdom Canadian Alpine Ski Championships
Beitar Jerusalem F.C.
List of Hindi songs recorded by Asha Bhosle
Arabic exonyms Arabic exonyms
Popular Union Popular Union
The Cantos The Cantos
Unisex name Unisex name
2021 Intercontinental GT Challenge Urban Hymns
47th Saturn Awards
AS Kaloum Star AS Kaloum Star
Akademi Fantasia (season 1) Akademi Fantasia (season 1)
Athletics at the 2022 Bolivarian Games Athletics at the 2022 Bolivarian Games
@ -21,16 +20,15 @@ Education in the Republic of Ireland
Healthcare in the Republic of Ireland Healthcare in the Republic of Ireland
I Love the 2000s I Love the 2000s
Kununokuni Kununokuni
List of Belgian football transfers summer 2022
List of Ultimate Marvel characters
List of Wisin & Yandel collaborations List of Wisin & Yandel collaborations
List of comics based on films List of comics based on films
List of programs broadcast by Asianet List of programs broadcast by Asianet
List of tributaries of the Missouri River
Music of South Africa
Neuruppin Neuruppin
197374 Kilmarnock F.C. season
1979 Sydney City FC season 1979 Sydney City FC season
2007 in Spanish television 2007 in Spanish television
2016 Colorado House of Representatives election
2021 Intercontinental GT Challenge
2022 Washington House of Representatives election 2022 Washington House of Representatives election
2022 World Athletics U20 Championships Men's 4 × 100 metres relay 2022 World Athletics U20 Championships Men's 4 × 100 metres relay
A2 autostrada (Poland) A2 autostrada (Poland)
@ -41,10 +39,12 @@ Dwayne McDuffie Award for Diversity in Comics
FTSE Italia Mid Cap FTSE Italia Mid Cap
Globoplay Globoplay
Index of Armenia-related articles Index of Armenia-related articles
List of Denmark national football team hat-tricks
List of Equinox episodes List of Equinox episodes
List of Indian monarchs List of Indian monarchs
List of Italian exonyms in Dalmatia List of Italian exonyms in Dalmatia
List of Ultimate Marvel characters
List of cities with historical German exonyms List of cities with historical German exonyms
List of jötnar in Norse mythology List of jötnar in Norse mythology
List of language families List of language families
List of people with surname Davis
List of political parties in Venezuela

View file

@ -2,7 +2,7 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title>{{ title }} &ndash; dab mecanic </title> <title>{{ article.enwiki }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous"> <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
<link rel="stylesheet" <link rel="stylesheet"
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*"> href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
@ -47,9 +47,9 @@ a.new { color: red; }
<body> <body>
<div id="dabs" class="p-3"> <div id="dabs" class="p-3">
<h1>{{ title }}</h1> <h1>{{ article.enwiki }}</h1>
<div id="save-panel" class="d-none"> <div id="save-panel" class="d-none">
<form method="POST" action="{{ url_for("save", enwiki=enwiki_underscore) }}"> <form method="POST" action="{{ article.save_endpoint() }}">
<button class="btn btn-primary" id="save-btn">Save</button> <button class="btn btn-primary" id="save-btn">Save</button>
<span id="edit-count"></span> <span id="edit-count"></span>
<input type="hidden" value="{}" id="save-edits" name="edits"> <input type="hidden" value="{}" id="save-edits" name="edits">
@ -80,7 +80,8 @@ a.new { color: red; }
var edit_set = new Set(); var edit_set = new Set();
var edits = {}; var edits = {};
var dab_lookup = {{ dab_lookup | tojson }}; var dab_lookup = {{ article.dab_lookup | tojson }};
var dab_order = {{ article.dab_order | tojson }};
function jump_to(dab_num) { function jump_to(dab_num) {
var highlight_title = "text-bg-primary"; var highlight_title = "text-bg-primary";
@ -126,8 +127,9 @@ a.new { color: red; }
} }
function update_edits() { function update_edits() {
var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]);
var save_edits = document.getElementById("save-edits"); var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(edits); save_edits.value = JSON.stringify(saves);
} }
function select_dab(element, dab_num) { function select_dab(element, dab_num) {

View file

@ -8,7 +8,7 @@
<body> <body>
<ul> <ul>
{% for enwiki in articles %} {% for enwiki in articles %}
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li> <li><a href="{{ url_for("article_page", enwiki=enwiki) }}">{{ enwiki }}</li>
{% endfor %} {% endfor %}
</ul> </ul>
</body> </body>

View file

@ -2,11 +2,17 @@
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="utf-8"> <meta charset="utf-8">
<title></title> <title>{{ title }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
</head> </head>
<body> <body>
<div class="m-3">
<h2>Save edits: {{ title }}</h2> <h2>Save edits: {{ title }}</h2>
{{ edits | pprint }} <p>Edit summary: {{ edit_summary }}</p>
</div>
<div class="m-3">
<pre>{{ text }}</pre>
</div>
</body> </body>
</html> </html>

View file

@ -1,8 +1,8 @@
#!/usr/bin/python3 #!/usr/bin/python3
import json import json
from collections import defaultdict import re
from typing import Any from typing import Any, TypedDict
import flask import flask
import lxml.html import lxml.html
@ -10,10 +10,25 @@ import requests
from werkzeug.wrappers import Response from werkzeug.wrappers import Response
app = flask.Flask(__name__) app = flask.Flask(__name__)
app.debug = True app.debug = True
api_url = "https://en.wikipedia.org/w/api.php"
def get_content(title: str) -> str:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"prop": "revisions|info",
"rvprop": "content|timestamp",
"titles": title,
}
data = requests.get(api_url, params=params).json()
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
return rev
@app.route("/") @app.route("/")
def index(): def index():
@ -153,15 +168,120 @@ def get_dab_html(dab_num: int, title: str) -> str:
return html return html
def make_disamb_link(edit: tuple[str, str]) -> str:
"""Given an edit return the appropriate link."""
return f"[[{edit[1]}|{edit[0]}]]"
def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
for link_from, link_to in edits:
print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
article_text,
)
return article_text
@app.route("/save/<path:enwiki>", methods=["POST"]) @app.route("/save/<path:enwiki>", methods=["POST"])
def save(enwiki: str) -> Response | str: def save(enwiki: str) -> Response | str:
"""Save edits to article.""" """Save edits to article."""
edits = json.loads(flask.request.form["edits"]) edits = [
return flask.render_template("save.html", title=enwiki, edits=edits) (link_to, link_from)
for link_to, link_from in json.loads(flask.request.form["edits"])
]
enwiki = enwiki.replace("_", " ")
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
article_text = apply_edits(get_content(enwiki), edits)
return flask.render_template(
"save.html",
edit_summary=edit_summary,
title=enwiki,
edits=edits,
text=article_text,
)
class DabItem(TypedDict):
"""Represent a disabiguation page."""
num: int
title: str
html: str
class Article:
"""Current article we're working on."""
def __init__(self, enwiki: str) -> None:
"""Make a new Article object."""
self.enwiki = enwiki
self.links = get_article_links(enwiki)
self.dab_list: list[DabItem] = []
self.dab_lookup: dict[int, str] = {}
self.dab_order: list[str] = []
# self.html_links: defaultdict[str, lxml.html.Element] = defaultdict(list)
def save_endpoint(self) -> str:
"""Endpoint for saving changes."""
href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
return href
def load(self) -> None:
"""Load parsed article HTML."""
html = get_article_html(self.enwiki)
self.root = lxml.html.fromstring(html)
def process_links(self) -> None:
"""Process links in parsed wikitext."""
dab_num = 0
seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in self.links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab: DabItem = {"num": dab_num, "title": title, "html": dab_html}
self.dab_list.append(dab)
self.dab_order.append(title)
self.dab_lookup[dab_num] = title
# self.html_links[title].append(a)
def article_html(self) -> str:
"""Return the processed article HTML."""
html: str = lxml.html.tostring(self.root, encoding=str)
return html
@app.route("/enwiki/<path:enwiki>") @app.route("/enwiki/<path:enwiki>")
def article(enwiki: str) -> Response: def article_page(enwiki: str) -> Response:
"""Article Page.""" """Article Page."""
enwiki_orig = enwiki enwiki_orig = enwiki
enwiki = enwiki.replace("_", " ") enwiki = enwiki.replace("_", " ")
@ -170,43 +290,18 @@ def article(enwiki: str) -> Response:
return flask.redirect( return flask.redirect(
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore) flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
) )
html = get_article_html(enwiki)
links = get_article_links(enwiki)
root = lxml.html.fromstring(html) article = Article(enwiki)
html_links = defaultdict(list) article.load()
seen = set() article.process_links()
dab_list = []
dab_lookup = {}
dab_num = 0
for a in root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
dab_lookup[dab_num] = title
html_links[title].append(a)
return flask.render_template( return flask.render_template(
"article.html", "article.html",
title=enwiki, article=article,
enwiki_underscore=enwiki_underscore, text=article.article_html(),
text=lxml.html.tostring(root, encoding=str), links=article.links,
links=links, # html_links=article.html_links,
html_links=html_links, dab_list=article.dab_list,
dab_list=dab_list,
dab_lookup=dab_lookup,
) )