This commit is contained in:
Edward Betts 2022-08-14 17:44:07 +01:00
parent a4c533c626
commit 7afe78c1ae
5 changed files with 161 additions and 58 deletions

View file

@ -1,16 +1,15 @@
Rail transport in Indonesia
Canadian Alpine Ski Championships
Media franchise
Orwell Prize
SchleFaZ
List of fatal victims of the September 11 attacks
List of Parkruns in the United Kingdom
Beitar Jerusalem F.C.
List of Hindi songs recorded by Asha Bhosle
Canadian Alpine Ski Championships
Arabic exonyms
Popular Union
The Cantos
Unisex name
2021 Intercontinental GT Challenge
Urban Hymns
47th Saturn Awards
AS Kaloum Star
Akademi Fantasia (season 1)
Athletics at the 2022 Bolivarian Games
@ -21,16 +20,15 @@ Education in the Republic of Ireland
Healthcare in the Republic of Ireland
I Love the 2000s
Kununokuni
List of Belgian football transfers summer 2022
List of Ultimate Marvel characters
List of Wisin & Yandel collaborations
List of comics based on films
List of programs broadcast by Asianet
List of tributaries of the Missouri River
Music of South Africa
Neuruppin
197374 Kilmarnock F.C. season
1979 Sydney City FC season
2007 in Spanish television
2016 Colorado House of Representatives election
2021 Intercontinental GT Challenge
2022 Washington House of Representatives election
2022 World Athletics U20 Championships Men's 4 × 100 metres relay
A2 autostrada (Poland)
@ -41,10 +39,12 @@ Dwayne McDuffie Award for Diversity in Comics
FTSE Italia Mid Cap
Globoplay
Index of Armenia-related articles
List of Denmark national football team hat-tricks
List of Equinox episodes
List of Indian monarchs
List of Italian exonyms in Dalmatia
List of Ultimate Marvel characters
List of cities with historical German exonyms
List of jötnar in Norse mythology
List of language families
List of people with surname Davis
List of political parties in Venezuela

View file

@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title>{{ title }} &ndash; dab mecanic </title>
<title>{{ article.enwiki }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
<link rel="stylesheet"
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
@ -47,9 +47,9 @@ a.new { color: red; }
<body>
<div id="dabs" class="p-3">
<h1>{{ title }}</h1>
<h1>{{ article.enwiki }}</h1>
<div id="save-panel" class="d-none">
<form method="POST" action="{{ url_for("save", enwiki=enwiki_underscore) }}">
<form method="POST" action="{{ article.save_endpoint() }}">
<button class="btn btn-primary" id="save-btn">Save</button>
<span id="edit-count"></span>
<input type="hidden" value="{}" id="save-edits" name="edits">
@ -80,7 +80,8 @@ a.new { color: red; }
var edit_set = new Set();
var edits = {};
var dab_lookup = {{ dab_lookup | tojson }};
var dab_lookup = {{ article.dab_lookup | tojson }};
var dab_order = {{ article.dab_order | tojson }};
function jump_to(dab_num) {
var highlight_title = "text-bg-primary";
@ -126,8 +127,9 @@ a.new { color: red; }
}
function update_edits() {
var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]);
var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(edits);
save_edits.value = JSON.stringify(saves);
}
function select_dab(element, dab_num) {

View file

@ -8,7 +8,7 @@
<body>
<ul>
{% for enwiki in articles %}
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li>
<li><a href="{{ url_for("article_page", enwiki=enwiki) }}">{{ enwiki }}</li>
{% endfor %}
</ul>
</body>

View file

@ -2,11 +2,17 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title></title>
<title>{{ title }} &ndash; dab mechanic</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
</head>
<body>
<div class="m-3">
<h2>Save edits: {{ title }}</h2>
{{ edits | pprint }}
<p>Edit summary: {{ edit_summary }}</p>
</div>
<div class="m-3">
<pre>{{ text }}</pre>
</div>
</body>
</html>

View file

@ -1,8 +1,8 @@
#!/usr/bin/python3
import json
from collections import defaultdict
from typing import Any
import re
from typing import Any, TypedDict
import flask
import lxml.html
@ -10,10 +10,25 @@ import requests
from werkzeug.wrappers import Response
app = flask.Flask(__name__)
app.debug = True
api_url = "https://en.wikipedia.org/w/api.php"
def get_content(title: str) -> str:
"""Get article text."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
"prop": "revisions|info",
"rvprop": "content|timestamp",
"titles": title,
}
data = requests.get(api_url, params=params).json()
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
return rev
@app.route("/")
def index():
@ -153,15 +168,120 @@ def get_dab_html(dab_num: int, title: str) -> str:
return html
def make_disamb_link(edit: tuple[str, str]) -> str:
"""Given an edit return the appropriate link."""
return f"[[{edit[1]}|{edit[0]}]]"
def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
"""Apply edits to article text."""
def escape(s: str) -> str:
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
for link_from, link_to in edits:
print(rf"\[\[{escape(link_from)}\]\]")
article_text = re.sub(
rf"\[\[{escape(link_from)}\]\]",
f"[[{link_to}|{link_from}]]",
article_text,
)
return article_text
@app.route("/save/<path:enwiki>", methods=["POST"])
def save(enwiki: str) -> Response | str:
"""Save edits to article."""
edits = json.loads(flask.request.form["edits"])
return flask.render_template("save.html", title=enwiki, edits=edits)
edits = [
(link_to, link_from)
for link_to, link_from in json.loads(flask.request.form["edits"])
]
enwiki = enwiki.replace("_", " ")
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
if len(titles) > 1:
titles += " and "
titles += make_disamb_link(edits[-1])
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
article_text = apply_edits(get_content(enwiki), edits)
return flask.render_template(
"save.html",
edit_summary=edit_summary,
title=enwiki,
edits=edits,
text=article_text,
)
class DabItem(TypedDict):
"""Represent a disabiguation page."""
num: int
title: str
html: str
class Article:
"""Current article we're working on."""
def __init__(self, enwiki: str) -> None:
"""Make a new Article object."""
self.enwiki = enwiki
self.links = get_article_links(enwiki)
self.dab_list: list[DabItem] = []
self.dab_lookup: dict[int, str] = {}
self.dab_order: list[str] = []
# self.html_links: defaultdict[str, lxml.html.Element] = defaultdict(list)
def save_endpoint(self) -> str:
"""Endpoint for saving changes."""
href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
return href
def load(self) -> None:
"""Load parsed article HTML."""
html = get_article_html(self.enwiki)
self.root = lxml.html.fromstring(html)
def process_links(self) -> None:
"""Process links in parsed wikitext."""
dab_num = 0
seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in self.links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab: DabItem = {"num": dab_num, "title": title, "html": dab_html}
self.dab_list.append(dab)
self.dab_order.append(title)
self.dab_lookup[dab_num] = title
# self.html_links[title].append(a)
def article_html(self) -> str:
"""Return the processed article HTML."""
html: str = lxml.html.tostring(self.root, encoding=str)
return html
@app.route("/enwiki/<path:enwiki>")
def article(enwiki: str) -> Response:
def article_page(enwiki: str) -> Response:
"""Article Page."""
enwiki_orig = enwiki
enwiki = enwiki.replace("_", " ")
@ -170,43 +290,18 @@ def article(enwiki: str) -> Response:
return flask.redirect(
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
)
html = get_article_html(enwiki)
links = get_article_links(enwiki)
root = lxml.html.fromstring(html)
html_links = defaultdict(list)
seen = set()
dab_list = []
dab_lookup = {}
dab_num = 0
for a in root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
dab_lookup[dab_num] = title
html_links[title].append(a)
article = Article(enwiki)
article.load()
article.process_links()
return flask.render_template(
"article.html",
title=enwiki,
enwiki_underscore=enwiki_underscore,
text=lxml.html.tostring(root, encoding=str),
links=links,
html_links=html_links,
dab_list=dab_list,
dab_lookup=dab_lookup,
article=article,
text=article.article_html(),
links=article.links,
# html_links=article.html_links,
dab_list=article.dab_list,
)