WIP
This commit is contained in:
parent
a4c533c626
commit
7afe78c1ae
20
article_list
20
article_list
|
@ -1,16 +1,15 @@
|
||||||
Rail transport in Indonesia
|
Rail transport in Indonesia
|
||||||
Canadian Alpine Ski Championships
|
Media franchise
|
||||||
Orwell Prize
|
Orwell Prize
|
||||||
SchleFaZ
|
SchleFaZ
|
||||||
List of fatal victims of the September 11 attacks
|
List of fatal victims of the September 11 attacks
|
||||||
List of Parkruns in the United Kingdom
|
Canadian Alpine Ski Championships
|
||||||
Beitar Jerusalem F.C.
|
|
||||||
List of Hindi songs recorded by Asha Bhosle
|
|
||||||
Arabic exonyms
|
Arabic exonyms
|
||||||
Popular Union
|
Popular Union
|
||||||
The Cantos
|
The Cantos
|
||||||
Unisex name
|
Unisex name
|
||||||
2021 Intercontinental GT Challenge
|
Urban Hymns
|
||||||
|
47th Saturn Awards
|
||||||
AS Kaloum Star
|
AS Kaloum Star
|
||||||
Akademi Fantasia (season 1)
|
Akademi Fantasia (season 1)
|
||||||
Athletics at the 2022 Bolivarian Games
|
Athletics at the 2022 Bolivarian Games
|
||||||
|
@ -21,16 +20,15 @@ Education in the Republic of Ireland
|
||||||
Healthcare in the Republic of Ireland
|
Healthcare in the Republic of Ireland
|
||||||
I Love the 2000s
|
I Love the 2000s
|
||||||
Kununokuni
|
Kununokuni
|
||||||
List of Belgian football transfers summer 2022
|
|
||||||
List of Ultimate Marvel characters
|
|
||||||
List of Wisin & Yandel collaborations
|
List of Wisin & Yandel collaborations
|
||||||
List of comics based on films
|
List of comics based on films
|
||||||
List of programs broadcast by Asianet
|
List of programs broadcast by Asianet
|
||||||
List of tributaries of the Missouri River
|
|
||||||
Music of South Africa
|
|
||||||
Neuruppin
|
Neuruppin
|
||||||
|
1973–74 Kilmarnock F.C. season
|
||||||
1979 Sydney City FC season
|
1979 Sydney City FC season
|
||||||
2007 in Spanish television
|
2007 in Spanish television
|
||||||
|
2016 Colorado House of Representatives election
|
||||||
|
2021 Intercontinental GT Challenge
|
||||||
2022 Washington House of Representatives election
|
2022 Washington House of Representatives election
|
||||||
2022 World Athletics U20 Championships – Men's 4 × 100 metres relay
|
2022 World Athletics U20 Championships – Men's 4 × 100 metres relay
|
||||||
A2 autostrada (Poland)
|
A2 autostrada (Poland)
|
||||||
|
@ -41,10 +39,12 @@ Dwayne McDuffie Award for Diversity in Comics
|
||||||
FTSE Italia Mid Cap
|
FTSE Italia Mid Cap
|
||||||
Globoplay
|
Globoplay
|
||||||
Index of Armenia-related articles
|
Index of Armenia-related articles
|
||||||
List of Denmark national football team hat-tricks
|
|
||||||
List of Equinox episodes
|
List of Equinox episodes
|
||||||
List of Indian monarchs
|
List of Indian monarchs
|
||||||
List of Italian exonyms in Dalmatia
|
List of Italian exonyms in Dalmatia
|
||||||
|
List of Ultimate Marvel characters
|
||||||
List of cities with historical German exonyms
|
List of cities with historical German exonyms
|
||||||
List of jötnar in Norse mythology
|
List of jötnar in Norse mythology
|
||||||
List of language families
|
List of language families
|
||||||
|
List of people with surname Davis
|
||||||
|
List of political parties in Venezuela
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title>{{ title }} – dab mecanic </title>
|
<title>{{ article.enwiki }} – dab mechanic</title>
|
||||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
||||||
<link rel="stylesheet"
|
<link rel="stylesheet"
|
||||||
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
|
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
|
||||||
|
@ -47,9 +47,9 @@ a.new { color: red; }
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div id="dabs" class="p-3">
|
<div id="dabs" class="p-3">
|
||||||
<h1>{{ title }}</h1>
|
<h1>{{ article.enwiki }}</h1>
|
||||||
<div id="save-panel" class="d-none">
|
<div id="save-panel" class="d-none">
|
||||||
<form method="POST" action="{{ url_for("save", enwiki=enwiki_underscore) }}">
|
<form method="POST" action="{{ article.save_endpoint() }}">
|
||||||
<button class="btn btn-primary" id="save-btn">Save</button>
|
<button class="btn btn-primary" id="save-btn">Save</button>
|
||||||
<span id="edit-count"></span>
|
<span id="edit-count"></span>
|
||||||
<input type="hidden" value="{}" id="save-edits" name="edits">
|
<input type="hidden" value="{}" id="save-edits" name="edits">
|
||||||
|
@ -80,7 +80,8 @@ a.new { color: red; }
|
||||||
|
|
||||||
var edit_set = new Set();
|
var edit_set = new Set();
|
||||||
var edits = {};
|
var edits = {};
|
||||||
var dab_lookup = {{ dab_lookup | tojson }};
|
var dab_lookup = {{ article.dab_lookup | tojson }};
|
||||||
|
var dab_order = {{ article.dab_order | tojson }};
|
||||||
|
|
||||||
function jump_to(dab_num) {
|
function jump_to(dab_num) {
|
||||||
var highlight_title = "text-bg-primary";
|
var highlight_title = "text-bg-primary";
|
||||||
|
@ -126,8 +127,9 @@ a.new { color: red; }
|
||||||
}
|
}
|
||||||
|
|
||||||
function update_edits() {
|
function update_edits() {
|
||||||
|
var saves = dab_order.filter(t => edits[t]).map(t => [t, edits[t]]);
|
||||||
var save_edits = document.getElementById("save-edits");
|
var save_edits = document.getElementById("save-edits");
|
||||||
save_edits.value = JSON.stringify(edits);
|
save_edits.value = JSON.stringify(saves);
|
||||||
}
|
}
|
||||||
|
|
||||||
function select_dab(element, dab_num) {
|
function select_dab(element, dab_num) {
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
<body>
|
<body>
|
||||||
<ul>
|
<ul>
|
||||||
{% for enwiki in articles %}
|
{% for enwiki in articles %}
|
||||||
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li>
|
<li><a href="{{ url_for("article_page", enwiki=enwiki) }}">{{ enwiki }}</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
</body>
|
</body>
|
||||||
|
|
|
@ -2,11 +2,17 @@
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title></title>
|
<title>{{ title }} – dab mechanic</title>
|
||||||
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
<div class="m-3">
|
||||||
<h2>Save edits: {{ title }}</h2>
|
<h2>Save edits: {{ title }}</h2>
|
||||||
{{ edits | pprint }}
|
<p>Edit summary: {{ edit_summary }}</p>
|
||||||
|
</div>
|
||||||
|
<div class="m-3">
|
||||||
|
<pre>{{ text }}</pre>
|
||||||
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
175
web_view.py
175
web_view.py
|
@ -1,8 +1,8 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from collections import defaultdict
|
import re
|
||||||
from typing import Any
|
from typing import Any, TypedDict
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
@ -10,10 +10,25 @@ import requests
|
||||||
from werkzeug.wrappers import Response
|
from werkzeug.wrappers import Response
|
||||||
|
|
||||||
app = flask.Flask(__name__)
|
app = flask.Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
app.debug = True
|
app.debug = True
|
||||||
|
|
||||||
|
api_url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
|
|
||||||
|
def get_content(title: str) -> str:
|
||||||
|
"""Get article text."""
|
||||||
|
params: dict[str, str | int] = {
|
||||||
|
"action": "query",
|
||||||
|
"format": "json",
|
||||||
|
"formatversion": 2,
|
||||||
|
"prop": "revisions|info",
|
||||||
|
"rvprop": "content|timestamp",
|
||||||
|
"titles": title,
|
||||||
|
}
|
||||||
|
data = requests.get(api_url, params=params).json()
|
||||||
|
rev: str = data["query"]["pages"][0]["revisions"][0]["content"]
|
||||||
|
return rev
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
|
@ -153,15 +168,120 @@ def get_dab_html(dab_num: int, title: str) -> str:
|
||||||
return html
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def make_disamb_link(edit: tuple[str, str]) -> str:
|
||||||
|
"""Given an edit return the appropriate link."""
|
||||||
|
return f"[[{edit[1]}|{edit[0]}]]"
|
||||||
|
|
||||||
|
|
||||||
|
def apply_edits(article_text: str, edits: list[tuple[str, str]]) -> str:
|
||||||
|
"""Apply edits to article text."""
|
||||||
|
|
||||||
|
def escape(s: str) -> str:
|
||||||
|
return re.escape(s).replace("_", "[ _]").replace(r"\ ", "[ _]")
|
||||||
|
|
||||||
|
for link_from, link_to in edits:
|
||||||
|
print(rf"\[\[{escape(link_from)}\]\]")
|
||||||
|
article_text = re.sub(
|
||||||
|
rf"\[\[{escape(link_from)}\]\]",
|
||||||
|
f"[[{link_to}|{link_from}]]",
|
||||||
|
article_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
return article_text
|
||||||
|
|
||||||
|
|
||||||
@app.route("/save/<path:enwiki>", methods=["POST"])
|
@app.route("/save/<path:enwiki>", methods=["POST"])
|
||||||
def save(enwiki: str) -> Response | str:
|
def save(enwiki: str) -> Response | str:
|
||||||
"""Save edits to article."""
|
"""Save edits to article."""
|
||||||
edits = json.loads(flask.request.form["edits"])
|
edits = [
|
||||||
return flask.render_template("save.html", title=enwiki, edits=edits)
|
(link_to, link_from)
|
||||||
|
for link_to, link_from in json.loads(flask.request.form["edits"])
|
||||||
|
]
|
||||||
|
|
||||||
|
enwiki = enwiki.replace("_", " ")
|
||||||
|
titles = ", ".join(make_disamb_link(edit) for edit in edits[:-1])
|
||||||
|
if len(titles) > 1:
|
||||||
|
titles += " and "
|
||||||
|
|
||||||
|
titles += make_disamb_link(edits[-1])
|
||||||
|
|
||||||
|
edit_summary = f"Disambiguate {titles} using [[User:Edward/Dab mechanic]]"
|
||||||
|
|
||||||
|
article_text = apply_edits(get_content(enwiki), edits)
|
||||||
|
|
||||||
|
return flask.render_template(
|
||||||
|
"save.html",
|
||||||
|
edit_summary=edit_summary,
|
||||||
|
title=enwiki,
|
||||||
|
edits=edits,
|
||||||
|
text=article_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DabItem(TypedDict):
|
||||||
|
"""Represent a disabiguation page."""
|
||||||
|
|
||||||
|
num: int
|
||||||
|
title: str
|
||||||
|
html: str
|
||||||
|
|
||||||
|
|
||||||
|
class Article:
|
||||||
|
"""Current article we're working on."""
|
||||||
|
|
||||||
|
def __init__(self, enwiki: str) -> None:
|
||||||
|
"""Make a new Article object."""
|
||||||
|
self.enwiki = enwiki
|
||||||
|
|
||||||
|
self.links = get_article_links(enwiki)
|
||||||
|
|
||||||
|
self.dab_list: list[DabItem] = []
|
||||||
|
self.dab_lookup: dict[int, str] = {}
|
||||||
|
self.dab_order: list[str] = []
|
||||||
|
# self.html_links: defaultdict[str, lxml.html.Element] = defaultdict(list)
|
||||||
|
|
||||||
|
def save_endpoint(self) -> str:
|
||||||
|
"""Endpoint for saving changes."""
|
||||||
|
href: str = flask.url_for("save", enwiki=self.enwiki.replace(" ", "_"))
|
||||||
|
return href
|
||||||
|
|
||||||
|
def load(self) -> None:
|
||||||
|
"""Load parsed article HTML."""
|
||||||
|
html = get_article_html(self.enwiki)
|
||||||
|
self.root = lxml.html.fromstring(html)
|
||||||
|
|
||||||
|
def process_links(self) -> None:
|
||||||
|
"""Process links in parsed wikitext."""
|
||||||
|
dab_num = 0
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
for a in self.root.findall(".//a[@href]"):
|
||||||
|
title = a.get("title")
|
||||||
|
if title is None:
|
||||||
|
continue
|
||||||
|
if title not in self.links:
|
||||||
|
continue
|
||||||
|
a.set("class", "disambig")
|
||||||
|
if title not in seen:
|
||||||
|
dab_num += 1
|
||||||
|
a.set("id", f"dab-{dab_num}")
|
||||||
|
seen.add(title)
|
||||||
|
dab_html = get_dab_html(dab_num, title)
|
||||||
|
dab: DabItem = {"num": dab_num, "title": title, "html": dab_html}
|
||||||
|
self.dab_list.append(dab)
|
||||||
|
self.dab_order.append(title)
|
||||||
|
self.dab_lookup[dab_num] = title
|
||||||
|
|
||||||
|
# self.html_links[title].append(a)
|
||||||
|
|
||||||
|
def article_html(self) -> str:
|
||||||
|
"""Return the processed article HTML."""
|
||||||
|
html: str = lxml.html.tostring(self.root, encoding=str)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enwiki/<path:enwiki>")
|
@app.route("/enwiki/<path:enwiki>")
|
||||||
def article(enwiki: str) -> Response:
|
def article_page(enwiki: str) -> Response:
|
||||||
"""Article Page."""
|
"""Article Page."""
|
||||||
enwiki_orig = enwiki
|
enwiki_orig = enwiki
|
||||||
enwiki = enwiki.replace("_", " ")
|
enwiki = enwiki.replace("_", " ")
|
||||||
|
@ -170,43 +290,18 @@ def article(enwiki: str) -> Response:
|
||||||
return flask.redirect(
|
return flask.redirect(
|
||||||
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
|
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
|
||||||
)
|
)
|
||||||
html = get_article_html(enwiki)
|
|
||||||
links = get_article_links(enwiki)
|
|
||||||
|
|
||||||
root = lxml.html.fromstring(html)
|
article = Article(enwiki)
|
||||||
html_links = defaultdict(list)
|
article.load()
|
||||||
seen = set()
|
article.process_links()
|
||||||
|
|
||||||
dab_list = []
|
|
||||||
dab_lookup = {}
|
|
||||||
dab_num = 0
|
|
||||||
|
|
||||||
for a in root.findall(".//a[@href]"):
|
|
||||||
title = a.get("title")
|
|
||||||
if title is None:
|
|
||||||
continue
|
|
||||||
if title not in links:
|
|
||||||
continue
|
|
||||||
a.set("class", "disambig")
|
|
||||||
if title not in seen:
|
|
||||||
dab_num += 1
|
|
||||||
a.set("id", f"dab-{dab_num}")
|
|
||||||
seen.add(title)
|
|
||||||
dab_html = get_dab_html(dab_num, title)
|
|
||||||
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
|
|
||||||
dab_lookup[dab_num] = title
|
|
||||||
|
|
||||||
html_links[title].append(a)
|
|
||||||
|
|
||||||
return flask.render_template(
|
return flask.render_template(
|
||||||
"article.html",
|
"article.html",
|
||||||
title=enwiki,
|
article=article,
|
||||||
enwiki_underscore=enwiki_underscore,
|
text=article.article_html(),
|
||||||
text=lxml.html.tostring(root, encoding=str),
|
links=article.links,
|
||||||
links=links,
|
# html_links=article.html_links,
|
||||||
html_links=html_links,
|
dab_list=article.dab_list,
|
||||||
dab_list=dab_list,
|
|
||||||
dab_lookup=dab_lookup,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue