This commit is contained in:
Edward Betts 2022-08-13 16:25:07 +01:00
parent c4af550929
commit a4c533c626
3 changed files with 219 additions and 44 deletions

View file

@ -2,16 +2,39 @@
<html lang="en">
<head>
<meta charset="utf-8">
<title></title>
<title>{{ title }} &ndash; dab mecanic </title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
<link rel="stylesheet"
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
<style>
a.disambig { color: #ff8c00; }
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
a.new { color: red; }
#top {
position: fixed;
left: 0px;
right: 0px;
height: 4em;
background: white;
z-index: 0;
}
#article {
left: 0;
width: 50%;
padding-left: 40%;
}
.dab-highlight {
border: 2px solid blue;
}
#dabs {
position: fixed;
top: 0em;
left: 0px;
width: 40%;
overflow: auto;
bottom: 0px;
}
.card-highlight {
@ -23,45 +46,127 @@ a.new { color: red; }
</head>
<body>
<div class="m-3 container-fluid">
<div class="row">
<div class="col-8">
<div id="dabs" class="p-3">
<h1>{{ title }}</h1>
<div>{{ text | safe }}</div>
</div>
<div class="col-4">
{% for dab in dab_list %}
<div class="card p-1 m-2" id="dab-card-{{ dab.num }}">
<h3 class="card-title">{{ dab.title }}</h3>
<div><a href="#" onclick="return jump_to({{ dab.num }})">Show in article</a></div>
<div>{{ dab.html | safe }}</div>
</div>
{% endfor %}
<div id="save-panel" class="d-none">
<form method="POST" action="{{ url_for("save", enwiki=enwiki_underscore) }}">
<button class="btn btn-primary" id="save-btn">Save</button>
<span id="edit-count"></span>
<input type="hidden" value="{}" id="save-edits" name="edits">
</form>
</div>
<div>There are {{ dab_list | count }} links in the article that need disambiguating.</div>
{% for dab in dab_list %}
<div class="card p-1 m-2">
<h3 class="card-title" id="dab-card-title-{{ dab.num }}" onclick="return jump_to({{ dab.num }})">{{ dab.title }}</h3>
<div>
<a href="#" onclick="return jump_to({{ dab.num }})">highlight link</a>
<span class="d-none" id="cancel-{{ dab.num }}">
|
<a href="#" onclick="return cancel_selection({{ dab.num }})">cancel selection</a>
</span>
</div>
<div class="dab-article" id="dab-article-{{ dab.num }}">{{ dab.html | safe }}</div>
</div>
{% endfor %}
</div>
<div id="article" class="pe-3">
<div>{{ text | safe }}</div>
</div>
</div>
</div>
<script>
var edit_set = new Set();
var edits = {};
var dab_lookup = {{ dab_lookup | tojson }};
function jump_to(dab_num) {
var highlight_title = "text-bg-primary";
var links = document.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
links[i].classList.remove("disambig-highlight");
}
var cards = document.getElementsByClassName("card");
for(var i=0; i<cards.length; i++) {
cards[i].classList.remove("card-highlight");
var card_titles = document.getElementsByClassName("card-title");
for(var i=0; i<card_titles.length; i++) {
card_titles[i].classList.remove(highlight_title);
}
var card = document.getElementById("dab-card-" + dab_num);
card.classList.add("card-highlight");
var card_title = document.getElementById("dab-card-title-" + dab_num);
card_title.classList.add(highlight_title);
var link = document.getElementById("dab-" + dab_num);
link.scrollIntoView();
link.classList.add("disambig-highlight")
return false;
}
function clear_dab_highlight(dab_num) {
var dab_article = document.getElementById("dab-article-" + dab_num);
var links = dab_article.querySelectorAll("a");
for(var i=0; i<links.length; i++) {
links[i].classList.remove("dab-highlight");
}
}
function update_edit_count() {
var save_panel = document.getElementById("save-panel")
if (edit_set.size) {
save_panel.classList.remove("d-none");
var edit_count = edit_set.size == 1 ? "1 edit" : edit_set.size + " edits";
document.getElementById("edit-count").textContent = edit_count;
} else {
save_panel.classList.add("d-none");
document.getElementById("edit-count").textContent = "";
}
}
function update_edits() {
var save_edits = document.getElementById("save-edits");
save_edits.value = JSON.stringify(edits);
}
function select_dab(element, dab_num) {
jump_to(dab_num);
document.getElementById("cancel-" + dab_num).classList.remove("d-none");
var title = element.getAttribute("title");
edits[dab_lookup[dab_num]] = title;
edit_set.add(dab_num);
update_edits();
update_edit_count();
clear_dab_highlight(dab_num);
var dab_article = document.getElementById("dab-article-" + dab_num);
var links = dab_article.querySelectorAll("a");
for(var i=0; i<links.length; i++) {
links[i].classList.remove("dab-highlight");
}
var title = element.getAttribute("title");
element.classList.add("dab-highlight");
console.log(title, dab_num);
return false;
}
function cancel_selection(dab_num) {
delete edits[dab_lookup[dab_num]];
document.getElementById("cancel-" + dab_num).classList.add("d-none");
clear_dab_highlight(dab_num);
edit_set.delete(dab_num);
update_edits();
update_edit_count();
return false;
}
</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>

12
templates/save.html Normal file
View file

@ -0,0 +1,12 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title></title>
</head>
<body>
<h2>Save edits: {{ title }}</h2>
{{ edits | pprint }}
</body>
</html>

View file

@ -1,6 +1,8 @@
#!/usr/bin/python3
import json
from collections import defaultdict
from typing import Any
import flask
import lxml.html
@ -21,9 +23,10 @@ def index():
def get_article_html(enwiki: str) -> str:
"""Parse article wikitext and return HTML."""
url = "https://en.wikipedia.org/w/api.php"
params = {
params: dict[str, str | int] = {
"action": "parse",
"format": "json",
"formatversion": 2,
@ -70,11 +73,9 @@ disambig_templates = [
]
def get_article_links(enwiki: str) -> list[str]:
"""Get links that appear in this article."""
url = "https://en.wikipedia.org/w/api.php"
params = {
def link_params(enwiki: str) -> dict[str, str | int]:
"""Parameters for finding article links from the API."""
params: dict[str, str | int] = {
"action": "query",
"format": "json",
"formatversion": 2,
@ -87,35 +88,88 @@ def get_article_links(enwiki: str) -> list[str]:
"tltemplates": "|".join(disambig_templates),
"prop": "templates",
}
return params
links = []
def needs_disambig(link: dict[str, Any]) -> bool:
"""Is this a disambiguation link."""
return bool(
not link["title"].endswith(" (disambiguation)") and link.get("templates")
)
def get_article_links(enwiki: str) -> list[str]:
"""Get links that appear in this article."""
url = "https://en.wikipedia.org/w/api.php"
params: dict[str, str | int] = link_params(enwiki)
links: set[str] = set()
while True:
r = requests.get(url, params=params)
json_data = r.json()
query = json_data.pop("query")
pages = query["pages"]
for page in pages:
title = page["title"]
if title.endswith(" (disambiguation)") or not page.get("templates"):
continue
if title not in links:
links.append(title)
data = requests.get(url, params=params).json()
links.update(
page["title"] for page in data["query"]["pages"] if needs_disambig(page)
)
if "continue" not in json_data:
if "continue" not in data:
break
print(json_data["continue"])
params["gplcontinue"] = json_data["continue"]["gplcontinue"]
params["gplcontinue"] = data["continue"]["gplcontinue"]
return links
return list(links)
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
def delete_toc(root: lxml.html.HtmlElement) -> None:
"""Delete table of contents from article HTML."""
for toc in root.findall(".//div[@class='toc']"):
toc.getparent().remove(toc)
def get_dab_html(dab_num: int, title: str) -> str:
"""Parse dab page and rewrite links."""
dab_html = get_article_html(title)
root = lxml.html.fromstring(dab_html)
delete_toc(root)
element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
for a in root.findall(".//a[@href]"):
href: str | None = a.get("href")
if not href:
continue
if not href.startswith("#"):
a.set("href", "#")
a.set("onclick", f"return select_dab(this, {dab_num})")
continue
destination_element = element_id_map[href[1:]]
assert destination_element is not None
destination_element.set("id", f"{dab_num}{href[1:]}")
a.set("href", f"#{dab_num}{href[1:]}")
html: str = lxml.html.tostring(root, encoding=str)
return html
@app.route("/save/<path:enwiki>", methods=["POST"])
def save(enwiki: str) -> Response | str:
"""Save edits to article."""
edits = json.loads(flask.request.form["edits"])
return flask.render_template("save.html", title=enwiki, edits=edits)
@app.route("/enwiki/<path:enwiki>")
def article(enwiki: str) -> Response:
"""Article Page."""
enwiki_orig = enwiki
enwiki = enwiki.replace("_", " ")
enwiki_underscore = enwiki.replace(" ", "_")
if " " in enwiki_orig:
return flask.redirect(
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
)
html = get_article_html(enwiki)
links = get_article_links(enwiki)
@ -124,6 +178,7 @@ def article(enwiki: str) -> Response:
seen = set()
dab_list = []
dab_lookup = {}
dab_num = 0
for a in root.findall(".//a[@href]"):
@ -137,18 +192,21 @@ def article(enwiki: str) -> Response:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_article_html(title)
dab_html = get_dab_html(dab_num, title)
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
dab_lookup[dab_num] = title
html_links[title].append(a)
return flask.render_template(
"article.html",
title=enwiki,
enwiki_underscore=enwiki_underscore,
text=lxml.html.tostring(root, encoding=str),
links=links,
html_links=html_links,
dab_list=dab_list,
dab_lookup=dab_lookup,
)