WIP
This commit is contained in:
parent
c4af550929
commit
a4c533c626
|
@ -2,16 +2,39 @@
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="utf-8">
|
<meta charset="utf-8">
|
||||||
<title></title>
|
<title>{{ title }} – dab mecanic </title>
|
||||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
||||||
|
<link rel="stylesheet"
|
||||||
|
href="//en.wikipedia.org/w/load.php?debug=false&lang=en&modules=mediawiki.legacy.commonPrint,shared|skins.vector.styles&only=styles&skin=vector&*">
|
||||||
<style>
|
<style>
|
||||||
a.disambig { color: #ff8c00; }
|
a.disambig { color: #ff8c00; }
|
||||||
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
|
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
|
||||||
a.new { color: red; }
|
a.new { color: red; }
|
||||||
|
|
||||||
|
#top {
|
||||||
|
position: fixed;
|
||||||
|
left: 0px;
|
||||||
|
right: 0px;
|
||||||
|
height: 4em;
|
||||||
|
background: white;
|
||||||
|
z-index: 0;
|
||||||
|
}
|
||||||
|
|
||||||
#article {
|
#article {
|
||||||
left: 0;
|
padding-left: 40%;
|
||||||
width: 50%;
|
}
|
||||||
|
|
||||||
|
.dab-highlight {
|
||||||
|
border: 2px solid blue;
|
||||||
|
}
|
||||||
|
|
||||||
|
#dabs {
|
||||||
|
position: fixed;
|
||||||
|
top: 0em;
|
||||||
|
left: 0px;
|
||||||
|
width: 40%;
|
||||||
|
overflow: auto;
|
||||||
|
bottom: 0px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.card-highlight {
|
.card-highlight {
|
||||||
|
@ -23,45 +46,127 @@ a.new { color: red; }
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
|
<div id="dabs" class="p-3">
|
||||||
<div class="m-3 container-fluid">
|
|
||||||
<div class="row">
|
|
||||||
<div class="col-8">
|
|
||||||
<h1>{{ title }}</h1>
|
<h1>{{ title }}</h1>
|
||||||
<div>{{ text | safe }}</div>
|
<div id="save-panel" class="d-none">
|
||||||
</div>
|
<form method="POST" action="{{ url_for("save", enwiki=enwiki_underscore) }}">
|
||||||
<div class="col-4">
|
<button class="btn btn-primary" id="save-btn">Save</button>
|
||||||
{% for dab in dab_list %}
|
<span id="edit-count"></span>
|
||||||
<div class="card p-1 m-2" id="dab-card-{{ dab.num }}">
|
<input type="hidden" value="{}" id="save-edits" name="edits">
|
||||||
<h3 class="card-title">{{ dab.title }}</h3>
|
</form>
|
||||||
<div><a href="#" onclick="return jump_to({{ dab.num }})">Show in article</a></div>
|
</div>
|
||||||
<div>{{ dab.html | safe }}</div>
|
<div>There are {{ dab_list | count }} links in the article that need disambiguating.</div>
|
||||||
</div>
|
{% for dab in dab_list %}
|
||||||
{% endfor %}
|
<div class="card p-1 m-2">
|
||||||
|
<h3 class="card-title" id="dab-card-title-{{ dab.num }}" onclick="return jump_to({{ dab.num }})">{{ dab.title }}</h3>
|
||||||
|
<div>
|
||||||
|
<a href="#" onclick="return jump_to({{ dab.num }})">highlight link</a>
|
||||||
|
<span class="d-none" id="cancel-{{ dab.num }}">
|
||||||
|
|
|
||||||
|
<a href="#" onclick="return cancel_selection({{ dab.num }})">cancel selection</a>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="dab-article" id="dab-article-{{ dab.num }}">{{ dab.html | safe }}</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
<div id="article" class="pe-3">
|
||||||
|
<div>{{ text | safe }}</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
|
|
||||||
|
var edit_set = new Set();
|
||||||
|
var edits = {};
|
||||||
|
var dab_lookup = {{ dab_lookup | tojson }};
|
||||||
|
|
||||||
function jump_to(dab_num) {
|
function jump_to(dab_num) {
|
||||||
|
var highlight_title = "text-bg-primary";
|
||||||
|
|
||||||
var links = document.getElementsByTagName("a");
|
var links = document.getElementsByTagName("a");
|
||||||
for(var i=0; i<links.length; i++) {
|
for(var i=0; i<links.length; i++) {
|
||||||
links[i].classList.remove("disambig-highlight");
|
links[i].classList.remove("disambig-highlight");
|
||||||
}
|
}
|
||||||
|
|
||||||
var cards = document.getElementsByClassName("card");
|
var card_titles = document.getElementsByClassName("card-title");
|
||||||
for(var i=0; i<cards.length; i++) {
|
for(var i=0; i<card_titles.length; i++) {
|
||||||
cards[i].classList.remove("card-highlight");
|
card_titles[i].classList.remove(highlight_title);
|
||||||
}
|
}
|
||||||
|
|
||||||
var card = document.getElementById("dab-card-" + dab_num);
|
var card_title = document.getElementById("dab-card-title-" + dab_num);
|
||||||
card.classList.add("card-highlight");
|
card_title.classList.add(highlight_title);
|
||||||
|
|
||||||
var link = document.getElementById("dab-" + dab_num);
|
var link = document.getElementById("dab-" + dab_num);
|
||||||
link.scrollIntoView();
|
link.scrollIntoView();
|
||||||
link.classList.add("disambig-highlight")
|
link.classList.add("disambig-highlight")
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function clear_dab_highlight(dab_num) {
|
||||||
|
var dab_article = document.getElementById("dab-article-" + dab_num);
|
||||||
|
|
||||||
|
var links = dab_article.querySelectorAll("a");
|
||||||
|
for(var i=0; i<links.length; i++) {
|
||||||
|
links[i].classList.remove("dab-highlight");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function update_edit_count() {
|
||||||
|
var save_panel = document.getElementById("save-panel")
|
||||||
|
if (edit_set.size) {
|
||||||
|
save_panel.classList.remove("d-none");
|
||||||
|
var edit_count = edit_set.size == 1 ? "1 edit" : edit_set.size + " edits";
|
||||||
|
document.getElementById("edit-count").textContent = edit_count;
|
||||||
|
} else {
|
||||||
|
save_panel.classList.add("d-none");
|
||||||
|
document.getElementById("edit-count").textContent = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function update_edits() {
|
||||||
|
var save_edits = document.getElementById("save-edits");
|
||||||
|
save_edits.value = JSON.stringify(edits);
|
||||||
|
}
|
||||||
|
|
||||||
|
function select_dab(element, dab_num) {
|
||||||
|
jump_to(dab_num);
|
||||||
|
document.getElementById("cancel-" + dab_num).classList.remove("d-none");
|
||||||
|
|
||||||
|
var title = element.getAttribute("title");
|
||||||
|
edits[dab_lookup[dab_num]] = title;
|
||||||
|
|
||||||
|
edit_set.add(dab_num);
|
||||||
|
update_edits();
|
||||||
|
update_edit_count();
|
||||||
|
|
||||||
|
clear_dab_highlight(dab_num);
|
||||||
|
|
||||||
|
var dab_article = document.getElementById("dab-article-" + dab_num);
|
||||||
|
|
||||||
|
var links = dab_article.querySelectorAll("a");
|
||||||
|
for(var i=0; i<links.length; i++) {
|
||||||
|
links[i].classList.remove("dab-highlight");
|
||||||
|
}
|
||||||
|
|
||||||
|
var title = element.getAttribute("title");
|
||||||
|
element.classList.add("dab-highlight");
|
||||||
|
console.log(title, dab_num);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
function cancel_selection(dab_num) {
|
||||||
|
delete edits[dab_lookup[dab_num]];
|
||||||
|
document.getElementById("cancel-" + dab_num).classList.add("d-none");
|
||||||
|
clear_dab_highlight(dab_num);
|
||||||
|
edit_set.delete(dab_num);
|
||||||
|
|
||||||
|
update_edits();
|
||||||
|
update_edit_count();
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>
|
||||||
|
|
12
templates/save.html
Normal file
12
templates/save.html
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title></title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h2>Save edits: {{ title }}</h2>
|
||||||
|
{{ edits | pprint }}
|
||||||
|
</body>
|
||||||
|
</html>
|
102
web_view.py
102
web_view.py
|
@ -1,6 +1,8 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import json
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
@ -21,9 +23,10 @@ def index():
|
||||||
|
|
||||||
|
|
||||||
def get_article_html(enwiki: str) -> str:
|
def get_article_html(enwiki: str) -> str:
|
||||||
|
"""Parse article wikitext and return HTML."""
|
||||||
url = "https://en.wikipedia.org/w/api.php"
|
url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
params = {
|
params: dict[str, str | int] = {
|
||||||
"action": "parse",
|
"action": "parse",
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"formatversion": 2,
|
"formatversion": 2,
|
||||||
|
@ -70,11 +73,9 @@ disambig_templates = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_article_links(enwiki: str) -> list[str]:
|
def link_params(enwiki: str) -> dict[str, str | int]:
|
||||||
"""Get links that appear in this article."""
|
"""Parameters for finding article links from the API."""
|
||||||
url = "https://en.wikipedia.org/w/api.php"
|
params: dict[str, str | int] = {
|
||||||
|
|
||||||
params = {
|
|
||||||
"action": "query",
|
"action": "query",
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"formatversion": 2,
|
"formatversion": 2,
|
||||||
|
@ -87,35 +88,88 @@ def get_article_links(enwiki: str) -> list[str]:
|
||||||
"tltemplates": "|".join(disambig_templates),
|
"tltemplates": "|".join(disambig_templates),
|
||||||
"prop": "templates",
|
"prop": "templates",
|
||||||
}
|
}
|
||||||
|
return params
|
||||||
|
|
||||||
links = []
|
|
||||||
|
def needs_disambig(link: dict[str, Any]) -> bool:
|
||||||
|
"""Is this a disambiguation link."""
|
||||||
|
return bool(
|
||||||
|
not link["title"].endswith(" (disambiguation)") and link.get("templates")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_links(enwiki: str) -> list[str]:
|
||||||
|
"""Get links that appear in this article."""
|
||||||
|
url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
|
params: dict[str, str | int] = link_params(enwiki)
|
||||||
|
links: set[str] = set()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
r = requests.get(url, params=params)
|
data = requests.get(url, params=params).json()
|
||||||
json_data = r.json()
|
links.update(
|
||||||
query = json_data.pop("query")
|
page["title"] for page in data["query"]["pages"] if needs_disambig(page)
|
||||||
pages = query["pages"]
|
)
|
||||||
for page in pages:
|
|
||||||
title = page["title"]
|
|
||||||
if title.endswith(" (disambiguation)") or not page.get("templates"):
|
|
||||||
continue
|
|
||||||
if title not in links:
|
|
||||||
links.append(title)
|
|
||||||
|
|
||||||
if "continue" not in json_data:
|
if "continue" not in data:
|
||||||
break
|
break
|
||||||
print(json_data["continue"])
|
|
||||||
|
|
||||||
params["gplcontinue"] = json_data["continue"]["gplcontinue"]
|
params["gplcontinue"] = data["continue"]["gplcontinue"]
|
||||||
|
|
||||||
return links
|
return list(links)
|
||||||
|
|
||||||
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
|
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
|
||||||
|
|
||||||
|
|
||||||
|
def delete_toc(root: lxml.html.HtmlElement) -> None:
|
||||||
|
"""Delete table of contents from article HTML."""
|
||||||
|
for toc in root.findall(".//div[@class='toc']"):
|
||||||
|
toc.getparent().remove(toc)
|
||||||
|
|
||||||
|
|
||||||
|
def get_dab_html(dab_num: int, title: str) -> str:
|
||||||
|
"""Parse dab page and rewrite links."""
|
||||||
|
dab_html = get_article_html(title)
|
||||||
|
root = lxml.html.fromstring(dab_html)
|
||||||
|
delete_toc(root)
|
||||||
|
|
||||||
|
element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
|
||||||
|
|
||||||
|
for a in root.findall(".//a[@href]"):
|
||||||
|
href: str | None = a.get("href")
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
if not href.startswith("#"):
|
||||||
|
a.set("href", "#")
|
||||||
|
a.set("onclick", f"return select_dab(this, {dab_num})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
destination_element = element_id_map[href[1:]]
|
||||||
|
assert destination_element is not None
|
||||||
|
destination_element.set("id", f"{dab_num}{href[1:]}")
|
||||||
|
a.set("href", f"#{dab_num}{href[1:]}")
|
||||||
|
|
||||||
|
html: str = lxml.html.tostring(root, encoding=str)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/save/<path:enwiki>", methods=["POST"])
|
||||||
|
def save(enwiki: str) -> Response | str:
|
||||||
|
"""Save edits to article."""
|
||||||
|
edits = json.loads(flask.request.form["edits"])
|
||||||
|
return flask.render_template("save.html", title=enwiki, edits=edits)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/enwiki/<path:enwiki>")
|
@app.route("/enwiki/<path:enwiki>")
|
||||||
def article(enwiki: str) -> Response:
|
def article(enwiki: str) -> Response:
|
||||||
"""Article Page."""
|
"""Article Page."""
|
||||||
|
enwiki_orig = enwiki
|
||||||
|
enwiki = enwiki.replace("_", " ")
|
||||||
|
enwiki_underscore = enwiki.replace(" ", "_")
|
||||||
|
if " " in enwiki_orig:
|
||||||
|
return flask.redirect(
|
||||||
|
flask.url_for(flask.request.endpoint, enwiki=enwiki_underscore)
|
||||||
|
)
|
||||||
html = get_article_html(enwiki)
|
html = get_article_html(enwiki)
|
||||||
links = get_article_links(enwiki)
|
links = get_article_links(enwiki)
|
||||||
|
|
||||||
|
@ -124,6 +178,7 @@ def article(enwiki: str) -> Response:
|
||||||
seen = set()
|
seen = set()
|
||||||
|
|
||||||
dab_list = []
|
dab_list = []
|
||||||
|
dab_lookup = {}
|
||||||
dab_num = 0
|
dab_num = 0
|
||||||
|
|
||||||
for a in root.findall(".//a[@href]"):
|
for a in root.findall(".//a[@href]"):
|
||||||
|
@ -137,18 +192,21 @@ def article(enwiki: str) -> Response:
|
||||||
dab_num += 1
|
dab_num += 1
|
||||||
a.set("id", f"dab-{dab_num}")
|
a.set("id", f"dab-{dab_num}")
|
||||||
seen.add(title)
|
seen.add(title)
|
||||||
dab_html = get_article_html(title)
|
dab_html = get_dab_html(dab_num, title)
|
||||||
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
|
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
|
||||||
|
dab_lookup[dab_num] = title
|
||||||
|
|
||||||
html_links[title].append(a)
|
html_links[title].append(a)
|
||||||
|
|
||||||
return flask.render_template(
|
return flask.render_template(
|
||||||
"article.html",
|
"article.html",
|
||||||
title=enwiki,
|
title=enwiki,
|
||||||
|
enwiki_underscore=enwiki_underscore,
|
||||||
text=lxml.html.tostring(root, encoding=str),
|
text=lxml.html.tostring(root, encoding=str),
|
||||||
links=links,
|
links=links,
|
||||||
html_links=html_links,
|
html_links=html_links,
|
||||||
dab_list=dab_list,
|
dab_list=dab_list,
|
||||||
|
dab_lookup=dab_lookup,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue