Initial commit.

This commit is contained in:
Edward Betts 2022-08-13 13:16:49 +01:00
commit c4af550929
4 changed files with 291 additions and 0 deletions

50
article_list Normal file
View file

@ -0,0 +1,50 @@
Rail transport in Indonesia
Canadian Alpine Ski Championships
Orwell Prize
SchleFaZ
List of fatal victims of the September 11 attacks
List of Parkruns in the United Kingdom
Beitar Jerusalem F.C.
List of Hindi songs recorded by Asha Bhosle
Arabic exonyms
Popular Union
The Cantos
Unisex name
2021 Intercontinental GT Challenge
AS Kaloum Star
Akademi Fantasia (season 1)
Athletics at the 2022 Bolivarian Games
Black to the Future
Demographics of the Republic of Ireland
Education in Northern Ireland
Education in the Republic of Ireland
Healthcare in the Republic of Ireland
I Love the 2000s
Kununokuni
List of Belgian football transfers summer 2022
List of Ultimate Marvel characters
List of Wisin & Yandel collaborations
List of comics based on films
List of programs broadcast by Asianet
List of tributaries of the Missouri River
Music of South Africa
Neuruppin
1979 Sydney City FC season
2007 in Spanish television
2022 Washington House of Representatives election
2022 World Athletics U20 Championships Men's 4 × 100 metres relay
A2 autostrada (Poland)
Chandel (Rajput clan)
County of Isenburg
Dinka people
Dwayne McDuffie Award for Diversity in Comics
FTSE Italia Mid Cap
Globoplay
Index of Armenia-related articles
List of Denmark national football team hat-tricks
List of Equinox episodes
List of Indian monarchs
List of Italian exonyms in Dalmatia
List of cities with historical German exonyms
List of jötnar in Norse mythology
List of language families

70
templates/article.html Normal file
View file

@ -0,0 +1,70 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title></title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
<style>
a.disambig { color: #ff8c00; }
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
a.new { color: red; }
#article {
left: 0;
width: 50%;
}
.card-highlight {
color: #000 !important;
background-color: #FFD580 !important;
}
</style>
</head>
<body>
<div class="m-3 container-fluid">
<div class="row">
<div class="col-8">
<h1>{{ title }}</h1>
<div>{{ text | safe }}</div>
</div>
<div class="col-4">
{% for dab in dab_list %}
<div class="card p-1 m-2" id="dab-card-{{ dab.num }}">
<h3 class="card-title">{{ dab.title }}</h3>
<div><a href="#" onclick="return jump_to({{ dab.num }})">Show in article</a></div>
<div>{{ dab.html | safe }}</div>
</div>
{% endfor %}
</div>
</div>
</div>
<script>
function jump_to(dab_num) {
var links = document.getElementsByTagName("a");
for(var i=0; i<links.length; i++) {
links[i].classList.remove("disambig-highlight");
}
var cards = document.getElementsByClassName("card");
for(var i=0; i<cards.length; i++) {
cards[i].classList.remove("card-highlight");
}
var card = document.getElementById("dab-card-" + dab_num);
card.classList.add("card-highlight");
var link = document.getElementById("dab-" + dab_num);
link.scrollIntoView();
link.classList.add("disambig-highlight")
return false;
}
</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>
</body>
</html>

15
templates/index.html Normal file
View file

@ -0,0 +1,15 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title></title>
</head>
<body>
<ul>
{% for enwiki in articles %}
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li>
{% endfor %}
</ul>
</body>
</html>

156
web_view.py Executable file
View file

@ -0,0 +1,156 @@
#!/usr/bin/python3
from collections import defaultdict
import flask
import lxml.html
import requests
from werkzeug.wrappers import Response
app = flask.Flask(__name__)
app.debug = True
@app.route("/")
def index():
articles = [line[:-1] for line in open("article_list")]
return flask.render_template("index.html", articles=articles)
def get_article_html(enwiki: str) -> str:
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "parse",
"format": "json",
"formatversion": 2,
"disableeditsection": 1,
"page": enwiki,
}
r = requests.get(url, params=params)
html: str = r.json()["parse"]["text"]
return html
disambig_templates = [
"Template:Disambiguation",
"Template:Airport disambiguation",
"Template:Biology disambiguation",
"Template:Call sign disambiguation",
"Template:Caselaw disambiguation",
"Template:Chinese title disambiguation",
"Template:Disambiguation cleanup",
"Template:Genus disambiguation",
"Template:Hospital disambiguation",
"Template:Human name disambiguation",
"Template:Human name disambiguation cleanup",
"Template:Letter-number combination disambiguation",
"Template:Mathematical disambiguation",
"Template:Military unit disambiguation",
"Template:Music disambiguation",
"Template:Number disambiguation",
"Template:Opus number disambiguation",
"Template:Phonetics disambiguation",
"Template:Place name disambiguation",
"Template:Portal disambiguation",
"Template:Road disambiguation",
"Template:School disambiguation",
"Template:Species Latin name abbreviation disambiguation",
"Template:Species Latin name disambiguation",
"Template:Station disambiguation",
"Template:Synagogue disambiguation",
"Template:Taxonomic authority disambiguation",
"Template:Taxonomy disambiguation",
"Template:Template disambiguation",
"Template:WoO number disambiguation",
]
def get_article_links(enwiki: str) -> list[str]:
"""Get links that appear in this article."""
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"formatversion": 2,
"titles": enwiki,
"generator": "links",
"gpllimit": "max",
"gplnamespace": 0,
"tllimit": "max",
"tlnamespace": 10,
"tltemplates": "|".join(disambig_templates),
"prop": "templates",
}
links = []
while True:
r = requests.get(url, params=params)
json_data = r.json()
query = json_data.pop("query")
pages = query["pages"]
for page in pages:
title = page["title"]
if title.endswith(" (disambiguation)") or not page.get("templates"):
continue
if title not in links:
links.append(title)
if "continue" not in json_data:
break
print(json_data["continue"])
params["gplcontinue"] = json_data["continue"]["gplcontinue"]
return links
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
@app.route("/enwiki/<path:enwiki>")
def article(enwiki: str) -> Response:
"""Article Page."""
html = get_article_html(enwiki)
links = get_article_links(enwiki)
root = lxml.html.fromstring(html)
html_links = defaultdict(list)
seen = set()
dab_list = []
dab_num = 0
for a in root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
seen.add(title)
dab_html = get_article_html(title)
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
html_links[title].append(a)
return flask.render_template(
"article.html",
title=enwiki,
text=lxml.html.tostring(root, encoding=str),
links=links,
html_links=html_links,
dab_list=dab_list,
)
if __name__ == "__main__":
app.run(host="0.0.0.0")