Initial commit.
This commit is contained in:
commit
c4af550929
50
article_list
Normal file
50
article_list
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
Rail transport in Indonesia
|
||||||
|
Canadian Alpine Ski Championships
|
||||||
|
Orwell Prize
|
||||||
|
SchleFaZ
|
||||||
|
List of fatal victims of the September 11 attacks
|
||||||
|
List of Parkruns in the United Kingdom
|
||||||
|
Beitar Jerusalem F.C.
|
||||||
|
List of Hindi songs recorded by Asha Bhosle
|
||||||
|
Arabic exonyms
|
||||||
|
Popular Union
|
||||||
|
The Cantos
|
||||||
|
Unisex name
|
||||||
|
2021 Intercontinental GT Challenge
|
||||||
|
AS Kaloum Star
|
||||||
|
Akademi Fantasia (season 1)
|
||||||
|
Athletics at the 2022 Bolivarian Games
|
||||||
|
Black to the Future
|
||||||
|
Demographics of the Republic of Ireland
|
||||||
|
Education in Northern Ireland
|
||||||
|
Education in the Republic of Ireland
|
||||||
|
Healthcare in the Republic of Ireland
|
||||||
|
I Love the 2000s
|
||||||
|
Kununokuni
|
||||||
|
List of Belgian football transfers summer 2022
|
||||||
|
List of Ultimate Marvel characters
|
||||||
|
List of Wisin & Yandel collaborations
|
||||||
|
List of comics based on films
|
||||||
|
List of programs broadcast by Asianet
|
||||||
|
List of tributaries of the Missouri River
|
||||||
|
Music of South Africa
|
||||||
|
Neuruppin
|
||||||
|
1979 Sydney City FC season
|
||||||
|
2007 in Spanish television
|
||||||
|
2022 Washington House of Representatives election
|
||||||
|
2022 World Athletics U20 Championships – Men's 4 × 100 metres relay
|
||||||
|
A2 autostrada (Poland)
|
||||||
|
Chandel (Rajput clan)
|
||||||
|
County of Isenburg
|
||||||
|
Dinka people
|
||||||
|
Dwayne McDuffie Award for Diversity in Comics
|
||||||
|
FTSE Italia Mid Cap
|
||||||
|
Globoplay
|
||||||
|
Index of Armenia-related articles
|
||||||
|
List of Denmark national football team hat-tricks
|
||||||
|
List of Equinox episodes
|
||||||
|
List of Indian monarchs
|
||||||
|
List of Italian exonyms in Dalmatia
|
||||||
|
List of cities with historical German exonyms
|
||||||
|
List of jötnar in Norse mythology
|
||||||
|
List of language families
|
70
templates/article.html
Normal file
70
templates/article.html
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title></title>
|
||||||
|
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
||||||
|
<style>
|
||||||
|
a.disambig { color: #ff8c00; }
|
||||||
|
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
|
||||||
|
a.new { color: red; }
|
||||||
|
|
||||||
|
#article {
|
||||||
|
left: 0;
|
||||||
|
width: 50%;
|
||||||
|
}
|
||||||
|
|
||||||
|
.card-highlight {
|
||||||
|
color: #000 !important;
|
||||||
|
background-color: #FFD580 !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div class="m-3 container-fluid">
|
||||||
|
<div class="row">
|
||||||
|
<div class="col-8">
|
||||||
|
<h1>{{ title }}</h1>
|
||||||
|
<div>{{ text | safe }}</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-4">
|
||||||
|
{% for dab in dab_list %}
|
||||||
|
<div class="card p-1 m-2" id="dab-card-{{ dab.num }}">
|
||||||
|
<h3 class="card-title">{{ dab.title }}</h3>
|
||||||
|
<div><a href="#" onclick="return jump_to({{ dab.num }})">Show in article</a></div>
|
||||||
|
<div>{{ dab.html | safe }}</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function jump_to(dab_num) {
|
||||||
|
var links = document.getElementsByTagName("a");
|
||||||
|
for(var i=0; i<links.length; i++) {
|
||||||
|
links[i].classList.remove("disambig-highlight");
|
||||||
|
}
|
||||||
|
|
||||||
|
var cards = document.getElementsByClassName("card");
|
||||||
|
for(var i=0; i<cards.length; i++) {
|
||||||
|
cards[i].classList.remove("card-highlight");
|
||||||
|
}
|
||||||
|
|
||||||
|
var card = document.getElementById("dab-card-" + dab_num);
|
||||||
|
card.classList.add("card-highlight");
|
||||||
|
|
||||||
|
var link = document.getElementById("dab-" + dab_num);
|
||||||
|
link.scrollIntoView();
|
||||||
|
link.classList.add("disambig-highlight")
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
15
templates/index.html
Normal file
15
templates/index.html
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title></title>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<ul>
|
||||||
|
{% for enwiki in articles %}
|
||||||
|
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
156
web_view.py
Executable file
156
web_view.py
Executable file
|
@ -0,0 +1,156 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import flask
|
||||||
|
import lxml.html
|
||||||
|
import requests
|
||||||
|
from werkzeug.wrappers import Response
|
||||||
|
|
||||||
|
app = flask.Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
app.debug = True
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/")
|
||||||
|
def index():
|
||||||
|
articles = [line[:-1] for line in open("article_list")]
|
||||||
|
|
||||||
|
return flask.render_template("index.html", articles=articles)
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_html(enwiki: str) -> str:
|
||||||
|
url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"action": "parse",
|
||||||
|
"format": "json",
|
||||||
|
"formatversion": 2,
|
||||||
|
"disableeditsection": 1,
|
||||||
|
"page": enwiki,
|
||||||
|
}
|
||||||
|
|
||||||
|
r = requests.get(url, params=params)
|
||||||
|
html: str = r.json()["parse"]["text"]
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
disambig_templates = [
|
||||||
|
"Template:Disambiguation",
|
||||||
|
"Template:Airport disambiguation",
|
||||||
|
"Template:Biology disambiguation",
|
||||||
|
"Template:Call sign disambiguation",
|
||||||
|
"Template:Caselaw disambiguation",
|
||||||
|
"Template:Chinese title disambiguation",
|
||||||
|
"Template:Disambiguation cleanup",
|
||||||
|
"Template:Genus disambiguation",
|
||||||
|
"Template:Hospital disambiguation",
|
||||||
|
"Template:Human name disambiguation",
|
||||||
|
"Template:Human name disambiguation cleanup",
|
||||||
|
"Template:Letter-number combination disambiguation",
|
||||||
|
"Template:Mathematical disambiguation",
|
||||||
|
"Template:Military unit disambiguation",
|
||||||
|
"Template:Music disambiguation",
|
||||||
|
"Template:Number disambiguation",
|
||||||
|
"Template:Opus number disambiguation",
|
||||||
|
"Template:Phonetics disambiguation",
|
||||||
|
"Template:Place name disambiguation",
|
||||||
|
"Template:Portal disambiguation",
|
||||||
|
"Template:Road disambiguation",
|
||||||
|
"Template:School disambiguation",
|
||||||
|
"Template:Species Latin name abbreviation disambiguation",
|
||||||
|
"Template:Species Latin name disambiguation",
|
||||||
|
"Template:Station disambiguation",
|
||||||
|
"Template:Synagogue disambiguation",
|
||||||
|
"Template:Taxonomic authority disambiguation",
|
||||||
|
"Template:Taxonomy disambiguation",
|
||||||
|
"Template:Template disambiguation",
|
||||||
|
"Template:WoO number disambiguation",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_links(enwiki: str) -> list[str]:
|
||||||
|
"""Get links that appear in this article."""
|
||||||
|
url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"action": "query",
|
||||||
|
"format": "json",
|
||||||
|
"formatversion": 2,
|
||||||
|
"titles": enwiki,
|
||||||
|
"generator": "links",
|
||||||
|
"gpllimit": "max",
|
||||||
|
"gplnamespace": 0,
|
||||||
|
"tllimit": "max",
|
||||||
|
"tlnamespace": 10,
|
||||||
|
"tltemplates": "|".join(disambig_templates),
|
||||||
|
"prop": "templates",
|
||||||
|
}
|
||||||
|
|
||||||
|
links = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
r = requests.get(url, params=params)
|
||||||
|
json_data = r.json()
|
||||||
|
query = json_data.pop("query")
|
||||||
|
pages = query["pages"]
|
||||||
|
for page in pages:
|
||||||
|
title = page["title"]
|
||||||
|
if title.endswith(" (disambiguation)") or not page.get("templates"):
|
||||||
|
continue
|
||||||
|
if title not in links:
|
||||||
|
links.append(title)
|
||||||
|
|
||||||
|
if "continue" not in json_data:
|
||||||
|
break
|
||||||
|
print(json_data["continue"])
|
||||||
|
|
||||||
|
params["gplcontinue"] = json_data["continue"]["gplcontinue"]
|
||||||
|
|
||||||
|
return links
|
||||||
|
|
||||||
|
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/enwiki/<path:enwiki>")
|
||||||
|
def article(enwiki: str) -> Response:
|
||||||
|
"""Article Page."""
|
||||||
|
html = get_article_html(enwiki)
|
||||||
|
links = get_article_links(enwiki)
|
||||||
|
|
||||||
|
root = lxml.html.fromstring(html)
|
||||||
|
html_links = defaultdict(list)
|
||||||
|
seen = set()
|
||||||
|
|
||||||
|
dab_list = []
|
||||||
|
dab_num = 0
|
||||||
|
|
||||||
|
for a in root.findall(".//a[@href]"):
|
||||||
|
title = a.get("title")
|
||||||
|
if title is None:
|
||||||
|
continue
|
||||||
|
if title not in links:
|
||||||
|
continue
|
||||||
|
a.set("class", "disambig")
|
||||||
|
if title not in seen:
|
||||||
|
dab_num += 1
|
||||||
|
a.set("id", f"dab-{dab_num}")
|
||||||
|
seen.add(title)
|
||||||
|
dab_html = get_article_html(title)
|
||||||
|
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
|
||||||
|
|
||||||
|
html_links[title].append(a)
|
||||||
|
|
||||||
|
return flask.render_template(
|
||||||
|
"article.html",
|
||||||
|
title=enwiki,
|
||||||
|
text=lxml.html.tostring(root, encoding=str),
|
||||||
|
links=links,
|
||||||
|
html_links=html_links,
|
||||||
|
dab_list=dab_list,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(host="0.0.0.0")
|
Loading…
Reference in a new issue