Initial commit.
This commit is contained in:
commit
c4af550929
50
article_list
Normal file
50
article_list
Normal file
|
@ -0,0 +1,50 @@
|
|||
Rail transport in Indonesia
|
||||
Canadian Alpine Ski Championships
|
||||
Orwell Prize
|
||||
SchleFaZ
|
||||
List of fatal victims of the September 11 attacks
|
||||
List of Parkruns in the United Kingdom
|
||||
Beitar Jerusalem F.C.
|
||||
List of Hindi songs recorded by Asha Bhosle
|
||||
Arabic exonyms
|
||||
Popular Union
|
||||
The Cantos
|
||||
Unisex name
|
||||
2021 Intercontinental GT Challenge
|
||||
AS Kaloum Star
|
||||
Akademi Fantasia (season 1)
|
||||
Athletics at the 2022 Bolivarian Games
|
||||
Black to the Future
|
||||
Demographics of the Republic of Ireland
|
||||
Education in Northern Ireland
|
||||
Education in the Republic of Ireland
|
||||
Healthcare in the Republic of Ireland
|
||||
I Love the 2000s
|
||||
Kununokuni
|
||||
List of Belgian football transfers summer 2022
|
||||
List of Ultimate Marvel characters
|
||||
List of Wisin & Yandel collaborations
|
||||
List of comics based on films
|
||||
List of programs broadcast by Asianet
|
||||
List of tributaries of the Missouri River
|
||||
Music of South Africa
|
||||
Neuruppin
|
||||
1979 Sydney City FC season
|
||||
2007 in Spanish television
|
||||
2022 Washington House of Representatives election
|
||||
2022 World Athletics U20 Championships – Men's 4 × 100 metres relay
|
||||
A2 autostrada (Poland)
|
||||
Chandel (Rajput clan)
|
||||
County of Isenburg
|
||||
Dinka people
|
||||
Dwayne McDuffie Award for Diversity in Comics
|
||||
FTSE Italia Mid Cap
|
||||
Globoplay
|
||||
Index of Armenia-related articles
|
||||
List of Denmark national football team hat-tricks
|
||||
List of Equinox episodes
|
||||
List of Indian monarchs
|
||||
List of Italian exonyms in Dalmatia
|
||||
List of cities with historical German exonyms
|
||||
List of jötnar in Norse mythology
|
||||
List of language families
|
70
templates/article.html
Normal file
70
templates/article.html
Normal file
|
@ -0,0 +1,70 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title></title>
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
|
||||
<style>
|
||||
a.disambig { color: #ff8c00; }
|
||||
a.disambig-highlight { color: #ff8c00; border: 2px solid #ff8c00; }
|
||||
a.new { color: red; }
|
||||
|
||||
#article {
|
||||
left: 0;
|
||||
width: 50%;
|
||||
}
|
||||
|
||||
.card-highlight {
|
||||
color: #000 !important;
|
||||
background-color: #FFD580 !important;
|
||||
}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
|
||||
<div class="m-3 container-fluid">
|
||||
<div class="row">
|
||||
<div class="col-8">
|
||||
<h1>{{ title }}</h1>
|
||||
<div>{{ text | safe }}</div>
|
||||
</div>
|
||||
<div class="col-4">
|
||||
{% for dab in dab_list %}
|
||||
<div class="card p-1 m-2" id="dab-card-{{ dab.num }}">
|
||||
<h3 class="card-title">{{ dab.title }}</h3>
|
||||
<div><a href="#" onclick="return jump_to({{ dab.num }})">Show in article</a></div>
|
||||
<div>{{ dab.html | safe }}</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function jump_to(dab_num) {
|
||||
var links = document.getElementsByTagName("a");
|
||||
for(var i=0; i<links.length; i++) {
|
||||
links[i].classList.remove("disambig-highlight");
|
||||
}
|
||||
|
||||
var cards = document.getElementsByClassName("card");
|
||||
for(var i=0; i<cards.length; i++) {
|
||||
cards[i].classList.remove("card-highlight");
|
||||
}
|
||||
|
||||
var card = document.getElementById("dab-card-" + dab_num);
|
||||
card.classList.add("card-highlight");
|
||||
|
||||
var link = document.getElementById("dab-" + dab_num);
|
||||
link.scrollIntoView();
|
||||
link.classList.add("disambig-highlight")
|
||||
return false;
|
||||
}
|
||||
</script>
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.2.0/dist/js/bootstrap.bundle.min.js" integrity="sha384-A3rJD856KowSb7dwlZdYEkO39Gagi7vIsF0jrRAoQmDKKtQBHUuLZ9AsSv4jD4Xa" crossorigin="anonymous"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
15
templates/index.html
Normal file
15
templates/index.html
Normal file
|
@ -0,0 +1,15 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title></title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<ul>
|
||||
{% for enwiki in articles %}
|
||||
<li><a href="{{ url_for("article", enwiki=enwiki) }}">{{ enwiki }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
156
web_view.py
Executable file
156
web_view.py
Executable file
|
@ -0,0 +1,156 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import flask
|
||||
import lxml.html
|
||||
import requests
|
||||
from werkzeug.wrappers import Response
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
|
||||
|
||||
app.debug = True
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
articles = [line[:-1] for line in open("article_list")]
|
||||
|
||||
return flask.render_template("index.html", articles=articles)
|
||||
|
||||
|
||||
def get_article_html(enwiki: str) -> str:
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
params = {
|
||||
"action": "parse",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"disableeditsection": 1,
|
||||
"page": enwiki,
|
||||
}
|
||||
|
||||
r = requests.get(url, params=params)
|
||||
html: str = r.json()["parse"]["text"]
|
||||
return html
|
||||
|
||||
|
||||
disambig_templates = [
|
||||
"Template:Disambiguation",
|
||||
"Template:Airport disambiguation",
|
||||
"Template:Biology disambiguation",
|
||||
"Template:Call sign disambiguation",
|
||||
"Template:Caselaw disambiguation",
|
||||
"Template:Chinese title disambiguation",
|
||||
"Template:Disambiguation cleanup",
|
||||
"Template:Genus disambiguation",
|
||||
"Template:Hospital disambiguation",
|
||||
"Template:Human name disambiguation",
|
||||
"Template:Human name disambiguation cleanup",
|
||||
"Template:Letter-number combination disambiguation",
|
||||
"Template:Mathematical disambiguation",
|
||||
"Template:Military unit disambiguation",
|
||||
"Template:Music disambiguation",
|
||||
"Template:Number disambiguation",
|
||||
"Template:Opus number disambiguation",
|
||||
"Template:Phonetics disambiguation",
|
||||
"Template:Place name disambiguation",
|
||||
"Template:Portal disambiguation",
|
||||
"Template:Road disambiguation",
|
||||
"Template:School disambiguation",
|
||||
"Template:Species Latin name abbreviation disambiguation",
|
||||
"Template:Species Latin name disambiguation",
|
||||
"Template:Station disambiguation",
|
||||
"Template:Synagogue disambiguation",
|
||||
"Template:Taxonomic authority disambiguation",
|
||||
"Template:Taxonomy disambiguation",
|
||||
"Template:Template disambiguation",
|
||||
"Template:WoO number disambiguation",
|
||||
]
|
||||
|
||||
|
||||
def get_article_links(enwiki: str) -> list[str]:
|
||||
"""Get links that appear in this article."""
|
||||
url = "https://en.wikipedia.org/w/api.php"
|
||||
|
||||
params = {
|
||||
"action": "query",
|
||||
"format": "json",
|
||||
"formatversion": 2,
|
||||
"titles": enwiki,
|
||||
"generator": "links",
|
||||
"gpllimit": "max",
|
||||
"gplnamespace": 0,
|
||||
"tllimit": "max",
|
||||
"tlnamespace": 10,
|
||||
"tltemplates": "|".join(disambig_templates),
|
||||
"prop": "templates",
|
||||
}
|
||||
|
||||
links = []
|
||||
|
||||
while True:
|
||||
r = requests.get(url, params=params)
|
||||
json_data = r.json()
|
||||
query = json_data.pop("query")
|
||||
pages = query["pages"]
|
||||
for page in pages:
|
||||
title = page["title"]
|
||||
if title.endswith(" (disambiguation)") or not page.get("templates"):
|
||||
continue
|
||||
if title not in links:
|
||||
links.append(title)
|
||||
|
||||
if "continue" not in json_data:
|
||||
break
|
||||
print(json_data["continue"])
|
||||
|
||||
params["gplcontinue"] = json_data["continue"]["gplcontinue"]
|
||||
|
||||
return links
|
||||
|
||||
# return {link["title"] for link in r.json()["query"]["pages"][0]["links"]}
|
||||
|
||||
|
||||
@app.route("/enwiki/<path:enwiki>")
|
||||
def article(enwiki: str) -> Response:
|
||||
"""Article Page."""
|
||||
html = get_article_html(enwiki)
|
||||
links = get_article_links(enwiki)
|
||||
|
||||
root = lxml.html.fromstring(html)
|
||||
html_links = defaultdict(list)
|
||||
seen = set()
|
||||
|
||||
dab_list = []
|
||||
dab_num = 0
|
||||
|
||||
for a in root.findall(".//a[@href]"):
|
||||
title = a.get("title")
|
||||
if title is None:
|
||||
continue
|
||||
if title not in links:
|
||||
continue
|
||||
a.set("class", "disambig")
|
||||
if title not in seen:
|
||||
dab_num += 1
|
||||
a.set("id", f"dab-{dab_num}")
|
||||
seen.add(title)
|
||||
dab_html = get_article_html(title)
|
||||
dab_list.append({"num": dab_num, "title": title, "html": dab_html})
|
||||
|
||||
html_links[title].append(a)
|
||||
|
||||
return flask.render_template(
|
||||
"article.html",
|
||||
title=enwiki,
|
||||
text=lxml.html.tostring(root, encoding=str),
|
||||
links=links,
|
||||
html_links=html_links,
|
||||
dab_list=dab_list,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0")
|
Loading…
Reference in a new issue