commit c4af5509295320352934ad67070cbe5cb11e6e9c Author: Edward Betts Date: Sat Aug 13 13:16:49 2022 +0100 Initial commit. diff --git a/article_list b/article_list new file mode 100644 index 0000000..2c37b8d --- /dev/null +++ b/article_list @@ -0,0 +1,50 @@ +Rail transport in Indonesia +Canadian Alpine Ski Championships +Orwell Prize +SchleFaZ +List of fatal victims of the September 11 attacks +List of Parkruns in the United Kingdom +Beitar Jerusalem F.C. +List of Hindi songs recorded by Asha Bhosle +Arabic exonyms +Popular Union +The Cantos +Unisex name +2021 Intercontinental GT Challenge +AS Kaloum Star +Akademi Fantasia (season 1) +Athletics at the 2022 Bolivarian Games +Black to the Future +Demographics of the Republic of Ireland +Education in Northern Ireland +Education in the Republic of Ireland +Healthcare in the Republic of Ireland +I Love the 2000s +Kununokuni +List of Belgian football transfers summer 2022 +List of Ultimate Marvel characters +List of Wisin & Yandel collaborations +List of comics based on films +List of programs broadcast by Asianet +List of tributaries of the Missouri River +Music of South Africa +Neuruppin +1979 Sydney City FC season +2007 in Spanish television +2022 Washington House of Representatives election +2022 World Athletics U20 Championships – Men's 4 × 100 metres relay +A2 autostrada (Poland) +Chandel (Rajput clan) +County of Isenburg +Dinka people +Dwayne McDuffie Award for Diversity in Comics +FTSE Italia Mid Cap +Globoplay +Index of Armenia-related articles +List of Denmark national football team hat-tricks +List of Equinox episodes +List of Indian monarchs +List of Italian exonyms in Dalmatia +List of cities with historical German exonyms +List of jötnar in Norse mythology +List of language families diff --git a/templates/article.html b/templates/article.html new file mode 100644 index 0000000..4f78ca3 --- /dev/null +++ b/templates/article.html @@ -0,0 +1,70 @@ + + + + + + + + + + + +
+
+
+

{{ title }}

+
{{ text | safe }}
+
+
+ {% for dab in dab_list %} +
+

{{ dab.title }}

+ +
{{ dab.html | safe }}
+
+ {% endfor %} +
+
+
+ + + + + + + diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..94c44d9 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,15 @@ + + + + + + + + + + + diff --git a/web_view.py b/web_view.py new file mode 100755 index 0000000..88140d7 --- /dev/null +++ b/web_view.py @@ -0,0 +1,156 @@ +#!/usr/bin/python3 + +from collections import defaultdict + +import flask +import lxml.html +import requests +from werkzeug.wrappers import Response + +app = flask.Flask(__name__) + + +app.debug = True + + +@app.route("/") +def index(): + articles = [line[:-1] for line in open("article_list")] + + return flask.render_template("index.html", articles=articles) + + +def get_article_html(enwiki: str) -> str: + url = "https://en.wikipedia.org/w/api.php" + + params = { + "action": "parse", + "format": "json", + "formatversion": 2, + "disableeditsection": 1, + "page": enwiki, + } + + r = requests.get(url, params=params) + html: str = r.json()["parse"]["text"] + return html + + +disambig_templates = [ + "Template:Disambiguation", + "Template:Airport disambiguation", + "Template:Biology disambiguation", + "Template:Call sign disambiguation", + "Template:Caselaw disambiguation", + "Template:Chinese title disambiguation", + "Template:Disambiguation cleanup", + "Template:Genus disambiguation", + "Template:Hospital disambiguation", + "Template:Human name disambiguation", + "Template:Human name disambiguation cleanup", + "Template:Letter-number combination disambiguation", + "Template:Mathematical disambiguation", + "Template:Military unit disambiguation", + "Template:Music disambiguation", + "Template:Number disambiguation", + "Template:Opus number disambiguation", + "Template:Phonetics disambiguation", + "Template:Place name disambiguation", + "Template:Portal disambiguation", + "Template:Road disambiguation", + "Template:School disambiguation", + "Template:Species Latin name abbreviation disambiguation", + "Template:Species Latin name disambiguation", + "Template:Station disambiguation", + "Template:Synagogue disambiguation", + "Template:Taxonomic authority disambiguation", + "Template:Taxonomy disambiguation", + "Template:Template disambiguation", + "Template:WoO number disambiguation", +] + + +def get_article_links(enwiki: str) -> list[str]: + """Get links that appear in this article.""" + url = "https://en.wikipedia.org/w/api.php" + + params = { + "action": "query", + "format": "json", + "formatversion": 2, + "titles": enwiki, + "generator": "links", + "gpllimit": "max", + "gplnamespace": 0, + "tllimit": "max", + "tlnamespace": 10, + "tltemplates": "|".join(disambig_templates), + "prop": "templates", + } + + links = [] + + while True: + r = requests.get(url, params=params) + json_data = r.json() + query = json_data.pop("query") + pages = query["pages"] + for page in pages: + title = page["title"] + if title.endswith(" (disambiguation)") or not page.get("templates"): + continue + if title not in links: + links.append(title) + + if "continue" not in json_data: + break + print(json_data["continue"]) + + params["gplcontinue"] = json_data["continue"]["gplcontinue"] + + return links + + # return {link["title"] for link in r.json()["query"]["pages"][0]["links"]} + + +@app.route("/enwiki/") +def article(enwiki: str) -> Response: + """Article Page.""" + html = get_article_html(enwiki) + links = get_article_links(enwiki) + + root = lxml.html.fromstring(html) + html_links = defaultdict(list) + seen = set() + + dab_list = [] + dab_num = 0 + + for a in root.findall(".//a[@href]"): + title = a.get("title") + if title is None: + continue + if title not in links: + continue + a.set("class", "disambig") + if title not in seen: + dab_num += 1 + a.set("id", f"dab-{dab_num}") + seen.add(title) + dab_html = get_article_html(title) + dab_list.append({"num": dab_num, "title": title, "html": dab_html}) + + html_links[title].append(a) + + return flask.render_template( + "article.html", + title=enwiki, + text=lxml.html.tostring(root, encoding=str), + links=links, + html_links=html_links, + dab_list=dab_list, + ) + + +if __name__ == "__main__": + app.run(host="0.0.0.0")