Show Articles With Multiple Dablinks report on index page.
This commit is contained in:
parent
6f4a5ecc56
commit
d499c896b4
|
@ -2,8 +2,11 @@
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
<ul>
|
<ul>
|
||||||
{% for enwiki in articles %}
|
{% for enwiki, count in articles %}
|
||||||
<li><a href="{{ url_for("article_page", enwiki=enwiki) }}">{{ enwiki }}</li>
|
<li>
|
||||||
|
<a href="{{ url_for("article_page", enwiki=enwiki) }}">{{ enwiki }}
|
||||||
|
({{ count }} links)
|
||||||
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|
51
web_view.py
51
web_view.py
|
@ -3,8 +3,7 @@
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from dab_mechanic import wikidata_oauth
|
from typing import Any, Iterator, Optional, TypedDict
|
||||||
from typing import Any, Iterator, TypedDict
|
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import lxml.html
|
import lxml.html
|
||||||
|
@ -14,6 +13,8 @@ from requests_oauthlib import OAuth1Session
|
||||||
from werkzeug.debug.tbtools import get_current_traceback
|
from werkzeug.debug.tbtools import get_current_traceback
|
||||||
from werkzeug.wrappers import Response
|
from werkzeug.wrappers import Response
|
||||||
|
|
||||||
|
from dab_mechanic import wikidata_oauth
|
||||||
|
|
||||||
app = flask.Flask(__name__)
|
app = flask.Flask(__name__)
|
||||||
app.config.from_object("config.default")
|
app.config.from_object("config.default")
|
||||||
app.debug = True
|
app.debug = True
|
||||||
|
@ -22,6 +23,7 @@ wiki_hostname = "en.wikipedia.org"
|
||||||
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
wiki_api_php = f"https://{wiki_hostname}/w/api.php"
|
||||||
wiki_index_php = f"https://{wiki_hostname}/w/index.php"
|
wiki_index_php = f"https://{wiki_hostname}/w/index.php"
|
||||||
|
|
||||||
|
|
||||||
@app.before_request
|
@app.before_request
|
||||||
def global_user():
|
def global_user():
|
||||||
"""Make username available everywhere."""
|
"""Make username available everywhere."""
|
||||||
|
@ -59,15 +61,35 @@ def get_content(title: str) -> str:
|
||||||
return rev
|
return rev
|
||||||
|
|
||||||
|
|
||||||
|
def parse_articles_with_dab_links(root: lxml.html.Element) -> list[tuple[str, int]]:
|
||||||
|
"""Parse Articles With Multiple Dablinks."""
|
||||||
|
articles = []
|
||||||
|
table = root.find(".//table")
|
||||||
|
for tr in table:
|
||||||
|
title = tr[0][0].text
|
||||||
|
count_text = tr[1][0].text
|
||||||
|
assert count_text.endswith(" links")
|
||||||
|
count = int(count_text[:-6])
|
||||||
|
|
||||||
|
articles.append((title, count))
|
||||||
|
|
||||||
|
return articles
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def index():
|
def index():
|
||||||
articles = [line[:-1] for line in open("article_list")]
|
|
||||||
|
r = requests.get("https://dplbot.toolforge.org/articles_with_dab_links.php")
|
||||||
|
root = lxml.html.fromstring(r.content)
|
||||||
|
articles = parse_articles_with_dab_links(root)
|
||||||
|
|
||||||
|
# articles = [line[:-1] for line in open("article_list")]
|
||||||
|
|
||||||
return flask.render_template("index.html", articles=articles)
|
return flask.render_template("index.html", articles=articles)
|
||||||
|
|
||||||
|
|
||||||
def get_article_html(enwiki: str) -> str:
|
def call_parse_api(enwiki: str) -> dict[str, Any]:
|
||||||
"""Parse article wikitext and return HTML."""
|
"""Call mediawiki parse API for given article."""
|
||||||
url = "https://en.wikipedia.org/w/api.php"
|
url = "https://en.wikipedia.org/w/api.php"
|
||||||
|
|
||||||
params: dict[str, str | int] = {
|
params: dict[str, str | int] = {
|
||||||
|
@ -76,11 +98,19 @@ def get_article_html(enwiki: str) -> str:
|
||||||
"formatversion": 2,
|
"formatversion": 2,
|
||||||
"disableeditsection": 1,
|
"disableeditsection": 1,
|
||||||
"page": enwiki,
|
"page": enwiki,
|
||||||
|
"prop": "text|links|headhtml",
|
||||||
|
"disabletoc": 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
r = requests.get(url, params=params)
|
r = requests.get(url, params=params)
|
||||||
html: str = r.json()["parse"]["text"]
|
parse: dict[str, Any] = r.json()["parse"]
|
||||||
return html
|
return parse
|
||||||
|
|
||||||
|
|
||||||
|
def get_article_html(enwiki: str) -> str:
|
||||||
|
"""Parse article wikitext and return HTML."""
|
||||||
|
text: str = call_parse_api(enwiki)["text"]
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
disambig_templates = [
|
disambig_templates = [
|
||||||
|
@ -267,6 +297,7 @@ class Article:
|
||||||
self.dab_list: list[DabItem] = []
|
self.dab_list: list[DabItem] = []
|
||||||
self.dab_lookup: dict[int, str] = {}
|
self.dab_lookup: dict[int, str] = {}
|
||||||
self.dab_order: list[str] = []
|
self.dab_order: list[str] = []
|
||||||
|
self.parse: Optional[dict[str, Any]] = None
|
||||||
|
|
||||||
def save_endpoint(self) -> str:
|
def save_endpoint(self) -> str:
|
||||||
"""Endpoint for saving changes."""
|
"""Endpoint for saving changes."""
|
||||||
|
@ -275,8 +306,8 @@ class Article:
|
||||||
|
|
||||||
def load(self) -> None:
|
def load(self) -> None:
|
||||||
"""Load parsed article HTML."""
|
"""Load parsed article HTML."""
|
||||||
html = get_article_html(self.enwiki)
|
self.parse = call_parse_api(self.enwiki)
|
||||||
self.root = lxml.html.fromstring(html)
|
self.root = lxml.html.fromstring(self.parse.pop("text"))
|
||||||
|
|
||||||
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
|
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
|
||||||
"""Disambiguation links that need fixing."""
|
"""Disambiguation links that need fixing."""
|
||||||
|
@ -328,6 +359,8 @@ def article_page(enwiki: str) -> Response:
|
||||||
article.load()
|
article.load()
|
||||||
article.process_links()
|
article.process_links()
|
||||||
|
|
||||||
|
assert article.parse
|
||||||
|
|
||||||
return flask.render_template("article.html", article=article)
|
return flask.render_template("article.html", article=article)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue