This commit is contained in:
Edward Betts 2022-08-15 11:43:12 +01:00
parent 53ec386477
commit b12b9459ef

View file

@ -2,7 +2,7 @@
import json
import re
from typing import Any, TypedDict
from typing import Any, Iterator, TypedDict
import flask
import lxml.html
@ -249,24 +249,31 @@ class Article:
html = get_article_html(self.enwiki)
self.root = lxml.html.fromstring(html)
def process_links(self) -> None:
"""Process links in parsed wikitext."""
dab_num = 0
def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
"""Disambiguation links that need fixing."""
seen = set()
for a in self.root.findall(".//a[@href]"):
title = a.get("title")
if title is None:
continue
if title not in self.links:
if title is None or title not in self.links:
continue
a.set("class", "disambig")
if title not in seen:
dab_num += 1
a.set("id", f"dab-{dab_num}")
if title in seen:
continue
seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab: DabItem = {"num": dab_num, "title": title, "html": dab_html}
yield a, title
def process_links(self) -> None:
"""Process links in parsed wikitext."""
for dab_num, (a, title) in enumerate(self.iter_links()):
a.set("id", f"dab-{dab_num}")
dab: DabItem = {
"num": dab_num,
"title": title,
"html": get_dab_html(dab_num, title),
}
self.dab_list.append(dab)
self.dab_order.append(title)
self.dab_lookup[dab_num] = title