This commit is contained in:
Edward Betts 2022-08-15 11:43:12 +01:00
parent 53ec386477
commit b12b9459ef

View file

@ -2,7 +2,7 @@
import json import json
import re import re
from typing import Any, TypedDict from typing import Any, Iterator, TypedDict
import flask import flask
import lxml.html import lxml.html
@ -249,24 +249,31 @@ class Article:
html = get_article_html(self.enwiki) html = get_article_html(self.enwiki)
self.root = lxml.html.fromstring(html) self.root = lxml.html.fromstring(html)
def process_links(self) -> None: def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
"""Process links in parsed wikitext.""" """Disambiguation links that need fixing."""
dab_num = 0
seen = set() seen = set()
for a in self.root.findall(".//a[@href]"): for a in self.root.findall(".//a[@href]"):
title = a.get("title") title = a.get("title")
if title is None: if title is None or title not in self.links:
continue
if title not in self.links:
continue continue
a.set("class", "disambig") a.set("class", "disambig")
if title not in seen:
dab_num += 1 if title in seen:
a.set("id", f"dab-{dab_num}") continue
seen.add(title) seen.add(title)
dab_html = get_dab_html(dab_num, title)
dab: DabItem = {"num": dab_num, "title": title, "html": dab_html} yield a, title
def process_links(self) -> None:
"""Process links in parsed wikitext."""
for dab_num, (a, title) in enumerate(self.iter_links()):
a.set("id", f"dab-{dab_num}")
dab: DabItem = {
"num": dab_num,
"title": title,
"html": get_dab_html(dab_num, title),
}
self.dab_list.append(dab) self.dab_list.append(dab)
self.dab_order.append(title) self.dab_order.append(title)
self.dab_lookup[dab_num] = title self.dab_lookup[dab_num] = title