From daf2a254584b8eb79730a82b665a341f41650450 Mon Sep 17 00:00:00 2001
From: Edward Betts <edward@4angle.com>
Date: Thu, 18 Aug 2022 20:51:18 +0100
Subject: [PATCH] Include all occurances of dab links, not just the first.

---
 dab_mechanic/wikipedia.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/dab_mechanic/wikipedia.py b/dab_mechanic/wikipedia.py
index 57c03c4..9bcb0fa 100644
--- a/dab_mechanic/wikipedia.py
+++ b/dab_mechanic/wikipedia.py
@@ -121,10 +121,9 @@ def delete_toc(root: lxml.html.HtmlElement) -> None:
         toc.getparent().remove(toc)
 
 
-def get_dab_html(dab_num: int, title: str) -> str:
+def get_dab_html(dab_num: int, html: str) -> str:
     """Parse dab page and rewrite links."""
-    dab_html = get_article_html(title)
-    root = lxml.html.fromstring(dab_html)
+    root = lxml.html.fromstring(html)
     delete_toc(root)
 
     element_id_map = {e.get("id"): e for e in root.findall(".//*[@id]")}
@@ -160,6 +159,7 @@ class Article:
         self.dab_lookup: dict[int, str] = {}
         self.dab_order: list[str] = []
         self.parse: Optional[dict[str, Any]] = None
+        self.dab_html: dict[str, str] = {}
 
     def save_endpoint(self) -> str:
         """Endpoint for saving changes."""
@@ -173,28 +173,25 @@ class Article:
 
     def iter_links(self) -> Iterator[tuple[lxml.html.Element, str]]:
         """Disambiguation links that need fixing."""
-        seen = set()
         for a in self.root.findall(".//a[@href]"):
             title = a.get("title")
             if title is None or title not in self.links:
                 continue
-            a.set("class", "disambig")
-
-            if title in seen:
-                continue
-            seen.add(title)
-
             yield a, title
 
     def process_links(self) -> None:
         """Process links in parsed wikitext."""
         for dab_num, (a, title) in enumerate(self.iter_links()):
+            a.set("class", "disambig")
             a.set("id", f"dab-{dab_num}")
 
+            if title not in self.dab_html:
+                self.dab_html[title] = get_article_html(title)
+
             dab: DabItem = {
                 "num": dab_num,
                 "title": title,
-                "html": get_dab_html(dab_num, title),
+                "html": get_dab_html(dab_num, self.dab_html[title]),
             }
             self.dab_list.append(dab)
             self.dab_order.append(title)