From 14d85392986fe13b2a2ed10f02124ffeba6c2f3d Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 9 Dec 2023 18:42:53 +0000 Subject: [PATCH] Link matching improvements --- add_links/match.py | 79 ++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/add_links/match.py b/add_links/match.py index 3ea90a5..41c9ef5 100644 --- a/add_links/match.py +++ b/add_links/match.py @@ -23,7 +23,7 @@ trans2[en_dash] = trans2[" "] patterns = [ lambda q: re.compile( - r"(?]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*", re.I | re.S + # r"]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*", re.I | re.S + r"]*?)?>.*", + re.I | re.S, ) @@ -173,8 +184,14 @@ def mk_link_matcher(q: str) -> typing.Callable[[str], re.Match[str] | None]: return search_for_link -def add_link(m, replacement, text): - return m.re.sub(lambda m: "[[%s]]" % replacement, text, count=1) +def add_link(m: re.Match[str], replacement: str, text: str) -> str: + """Add link to text.""" + + matched_text = m.group(0) + if matched_text.startswith("[[") and matched_text.endswith("|"): + return m.re.sub(lambda m: f"[[{replacement}|", text, count=1) + else: + return m.re.sub(lambda m: f"[[{replacement}]]", text, count=1) def find_link_in_chunk( @@ -303,30 +320,32 @@ def find_link_and_section(q: str, content: str, linkto: str | None = None): if header: new_content += header for token_type, text in parse_cite(section_text): - if token_type == "text" and not replacement: - new_text = "" - for token_type2, text2 in parse_links(text): - if token_type2 == "link" and not replacement: - link_text = text2[2:-2] - if "|" in link_text: - link_dest, link_text = link_text.split("|", 1) - else: - link_dest = None - m = search_for_link(link_text) - if m: - if link_dest: - found["link_dest"] = link_dest - found["link_text"] = link_text - replacement = match_found(m, q, None) - text2 = add_link(m, replacement, link_text) - new_text += text2 - if replacement: - text = new_text - else: - m = search_for_link(text) + if token_type != "text" or replacement: + new_content += text + continue + new_text = "" + for token_type2, text2 in parse_links(text): + if token_type2 == "link" and not replacement: + link_text = text2[2:-2] + if "|" in link_text: + link_dest, link_text = link_text.split("|", 1) + else: + link_dest = None + m = search_for_link(link_text) if m: - replacement = match_found(m, q, linkto) - text = add_link(m, replacement, text) + if link_dest: + found["link_dest"] = link_dest + found["link_text"] = link_text + replacement = match_found(m, q, None) + text2 = add_link(m, replacement, link_text) + new_text += text2 + if replacement: + text = new_text + else: + m = search_for_link(text) + if m: + replacement = match_found(m, q, linkto) + text = add_link(m, replacement, text) new_content += text if replacement: found.update(