Link matching improvements

This commit is contained in:
Edward Betts 2023-12-09 18:42:53 +00:00
parent 1da620875a
commit 14d8539298

View file

@ -23,7 +23,7 @@ trans2[en_dash] = trans2[" "]
patterns = [
lambda q: re.compile(
r"(?<!-)(?:\[\[(?:[^]]+\|)?)?(%s)%s(?:\]\])?"
r"(?<!-)\[\[(%s)%s\|(?=.*\]\])"
% (
re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
@ -31,10 +31,19 @@ patterns = [
re.I,
),
lambda q: re.compile(
r"(?<!-)\[\[[^|]+\|(%s)%s\]\]" % (re.escape(q[0]), re.escape(q[1:])), re.I
r"(?<!-)\[\[(?:(?!File:)(?:[^]]+\|)?)(%s)%s\]\]"
% (
re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
),
re.I,
),
lambda q: re.compile(
r"(?<!-)\[\[[^|]+\|(%s)%s(?:\]\])?"
r"(?<!-)\[\[(?!File:)[^|]+\|(%s)%s\]\]" % (re.escape(q[0]), re.escape(q[1:])),
re.I,
),
lambda q: re.compile(
r"(?<!-)\[\[(?!File:)[^|]+\|(%s)%s(?:\]\])?"
% (
re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
@ -46,7 +55,7 @@ patterns = [
r"(?<!-)(%s)%s"
% (
re.escape(q[0]),
"".join((trans[c] if c in trans else re.escape(c)) for c in q[1:]),
"".join((trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
),
re.I,
),
@ -58,7 +67,9 @@ class NoMatch(Exception):
re_cite = re.compile(
r"<ref( [^>]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*</ref>", re.I | re.S
# r"<ref( [^>]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*</ref>", re.I | re.S
r"<ref( [^>]*?)?>.*</ref>",
re.I | re.S,
)
@ -173,8 +184,14 @@ def mk_link_matcher(q: str) -> typing.Callable[[str], re.Match[str] | None]:
return search_for_link
def add_link(m, replacement, text):
return m.re.sub(lambda m: "[[%s]]" % replacement, text, count=1)
def add_link(m: re.Match[str], replacement: str, text: str) -> str:
"""Add link to text."""
matched_text = m.group(0)
if matched_text.startswith("[[") and matched_text.endswith("|"):
return m.re.sub(lambda m: f"[[{replacement}|", text, count=1)
else:
return m.re.sub(lambda m: f"[[{replacement}]]", text, count=1)
def find_link_in_chunk(
@ -303,30 +320,32 @@ def find_link_and_section(q: str, content: str, linkto: str | None = None):
if header:
new_content += header
for token_type, text in parse_cite(section_text):
if token_type == "text" and not replacement:
new_text = ""
for token_type2, text2 in parse_links(text):
if token_type2 == "link" and not replacement:
link_text = text2[2:-2]
if "|" in link_text:
link_dest, link_text = link_text.split("|", 1)
else:
link_dest = None
m = search_for_link(link_text)
if m:
if link_dest:
found["link_dest"] = link_dest
found["link_text"] = link_text
replacement = match_found(m, q, None)
text2 = add_link(m, replacement, link_text)
new_text += text2
if replacement:
text = new_text
else:
m = search_for_link(text)
if token_type != "text" or replacement:
new_content += text
continue
new_text = ""
for token_type2, text2 in parse_links(text):
if token_type2 == "link" and not replacement:
link_text = text2[2:-2]
if "|" in link_text:
link_dest, link_text = link_text.split("|", 1)
else:
link_dest = None
m = search_for_link(link_text)
if m:
replacement = match_found(m, q, linkto)
text = add_link(m, replacement, text)
if link_dest:
found["link_dest"] = link_dest
found["link_text"] = link_text
replacement = match_found(m, q, None)
text2 = add_link(m, replacement, link_text)
new_text += text2
if replacement:
text = new_text
else:
m = search_for_link(text)
if m:
replacement = match_found(m, q, linkto)
text = add_link(m, replacement, text)
new_content += text
if replacement:
found.update(