Link matching improvements

This commit is contained in:
Edward Betts 2023-12-09 18:42:53 +00:00
parent 1da620875a
commit 14d8539298

View file

@ -23,7 +23,7 @@ trans2[en_dash] = trans2[" "]
patterns = [ patterns = [
lambda q: re.compile( lambda q: re.compile(
r"(?<!-)(?:\[\[(?:[^]]+\|)?)?(%s)%s(?:\]\])?" r"(?<!-)\[\[(%s)%s\|(?=.*\]\])"
% ( % (
re.escape(q[0]), re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]), "".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
@ -31,10 +31,19 @@ patterns = [
re.I, re.I,
), ),
lambda q: re.compile( lambda q: re.compile(
r"(?<!-)\[\[[^|]+\|(%s)%s\]\]" % (re.escape(q[0]), re.escape(q[1:])), re.I r"(?<!-)\[\[(?:(?!File:)(?:[^]]+\|)?)(%s)%s\]\]"
% (
re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
),
re.I,
), ),
lambda q: re.compile( lambda q: re.compile(
r"(?<!-)\[\[[^|]+\|(%s)%s(?:\]\])?" r"(?<!-)\[\[(?!File:)[^|]+\|(%s)%s\]\]" % (re.escape(q[0]), re.escape(q[1:])),
re.I,
),
lambda q: re.compile(
r"(?<!-)\[\[(?!File:)[^|]+\|(%s)%s(?:\]\])?"
% ( % (
re.escape(q[0]), re.escape(q[0]),
"".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]), "".join("-?" + (trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
@ -46,7 +55,7 @@ patterns = [
r"(?<!-)(%s)%s" r"(?<!-)(%s)%s"
% ( % (
re.escape(q[0]), re.escape(q[0]),
"".join((trans[c] if c in trans else re.escape(c)) for c in q[1:]), "".join((trans2[c] if c in trans2 else re.escape(c)) for c in q[1:]),
), ),
re.I, re.I,
), ),
@ -58,7 +67,9 @@ class NoMatch(Exception):
re_cite = re.compile( re_cite = re.compile(
r"<ref( [^>]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*</ref>", re.I | re.S # r"<ref( [^>]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*</ref>", re.I | re.S
r"<ref( [^>]*?)?>.*</ref>",
re.I | re.S,
) )
@ -173,8 +184,14 @@ def mk_link_matcher(q: str) -> typing.Callable[[str], re.Match[str] | None]:
return search_for_link return search_for_link
def add_link(m, replacement, text): def add_link(m: re.Match[str], replacement: str, text: str) -> str:
return m.re.sub(lambda m: "[[%s]]" % replacement, text, count=1) """Add link to text."""
matched_text = m.group(0)
if matched_text.startswith("[[") and matched_text.endswith("|"):
return m.re.sub(lambda m: f"[[{replacement}|", text, count=1)
else:
return m.re.sub(lambda m: f"[[{replacement}]]", text, count=1)
def find_link_in_chunk( def find_link_in_chunk(
@ -303,30 +320,32 @@ def find_link_and_section(q: str, content: str, linkto: str | None = None):
if header: if header:
new_content += header new_content += header
for token_type, text in parse_cite(section_text): for token_type, text in parse_cite(section_text):
if token_type == "text" and not replacement: if token_type != "text" or replacement:
new_text = "" new_content += text
for token_type2, text2 in parse_links(text): continue
if token_type2 == "link" and not replacement: new_text = ""
link_text = text2[2:-2] for token_type2, text2 in parse_links(text):
if "|" in link_text: if token_type2 == "link" and not replacement:
link_dest, link_text = link_text.split("|", 1) link_text = text2[2:-2]
else: if "|" in link_text:
link_dest = None link_dest, link_text = link_text.split("|", 1)
m = search_for_link(link_text) else:
if m: link_dest = None
if link_dest: m = search_for_link(link_text)
found["link_dest"] = link_dest
found["link_text"] = link_text
replacement = match_found(m, q, None)
text2 = add_link(m, replacement, link_text)
new_text += text2
if replacement:
text = new_text
else:
m = search_for_link(text)
if m: if m:
replacement = match_found(m, q, linkto) if link_dest:
text = add_link(m, replacement, text) found["link_dest"] = link_dest
found["link_text"] = link_text
replacement = match_found(m, q, None)
text2 = add_link(m, replacement, link_text)
new_text += text2
if replacement:
text = new_text
else:
m = search_for_link(text)
if m:
replacement = match_found(m, q, linkto)
text = add_link(m, replacement, text)
new_content += text new_content += text
if replacement: if replacement:
found.update( found.update(