diff --git a/add_links/match.py b/add_links/match.py
index cc18fdc..dc3dabc 100644
--- a/add_links/match.py
+++ b/add_links/match.py
@@ -11,7 +11,7 @@ re_link_in_text = re.compile(r"\[\[[^]]+?\]\]", re.I | re.S)
class LinkReplace(Exception):
- """Replaces and existing link."""
+ pass
en_dash = "\u2013"
@@ -23,7 +23,7 @@ trans2[en_dash] = trans2[" "]
patterns = [
lambda q: re.compile(
- r"(?]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*", re.I | re.S
- r"[]*?)?>.*]",
- re.I | re.S,
+ r"[]*?)?>\s*({{cite.*?}}|\[https?://[^]]*?\])\s*]", re.I | re.S
)
@@ -109,7 +98,7 @@ def section_iter(text: str) -> typing.Iterator[tuple[str | None, str]]:
def get_subsections(text: str, section_num: int) -> str:
- """Retrieve the text of subsections for a given section number within an article."""
+ "retrieve the text of subsections for a given section number within an article"
found = ""
collection_level = None
for num, (heading, body) in enumerate(section_iter(text)):
@@ -131,7 +120,7 @@ def get_subsections(text: str, section_num: int) -> str:
return found
-def match_found(m: re.Match[str], q: str, linkto: str | None) -> str:
+def match_found(m, q, linkto):
if q[1:] == m.group(0)[1:]:
replacement = m.group(1) + q[1:]
elif any(c.isupper() for c in q[1:]) or m.group(0) == m.group(0).upper():
@@ -170,34 +159,23 @@ def parse_links(text: str) -> typing.Iterator[tuple[str, str]]:
yield ("text", text[prev:])
-def mk_link_matcher(q: str) -> typing.Callable[[str], re.Match[str] | None]:
- """Make link matcher."""
+def mk_link_matcher(q):
re_links = [p(q) for p in patterns]
- def search_for_link(text: str) -> re.Match[str] | None:
+ def search_for_link(text):
for re_link in re_links:
m = re_link.search(text)
if m and m.group(0).count("[[") < 4:
return m
- return None
return search_for_link
-def add_link(m: re.Match[str], replacement: str, text: str) -> str:
- """Add link to text."""
-
- matched_text = m.group(0)
- if matched_text.startswith("[[") and matched_text.endswith("|"):
- return m.re.sub(lambda m: f"[[{replacement}|", text, count=1)
- else:
- return m.re.sub(lambda m: f"[[{replacement}]]", text, count=1)
+def add_link(m, replacement, text):
+ return m.re.sub(lambda m: "[[%s]]" % replacement, text, count=1)
-def find_link_in_chunk(
- q: str, content: str, linkto: str | None = None
-) -> tuple[str, str | None, str | None]:
- """Find link in chunk."""
+def find_link_in_chunk(q, content, linkto=None):
search_for_link = mk_link_matcher(q)
new_content = ""
replacement = None
@@ -267,7 +245,7 @@ def find_link_in_chunk(
return (new_content, replacement, found_text_to_link)
-def find_link_in_text(q: str, content: str) -> tuple[str, str]:
+def find_link_in_text(q, content):
(new_content, replacement) = find_link_in_chunk(q, content)
if replacement:
return (new_content, replacement)
@@ -302,7 +280,7 @@ def find_link_in_content(q, content, linkto=None):
raise LinkReplace if link_replace else NoMatch
-def find_link_and_section(q: str, content: str, linkto: str | None = None):
+def find_link_and_section(q, content, linkto=None):
if linkto:
try:
return find_link_and_section(linkto, content)
@@ -320,32 +298,30 @@ def find_link_and_section(q: str, content: str, linkto: str | None = None):
if header:
new_content += header
for token_type, text in parse_cite(section_text):
- if token_type != "text" or replacement:
- new_content += text
- continue
- new_text = ""
- for token_type2, text2 in parse_links(text):
- if token_type2 == "link" and not replacement:
- link_text = text2[2:-2]
- if "|" in link_text:
- link_dest, link_text = link_text.split("|", 1)
- else:
- link_dest = None
- m = search_for_link(link_text)
+ if token_type == "text" and not replacement:
+ new_text = ""
+ for token_type2, text2 in parse_links(text):
+ if token_type2 == "link" and not replacement:
+ link_text = text2[2:-2]
+ if "|" in link_text:
+ link_dest, link_text = link_text.split("|", 1)
+ else:
+ link_dest = None
+ m = search_for_link(link_text)
+ if m:
+ if link_dest:
+ found["link_dest"] = link_dest
+ found["link_text"] = link_text
+ replacement = match_found(m, q, None)
+ text2 = add_link(m, replacement, link_text)
+ new_text += text2
+ if replacement:
+ text = new_text
+ else:
+ m = search_for_link(text)
if m:
- if link_dest:
- found["link_dest"] = link_dest
- found["link_text"] = link_text
- replacement = match_found(m, q, None)
- text2 = add_link(m, replacement, link_text)
- new_text += text2
- if replacement:
- text = new_text
- else:
- m = search_for_link(text)
- if m:
- replacement = match_found(m, q, linkto)
- text = add_link(m, replacement, text)
+ replacement = match_found(m, q, linkto)
+ text = add_link(m, replacement, text)
new_content += text
if replacement:
found.update(
@@ -362,7 +338,9 @@ def find_link_and_section(q: str, content: str, linkto: str | None = None):
def find_refs(text: str) -> list[str]:
"""Find [ in wikitext."""
+
refs = re.findall("][]*)>(.+?)]", text)
+ print(refs)
return refs
diff --git a/cmdline.py b/cmdline.py
index 1b2edb1..1d1de7c 100755
--- a/cmdline.py
+++ b/cmdline.py
@@ -47,7 +47,6 @@ def search_count_with_link(q: str) -> int:
def parse_contribs() -> list[tuple[str, int]]:
- """Parse user contributions."""
re_comment = re.compile(r"^link \[\[(.*)\]\] using")
links: collections.Counter[str] = collections.Counter()
@@ -71,48 +70,45 @@ def parse_contribs() -> list[tuple[str, int]]:
return links.most_common(200)
-def main() -> None:
- with open("examples") as f:
- seen = {json.loads(line)["title"] for line in f}
+with open("examples") as f:
+ seen = {json.loads(line)["title"] for line in f}
- out = open("examples", "a")
- for from_title, num in parse_contribs():
- if from_title in seen:
- continue
- count = search_count(from_title)
- count_with_link = search_count_with_link(from_title)
- ratio = float(count_with_link) / float(count)
-
- print(from_title, count, count_with_link, f"{ratio:.1%}")
- print(
- json.dumps(
- {"title": from_title, "total": count, "with_links": count_with_link}
- ),
- file=out,
- )
- out.flush()
- time.sleep(0.1)
- out.close()
-
- sys.exit(0)
+out = open("examples", "a")
+for from_title, num in parse_contribs():
+ if from_title in seen:
+ continue
count = search_count(from_title)
count_with_link = search_count_with_link(from_title)
ratio = float(count_with_link) / float(count)
- print(count, count_with_link, f"{ratio:.1%}")
+ print(from_title, count, count_with_link, f"{ratio:.1%}")
+ print(
+ json.dumps(
+ {"title": from_title, "total": count, "with_links": count_with_link}
+ ),
+ file=out,
+ )
+ out.flush()
+ time.sleep(0.1)
+out.close()
- sys.exit(0)
-
- totalhits, search_hits = search_no_link(from_title)
-
- for hit in search_hits:
- print(" ", hit)
- print(count, count_with_link, f"{ratio:.1%}", totalhits, len(search_hits))
-
- # ret = core.do_search(from_title)
- # print(ret)
+sys.exit(0)
-if __name__ == "__main__":
- main()
+count = search_count(from_title)
+count_with_link = search_count_with_link(from_title)
+ratio = float(count_with_link) / float(count)
+
+print(count, count_with_link, f"{ratio:.1%}")
+
+sys.exit(0)
+
+totalhits, search_hits = search_no_link(from_title)
+
+for hit in search_hits:
+ print(" ", hit)
+print(count, count_with_link, f"{ratio:.1%}", totalhits, len(search_hits))
+
+# ret = core.do_search(from_title)
+# print(ret)
diff --git a/web_view.py b/web_view.py
index 354ee95..0f8a1c6 100755
--- a/web_view.py
+++ b/web_view.py
@@ -289,8 +289,8 @@ def get_best_hit(title: str, hits: list[Hit]) -> tuple[Hit, dict[str, typing.Any
for hit in hits:
if hit["title"].lower() == title.lower():
continue
- # if match_type(title, hit["snippet"]) != "exact":
- # continue
+ if match_type(title, hit["snippet"]) != "exact":
+ continue
try:
print(f'get diff: {hit["title"]}, {title}')