add-links/add_links/util.py

116 lines
2.6 KiB
Python
Raw Normal View History

2023-10-04 12:56:21 +01:00
"""Util functions."""
import re
import urllib
from typing import Any
# util functions that don't access the network
namespaces = {
ns.casefold()
for ns in (
"Special",
"Media",
"Talk",
"Template",
"Portal",
"Portal talk",
"Book",
"Book talk",
"Template talk",
"Draft",
"Draft talk",
"Help",
"Help talk",
"Category",
"Category talk",
"User",
"Gadget",
"Gadget talk",
"Gadget definition",
"Gadget definition talk",
"Topic",
"User talk",
"Wikipedia",
"Education Program",
"Education Program talk",
"Wikipedia talk",
"File",
"File talk",
"TimedText",
"TimedText talk",
"MediaWiki",
"Module",
"Module talk",
"MediaWiki talk",
)
}
re_space_or_dash = re.compile("[ -]")
def is_title_case(phrase: str) -> bool:
"""Is a given phrase is in Title Case."""
return all(
term[0].isupper() and term[1:].islower()
for term in re_space_or_dash.split(phrase)
if term and term[0].isalpha()
)
def urlquote(value: str) -> str:
"""Prepare string for use in URL param."""
return urllib.parse.quote_plus(value.encode("utf-8"))
def strip_parens(q: str) -> str:
"""Remove a word in parenthesis from the end of a string."""
m = re.search(r" \(.*?\)$", q)
return q[: m.start()] if m else q
def starts_with_namespace(title: str) -> bool:
"""Check if a title starts with a namespace."""
return ":" in title and title.split(":", 1)[0].casefold() in namespaces
def is_disambig(doc: dict[str, Any]) -> bool:
"""Is a this a disambiguation page."""
return any(
"disambig" in t
or t.endswith("dis")
or "given name" in t
or t == "template:surname"
for t in (t["title"].lower() for t in doc.get("templates", []))
)
def norm(s: str) -> str:
"""Normalise string."""
s = re.sub(r"\W", "", s).lower()
return s[:-1] if s and s[-1] == "s" else s
def case_flip(s: str) -> str:
"""Switch case of character."""
if s.islower():
return s.upper()
if s.isupper():
return s.lower()
return s
def case_flip_first(s: str) -> str:
"""Switch case of first character in string."""
return case_flip(s[0]) + s[1:]
def lc_alpha(s: str) -> str:
"""Lower case alphabetic characters in string."""
return "".join(c.lower() for c in s if c.isalpha())
def wiki_space_norm(s: str) -> str:
"""Normalise article title."""
return s.replace("_", " ").strip()