Improve article_title_to_search_query function

Add more detailed docstring
This commit is contained in:
Edward Betts 2023-12-06 20:44:20 +00:00
parent 2c267c67e2
commit dd9b7be198

View file

@ -35,17 +35,41 @@ class Hit(typing.TypedDict):
timestamp: str timestamp: str
re_disambig = re.compile(r"^(.*) \((.*)\)$")
def load_examples() -> list[dict[str, str | int]]: def load_examples() -> list[dict[str, str | int]]:
"""Load examples.""" """Load examples."""
return [json.loads(line) for line in open("examples")] return [json.loads(line) for line in open("examples")]
def article_title_to_search_query(title: str) -> str: def article_title_to_search_query(title: str) -> str:
"""Convert from article title to search query string.""" """
m = re_disambig.match(title) Convert a Wikipedia article title to a search query string.
This function takes a Wikipedia article title and parses it to create a
search query. If the title contains disambiguation text in parentheses,
it separates the main title and the disambiguation text and formats them
into a search query using an AND operator. If there's no disambiguation text,
the title is used as is for the search query.
The search query is formatted such that the main title and the disambiguation
text (if present) are enclosed in double quotes and connected with 'AND'.
This format is useful for precise search engine queries.
Args:
title (str): The Wikipedia article title, possibly including disambiguation
text in parentheses.
Returns:
str: A formatted search query string. If disambiguation text is present,
returns '"[main title]" AND "[disambiguation text]"'. Otherwise,
returns '"[title]"'.
Example:
>>> article_title_to_search_query("Python (programming language)")
'"Python" AND "programming language"'
>>> article_title_to_search_query("London")
'"London"'
"""
m = re.match(r"^(.*) \((.*)\)$", title)
return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"' return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'