Improve article_title_to_search_query function
Add more detailed docstring
This commit is contained in:
parent
2c267c67e2
commit
dd9b7be198
34
web_view.py
34
web_view.py
|
@ -35,17 +35,41 @@ class Hit(typing.TypedDict):
|
|||
timestamp: str
|
||||
|
||||
|
||||
re_disambig = re.compile(r"^(.*) \((.*)\)$")
|
||||
|
||||
|
||||
def load_examples() -> list[dict[str, str | int]]:
|
||||
"""Load examples."""
|
||||
return [json.loads(line) for line in open("examples")]
|
||||
|
||||
|
||||
def article_title_to_search_query(title: str) -> str:
|
||||
"""Convert from article title to search query string."""
|
||||
m = re_disambig.match(title)
|
||||
"""
|
||||
Convert a Wikipedia article title to a search query string.
|
||||
|
||||
This function takes a Wikipedia article title and parses it to create a
|
||||
search query. If the title contains disambiguation text in parentheses,
|
||||
it separates the main title and the disambiguation text and formats them
|
||||
into a search query using an AND operator. If there's no disambiguation text,
|
||||
the title is used as is for the search query.
|
||||
|
||||
The search query is formatted such that the main title and the disambiguation
|
||||
text (if present) are enclosed in double quotes and connected with 'AND'.
|
||||
This format is useful for precise search engine queries.
|
||||
|
||||
Args:
|
||||
title (str): The Wikipedia article title, possibly including disambiguation
|
||||
text in parentheses.
|
||||
|
||||
Returns:
|
||||
str: A formatted search query string. If disambiguation text is present,
|
||||
returns '"[main title]" AND "[disambiguation text]"'. Otherwise,
|
||||
returns '"[title]"'.
|
||||
|
||||
Example:
|
||||
>>> article_title_to_search_query("Python (programming language)")
|
||||
'"Python" AND "programming language"'
|
||||
>>> article_title_to_search_query("London")
|
||||
'"London"'
|
||||
"""
|
||||
m = re.match(r"^(.*) \((.*)\)$", title)
|
||||
return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue