Improve article_title_to_search_query function
Add more detailed docstring
This commit is contained in:
parent
2c267c67e2
commit
dd9b7be198
34
web_view.py
34
web_view.py
|
@ -35,17 +35,41 @@ class Hit(typing.TypedDict):
|
||||||
timestamp: str
|
timestamp: str
|
||||||
|
|
||||||
|
|
||||||
re_disambig = re.compile(r"^(.*) \((.*)\)$")
|
|
||||||
|
|
||||||
|
|
||||||
def load_examples() -> list[dict[str, str | int]]:
|
def load_examples() -> list[dict[str, str | int]]:
|
||||||
"""Load examples."""
|
"""Load examples."""
|
||||||
return [json.loads(line) for line in open("examples")]
|
return [json.loads(line) for line in open("examples")]
|
||||||
|
|
||||||
|
|
||||||
def article_title_to_search_query(title: str) -> str:
|
def article_title_to_search_query(title: str) -> str:
|
||||||
"""Convert from article title to search query string."""
|
"""
|
||||||
m = re_disambig.match(title)
|
Convert a Wikipedia article title to a search query string.
|
||||||
|
|
||||||
|
This function takes a Wikipedia article title and parses it to create a
|
||||||
|
search query. If the title contains disambiguation text in parentheses,
|
||||||
|
it separates the main title and the disambiguation text and formats them
|
||||||
|
into a search query using an AND operator. If there's no disambiguation text,
|
||||||
|
the title is used as is for the search query.
|
||||||
|
|
||||||
|
The search query is formatted such that the main title and the disambiguation
|
||||||
|
text (if present) are enclosed in double quotes and connected with 'AND'.
|
||||||
|
This format is useful for precise search engine queries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title (str): The Wikipedia article title, possibly including disambiguation
|
||||||
|
text in parentheses.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: A formatted search query string. If disambiguation text is present,
|
||||||
|
returns '"[main title]" AND "[disambiguation text]"'. Otherwise,
|
||||||
|
returns '"[title]"'.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> article_title_to_search_query("Python (programming language)")
|
||||||
|
'"Python" AND "programming language"'
|
||||||
|
>>> article_title_to_search_query("London")
|
||||||
|
'"London"'
|
||||||
|
"""
|
||||||
|
m = re.match(r"^(.*) \((.*)\)$", title)
|
||||||
return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
|
return f'"{m.group(1)}" AND "{m.group(2)}"' if m else f'"{title}"'
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue