Improvements

This commit is contained in:
Edward Betts 2023-09-25 18:38:02 +01:00
parent 6c16b87fff
commit 5ac6260d60

View file

@ -8,12 +8,14 @@ import time
import typing import typing
import requests import requests
import flask
commons_url = "https://www.wikidata.org/w/api.php" commons_url = "https://www.wikidata.org/w/api.php"
wikidata_api = "https://www.wikidata.org/w/api.php" wikidata_api = "https://www.wikidata.org/w/api.php"
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)" user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
CallParams = dict[str, str | int] CallParams = dict[str, str | int]
SearchHit = dict[str, typing.Any]
s = requests.Session() s = requests.Session()
s.headers.update({"User-Agent": user_agent}) s.headers.update({"User-Agent": user_agent})
@ -24,29 +26,39 @@ def md5sum(s: str) -> str:
return hashlib.md5(s.encode("utf-8")).hexdigest() return hashlib.md5(s.encode("utf-8")).hexdigest()
def search(q: str) -> list[dict[str, typing.Any]]: def get_cache_filename(q: str) -> str:
"""Search Wikidata with caching.""" """Cache filename for query."""
q_md5 = md5sum(q) q_md5 = md5sum(q)
data_dir = flask.current_app.config["DATA_DIR"]
return os.path.join(data_dir, "cache", q_md5 + ".json")
cache_filename = os.path.join("cache", q_md5 + ".json")
def get_item_filename(qid: str) -> str:
data_dir = flask.current_app.config["DATA_DIR"]
return os.path.join(data_dir, "item", qid + ".json")
def search(q: str) -> list[SearchHit]:
"""Search Wikidata with caching."""
cache_filename = get_cache_filename(q)
if os.path.exists(cache_filename): if os.path.exists(cache_filename):
data = json.load(open(cache_filename)) data = json.load(open(cache_filename))
else: return typing.cast(list[SearchHit], data["query"]["search"])
params: dict[str, str | int] = {
"action": "query",
"list": "search",
"format": "json",
"formatversion": 2,
"srsearch": q,
"srlimit": "10",
}
r = requests.get(wikidata_api, params=params)
open(cache_filename, "w").write(r.text)
data = r.json()
time.sleep(1)
return typing.cast(list[dict[str, typing.Any]], data["query"]["search"]) params: dict[str, str | int] = {
"action": "query",
"list": "search",
"format": "json",
"formatversion": 2,
"srsearch": q,
"srlimit": "10",
}
r = requests.get(wikidata_api, params=params)
open(cache_filename, "w").write(r.text)
data = r.json()
return typing.cast(list[SearchHit], data["query"]["search"])
def api_image_detail_call(filename: str) -> requests.Response: def api_image_detail_call(filename: str) -> requests.Response:
@ -65,9 +77,9 @@ def api_image_detail_call(filename: str) -> requests.Response:
def get_item(qid: str) -> typing.Any: def get_item(qid: str) -> typing.Any:
"""Get an item from Wikidata.""" """Get an item from Wikidata."""
cache_filename = os.path.join("items", qid + ".json") item_filename = get_item_filename(qid)
if os.path.exists(cache_filename): if os.path.exists(item_filename):
item = json.load(open(cache_filename)) item = json.load(open(item_filename))
else: else:
params: dict[str, str | int] = { params: dict[str, str | int] = {
"action": "wbgetentities", "action": "wbgetentities",
@ -77,9 +89,8 @@ def get_item(qid: str) -> typing.Any:
} }
r = s.get(wikidata_api, params=params) r = s.get(wikidata_api, params=params)
item = r.json()["entities"][qid] item = r.json()["entities"][qid]
with open(cache_filename, "w") as f: with open(item_filename, "w") as f:
json.dump(item, f, indent=2) json.dump(item, f, indent=2)
time.sleep(0.1)
return item return item