Improvements
This commit is contained in:
parent
6c16b87fff
commit
5ac6260d60
|
@ -8,12 +8,14 @@ import time
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import flask
|
||||||
|
|
||||||
commons_url = "https://www.wikidata.org/w/api.php"
|
commons_url = "https://www.wikidata.org/w/api.php"
|
||||||
wikidata_api = "https://www.wikidata.org/w/api.php"
|
wikidata_api = "https://www.wikidata.org/w/api.php"
|
||||||
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
|
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
|
||||||
|
|
||||||
CallParams = dict[str, str | int]
|
CallParams = dict[str, str | int]
|
||||||
|
SearchHit = dict[str, typing.Any]
|
||||||
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
s.headers.update({"User-Agent": user_agent})
|
s.headers.update({"User-Agent": user_agent})
|
||||||
|
@ -24,15 +26,26 @@ def md5sum(s: str) -> str:
|
||||||
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def search(q: str) -> list[dict[str, typing.Any]]:
|
def get_cache_filename(q: str) -> str:
|
||||||
"""Search Wikidata with caching."""
|
"""Cache filename for query."""
|
||||||
q_md5 = md5sum(q)
|
q_md5 = md5sum(q)
|
||||||
|
data_dir = flask.current_app.config["DATA_DIR"]
|
||||||
|
return os.path.join(data_dir, "cache", q_md5 + ".json")
|
||||||
|
|
||||||
cache_filename = os.path.join("cache", q_md5 + ".json")
|
|
||||||
|
def get_item_filename(qid: str) -> str:
|
||||||
|
data_dir = flask.current_app.config["DATA_DIR"]
|
||||||
|
return os.path.join(data_dir, "item", qid + ".json")
|
||||||
|
|
||||||
|
|
||||||
|
def search(q: str) -> list[SearchHit]:
|
||||||
|
"""Search Wikidata with caching."""
|
||||||
|
cache_filename = get_cache_filename(q)
|
||||||
|
|
||||||
if os.path.exists(cache_filename):
|
if os.path.exists(cache_filename):
|
||||||
data = json.load(open(cache_filename))
|
data = json.load(open(cache_filename))
|
||||||
else:
|
return typing.cast(list[SearchHit], data["query"]["search"])
|
||||||
|
|
||||||
params: dict[str, str | int] = {
|
params: dict[str, str | int] = {
|
||||||
"action": "query",
|
"action": "query",
|
||||||
"list": "search",
|
"list": "search",
|
||||||
|
@ -44,9 +57,8 @@ def search(q: str) -> list[dict[str, typing.Any]]:
|
||||||
r = requests.get(wikidata_api, params=params)
|
r = requests.get(wikidata_api, params=params)
|
||||||
open(cache_filename, "w").write(r.text)
|
open(cache_filename, "w").write(r.text)
|
||||||
data = r.json()
|
data = r.json()
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
return typing.cast(list[dict[str, typing.Any]], data["query"]["search"])
|
return typing.cast(list[SearchHit], data["query"]["search"])
|
||||||
|
|
||||||
|
|
||||||
def api_image_detail_call(filename: str) -> requests.Response:
|
def api_image_detail_call(filename: str) -> requests.Response:
|
||||||
|
@ -65,9 +77,9 @@ def api_image_detail_call(filename: str) -> requests.Response:
|
||||||
|
|
||||||
def get_item(qid: str) -> typing.Any:
|
def get_item(qid: str) -> typing.Any:
|
||||||
"""Get an item from Wikidata."""
|
"""Get an item from Wikidata."""
|
||||||
cache_filename = os.path.join("items", qid + ".json")
|
item_filename = get_item_filename(qid)
|
||||||
if os.path.exists(cache_filename):
|
if os.path.exists(item_filename):
|
||||||
item = json.load(open(cache_filename))
|
item = json.load(open(item_filename))
|
||||||
else:
|
else:
|
||||||
params: dict[str, str | int] = {
|
params: dict[str, str | int] = {
|
||||||
"action": "wbgetentities",
|
"action": "wbgetentities",
|
||||||
|
@ -77,9 +89,8 @@ def get_item(qid: str) -> typing.Any:
|
||||||
}
|
}
|
||||||
r = s.get(wikidata_api, params=params)
|
r = s.get(wikidata_api, params=params)
|
||||||
item = r.json()["entities"][qid]
|
item = r.json()["entities"][qid]
|
||||||
with open(cache_filename, "w") as f:
|
with open(item_filename, "w") as f:
|
||||||
json.dump(item, f, indent=2)
|
json.dump(item, f, indent=2)
|
||||||
time.sleep(0.1)
|
|
||||||
return item
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue