Code to search wikidata

This commit is contained in:
Edward Betts 2023-09-24 15:50:53 +01:00
parent eb337fb7e9
commit cd001dd467

View file

@ -1,3 +1,4 @@
import hashlib
import json import json
import os import os
import subprocess import subprocess
@ -13,6 +14,35 @@ s = requests.Session()
s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"}) s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"})
def md5sum(s: str) -> str:
"""Generate hex md5sum."""
return hashlib.md5(s.encode("utf-8")).hexdigest()
def search(q: str) -> list[dict[str, typing.Any]]:
q_md5 = md5sum(q)
cache_filename = os.path.join("cache", q_md5 + ".json")
if os.path.exists(cache_filename):
data = json.load(open(cache_filename))
else:
params: dict[str, str | int] = {
"action": "query",
"list": "search",
"format": "json",
"formatversion": 2,
"srsearch": q,
"srlimit": "10",
}
r = requests.get(wikidata_api, params=params)
open(cache_filename, "w").write(r.text)
data = r.json()
time.sleep(1)
return typing.cast(list[dict[str, typing.Any]], data["query"]["search"])
def api_image_detail_call(filename: str) -> requests.Response: def api_image_detail_call(filename: str) -> requests.Response:
"""Call the Commons API.""" """Call the Commons API."""
call_params = { call_params = {
@ -75,3 +105,26 @@ def get_photo(filename: str) -> None:
subprocess.run(["convert", "-resize", "1024x", save_to, thumb]) subprocess.run(["convert", "-resize", "1024x", save_to, thumb])
if filename.endswith("jpg") or filename.endswith("jpeg"): if filename.endswith("jpg") or filename.endswith("jpeg"):
subprocess.run(["jpegoptim", "-S1048576", thumb]) subprocess.run(["jpegoptim", "-S1048576", thumb])
wikidata_properties = [
("website", "P856", "official website", None),
("twitter", "P2002", "Twitter username", "https://twitter.com/$1"),
("github", "P2037", "GitHub username", "https://github.com/$1"),
(
"linkedin",
"P6634",
"LinkedIn personal profile ID",
"https://www.linkedin.com/in/$1/",
),
("mastodon_address", "P4033", "Mastodon address", None),
("dblp", "P2456", "DBLP author ID", "https://dblp.org/pid/$1"),
("blog_url", "P1581", "official blog URL", None),
(
"hacker_news",
"P7171",
"Hacker News username",
"https://news.ycombinator.com/user?id=$1",
),
("reddit", "P4265", "Reddit username", "https://www.reddit.com/user/$1"),
]