From cd001dd467efa7b3391f53b7a77a9a1cfabf3f22 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 24 Sep 2023 15:50:53 +0100 Subject: [PATCH] Code to search wikidata --- confarchive/wikidata.py | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/confarchive/wikidata.py b/confarchive/wikidata.py index 2191f0f..cb73528 100644 --- a/confarchive/wikidata.py +++ b/confarchive/wikidata.py @@ -1,3 +1,4 @@ +import hashlib import json import os import subprocess @@ -13,6 +14,35 @@ s = requests.Session() s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"}) +def md5sum(s: str) -> str: + """Generate hex md5sum.""" + return hashlib.md5(s.encode("utf-8")).hexdigest() + + +def search(q: str) -> list[dict[str, typing.Any]]: + q_md5 = md5sum(q) + + cache_filename = os.path.join("cache", q_md5 + ".json") + + if os.path.exists(cache_filename): + data = json.load(open(cache_filename)) + else: + params: dict[str, str | int] = { + "action": "query", + "list": "search", + "format": "json", + "formatversion": 2, + "srsearch": q, + "srlimit": "10", + } + r = requests.get(wikidata_api, params=params) + open(cache_filename, "w").write(r.text) + data = r.json() + time.sleep(1) + + return typing.cast(list[dict[str, typing.Any]], data["query"]["search"]) + + def api_image_detail_call(filename: str) -> requests.Response: """Call the Commons API.""" call_params = { @@ -75,3 +105,26 @@ def get_photo(filename: str) -> None: subprocess.run(["convert", "-resize", "1024x", save_to, thumb]) if filename.endswith("jpg") or filename.endswith("jpeg"): subprocess.run(["jpegoptim", "-S1048576", thumb]) + + +wikidata_properties = [ + ("website", "P856", "official website", None), + ("twitter", "P2002", "Twitter username", "https://twitter.com/$1"), + ("github", "P2037", "GitHub username", "https://github.com/$1"), + ( + "linkedin", + "P6634", + "LinkedIn personal profile ID", + "https://www.linkedin.com/in/$1/", + ), + ("mastodon_address", "P4033", "Mastodon address", None), + ("dblp", "P2456", "DBLP author ID", "https://dblp.org/pid/$1"), + ("blog_url", "P1581", "official blog URL", None), + ( + "hacker_news", + "P7171", + "Hacker News username", + "https://news.ycombinator.com/user?id=$1", + ), + ("reddit", "P4265", "Reddit username", "https://www.reddit.com/user/$1"), +]