import hashlib import json import os import subprocess import time import typing import requests commons_url = "https://www.wikidata.org/w/api.php" wikidata_api = "https://www.wikidata.org/w/api.php" s = requests.Session() s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"}) def md5sum(s: str) -> str: """Generate hex md5sum.""" return hashlib.md5(s.encode("utf-8")).hexdigest() def search(q: str) -> list[dict[str, typing.Any]]: q_md5 = md5sum(q) cache_filename = os.path.join("cache", q_md5 + ".json") if os.path.exists(cache_filename): data = json.load(open(cache_filename)) else: params: dict[str, str | int] = { "action": "query", "list": "search", "format": "json", "formatversion": 2, "srsearch": q, "srlimit": "10", } r = requests.get(wikidata_api, params=params) open(cache_filename, "w").write(r.text) data = r.json() time.sleep(1) return typing.cast(list[dict[str, typing.Any]], data["query"]["search"]) def api_image_detail_call(filename: str) -> requests.Response: """Call the Commons API.""" call_params = { "format": "json", "formatversion": 2, "action": "query", "prop": "imageinfo", "iiprop": "url", "titles": f"File:{filename}", } return s.get(commons_url, params=call_params, timeout=5) def get_item(qid: str) -> typing.Any: cache_filename = os.path.join("items", qid + ".json") if os.path.exists(cache_filename): item = json.load(open(cache_filename)) else: params: dict[str, str | int] = { "action": "wbgetentities", "ids": qid, "format": "json", "formatversion": 2, } r = s.get(wikidata_api, params=params) item = r.json()["entities"][qid] with open(cache_filename, "w") as f: json.dump(item, f, indent=2) time.sleep(0.1) return item def download_photo(filename: str) -> None: save_to = os.path.join("static", "wikidata_photo", filename) r = api_image_detail_call(filename) try: pages = r.json()["query"]["pages"] except requests.exceptions.JSONDecodeError: print(r.text) raise photo = pages[0]["imageinfo"][0] photo_url = photo["url"] while True: r = s.get(photo_url) if not r.content.startswith(b""): break time.sleep(1) with open(save_to, "wb") as out: out.write(r.content) print(len(r.content), filename) def get_photo(filename: str) -> None: save_to = os.path.join("static", "wikidata_photo", filename) thumb = os.path.join("static", "wikidata_photo", "thumb", filename) if not os.path.exists(save_to): download_photo(filename) if not os.path.exists(thumb): subprocess.run(["convert", "-resize", "1024x", save_to, thumb]) if filename.endswith("jpg") or filename.endswith("jpeg"): subprocess.run(["jpegoptim", "-S1048576", thumb]) wikidata_properties = [ ("website", "P856", "official website", None), ("twitter", "P2002", "Twitter username", "https://twitter.com/$1"), ("github", "P2037", "GitHub username", "https://github.com/$1"), ( "linkedin", "P6634", "LinkedIn personal profile ID", "https://www.linkedin.com/in/$1/", ), ("mastodon_address", "P4033", "Mastodon address", None), ("dblp", "P2456", "DBLP author ID", "https://dblp.org/pid/$1"), ("blog_url", "P1581", "official blog URL", None), ( "hacker_news", "P7171", "Hacker News username", "https://news.ycombinator.com/user?id=$1", ), ("reddit", "P4265", "Reddit username", "https://www.reddit.com/user/$1"), ]