From 6a0e76440a8d775a1c80db7ffec3d8c6df32ac0c Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 23 Sep 2023 18:36:09 +0100 Subject: [PATCH] Add missing code. --- confarchive/wikidata.py | 77 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 confarchive/wikidata.py diff --git a/confarchive/wikidata.py b/confarchive/wikidata.py new file mode 100644 index 0000000..2191f0f --- /dev/null +++ b/confarchive/wikidata.py @@ -0,0 +1,77 @@ +import json +import os +import subprocess +import time +import typing + +import requests + +commons_url = "https://www.wikidata.org/w/api.php" +wikidata_api = "https://www.wikidata.org/w/api.php" + +s = requests.Session() +s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"}) + + +def api_image_detail_call(filename: str) -> requests.Response: + """Call the Commons API.""" + call_params = { + "format": "json", + "formatversion": 2, + "action": "query", + "prop": "imageinfo", + "iiprop": "url", + "titles": f"File:{filename}", + } + + return s.get(commons_url, params=call_params, timeout=5) + + +def get_item(qid: str) -> typing.Any: + cache_filename = os.path.join("items", qid + ".json") + if os.path.exists(cache_filename): + item = json.load(open(cache_filename)) + else: + params: dict[str, str | int] = { + "action": "wbgetentities", + "ids": qid, + "format": "json", + "formatversion": 2, + } + r = s.get(wikidata_api, params=params) + item = r.json()["entities"][qid] + with open(cache_filename, "w") as f: + json.dump(item, f, indent=2) + time.sleep(0.1) + return item + + +def download_photo(filename: str) -> None: + save_to = os.path.join("static", "wikidata_photo", filename) + r = api_image_detail_call(filename) + try: + pages = r.json()["query"]["pages"] + except requests.exceptions.JSONDecodeError: + print(r.text) + raise + photo = pages[0]["imageinfo"][0] + photo_url = photo["url"] + while True: + r = s.get(photo_url) + if not r.content.startswith(b""): + break + time.sleep(1) + with open(save_to, "wb") as out: + out.write(r.content) + print(len(r.content), filename) + + +def get_photo(filename: str) -> None: + save_to = os.path.join("static", "wikidata_photo", filename) + thumb = os.path.join("static", "wikidata_photo", "thumb", filename) + if not os.path.exists(save_to): + download_photo(filename) + if not os.path.exists(thumb): + subprocess.run(["convert", "-resize", "1024x", save_to, thumb]) + if filename.endswith("jpg") or filename.endswith("jpeg"): + subprocess.run(["jpegoptim", "-S1048576", thumb])