import json import os import subprocess import time import typing import requests commons_url = "https://www.wikidata.org/w/api.php" wikidata_api = "https://www.wikidata.org/w/api.php" s = requests.Session() s.headers.update({"User-Agent": "conference-archive/0.1 (contact: edward@4angle.com)"}) def api_image_detail_call(filename: str) -> requests.Response: """Call the Commons API.""" call_params = { "format": "json", "formatversion": 2, "action": "query", "prop": "imageinfo", "iiprop": "url", "titles": f"File:{filename}", } return s.get(commons_url, params=call_params, timeout=5) def get_item(qid: str) -> typing.Any: cache_filename = os.path.join("items", qid + ".json") if os.path.exists(cache_filename): item = json.load(open(cache_filename)) else: params: dict[str, str | int] = { "action": "wbgetentities", "ids": qid, "format": "json", "formatversion": 2, } r = s.get(wikidata_api, params=params) item = r.json()["entities"][qid] with open(cache_filename, "w") as f: json.dump(item, f, indent=2) time.sleep(0.1) return item def download_photo(filename: str) -> None: save_to = os.path.join("static", "wikidata_photo", filename) r = api_image_detail_call(filename) try: pages = r.json()["query"]["pages"] except requests.exceptions.JSONDecodeError: print(r.text) raise photo = pages[0]["imageinfo"][0] photo_url = photo["url"] while True: r = s.get(photo_url) if not r.content.startswith(b""): break time.sleep(1) with open(save_to, "wb") as out: out.write(r.content) print(len(r.content), filename) def get_photo(filename: str) -> None: save_to = os.path.join("static", "wikidata_photo", filename) thumb = os.path.join("static", "wikidata_photo", "thumb", filename) if not os.path.exists(save_to): download_photo(filename) if not os.path.exists(thumb): subprocess.run(["convert", "-resize", "1024x", save_to, thumb]) if filename.endswith("jpg") or filename.endswith("jpeg"): subprocess.run(["jpegoptim", "-S1048576", thumb])