owl-map/matcher/commons.py

58 lines
1.6 KiB
Python
Raw Permalink Normal View History

2023-05-13 20:57:58 +01:00
"""Use mediawiki API to look up images on Wikimedia Commons."""
2021-05-08 09:39:06 +01:00
import urllib.parse
2023-05-14 10:07:14 +01:00
from typing import Any
2023-05-13 20:57:58 +01:00
import requests
2023-05-14 10:07:14 +01:00
from . import CallParams, utils
2021-05-08 09:39:06 +01:00
commons_start = "http://commons.wikimedia.org/wiki/Special:FilePath/"
commons_url = "https://www.wikidata.org/w/api.php"
page_size = 50
2023-05-13 20:57:58 +01:00
def commons_uri_to_filename(uri: str) -> str:
"""Given the URI for a file on commons return the filename of the file."""
2021-05-08 09:39:06 +01:00
return urllib.parse.unquote(utils.drop_start(uri, commons_start))
2023-05-14 10:07:14 +01:00
def api_call(params: CallParams) -> requests.Response:
"""Call the Commons API."""
call_params: CallParams = {
2021-05-08 09:39:06 +01:00
"format": "json",
"formatversion": 2,
**params,
}
return requests.get(commons_url, params=call_params, timeout=5)
2023-05-14 10:07:14 +01:00
def image_detail(
filenames: list[str], thumbheight: int | None = None, thumbwidth: int | None = None
) -> dict[str, Any]:
2023-05-13 20:57:58 +01:00
"""Detail for multiple images."""
2023-05-14 10:07:14 +01:00
params: CallParams = {
2021-05-08 09:39:06 +01:00
"action": "query",
"prop": "imageinfo",
"iiprop": "url",
}
if thumbheight is not None:
params["iiurlheight"] = thumbheight
if thumbwidth is not None:
params["iiurlwidth"] = thumbwidth
2023-05-14 10:07:14 +01:00
images: dict[str, Any] = {}
2021-05-08 09:39:06 +01:00
for cur in utils.chunk(filenames, page_size):
call_params = params.copy()
call_params["titles"] = "|".join(f"File:{f}" for f in cur)
r = api_call(call_params)
for image in r.json()["query"]["pages"]:
filename = utils.drop_start(image["title"], "File:")
images[filename] = image["imageinfo"][0] if "imageinfo" in image else None
return images