owl-map/matcher/wikidata_api.py

109 lines
2.7 KiB
Python
Raw Normal View History

"""Wikidata API."""
import json
2023-05-14 15:12:36 +01:00
import typing
from typing import cast
2023-05-14 15:12:36 +01:00
import requests
import simplejson.errors
2023-05-14 15:12:36 +01:00
from . import CallParams, user_agent_headers
wd_api_url = "https://www.wikidata.org/w/api.php"
Claims = dict[str, list[dict[str, typing.Any]]]
Sitelinks = dict[str, dict[str, typing.Any]]
class EntityType(typing.TypedDict, total=False):
"""Wikidata Entity."""
id: str
ns: str
type: str
pageid: int
title: str
labels: dict[str, typing.Any]
descriptions: dict[str, typing.Any]
claims: Claims
lastrevid: int
sitelinks: Sitelinks
modified: str
redirects: dict[str, typing.Any]
aliases: dict[str, list[dict[str, typing.Any]]]
2023-05-14 15:12:36 +01:00
2023-05-14 15:12:36 +01:00
def api_get(params: CallParams) -> requests.Response:
"""Call the wikidata API."""
call_params: CallParams = {
"format": "json",
"formatversion": 2,
2023-05-14 15:12:36 +01:00
**params,
}
2023-05-14 15:12:36 +01:00
r = requests.get(wd_api_url, params=call_params, headers=user_agent_headers())
return r
2023-05-14 15:12:36 +01:00
def get_revision_timestamp(revid: int) -> str:
"""Get timetsmap for the given revid."""
params: CallParams = {
"action": "query",
"prop": "revisions",
"revids": revid,
"rvprop": "ids|timestamp",
}
r = api_get(params)
rev = r.json()["query"]["pages"][0]["revisions"][0]
assert rev["revid"] == int(revid)
2023-05-14 15:12:36 +01:00
return cast(str, rev["timestamp"])
def get_recent_changes(
rcstart: str | None = None, rccontinue: str | None = None
) -> requests.Response:
2023-05-14 15:12:36 +01:00
"""Get list of recent changes."""
props = [
"title",
"ids",
"comment",
"parsedcomment",
"timestamp",
"redirect",
"loginfo",
]
2023-05-14 15:12:36 +01:00
params: CallParams = {
"action": "query",
"list": "recentchanges",
"rcnamespace": 0,
"rclimit": "max",
"rcdir": "newer",
"rcprop": "|".join(props),
}
if rcstart is not None:
params["rcstart"] = rcstart
if rccontinue is not None:
params["rccontinue"] = rccontinue
return api_get(params)
2023-05-14 15:12:36 +01:00
def get_entity(qid: str) -> EntityType:
"""Retrieve a Wikidata item with the given QID using the API."""
r = api_get({"action": "wbgetentities", "ids": qid})
try:
data = r.json()
except simplejson.errors.JSONDecodeError:
print(r.text)
raise
if "entities" not in data:
print(json.dumps(data, indent=2))
2023-05-14 15:12:36 +01:00
return cast(EntityType, data["entities"][qid])
2023-05-14 15:12:36 +01:00
def get_entities(ids: list[str]) -> typing.Iterator[tuple[str, EntityType]]:
"""Get Wikidata item entities with the given QIDs."""
r = api_get({"action": "wbgetentities", "ids": "|".join(ids)})
2023-05-14 15:12:36 +01:00
return ((qid, entity) for qid, entity in r.json()["entities"].items())