2023-11-01 20:54:19 +00:00
|
|
|
"""Wikidata API."""
|
|
|
|
|
2021-05-08 10:05:00 +01:00
|
|
|
import json
|
2023-05-14 15:12:36 +01:00
|
|
|
import typing
|
2023-11-01 20:54:19 +00:00
|
|
|
from typing import cast
|
2023-05-14 15:12:36 +01:00
|
|
|
|
|
|
|
import requests
|
2021-06-20 13:47:04 +01:00
|
|
|
import simplejson.errors
|
2021-05-08 10:05:00 +01:00
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
from . import CallParams, user_agent_headers
|
|
|
|
|
2021-05-08 10:05:00 +01:00
|
|
|
wd_api_url = "https://www.wikidata.org/w/api.php"
|
|
|
|
|
2023-11-01 20:54:19 +00:00
|
|
|
Claims = dict[str, list[dict[str, typing.Any]]]
|
|
|
|
Sitelinks = dict[str, dict[str, typing.Any]]
|
|
|
|
|
|
|
|
|
|
|
|
class EntityType(typing.TypedDict, total=False):
|
|
|
|
"""Wikidata Entity."""
|
|
|
|
|
|
|
|
id: str
|
|
|
|
ns: str
|
|
|
|
type: str
|
|
|
|
pageid: int
|
|
|
|
title: str
|
|
|
|
labels: dict[str, typing.Any]
|
|
|
|
descriptions: dict[str, typing.Any]
|
|
|
|
claims: Claims
|
|
|
|
lastrevid: int
|
|
|
|
sitelinks: Sitelinks
|
|
|
|
modified: str
|
|
|
|
redirects: dict[str, typing.Any]
|
|
|
|
aliases: dict[str, list[dict[str, typing.Any]]]
|
2023-05-14 15:12:36 +01:00
|
|
|
|
2021-05-08 10:05:00 +01:00
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
def api_get(params: CallParams) -> requests.Response:
|
|
|
|
"""Call the wikidata API."""
|
|
|
|
call_params: CallParams = {
|
2021-05-08 10:05:00 +01:00
|
|
|
"format": "json",
|
|
|
|
"formatversion": 2,
|
2023-05-14 15:12:36 +01:00
|
|
|
**params,
|
2021-05-08 10:05:00 +01:00
|
|
|
}
|
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
r = requests.get(wd_api_url, params=call_params, headers=user_agent_headers())
|
|
|
|
return r
|
2021-05-08 10:05:00 +01:00
|
|
|
|
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
def get_revision_timestamp(revid: int) -> str:
|
|
|
|
"""Get timetsmap for the given revid."""
|
|
|
|
params: CallParams = {
|
2021-05-08 10:05:00 +01:00
|
|
|
"action": "query",
|
|
|
|
"prop": "revisions",
|
|
|
|
"revids": revid,
|
|
|
|
"rvprop": "ids|timestamp",
|
|
|
|
}
|
|
|
|
r = api_get(params)
|
|
|
|
rev = r.json()["query"]["pages"][0]["revisions"][0]
|
|
|
|
assert rev["revid"] == int(revid)
|
2023-05-14 15:12:36 +01:00
|
|
|
return cast(str, rev["timestamp"])
|
2021-05-08 10:05:00 +01:00
|
|
|
|
|
|
|
|
2023-05-14 15:22:37 +01:00
|
|
|
def get_recent_changes(
|
|
|
|
rcstart: str | None = None, rccontinue: str | None = None
|
|
|
|
) -> requests.Response:
|
2023-05-14 15:12:36 +01:00
|
|
|
"""Get list of recent changes."""
|
2021-05-08 10:05:00 +01:00
|
|
|
props = [
|
|
|
|
"title",
|
|
|
|
"ids",
|
|
|
|
"comment",
|
|
|
|
"parsedcomment",
|
|
|
|
"timestamp",
|
|
|
|
"redirect",
|
|
|
|
"loginfo",
|
|
|
|
]
|
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
params: CallParams = {
|
2021-05-08 10:05:00 +01:00
|
|
|
"action": "query",
|
|
|
|
"list": "recentchanges",
|
|
|
|
"rcnamespace": 0,
|
|
|
|
"rclimit": "max",
|
|
|
|
"rcdir": "newer",
|
|
|
|
"rcprop": "|".join(props),
|
|
|
|
}
|
2023-05-14 15:22:37 +01:00
|
|
|
if rcstart is not None:
|
|
|
|
params["rcstart"] = rcstart
|
|
|
|
if rccontinue is not None:
|
|
|
|
params["rccontinue"] = rccontinue
|
2021-05-08 10:05:00 +01:00
|
|
|
|
|
|
|
return api_get(params)
|
|
|
|
|
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
def get_entity(qid: str) -> EntityType:
|
|
|
|
"""Retrieve a Wikidata item with the given QID using the API."""
|
2021-06-20 13:47:04 +01:00
|
|
|
r = api_get({"action": "wbgetentities", "ids": qid})
|
|
|
|
try:
|
|
|
|
data = r.json()
|
|
|
|
except simplejson.errors.JSONDecodeError:
|
|
|
|
print(r.text)
|
|
|
|
raise
|
2021-05-08 10:05:00 +01:00
|
|
|
if "entities" not in data:
|
|
|
|
print(json.dumps(data, indent=2))
|
2023-05-14 15:12:36 +01:00
|
|
|
return cast(EntityType, data["entities"][qid])
|
2021-05-08 10:05:00 +01:00
|
|
|
|
|
|
|
|
2023-05-14 15:12:36 +01:00
|
|
|
def get_entities(ids: list[str]) -> typing.Iterator[tuple[str, EntityType]]:
|
|
|
|
"""Get Wikidata item entities with the given QIDs."""
|
2021-05-08 10:05:00 +01:00
|
|
|
r = api_get({"action": "wbgetentities", "ids": "|".join(ids)})
|
2023-05-14 15:12:36 +01:00
|
|
|
return ((qid, entity) for qid, entity in r.json()["entities"].items())
|