owl-map/matcher/wikidata_api.py

86 lines
2.2 KiB
Python
Raw Normal View History

import json
2023-05-14 15:12:36 +01:00
import typing
from typing import Any, cast
import requests
import simplejson.errors
2023-05-14 15:12:36 +01:00
from . import CallParams, user_agent_headers
wd_api_url = "https://www.wikidata.org/w/api.php"
2023-05-14 15:12:36 +01:00
EntityType = dict[str, Any]
2023-05-14 15:12:36 +01:00
def api_get(params: CallParams) -> requests.Response:
"""Call the wikidata API."""
call_params: CallParams = {
"format": "json",
"formatversion": 2,
2023-05-14 15:12:36 +01:00
**params,
}
2023-05-14 15:12:36 +01:00
r = requests.get(wd_api_url, params=call_params, headers=user_agent_headers())
return r
2023-05-14 15:12:36 +01:00
def get_revision_timestamp(revid: int) -> str:
"""Get timetsmap for the given revid."""
params: CallParams = {
"action": "query",
"prop": "revisions",
"revids": revid,
"rvprop": "ids|timestamp",
}
r = api_get(params)
rev = r.json()["query"]["pages"][0]["revisions"][0]
assert rev["revid"] == int(revid)
2023-05-14 15:12:36 +01:00
return cast(str, rev["timestamp"])
2023-05-14 15:12:36 +01:00
def get_recent_changes(**kwargs: CallParams) -> requests.Response:
"""Get list of recent changes."""
props = [
"title",
"ids",
"comment",
"parsedcomment",
"timestamp",
"redirect",
"loginfo",
]
2023-05-14 15:12:36 +01:00
params: CallParams = {
"action": "query",
"list": "recentchanges",
"rcnamespace": 0,
# "rctype": "log",
# "rclimit": "max",
"rclimit": "max",
# "rcstart": start,
"rcdir": "newer",
"rcprop": "|".join(props),
2023-05-14 15:12:36 +01:00
**{k: cast(str | int, v) for k, v in kwargs.items() if v},
}
return api_get(params)
2023-05-14 15:12:36 +01:00
def get_entity(qid: str) -> EntityType:
"""Retrieve a Wikidata item with the given QID using the API."""
r = api_get({"action": "wbgetentities", "ids": qid})
try:
data = r.json()
except simplejson.errors.JSONDecodeError:
print(r.text)
raise
if "entities" not in data:
print(json.dumps(data, indent=2))
2023-05-14 15:12:36 +01:00
return cast(EntityType, data["entities"][qid])
2023-05-14 15:12:36 +01:00
def get_entities(ids: list[str]) -> typing.Iterator[tuple[str, EntityType]]:
"""Get Wikidata item entities with the given QIDs."""
r = api_get({"action": "wbgetentities", "ids": "|".join(ids)})
2023-05-14 15:12:36 +01:00
return ((qid, entity) for qid, entity in r.json()["entities"].items())