From 269c6bce54de3422c539d70e668da62469bf4425 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 6 Jun 2024 13:47:22 +0000 Subject: [PATCH 1/2] Need to use BigInteger for lastrevid --- matcher/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matcher/model.py b/matcher/model.py index 40d56f5..4f61118 100644 --- a/matcher/model.py +++ b/matcher/model.py @@ -107,7 +107,7 @@ class Item(Base): aliases = Column(postgresql.JSONB) sitelinks = Column(postgresql.JSONB) claims = Column(postgresql.JSONB, nullable=False) - lastrevid = Column(Integer, nullable=False, unique=True) + lastrevid = Column(BigInteger, nullable=False, unique=True) locations: Mapped[list["ItemLocation"]] = relationship( "ItemLocation", cascade="all, delete-orphan", backref="item" ) From 13ecf4526d1f85a00199c89ccbcaf3ccc50450fa Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Thu, 6 Jun 2024 13:48:18 +0000 Subject: [PATCH 2/2] Catch MediaWiki database timeout errors and retry --- update.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/update.py b/update.py index f3626a7..ee26a3b 100755 --- a/update.py +++ b/update.py @@ -6,6 +6,8 @@ import json import typing from time import sleep +import requests.exceptions + from matcher import model, wikidata, wikidata_api from matcher.database import init_db, session @@ -81,7 +83,14 @@ def handle_edit(change: Change) -> None: print(f"{ts}: no need to update {qid}") return - entity = wikidata_api.get_entity(qid) + for attempt in range(100): + try: + entity = wikidata_api.get_entity(qid) + except requests.exceptions.ConnectionError: + print("connection error, retrying.") + sleep(10) + else: + break entity_qid = entity.pop("id") if entity_qid != qid: print(f"{ts}: item {qid} replaced with redirect") @@ -123,6 +132,15 @@ def update_database() -> None: r = wikidata_api.get_recent_changes(rcstart=start, rccontinue=rccontinue) reply = r.json() + if ( + "error" in reply + and reply["error"]["code"] == "internal_api_error_DBQueryTimeoutError" + ): + print(reply) + sleep(10) + continue + if "query" not in reply: + print(reply) for change in reply["query"]["recentchanges"]: rctype = change["type"] timestamp = change["timestamp"]