Remove unused code.

This commit is contained in:
Edward Betts 2023-05-14 13:50:11 +00:00
parent 40e9499eb6
commit f4b7dd681c

121
update.py
View file

@ -3,124 +3,17 @@
"""Download Wikidata recent changes and update items in local database.""" """Download Wikidata recent changes and update items in local database."""
import json import json
import os
import sys
import typing import typing
from time import sleep from time import sleep
from matcher import database, model, utils, wikidata, wikidata_api from matcher import database, model, wikidata, wikidata_api
DB_URL = "postgresql:///matcher" DB_URL = "postgresql:///matcher"
database.init_db(DB_URL) database.init_db(DB_URL)
previous_max_lastrevid = 1888214110 # Q118129609
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"} entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
def read_changes() -> None:
qids: set[str] = set()
max_lastrevid = 0
for f in sorted(os.listdir("changes"), key=lambda f: int(f.partition(".")[0])):
reply = json.load(open("changes/" + f))
print(f, len(qids))
for change in reply["query"]["recentchanges"]:
# rctype = change["type"]
title = change["title"]
revid = change["revid"]
if revid and revid > max_lastrevid:
max_lastrevid = revid
assert title.startswith("Q")
qids.add(title)
print(len(qids))
print(max_lastrevid)
return
for cur in utils.chunk(qids, 50):
print(cur)
for qid, entity in wikidata_api.get_entities(cur):
with open(f"items/{qid}.json", "w") as out:
json.dump(entity, out)
def get_changes() -> None:
"""Get recent changes."""
start = "2021-03-24T11:56:11"
rccontinue = None
i = 0
while True:
i += 1
r = wikidata_api.query_wd_api(rcstart=start, rccontinue=rccontinue)
with open(f"changes/{i:06d}.json", "w") as out:
out.write(r.text)
reply = r.json()
try:
print(reply["query"]["recentchanges"][0]["timestamp"])
except KeyError:
print("KeyError")
if False:
for change in reply["query"]["recentchanges"]:
# rctype = change["type"]
# if change["revid"] == 0 and change["old_revid"] == 0:
# continue
if change["logtype"] == "delete" and change["logaction"] in {
"revision",
"delete",
"restore",
}:
continue
if change["logtype"] == "protect" and change["logaction"] in {
"unprotect",
"protect",
}:
continue
print(json.dumps(change, indent=2))
sys.exit(0)
continue
if not change["title"].startswith("Q"):
continue # not an item
qid = change["title"]
assert qid[1:].isdigit()
item_id = int(qid[1:])
revid = change["revid"]
item = model.Item.query.get(item_id)
if change["type"] == "edit" and not item:
continue
if change["type"] == "new" and not item:
print(("new", qid))
continue
if not item:
print(qid)
print(json.dumps(change, indent=2))
print((change["type"], qid, item.lastrevid, revid))
# print(json.dumps(reply, indent=2))
if "continue" not in reply:
break
rccontinue = reply["continue"]["rccontinue"]
print(rccontinue)
sleep(1)
def get_timestamp():
ts = wikidata_api.get_revision_timestamp(previous_max_lastrevid)
print(ts)
def handle_new(change): def handle_new(change):
qid = change["title"] qid = change["title"]
ts = change["timestamp"] ts = change["timestamp"]
@ -243,10 +136,12 @@ def update_database() -> None:
print("finished") print("finished")
# read_changes() def main() -> None:
# get_timestamp() """Infinite loop."""
# get_changes() while True:
while True:
update_database() update_database()
sleep(60) sleep(60)
if __name__ == "__main__":
main()