Download missing IsA items from Wikidata

This commit is contained in:
Edward Betts 2021-05-08 11:02:59 +02:00
parent 63e0cd904c
commit 471022877a
1 changed files with 28 additions and 1 deletions

View File

@ -3,7 +3,7 @@
from flask import Flask, render_template, request, jsonify, redirect, url_for from flask import Flask, render_template, request, jsonify, redirect, url_for
from sqlalchemy import func from sqlalchemy import func
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from matcher import nominatim, model, database, commons from matcher import nominatim, model, database, commons, wikidata, wikidata_api
from collections import Counter from collections import Counter
from time import time from time import time
import GeoIP import GeoIP
@ -15,6 +15,7 @@ app.debug = True
DB_URL = "postgresql:///matcher" DB_URL = "postgresql:///matcher"
database.init_db(DB_URL) database.init_db(DB_URL)
entity_keys = {"labels", "sitelinks", "aliases", "claims", "descriptions", "lastrevid"}
property_map = [ property_map = [
("P238", ["iata"], "IATA airport code"), ("P238", ["iata"], "IATA airport code"),
@ -301,6 +302,29 @@ def get_isa_count(items):
return isa_count.most_common() return isa_count.most_common()
def get_and_save_item(qid):
entity = wikidata_api.get_entity(qid)
if entity["id"] != qid:
print(f'redirect {qid} -> {entity["id"]}')
return
coords = wikidata.get_entity_coords(entity["claims"])
item_id = int(qid[1:])
obj = {k: v for k, v in entity.items() if k in entity_keys}
try:
item = model.Item(item_id=item_id, **obj)
except TypeError:
print(qid)
print(f'{entity["pageid"]=} {entity["ns"]=} {entity["type"]=}')
print(entity.keys())
raise
item.locations = model.location_objects(coords)
database.session.add(item)
return item
@app.route("/api/1/items") @app.route("/api/1/items")
def api_wikidata_items(): def api_wikidata_items():
bounds = request.args.get("bounds") bounds = request.args.get("bounds")
@ -318,6 +342,9 @@ def api_wikidata_items():
isa_count = [] isa_count = []
for qid, count in counts: for qid, count in counts:
item = isa_items.get(qid) item = isa_items.get(qid)
if not item:
item = get_and_save_item(qid)
label = item.label() if item else "[missing]" label = item.label() if item else "[missing]"
isa = { isa = {
"qid": qid, "qid": qid,