132 lines
3.3 KiB
Python
132 lines
3.3 KiB
Python
|
#!/usr/bin/python3
|
||
|
|
||
|
import sys
|
||
|
|
||
|
from confarchive import database, model, wikidata
|
||
|
|
||
|
DB_URL = "postgresql:///confarchive"
|
||
|
|
||
|
database.init_db(DB_URL)
|
||
|
|
||
|
|
||
|
def add_country(name: str) -> None:
|
||
|
"""Add a country to the database."""
|
||
|
print(name)
|
||
|
|
||
|
hits = wikidata.search(name + " haswbstatement:P297")
|
||
|
|
||
|
found = []
|
||
|
|
||
|
for hit in hits:
|
||
|
qid = hit["title"]
|
||
|
item = wikidata.get_item(qid)
|
||
|
if "en" in item["labels"]:
|
||
|
label = item["labels"]["en"]["value"]
|
||
|
else:
|
||
|
label = "[no english label]"
|
||
|
|
||
|
if "en" in item["descriptions"]:
|
||
|
description = item["descriptions"]["en"]["value"]
|
||
|
else:
|
||
|
description = "[no english description]"
|
||
|
|
||
|
if name.lower() not in label.lower():
|
||
|
print("not picking", label)
|
||
|
continue
|
||
|
|
||
|
alpha2 = item["claims"]["P297"][0]["mainsnak"]["datavalue"]["value"]
|
||
|
|
||
|
wd_hit = {
|
||
|
"qid": qid,
|
||
|
"label": label,
|
||
|
"description": description,
|
||
|
"alpha2": alpha2,
|
||
|
}
|
||
|
print(wd_hit)
|
||
|
|
||
|
found.append(wd_hit)
|
||
|
|
||
|
assert len(found) == 1
|
||
|
hit = found[0]
|
||
|
|
||
|
item = model.Country(
|
||
|
alpha2=hit["alpha2"], name=hit["label"], wikidata_qid=hit["qid"]
|
||
|
)
|
||
|
database.session.add(item)
|
||
|
database.session.commit()
|
||
|
|
||
|
print(hit["alpha2"], hit["label"], hit["qid"], "added to database")
|
||
|
|
||
|
|
||
|
def add_city(country_code: str, name: str) -> None:
|
||
|
"""Add a city to the database."""
|
||
|
print(name)
|
||
|
|
||
|
country = model.Country.query.filter_by(alpha2=country_code).one()
|
||
|
country_qid = country.wikidata_qid
|
||
|
|
||
|
# hits = wikidata_search(name + " haswbstatement:P17=" + country_qid)
|
||
|
hits = wikidata.search(name)
|
||
|
|
||
|
found = []
|
||
|
|
||
|
for hit in hits:
|
||
|
qid = hit["title"]
|
||
|
item = wikidata.get_item(qid)
|
||
|
claims = item["claims"]
|
||
|
if "en" in item["labels"]:
|
||
|
label = item["labels"]["en"]["value"]
|
||
|
else:
|
||
|
label = "[no english label]"
|
||
|
|
||
|
if "en" in item["descriptions"]:
|
||
|
description = item["descriptions"]["en"]["value"]
|
||
|
else:
|
||
|
description = "[no english description]"
|
||
|
|
||
|
try:
|
||
|
hit_country_qid = claims["P17"][0]["mainsnak"]["datavalue"]["value"]["id"]
|
||
|
except KeyError:
|
||
|
hit_country_qid = None
|
||
|
if hit_country_qid != country_qid:
|
||
|
print("skipping:", label, description)
|
||
|
print(" ", hit_country_qid, "!=", country_qid)
|
||
|
continue
|
||
|
|
||
|
wd_hit = {
|
||
|
"qid": qid,
|
||
|
"label": label,
|
||
|
"description": description,
|
||
|
"country": country,
|
||
|
}
|
||
|
print(wd_hit)
|
||
|
assert len(label) < 30
|
||
|
|
||
|
found.append(wd_hit)
|
||
|
break
|
||
|
|
||
|
assert len(found) == 1
|
||
|
hit = found[0]
|
||
|
|
||
|
slug = hit["label"].lower().replace(" ", "_")
|
||
|
|
||
|
item = model.City(
|
||
|
slug=slug, name=hit["label"], country=country, wikidata_qid=hit["qid"]
|
||
|
)
|
||
|
database.session.add(item)
|
||
|
database.session.commit()
|
||
|
|
||
|
print(hit["label"], hit["qid"], "added to database")
|
||
|
|
||
|
|
||
|
obj_type = sys.argv[1]
|
||
|
|
||
|
assert obj_type in ("country", "city", "venue")
|
||
|
|
||
|
if obj_type == "country":
|
||
|
add_country(sys.argv[2])
|
||
|
if obj_type == "city":
|
||
|
add_city(sys.argv[2], sys.argv[3])
|
||
|
# if obj_type == "venue":
|
||
|
# add_venue(sys.argv[2], sys.argv[3])
|