From eb337fb7e991b206b0ba38d3bb21a939c5bfad84 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 24 Sep 2023 15:48:18 +0100 Subject: [PATCH] Add countries and cities to the database --- add_place.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) create mode 100755 add_place.py diff --git a/add_place.py b/add_place.py new file mode 100755 index 0000000..41060d6 --- /dev/null +++ b/add_place.py @@ -0,0 +1,131 @@ +#!/usr/bin/python3 + +import sys + +from confarchive import database, model, wikidata + +DB_URL = "postgresql:///confarchive" + +database.init_db(DB_URL) + + +def add_country(name: str) -> None: + """Add a country to the database.""" + print(name) + + hits = wikidata.search(name + " haswbstatement:P297") + + found = [] + + for hit in hits: + qid = hit["title"] + item = wikidata.get_item(qid) + if "en" in item["labels"]: + label = item["labels"]["en"]["value"] + else: + label = "[no english label]" + + if "en" in item["descriptions"]: + description = item["descriptions"]["en"]["value"] + else: + description = "[no english description]" + + if name.lower() not in label.lower(): + print("not picking", label) + continue + + alpha2 = item["claims"]["P297"][0]["mainsnak"]["datavalue"]["value"] + + wd_hit = { + "qid": qid, + "label": label, + "description": description, + "alpha2": alpha2, + } + print(wd_hit) + + found.append(wd_hit) + + assert len(found) == 1 + hit = found[0] + + item = model.Country( + alpha2=hit["alpha2"], name=hit["label"], wikidata_qid=hit["qid"] + ) + database.session.add(item) + database.session.commit() + + print(hit["alpha2"], hit["label"], hit["qid"], "added to database") + + +def add_city(country_code: str, name: str) -> None: + """Add a city to the database.""" + print(name) + + country = model.Country.query.filter_by(alpha2=country_code).one() + country_qid = country.wikidata_qid + + # hits = wikidata_search(name + " haswbstatement:P17=" + country_qid) + hits = wikidata.search(name) + + found = [] + + for hit in hits: + qid = hit["title"] + item = wikidata.get_item(qid) + claims = item["claims"] + if "en" in item["labels"]: + label = item["labels"]["en"]["value"] + else: + label = "[no english label]" + + if "en" in item["descriptions"]: + description = item["descriptions"]["en"]["value"] + else: + description = "[no english description]" + + try: + hit_country_qid = claims["P17"][0]["mainsnak"]["datavalue"]["value"]["id"] + except KeyError: + hit_country_qid = None + if hit_country_qid != country_qid: + print("skipping:", label, description) + print(" ", hit_country_qid, "!=", country_qid) + continue + + wd_hit = { + "qid": qid, + "label": label, + "description": description, + "country": country, + } + print(wd_hit) + assert len(label) < 30 + + found.append(wd_hit) + break + + assert len(found) == 1 + hit = found[0] + + slug = hit["label"].lower().replace(" ", "_") + + item = model.City( + slug=slug, name=hit["label"], country=country, wikidata_qid=hit["qid"] + ) + database.session.add(item) + database.session.commit() + + print(hit["label"], hit["qid"], "added to database") + + +obj_type = sys.argv[1] + +assert obj_type in ("country", "city", "venue") + +if obj_type == "country": + add_country(sys.argv[2]) +if obj_type == "city": + add_city(sys.argv[2], sys.argv[3]) +# if obj_type == "venue": +# add_venue(sys.argv[2], sys.argv[3])