Compare commits
2 commits
b12a89c3b6
...
0ccaebe418
Author | SHA1 | Date | |
---|---|---|---|
Edward Betts | 0ccaebe418 | ||
Edward Betts | 5c934e19bc |
|
@ -13,6 +13,8 @@ commons_url = "https://www.wikidata.org/w/api.php"
|
||||||
wikidata_api = "https://www.wikidata.org/w/api.php"
|
wikidata_api = "https://www.wikidata.org/w/api.php"
|
||||||
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
|
user_agent = "conference-archive/0.1 (contact: edward@4angle.com)"
|
||||||
|
|
||||||
|
CallParams = dict[str, str | int]
|
||||||
|
|
||||||
s = requests.Session()
|
s = requests.Session()
|
||||||
s.headers.update({"User-Agent": user_agent})
|
s.headers.update({"User-Agent": user_agent})
|
||||||
|
|
||||||
|
@ -49,7 +51,7 @@ def search(q: str) -> list[dict[str, typing.Any]]:
|
||||||
|
|
||||||
def api_image_detail_call(filename: str) -> requests.Response:
|
def api_image_detail_call(filename: str) -> requests.Response:
|
||||||
"""Call the Commons API."""
|
"""Call the Commons API."""
|
||||||
call_params = {
|
call_params: CallParams = {
|
||||||
"format": "json",
|
"format": "json",
|
||||||
"formatversion": 2,
|
"formatversion": 2,
|
||||||
"action": "query",
|
"action": "query",
|
||||||
|
@ -102,6 +104,7 @@ def download_photo(filename: str) -> None:
|
||||||
|
|
||||||
|
|
||||||
def get_photo(filename: str) -> None:
|
def get_photo(filename: str) -> None:
|
||||||
|
"""Download filename and resize."""
|
||||||
save_to = os.path.join("static", "wikidata_photo", filename)
|
save_to = os.path.join("static", "wikidata_photo", filename)
|
||||||
thumb = os.path.join("static", "wikidata_photo", "thumb", filename)
|
thumb = os.path.join("static", "wikidata_photo", "thumb", filename)
|
||||||
if not os.path.exists(save_to):
|
if not os.path.exists(save_to):
|
||||||
|
|
51
main.py
51
main.py
|
@ -1,14 +1,8 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import time
|
|
||||||
import typing
|
|
||||||
from typing import cast
|
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import requests
|
|
||||||
import sqlalchemy
|
import sqlalchemy
|
||||||
from sqlalchemy import func, or_, update
|
from sqlalchemy import func, or_, update
|
||||||
from werkzeug.wrappers import Response
|
from werkzeug.wrappers import Response
|
||||||
|
@ -21,37 +15,6 @@ app.debug = True
|
||||||
app.config.from_object("config.default")
|
app.config.from_object("config.default")
|
||||||
database.init_app(app)
|
database.init_app(app)
|
||||||
|
|
||||||
wikidata_api = "https://www.wikidata.org/w/api.php"
|
|
||||||
|
|
||||||
|
|
||||||
def md5sum(s: str) -> str:
|
|
||||||
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
def wikidata_search(q: str) -> list[dict[str, typing.Any]]:
|
|
||||||
q += " haswbstatement:P31=Q5"
|
|
||||||
q_md5 = md5sum(q)
|
|
||||||
|
|
||||||
cache_filename = os.path.join("cache", q_md5 + ".json")
|
|
||||||
|
|
||||||
if os.path.exists(cache_filename):
|
|
||||||
data = json.load(open(cache_filename))
|
|
||||||
else:
|
|
||||||
params: dict[str, str | int] = {
|
|
||||||
"action": "query",
|
|
||||||
"list": "search",
|
|
||||||
"format": "json",
|
|
||||||
"formatversion": 2,
|
|
||||||
"srsearch": q,
|
|
||||||
"srlimit": "max",
|
|
||||||
}
|
|
||||||
r = requests.get(wikidata_api, params=params)
|
|
||||||
open(cache_filename, "w").write(r.text)
|
|
||||||
data = r.json()
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
return cast(list[dict[str, typing.Any]], data["query"]["search"])
|
|
||||||
|
|
||||||
|
|
||||||
def top_speakers() -> sqlalchemy.orm.query.Query:
|
def top_speakers() -> sqlalchemy.orm.query.Query:
|
||||||
q = (
|
q = (
|
||||||
|
@ -89,6 +52,7 @@ def top_events() -> sqlalchemy.orm.query.Query:
|
||||||
|
|
||||||
|
|
||||||
def drop_start(s: str, start: str) -> str:
|
def drop_start(s: str, start: str) -> str:
|
||||||
|
"""Remove text from the start of a string."""
|
||||||
return s[len(start) :] if s.startswith(start) else s
|
return s[len(start) :] if s.startswith(start) else s
|
||||||
|
|
||||||
|
|
||||||
|
@ -121,7 +85,8 @@ def person(person_id: int) -> str | Response:
|
||||||
|
|
||||||
wikidata_hits: list[dict[str, str]] = []
|
wikidata_hits: list[dict[str, str]] = []
|
||||||
if False and item.wikidata_qid is None:
|
if False and item.wikidata_qid is None:
|
||||||
search_hits = wikidata_search(item.name)
|
q = item.name + " haswbstatement:P31=Q5"
|
||||||
|
search_hits = wikidata.search(q)
|
||||||
print(len(search_hits))
|
print(len(search_hits))
|
||||||
for search_hit in search_hits:
|
for search_hit in search_hits:
|
||||||
qid = search_hit["title"]
|
qid = search_hit["title"]
|
||||||
|
@ -350,7 +315,8 @@ def link_to_wikidata() -> str:
|
||||||
for person, num in top_speakers2():
|
for person, num in top_speakers2():
|
||||||
if person.wikidata_qid:
|
if person.wikidata_qid:
|
||||||
continue
|
continue
|
||||||
search_hits = wikidata_search(person.name)
|
q = person.name + " haswbstatement:P31=Q5"
|
||||||
|
search_hits = wikidata.search(q)
|
||||||
if not search_hits:
|
if not search_hits:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -361,7 +327,7 @@ def link_to_wikidata() -> str:
|
||||||
|
|
||||||
for search_hit in search_hits:
|
for search_hit in search_hits:
|
||||||
qid = search_hit["title"]
|
qid = search_hit["title"]
|
||||||
item = wikidata_get_item(qid)
|
item = wikidata.get_item(qid)
|
||||||
if "en" in item["labels"]:
|
if "en" in item["labels"]:
|
||||||
label = item["labels"]["en"]["value"]
|
label = item["labels"]["en"]["value"]
|
||||||
else:
|
else:
|
||||||
|
@ -425,7 +391,8 @@ def delete_person(person_id: int) -> str | Response:
|
||||||
return flask.redirect(flask.url_for("index"))
|
return flask.redirect(flask.url_for("index"))
|
||||||
|
|
||||||
|
|
||||||
def person_image_filename(person_id):
|
def person_image_filename(person_id: int) -> str:
|
||||||
|
"""Filename for speaker photo."""
|
||||||
person = model.Person.query.get(person_id)
|
person = model.Person.query.get(person_id)
|
||||||
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
|
return os.path.join("wikidata_photo", "thumb", person.wikidata_photo[0])
|
||||||
|
|
||||||
|
@ -440,6 +407,7 @@ def person_image_filename(person_id):
|
||||||
|
|
||||||
@app.route("/github_wikidata")
|
@app.route("/github_wikidata")
|
||||||
def github_wikidata() -> str:
|
def github_wikidata() -> str:
|
||||||
|
"""Look for speakers on Wikidata based on the GitHub property."""
|
||||||
items = []
|
items = []
|
||||||
for line in open("found_wikidata_github"):
|
for line in open("found_wikidata_github"):
|
||||||
person_id, person_name, qid, wd_name, github, photo = eval(line)
|
person_id, person_name, qid, wd_name, github, photo = eval(line)
|
||||||
|
@ -453,6 +421,7 @@ def github_wikidata() -> str:
|
||||||
|
|
||||||
@app.route("/reports")
|
@app.route("/reports")
|
||||||
def reports_page() -> str:
|
def reports_page() -> str:
|
||||||
|
"""Page showing statistics."""
|
||||||
event_count = model.Event.query.count()
|
event_count = model.Event.query.count()
|
||||||
|
|
||||||
missing_event_date_count = model.Event.query.filter(
|
missing_event_date_count = model.Event.query.filter(
|
||||||
|
|
Loading…
Reference in a new issue