From bb68a46f5924b120c2e97e52ebc270ac871e2561 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Tue, 8 Oct 2019 11:40:48 +0100 Subject: [PATCH] There can be more than 50 people. --- depicts/human.py | 31 ++++++++++++++++--------------- depicts/mediawiki.py | 5 ++++- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/depicts/human.py b/depicts/human.py index 9409fca..d7c71af 100644 --- a/depicts/human.py +++ b/depicts/human.py @@ -1,5 +1,5 @@ from .model import HumanItem -from . import mediawiki +from . import mediawiki, utils import re re_four_digits = re.compile(r'\b\d{4}\b') @@ -41,19 +41,20 @@ def from_name(name): qids = list(lookup.keys()) found = [] - for entity in mediawiki.get_entities_with_cache(qids, props='labels|descriptions'): - qid = entity['id'] - item = lookup[qid] - i = { - 'qid': entity['id'], - 'year_of_birth': item.year_of_birth, - 'year_of_death': item.year_of_death, - } - label = mediawiki.get_entity_label(entity) - if label: - i['label'] = label - if 'en' in entity['descriptions']: - i['description'] = entity['descriptions']['en']['value'] - found.append(i) + for cur in utils.chunk(qids, 50): + for entity in mediawiki.get_entities_with_cache(cur, props='labels|descriptions'): + qid = entity['id'] + item = lookup[qid] + i = { + 'qid': entity['id'], + 'year_of_birth': item.year_of_birth, + 'year_of_death': item.year_of_death, + } + label = mediawiki.get_entity_label(entity) + if label: + i['label'] = label + if 'en' in entity['descriptions']: + i['description'] = entity['descriptions']['en']['value'] + found.append(i) found.sort(key=lambda i: i['label']) return found diff --git a/depicts/mediawiki.py b/depicts/mediawiki.py index 2bd96f7..9a05c63 100644 --- a/depicts/mediawiki.py +++ b/depicts/mediawiki.py @@ -1,6 +1,7 @@ import requests import os import json +import hashlib from .category import Category wikidata_url = 'https://www.wikidata.org/w/api.php' @@ -64,7 +65,9 @@ def get_entity_with_cache(qid, refresh=False): return entity def get_entities_with_cache(ids, **params): - filename = f'cache/entities_{"_".join(ids)}.json' + md5 = hashlib.md5(' '.join(ids).encode('utf-8')).hexdigest() + + filename = f'cache/entities_{md5}.json' if os.path.exists(filename): entity = json.load(open(filename)) else: