diff --git a/depicts/human.py b/depicts/human.py index 9409fca..d7c71af 100644 --- a/depicts/human.py +++ b/depicts/human.py @@ -1,5 +1,5 @@ from .model import HumanItem -from . import mediawiki +from . import mediawiki, utils import re re_four_digits = re.compile(r'\b\d{4}\b') @@ -41,19 +41,20 @@ def from_name(name): qids = list(lookup.keys()) found = [] - for entity in mediawiki.get_entities_with_cache(qids, props='labels|descriptions'): - qid = entity['id'] - item = lookup[qid] - i = { - 'qid': entity['id'], - 'year_of_birth': item.year_of_birth, - 'year_of_death': item.year_of_death, - } - label = mediawiki.get_entity_label(entity) - if label: - i['label'] = label - if 'en' in entity['descriptions']: - i['description'] = entity['descriptions']['en']['value'] - found.append(i) + for cur in utils.chunk(qids, 50): + for entity in mediawiki.get_entities_with_cache(cur, props='labels|descriptions'): + qid = entity['id'] + item = lookup[qid] + i = { + 'qid': entity['id'], + 'year_of_birth': item.year_of_birth, + 'year_of_death': item.year_of_death, + } + label = mediawiki.get_entity_label(entity) + if label: + i['label'] = label + if 'en' in entity['descriptions']: + i['description'] = entity['descriptions']['en']['value'] + found.append(i) found.sort(key=lambda i: i['label']) return found diff --git a/depicts/mediawiki.py b/depicts/mediawiki.py index 2bd96f7..9a05c63 100644 --- a/depicts/mediawiki.py +++ b/depicts/mediawiki.py @@ -1,6 +1,7 @@ import requests import os import json +import hashlib from .category import Category wikidata_url = 'https://www.wikidata.org/w/api.php' @@ -64,7 +65,9 @@ def get_entity_with_cache(qid, refresh=False): return entity def get_entities_with_cache(ids, **params): - filename = f'cache/entities_{"_".join(ids)}.json' + md5 = hashlib.md5(' '.join(ids).encode('utf-8')).hexdigest() + + filename = f'cache/entities_{md5}.json' if os.path.exists(filename): entity = json.load(open(filename)) else: