From 039240b3c58a73d1033cb8ac9b0c0c6179f5a40f Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 14 Oct 2019 11:08:42 +0100 Subject: [PATCH] Chunk requests to Wikimedia Commons --- depicts/commons.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/depicts/commons.py b/depicts/commons.py index dd7df26..c19e4f5 100644 --- a/depicts/commons.py +++ b/depicts/commons.py @@ -1,6 +1,7 @@ from . import mediawiki, utils commons_url = 'https://www.wikidata.org/w/api.php' +page_size = 50 def image_detail(filenames, thumbheight=None, thumbwidth=None): if not isinstance(filenames, list): @@ -10,7 +11,6 @@ def image_detail(filenames, thumbheight=None, thumbwidth=None): params = { 'action': 'query', - 'titles': '|'.join(f'File:{f}' for f in filenames), 'prop': 'imageinfo', 'iiprop': 'url', } @@ -18,13 +18,18 @@ def image_detail(filenames, thumbheight=None, thumbwidth=None): params['iiurlheight'] = thumbheight if thumbwidth is not None: params['iiurlwidth'] = thumbwidth - r = mediawiki.api_call(params, api_url=commons_url) images = {} - for image in r.json()['query']['pages']: - filename = utils.drop_start(image['title'], 'File:') - images[filename] = image['imageinfo'][0] + for cur in utils.chunk(filenames, page_size): + call_params = params.copy() + call_params['titles'] = '|'.join(f'File:{f}' for f in cur) + + r = mediawiki.api_call(call_params, api_url=commons_url) + + for image in r.json()['query']['pages']: + filename = utils.drop_start(image['title'], 'File:') + images[filename] = image['imageinfo'][0] return images