From 04091988de2a6c204a2ae39807e362177da1118e Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Wed, 9 Oct 2019 22:03:51 +0100 Subject: [PATCH] Bug fix 'Detroit Institute of Arts' parser. --- depicts/dia.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/depicts/dia.py b/depicts/dia.py index 9d627e6..3d86ab3 100644 --- a/depicts/dia.py +++ b/depicts/dia.py @@ -6,7 +6,10 @@ import re re_url = re.compile(r'https?://www.dia.org/art/collection/object/(.+)$') def get_html(url): - catalog_id = re_url.search(url).group(1).replace('/', '_') + m = re_url.search(url).group(1).replace('/', '_') + if not m: + return + catalog_id = m.group(1).replace('/', '_') filename = f'cache/dia_{catalog_id}.html' @@ -47,4 +50,6 @@ def parse_html(html): } def get_catalog(url): - return parse_html(get_html(url)) + html = get_html(url) + if html: + return parse_html(html)