add timeout to catalog pages
This commit is contained in:
parent
42a8353ecc
commit
6eb79ccfd5
67
app.py
67
app.py
|
@ -388,7 +388,7 @@ def get_catalog_page(property_id, value):
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
html = open(filename).read()
|
html = open(filename).read()
|
||||||
else:
|
else:
|
||||||
r = requests.get(url, headers={'User-Agent': user_agent})
|
r = requests.get(url, headers={'User-Agent': user_agent}, timeout=2)
|
||||||
html = r.text
|
html = r.text
|
||||||
open(filename, 'w').write(html)
|
open(filename, 'w').write(html)
|
||||||
|
|
||||||
|
@ -429,38 +429,41 @@ def item_page(item_id):
|
||||||
catalog_url = first_datavalue(entity, 'P973')
|
catalog_url = first_datavalue(entity, 'P973')
|
||||||
|
|
||||||
catalog = None
|
catalog = None
|
||||||
if 'P4704' in entity['claims']:
|
try:
|
||||||
saam_id = first_datavalue(entity, 'P4704')
|
if 'P4704' in entity['claims']:
|
||||||
catalog = saam.get_catalog(saam_id)
|
saam_id = first_datavalue(entity, 'P4704')
|
||||||
elif 'P4709' in entity['claims']:
|
catalog = saam.get_catalog(saam_id)
|
||||||
catalog_id = first_datavalue(entity, 'P4709')
|
elif 'P4709' in entity['claims']:
|
||||||
catalog = barnesfoundation.get_catalog(catalog_id)
|
catalog_id = first_datavalue(entity, 'P4709')
|
||||||
elif catalog_url and 'www.dia.org' in catalog_url:
|
catalog = barnesfoundation.get_catalog(catalog_id)
|
||||||
catalog = dia.get_catalog(catalog_url)
|
elif catalog_url and 'www.dia.org' in catalog_url:
|
||||||
elif catalog_url and 'www.rijksmuseum.nl' in catalog_url:
|
catalog = dia.get_catalog(catalog_url)
|
||||||
catalog = rijksmuseum.get_catalog(catalog_url)
|
elif catalog_url and 'www.rijksmuseum.nl' in catalog_url:
|
||||||
elif catalog_url and 'www.npg.org.uk' in catalog_url:
|
catalog = rijksmuseum.get_catalog(catalog_url)
|
||||||
catalog = npg.get_catalog(catalog_url)
|
elif catalog_url and 'www.npg.org.uk' in catalog_url:
|
||||||
elif catalog_url and 'www.museodelprado.es' in catalog_url:
|
catalog = npg.get_catalog(catalog_url)
|
||||||
catalog = museodelprado.get_catalog(catalog_url)
|
elif catalog_url and 'www.museodelprado.es' in catalog_url:
|
||||||
|
catalog = museodelprado.get_catalog(catalog_url)
|
||||||
|
|
||||||
if not catalog and catalog_ids:
|
if not catalog and catalog_ids:
|
||||||
for property_id in sorted(catalog_ids):
|
for property_id in sorted(catalog_ids):
|
||||||
if property_id == 'P350':
|
if property_id == 'P350':
|
||||||
continue # RKDimages ID
|
continue # RKDimages ID
|
||||||
value = first_datavalue(entity, property_id)
|
value = first_datavalue(entity, property_id)
|
||||||
detail = wd_catalog.lookup(property_id, value)
|
detail = wd_catalog.lookup(property_id, value)
|
||||||
try:
|
try:
|
||||||
html = get_catalog_page(property_id, value)
|
html = get_catalog_page(property_id, value)
|
||||||
except requests.exceptions.SSLError:
|
except requests.exceptions.SSLError:
|
||||||
continue # ignore this error
|
continue # ignore this error
|
||||||
description = get_description_from_page(html)
|
description = get_description_from_page(html)
|
||||||
if not description:
|
if not description:
|
||||||
continue
|
continue
|
||||||
catalog = {
|
catalog = {
|
||||||
'institution': detail['label'],
|
'institution': detail['label'],
|
||||||
'description': description,
|
'description': description,
|
||||||
}
|
}
|
||||||
|
except requests.exceptions.ReadTimeout:
|
||||||
|
pass
|
||||||
|
|
||||||
return render_template('item.html',
|
return render_template('item.html',
|
||||||
qid=qid,
|
qid=qid,
|
||||||
|
|
Loading…
Reference in a new issue