Bug fix Smithsonian American Art Museum parsing.
This commit is contained in:
parent
ac570b05d2
commit
d030f745de
|
@ -21,6 +21,8 @@ def parse_html(html):
|
||||||
ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
|
ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
|
||||||
|
|
||||||
ul = root.find('.//ul[@class="ontology-list"]')
|
ul = root.find('.//ul[@class="ontology-list"]')
|
||||||
|
if ul is None:
|
||||||
|
return {'ld': ld, 'keywords': []}
|
||||||
assert ul.tag == 'ul'
|
assert ul.tag == 'ul'
|
||||||
keywords = [li.text for li in ul]
|
keywords = [li.text for li in ul]
|
||||||
return {'ld': ld, 'keywords': keywords}
|
return {'ld': ld, 'keywords': keywords}
|
||||||
|
@ -29,10 +31,10 @@ def get_catalog(saam_id):
|
||||||
data = parse_html(get_html(saam_id))
|
data = parse_html(get_html(saam_id))
|
||||||
ret = {
|
ret = {
|
||||||
'institution': 'Smithsonian American Art Museum',
|
'institution': 'Smithsonian American Art Museum',
|
||||||
'keywords': data['keywords'],
|
|
||||||
}
|
}
|
||||||
|
if data['keywords']:
|
||||||
|
ret['keywords'] = data['keywords']
|
||||||
if 'description' in data['ld']:
|
if 'description' in data['ld']:
|
||||||
ret['description'] = data['ld']['description']
|
ret['description'] = data['ld']['description']
|
||||||
return ret
|
|
||||||
|
|
||||||
|
|
||||||
|
return ret if 'description' in ret or 'keywords' in ret else {}
|
||||||
|
|
Loading…
Reference in a new issue