Bug fix saam parser
This commit is contained in:
parent
dc9989157c
commit
3191ae51b0
|
@ -18,17 +18,22 @@ def get_html(saam_id):
|
|||
|
||||
def parse_html(html):
|
||||
root = lxml.html.fromstring(html)
|
||||
ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
|
||||
ld_json = root.findtext('.//script[@type="application/ld+json"]')
|
||||
if ld_json is None:
|
||||
return {'ld': {}, 'keywords': []}
|
||||
ld = json.loads(ld_json)
|
||||
|
||||
ul = root.find('.//ul[@class="ontology-list"]')
|
||||
if ul is None:
|
||||
return {'ld': ld, 'keywords': []}
|
||||
return
|
||||
assert ul.tag == 'ul'
|
||||
keywords = [li.text for li in ul]
|
||||
return {'ld': ld, 'keywords': keywords}
|
||||
|
||||
def get_catalog(saam_id):
|
||||
data = parse_html(get_html(saam_id))
|
||||
if not data:
|
||||
return {}
|
||||
ret = {
|
||||
'institution': 'Smithsonian American Art Museum',
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue