Bug fix Smithsonian American Art Museum parsing.
This commit is contained in:
		
							parent
							
								
									ac570b05d2
								
							
						
					
					
						commit
						d030f745de
					
				| 
						 | 
					@ -21,6 +21,8 @@ def parse_html(html):
 | 
				
			||||||
    ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
 | 
					    ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ul = root.find('.//ul[@class="ontology-list"]')
 | 
					    ul = root.find('.//ul[@class="ontology-list"]')
 | 
				
			||||||
 | 
					    if ul is None:
 | 
				
			||||||
 | 
					        return {'ld': ld, 'keywords': []}
 | 
				
			||||||
    assert ul.tag == 'ul'
 | 
					    assert ul.tag == 'ul'
 | 
				
			||||||
    keywords = [li.text for li in ul]
 | 
					    keywords = [li.text for li in ul]
 | 
				
			||||||
    return {'ld': ld, 'keywords': keywords}
 | 
					    return {'ld': ld, 'keywords': keywords}
 | 
				
			||||||
| 
						 | 
					@ -29,10 +31,10 @@ def get_catalog(saam_id):
 | 
				
			||||||
    data = parse_html(get_html(saam_id))
 | 
					    data = parse_html(get_html(saam_id))
 | 
				
			||||||
    ret = {
 | 
					    ret = {
 | 
				
			||||||
        'institution': 'Smithsonian American Art Museum',
 | 
					        'institution': 'Smithsonian American Art Museum',
 | 
				
			||||||
        'keywords': data['keywords'],
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    if data['keywords']:
 | 
				
			||||||
 | 
					        ret['keywords'] = data['keywords']
 | 
				
			||||||
    if 'description' in data['ld']:
 | 
					    if 'description' in data['ld']:
 | 
				
			||||||
        ret['description'] = data['ld']['description']
 | 
					        ret['description'] = data['ld']['description']
 | 
				
			||||||
    return ret
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return ret if 'description' in ret or 'keywords' in ret else {}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in a new issue