Add Smithsonian American Art Museum lookup code

This commit is contained in:
Edward Betts 2019-09-25 13:44:39 +01:00
parent 0cef952f17
commit d123c23270

32
depicts/saam.py Executable file
View file

@ -0,0 +1,32 @@
#!/usr/bin/python3
import requests
import lxml.html
import json
import os
from pprint import pprint
def get_html(saam_id):
filename = f'cache/saam_{saam_id}.html'
url = 'http://americanart.si.edu/collections/search/artwork/'
if os.path.exists(filename):
html = open(filename).read()
else:
r = requests.get(url, params={'id': saam_id})
html = r.text
open(filename, 'w').write(html)
return html
def parse_html(html):
root = lxml.html.fromstring(html)
ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
ul = root.find('.//ul[@class="ontology-list"]')
assert ul.tag == 'ul'
keywords = [li.text for li in ul]
return {'ld': ld, 'keywords': keywords}
def get_catalog(saam_id):
return parse_html(get_html(saam_id))