Add Smithsonian American Art Museum lookup code
This commit is contained in:
parent
0cef952f17
commit
d123c23270
32
depicts/saam.py
Executable file
32
depicts/saam.py
Executable file
|
@ -0,0 +1,32 @@
|
|||
#!/usr/bin/python3
|
||||
|
||||
import requests
|
||||
import lxml.html
|
||||
import json
|
||||
import os
|
||||
from pprint import pprint
|
||||
|
||||
def get_html(saam_id):
|
||||
filename = f'cache/saam_{saam_id}.html'
|
||||
url = 'http://americanart.si.edu/collections/search/artwork/'
|
||||
|
||||
if os.path.exists(filename):
|
||||
html = open(filename).read()
|
||||
else:
|
||||
r = requests.get(url, params={'id': saam_id})
|
||||
html = r.text
|
||||
open(filename, 'w').write(html)
|
||||
|
||||
return html
|
||||
|
||||
def parse_html(html):
|
||||
root = lxml.html.fromstring(html)
|
||||
ld = json.loads(root.findtext('.//script[@type="application/ld+json"]'))
|
||||
|
||||
ul = root.find('.//ul[@class="ontology-list"]')
|
||||
assert ul.tag == 'ul'
|
||||
keywords = [li.text for li in ul]
|
||||
return {'ld': ld, 'keywords': keywords}
|
||||
|
||||
def get_catalog(saam_id):
|
||||
return parse_html(get_html(saam_id))
|
Loading…
Reference in a new issue