Parse catalog HTML as bytes

This commit is contained in:
Edward Betts 2019-10-07 10:39:26 +01:00
parent 807353303e
commit a4b10945ce

12
app.py
View file

@ -447,11 +447,11 @@ def get_catalog_page(property_id, value):
filename = f'cache/{property_id}_{catalog_id}.html' filename = f'cache/{property_id}_{catalog_id}.html'
if os.path.exists(filename): if os.path.exists(filename):
html = open(filename).read() html = open(filename, 'rb').read()
else: else:
r = requests.get(url, headers={'User-Agent': user_agent}, timeout=2) r = requests.get(url, headers={'User-Agent': user_agent}, timeout=2)
html = r.text html = r.content
open(filename, 'w').write(html) open(filename, 'wb').write(html)
return html return html
@ -460,13 +460,13 @@ def get_catalog_url(url):
filename = 'cache/' + md5_filename filename = 'cache/' + md5_filename
if os.path.exists(filename): if os.path.exists(filename):
html = open(filename).read() html = open(filename, 'rb').read()
else: else:
r = relaxed_ssl.get(url, r = relaxed_ssl.get(url,
headers={'User-Agent': user_agent}, headers={'User-Agent': user_agent},
timeout=2) timeout=2)
html = r.text html = r.content
open(filename, 'w').write(html) open(filename, 'wb').write(html)
return html return html