From a4b10945cee094e7f1ee43fd43f9cba0c2490cc4 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 7 Oct 2019 10:39:26 +0100 Subject: [PATCH] Parse catalog HTML as bytes --- app.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app.py b/app.py index 7f80622..656565c 100755 --- a/app.py +++ b/app.py @@ -447,11 +447,11 @@ def get_catalog_page(property_id, value): filename = f'cache/{property_id}_{catalog_id}.html' if os.path.exists(filename): - html = open(filename).read() + html = open(filename, 'rb').read() else: r = requests.get(url, headers={'User-Agent': user_agent}, timeout=2) - html = r.text - open(filename, 'w').write(html) + html = r.content + open(filename, 'wb').write(html) return html @@ -460,13 +460,13 @@ def get_catalog_url(url): filename = 'cache/' + md5_filename if os.path.exists(filename): - html = open(filename).read() + html = open(filename, 'rb').read() else: r = relaxed_ssl.get(url, headers={'User-Agent': user_agent}, timeout=2) - html = r.text - open(filename, 'w').write(html) + html = r.content + open(filename, 'wb').write(html) return html