From 99844005d67ac36ecc5c4bff174a286ed61b00d9 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Wed, 17 Apr 2024 10:02:26 +0100 Subject: [PATCH] Update code style --- get.py | 59 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/get.py b/get.py index e654472..150cc35 100755 --- a/get.py +++ b/get.py @@ -1,13 +1,16 @@ #!/usr/bin/python3 -import requests -from http.cookiejar import LWPCookieJar +"""Download shelves from goodreads.""" + import os import re -import lxml.html +from http.cookiejar import LWPCookieJar from random import shuffle from time import sleep +import lxml.html +import requests + re_recommend = re.compile( ' ' ) @@ -20,10 +23,11 @@ cookie_file = os.path.join(cookie_dir, "goodreads") cj = LWPCookieJar(cookie_file) if os.path.exists(cookie_file): cj.load() -s.cookies = cj +s.cookies = cj # type: ignore -def login(): +def login() -> None: + """Login.""" sign_in_page = "https://www.goodreads.com/user/sign_in" page = s.get(sign_in_page).text open("sign_in.html", "w").write(page) @@ -33,9 +37,12 @@ def login(): re_token = re.compile( '' ) - re_n = re.compile("") + re_n = re.compile(r"") + m_n = re_n.search(page) + m_token = re_token.search(page) - token = re_token.search(page).group(1) + assert m_token and m_n + token = m_token.group(1) data = { "utf8": "\u2713", @@ -44,7 +51,7 @@ def login(): "user[password]": "8V8~9:3~U!Ly", "remember_me": 1, "next": "Sign in", - "n": re_n.search(page).group(1), + "n": m_n.group(1), } print(token) @@ -62,7 +69,8 @@ def login(): cj.save(ignore_discard=True) -def get_index(): +def get_index() -> None: + """Get index.""" # url = 'https://www.goodreads.com/recommendations' url = "https://www.goodreads.com/recommendations/?recs_current_view=list" @@ -71,6 +79,7 @@ def get_index(): def get_individual(): + """Get individual page.""" for line in open("recommendations.html"): if "actionLinkLite" not in line: continue @@ -79,19 +88,25 @@ def get_individual(): yield m.groups() -# art = 'https://www.goodreads.com/recommendations/genre/art' -login() -get_index() -recommend_list = list(get_individual()) -shuffle(recommend_list) +def main() -> None: + """Login and download shelves.""" + # art = 'https://www.goodreads.com/recommendations/genre/art' + login() + get_index() + recommend_list = list(get_individual()) + shuffle(recommend_list) -headers = {"Accept": "text/html"} + headers = {"Accept": "text/html"} -for a, b, c in recommend_list: - print((b, c)) - url = "https://www.goodreads.com" + a + for a, b, c in recommend_list: + print((b, c)) + url = "https://www.goodreads.com" + a - r = s.get(url, headers=headers) - filename = os.path.join(b, c + ".html") - open(filename, "w").write(r.text) - sleep(0.5) + r = s.get(url, headers=headers) + filename = os.path.join(b, c + ".html") + open(filename, "w").write(r.text) + sleep(0.5) + + +if __name__ == "__main__": + main()