Update code style

This commit is contained in:
Edward Betts 2024-04-17 10:02:26 +01:00
parent c1cbfd5f79
commit 99844005d6

59
get.py
View file

@ -1,13 +1,16 @@
#!/usr/bin/python3 #!/usr/bin/python3
import requests """Download shelves from goodreads."""
from http.cookiejar import LWPCookieJar
import os import os
import re import re
import lxml.html from http.cookiejar import LWPCookieJar
from random import shuffle from random import shuffle
from time import sleep from time import sleep
import lxml.html
import requests
re_recommend = re.compile( re_recommend = re.compile(
' <a class="actionLinkLite " href="(/recommendations/([^/]*?)/([^/]*?))">' ' <a class="actionLinkLite " href="(/recommendations/([^/]*?)/([^/]*?))">'
) )
@ -20,10 +23,11 @@ cookie_file = os.path.join(cookie_dir, "goodreads")
cj = LWPCookieJar(cookie_file) cj = LWPCookieJar(cookie_file)
if os.path.exists(cookie_file): if os.path.exists(cookie_file):
cj.load() cj.load()
s.cookies = cj s.cookies = cj # type: ignore
def login(): def login() -> None:
"""Login."""
sign_in_page = "https://www.goodreads.com/user/sign_in" sign_in_page = "https://www.goodreads.com/user/sign_in"
page = s.get(sign_in_page).text page = s.get(sign_in_page).text
open("sign_in.html", "w").write(page) open("sign_in.html", "w").write(page)
@ -33,9 +37,12 @@ def login():
re_token = re.compile( re_token = re.compile(
'<input type="hidden" name="authenticity_token" value="([^"]*?)" />' '<input type="hidden" name="authenticity_token" value="([^"]*?)" />'
) )
re_n = re.compile("<input name='n' type='hidden' value='(\d+)'>") re_n = re.compile(r"<input name='n' type='hidden' value='(\d+)'>")
m_n = re_n.search(page)
m_token = re_token.search(page)
token = re_token.search(page).group(1) assert m_token and m_n
token = m_token.group(1)
data = { data = {
"utf8": "\u2713", "utf8": "\u2713",
@ -44,7 +51,7 @@ def login():
"user[password]": "8V8~9:3~U!Ly", "user[password]": "8V8~9:3~U!Ly",
"remember_me": 1, "remember_me": 1,
"next": "Sign in", "next": "Sign in",
"n": re_n.search(page).group(1), "n": m_n.group(1),
} }
print(token) print(token)
@ -62,7 +69,8 @@ def login():
cj.save(ignore_discard=True) cj.save(ignore_discard=True)
def get_index(): def get_index() -> None:
"""Get index."""
# url = 'https://www.goodreads.com/recommendations' # url = 'https://www.goodreads.com/recommendations'
url = "https://www.goodreads.com/recommendations/?recs_current_view=list" url = "https://www.goodreads.com/recommendations/?recs_current_view=list"
@ -71,6 +79,7 @@ def get_index():
def get_individual(): def get_individual():
"""Get individual page."""
for line in open("recommendations.html"): for line in open("recommendations.html"):
if "actionLinkLite" not in line: if "actionLinkLite" not in line:
continue continue
@ -79,19 +88,25 @@ def get_individual():
yield m.groups() yield m.groups()
# art = 'https://www.goodreads.com/recommendations/genre/art' def main() -> None:
login() """Login and download shelves."""
get_index() # art = 'https://www.goodreads.com/recommendations/genre/art'
recommend_list = list(get_individual()) login()
shuffle(recommend_list) get_index()
recommend_list = list(get_individual())
shuffle(recommend_list)
headers = {"Accept": "text/html"} headers = {"Accept": "text/html"}
for a, b, c in recommend_list: for a, b, c in recommend_list:
print((b, c)) print((b, c))
url = "https://www.goodreads.com" + a url = "https://www.goodreads.com" + a
r = s.get(url, headers=headers) r = s.get(url, headers=headers)
filename = os.path.join(b, c + ".html") filename = os.path.join(b, c + ".html")
open(filename, "w").write(r.text) open(filename, "w").write(r.text)
sleep(0.5) sleep(0.5)
if __name__ == "__main__":
main()