Update code style

This commit is contained in:
Edward Betts 2024-04-17 10:02:26 +01:00
parent c1cbfd5f79
commit 99844005d6

59
get.py
View file

@ -1,13 +1,16 @@
#!/usr/bin/python3
import requests
from http.cookiejar import LWPCookieJar
"""Download shelves from goodreads."""
import os
import re
import lxml.html
from http.cookiejar import LWPCookieJar
from random import shuffle
from time import sleep
import lxml.html
import requests
re_recommend = re.compile(
' <a class="actionLinkLite " href="(/recommendations/([^/]*?)/([^/]*?))">'
)
@ -20,10 +23,11 @@ cookie_file = os.path.join(cookie_dir, "goodreads")
cj = LWPCookieJar(cookie_file)
if os.path.exists(cookie_file):
cj.load()
s.cookies = cj
s.cookies = cj # type: ignore
def login():
def login() -> None:
"""Login."""
sign_in_page = "https://www.goodreads.com/user/sign_in"
page = s.get(sign_in_page).text
open("sign_in.html", "w").write(page)
@ -33,9 +37,12 @@ def login():
re_token = re.compile(
'<input type="hidden" name="authenticity_token" value="([^"]*?)" />'
)
re_n = re.compile("<input name='n' type='hidden' value='(\d+)'>")
re_n = re.compile(r"<input name='n' type='hidden' value='(\d+)'>")
m_n = re_n.search(page)
m_token = re_token.search(page)
token = re_token.search(page).group(1)
assert m_token and m_n
token = m_token.group(1)
data = {
"utf8": "\u2713",
@ -44,7 +51,7 @@ def login():
"user[password]": "8V8~9:3~U!Ly",
"remember_me": 1,
"next": "Sign in",
"n": re_n.search(page).group(1),
"n": m_n.group(1),
}
print(token)
@ -62,7 +69,8 @@ def login():
cj.save(ignore_discard=True)
def get_index():
def get_index() -> None:
"""Get index."""
# url = 'https://www.goodreads.com/recommendations'
url = "https://www.goodreads.com/recommendations/?recs_current_view=list"
@ -71,6 +79,7 @@ def get_index():
def get_individual():
"""Get individual page."""
for line in open("recommendations.html"):
if "actionLinkLite" not in line:
continue
@ -79,19 +88,25 @@ def get_individual():
yield m.groups()
# art = 'https://www.goodreads.com/recommendations/genre/art'
login()
get_index()
recommend_list = list(get_individual())
shuffle(recommend_list)
def main() -> None:
"""Login and download shelves."""
# art = 'https://www.goodreads.com/recommendations/genre/art'
login()
get_index()
recommend_list = list(get_individual())
shuffle(recommend_list)
headers = {"Accept": "text/html"}
headers = {"Accept": "text/html"}
for a, b, c in recommend_list:
print((b, c))
url = "https://www.goodreads.com" + a
for a, b, c in recommend_list:
print((b, c))
url = "https://www.goodreads.com" + a
r = s.get(url, headers=headers)
filename = os.path.join(b, c + ".html")
open(filename, "w").write(r.text)
sleep(0.5)
r = s.get(url, headers=headers)
filename = os.path.join(b, c + ".html")
open(filename, "w").write(r.text)
sleep(0.5)
if __name__ == "__main__":
main()