diff --git a/backup.py b/backup.py index c1f7d35..45fea0f 100644 --- a/backup.py +++ b/backup.py @@ -68,7 +68,7 @@ def run_backup(page: Page) -> None: def run(playwright: Playwright) -> None: """Download export.""" - browser = playwright.chromium.launch(headless=True) + browser = playwright.chromium.launch(headless=False) auth_json = os.path.join(script_dir, "auth.json") context = browser.new_context(storage_state=auth_json) page = context.new_page() diff --git a/get.py b/get.py deleted file mode 100755 index e654472..0000000 --- a/get.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/python3 - -import requests -from http.cookiejar import LWPCookieJar -import os -import re -import lxml.html -from random import shuffle -from time import sleep - -re_recommend = re.compile( - ' ' -) - -s = requests.Session() - -cookie_dir = "/home/edward/lib/cookies" -cookie_file = os.path.join(cookie_dir, "goodreads") - -cj = LWPCookieJar(cookie_file) -if os.path.exists(cookie_file): - cj.load() -s.cookies = cj - - -def login(): - sign_in_page = "https://www.goodreads.com/user/sign_in" - page = s.get(sign_in_page).text - open("sign_in.html", "w").write(page) - if '"name":"Edward Betts"' in page: - return # already signed in - - re_token = re.compile( - '' - ) - re_n = re.compile("") - - token = re_token.search(page).group(1) - - data = { - "utf8": "\u2713", - "authenticity_token": token, - "user[email]": "edward@4angle.com", - "user[password]": "8V8~9:3~U!Ly", - "remember_me": 1, - "next": "Sign in", - "n": re_n.search(page).group(1), - } - - print(token) - print(data["n"]) - - r = s.post(sign_in_page, data=data, headers={"referer": sign_in_page}) - - open("signed_in.html", "w").write(r.text) - - root = lxml.html.fromstring(r.content) - flash = root.find_class("flash") - if flash: - print("flash:", flash[0].text) - - cj.save(ignore_discard=True) - - -def get_index(): - # url = 'https://www.goodreads.com/recommendations' - url = "https://www.goodreads.com/recommendations/?recs_current_view=list" - - r = s.get(url) - open("recommendations.html", "w").write(r.text) - - -def get_individual(): - for line in open("recommendations.html"): - if "actionLinkLite" not in line: - continue - m = re_recommend.match(line) - if m: - yield m.groups() - - -# art = 'https://www.goodreads.com/recommendations/genre/art' -login() -get_index() -recommend_list = list(get_individual()) -shuffle(recommend_list) - -headers = {"Accept": "text/html"} - -for a, b, c in recommend_list: - print((b, c)) - url = "https://www.goodreads.com" + a - - r = s.get(url, headers=headers) - filename = os.path.join(b, c + ".html") - open(filename, "w").write(r.text) - sleep(0.5) diff --git a/parse.py b/parse.py deleted file mode 100755 index f0f6294..0000000 --- a/parse.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/python3 -import os -import re -import lxml.html -import jinja2 -import json -import sys -import requests -from time import sleep -from pprint import pprint - -parser = lxml.html.HTMLParser(encoding="utf-8") - -env = jinja2.Environment(loader=jinja2.FileSystemLoader("templates")) - -re_book = re.compile( - "function refreshGroupBox(group_id, book_id) \{(.*?)\n *\}", re.DOTALL -) -re_book = re.compile( - "function refreshGroupBox\(group_id, book_id\) \{(.*?)\n *\}", re.DOTALL -) - -re_tip = re.compile(rb'var newTip = new Tip\(\$\(\'[^\']+\'\), "(.*?)", {') - -dirs = ["shelf", "genre"] -start = "https://www.goodreads.com/book/show/" - -existing = {book["title"] for book in json.load(open("calibre_book_list.json"))} - - -def iter_books(): - for d in dirs: - for f in sorted(os.listdir(d)): - filename = os.path.join(d, f) - root = lxml.html.parse(filename, parser=parser).getroot() - for div in root.find_class("bookInformation"): - link = div.find(".//a") - rating = div.find('.//span[@class="minirating"]') - description = div.find('./div[@class="bookDescription"]') - r = rating[0].tail.strip() - - cover = div.getnext().find(".//img").get("src") - - book = { - "title": link.text, - "url": link.get("href"), - "rating": r, - "r": float(r[:3]), - "cover": cover, - "authors": [a.text for a in div.find_class("authorName")], - } - if description is not None: - index = 1 if len(description) == 3 else 0 - book["description"] = description[index].text - yield d, f, book - continue - - # print(filename) - for line in open(filename, "rb"): - if b"var newTip" not in line: - continue - print(line) - m = re_tip.search(line) - tip = m.group(1).decode("unicode_escape").replace("\/", "/") - # tip = m.group(1) # .replace('\/', '/') - # print(tip) - if '