#!/usr/bin/python3 """Backup list of books from Goodreads.""" import argparse import configparser import os import sys import time from datetime import date from urllib.parse import urljoin from playwright.sync_api import Page, Playwright, TimeoutError, sync_playwright refresh_backup = True today = date.today().isoformat() # current date in ISO format config = configparser.ConfigParser() config_file_path = os.path.expanduser( os.path.join(os.getenv("XDG_CONFIG_HOME", "~/.config"), "goodreads", "config") ) assert os.path.exists(config_file_path) config.read(os.path.expanduser(config_file_path)) script_dir = os.path.dirname(os.path.abspath(__file__)) def login(page: Page) -> None: """Login to Goodreads.""" page.get_by_role("link", name="Sign In").click() page.get_by_role("button", name="Sign in with email").click() page.get_by_label("Email").fill(config.get("login", "email")) page.get_by_label("Password").fill(config.get("login", "password")) page.get_by_role("checkbox", name="Keep me signed in").check() try: page.get_by_label("Sign in").click() except TimeoutError: # The submit can succeed but never reach Playwright's expected navigation state. pass def run_backup(page: Page) -> None: """Run backup.""" import_url = "https://www.goodreads.com/review/import" page.goto(import_url) if refresh_backup: print("backup requested") export_button = page.get_by_role("button", name="Export Library") if export_button.count() > 0 and export_button.first.is_visible(): export_button.first.click() print("waiting for export to be ready...") # JS clears #exportFile when generation starts, then populates it when ready. export_file_link = page.locator("#exportFile a") try: export_file_link.wait_for(state="hidden", timeout=30 * 1000) except TimeoutError: pass export_file_link.wait_for(state="visible", timeout=15 * 60 * 1000) print("export ready") else: print("Export Library button not found, using latest ready export link") print("download export") export_link = page.get_by_role("link", name="Your export from").first export_link.wait_for(state="visible", timeout=2 * 60 * 1000) print(export_link.text_content()) href = export_link.get_attribute("href") download_url = urljoin(page.url, href) if href else None backup_dir = config.get("backup", "dir") save_to = os.path.join(backup_dir, f"{today}_goodreads_library_export.csv") try: with page.expect_download(timeout=30 * 1000) as download_info: export_link.click() download = download_info.value download.save_as(save_to) except TimeoutError: if not download_url: raise RuntimeError("Could not find export download URL") response = None for _ in range(60): response = page.context.request.get(download_url) if response.ok: with open(save_to, "wb") as fh: fh.write(response.body()) break time.sleep(2) else: assert response is not None raise RuntimeError(f"Export download failed with status {response.status}") def has_auth(path: str) -> bool: """Check if auth.json exists and is non-empty.""" return os.path.exists(path) and os.path.getsize(path) > 0 def run( playwright: Playwright, headless: bool = True, har_path: str | None = None, mode: str = "backup", ) -> int: """Download export.""" browser = playwright.chromium.launch(headless=headless) auth_json = os.path.join(script_dir, "auth.json") if has_auth(auth_json) and har_path: context = browser.new_context( storage_state=auth_json, record_har_path=har_path, record_har_mode="minimal", ) elif has_auth(auth_json): context = browser.new_context(storage_state=auth_json) elif har_path: context = browser.new_context( record_har_path=har_path, record_har_mode="minimal", ) else: context = browser.new_context() page = context.new_page() page.goto("https://www.goodreads.com/") logged_in = not page.get_by_role("link", name="Sign In").is_visible() if mode == "check_login": print("logged in" if logged_in else "not logged in") page.close() context.close() browser.close() return 0 if logged_in else 1 if not logged_in: print("not logged in, logging in...") login(page) if mode == "login_only": print("login complete") else: run_backup(page) page.close() context.storage_state(path=auth_json) context.close() browser.close() return 0 parser = argparse.ArgumentParser(description="Backup list of books from Goodreads.") parser.add_argument( "--no-headless", action="store_true", help="run browser with a visible window" ) parser.add_argument( "--har-path", help="record a HAR file for this run (includes login/export requests)", ) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( "--check-login", action="store_true", help="check whether current session is logged in and exit", ) mode_group.add_argument( "--login-only", action="store_true", help="perform login and save auth state without exporting CSV", ) args = parser.parse_args() mode = "backup" if args.check_login: mode = "check_login" elif args.login_only: mode = "login_only" with sync_playwright() as playwright: sys.exit( run( playwright, headless=not args.no_headless, har_path=args.har_path, mode=mode, ) )