diff --git a/crawl.py b/crawl.py index daf36d5..9e24331 100755 --- a/crawl.py +++ b/crawl.py @@ -4,7 +4,6 @@ import collections import configparser import decimal -import sqlite3 import logging import os import random @@ -74,8 +73,7 @@ def get_product_list(n: str, page: Optional[int] = None) -> str: "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})" ) pg = context.new_page() - pg.goto(url, wait_until="domcontentloaded", timeout=60000) - pg.wait_for_selector("div.item-container", timeout=60000) + pg.goto(url, wait_until="networkidle", timeout=60000) if "areyouahuman" in pg.url: logger.info("bot detection triggered, simulating mouse movement...") @@ -335,46 +333,6 @@ def group_items( yield {"name": name, "label": label, "items": items} -db_path = os.path.join(data_root, "prices.db") -sqlite3.register_adapter(Decimal, str) - - -def init_db(conn: sqlite3.Connection) -> None: - """Create tables if they don't exist.""" - conn.execute(""" - CREATE TABLE IF NOT EXISTS price ( - item_number TEXT NOT NULL, - title TEXT NOT NULL, - size_gb NUMERIC NOT NULL, - price NUMERIC NOT NULL, - category TEXT NOT NULL, - seen_at DATE NOT NULL, - PRIMARY KEY (item_number, seen_at) - ) - """) - conn.commit() - - -def record_prices(data: list[Grouped], today: date) -> None: - """Record today's prices to the database.""" - conn = sqlite3.connect(db_path) - init_db(conn) - for cat in data: - for item in cat["items"]: - conn.execute( - """ - INSERT OR REPLACE INTO price - (item_number, title, size_gb, price, category, seen_at) - VALUES (?, ?, ?, ?, ?, ?) - """, - (item["number"], item["title"], item["size_gb"], - item["price"], cat["name"], today), - ) - conn.commit() - conn.close() - logger.info("prices recorded", db=db_path, date=today) - - def get_build_root() -> str: """Read build_dir from ~/.config/newegg-hdd/config, fall back to output/.""" config_path = os.path.expanduser("~/.config/newegg-hdd/config") @@ -395,7 +353,6 @@ def build() -> None: env = Environment(loader=FileSystemLoader(templates_dir)) data = list(group_items(today)) - record_prices(data, today) index = os.path.join(build_root, "index.html") index_template = env.get_template("index.html") page = index_template.render(best=data, today=today)