Fix item_number including &SoldByNewegg=1 suffix

Truncate at the first & when extracting the item number from the URL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-03 15:37:11 +01:00 · 2026-04-03 15:37:11 +01:00 · 47aaa52320
commit 47aaa52320
parent 052b598069
1 changed files with 1 additions and 1 deletions
--- a/crawl.py
+++ b/crawl.py
@ -225,7 +225,7 @@ def parse_page(filename: str) -> list[Item]:
    for item in root.xpath("//div[contains(@class, 'item-container')]"):
        title_link = item.find('.//a[@class="item-title"]')
        href = title_link.get("href")
-        item_number = href[href.find("Item=") + 5 :]
+        item_number = href[href.find("Item=") + 5 :].split("&")[0]
        title = title_link.text_content()
        #        compare = item.find('.//div[@class="item-compare-box"]//input')