From 47aaa52320badd17a1ea7f18953b160f62f4866c Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Fri, 3 Apr 2026 15:37:11 +0100 Subject: [PATCH] Fix item_number including &SoldByNewegg=1 suffix Truncate at the first & when extracting the item number from the URL. Co-Authored-By: Claude Sonnet 4.6 --- crawl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl.py b/crawl.py index 72374c1..57e0de6 100755 --- a/crawl.py +++ b/crawl.py @@ -225,7 +225,7 @@ def parse_page(filename: str) -> list[Item]: for item in root.xpath("//div[contains(@class, 'item-container')]"): title_link = item.find('.//a[@class="item-title"]') href = title_link.get("href") - item_number = href[href.find("Item=") + 5 :] + item_number = href[href.find("Item=") + 5 :].split("&")[0] title = title_link.text_content() # compare = item.find('.//div[@class="item-compare-box"]//input')