Fix item_number including &SoldByNewegg=1 suffix
Truncate at the first & when extracting the item number from the URL. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
052b598069
commit
47aaa52320
1 changed files with 1 additions and 1 deletions
2
crawl.py
2
crawl.py
|
|
@ -225,7 +225,7 @@ def parse_page(filename: str) -> list[Item]:
|
||||||
for item in root.xpath("//div[contains(@class, 'item-container')]"):
|
for item in root.xpath("//div[contains(@class, 'item-container')]"):
|
||||||
title_link = item.find('.//a[@class="item-title"]')
|
title_link = item.find('.//a[@class="item-title"]')
|
||||||
href = title_link.get("href")
|
href = title_link.get("href")
|
||||||
item_number = href[href.find("Item=") + 5 :]
|
item_number = href[href.find("Item=") + 5 :].split("&")[0]
|
||||||
title = title_link.text_content()
|
title = title_link.text_content()
|
||||||
|
|
||||||
# compare = item.find('.//div[@class="item-compare-box"]//input')
|
# compare = item.find('.//div[@class="item-compare-box"]//input')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue