Handle Google CAPTCHA by opening headed browser for manual solve
Also wait for [data-attrid="Price"] instead of #center_col to ensure the finance widget has rendered before parsing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0f77442fd4
commit
3ab79893c3
1 changed files with 12 additions and 4 deletions
|
|
@ -96,18 +96,26 @@ class Index:
|
|||
accept_cookies(page)
|
||||
stay_signed_out(page)
|
||||
|
||||
expect(page.locator("#center_col")).to_be_visible()
|
||||
if page.locator("#recaptcha, #captcha-form").count() > 0:
|
||||
captcha_url = page.url
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
print("Google is showing a CAPTCHA. Solve it in the browser window...", flush=True)
|
||||
browser = playwright.chromium.launch(headless=False)
|
||||
context = browser.new_context(storage_state=auth_file)
|
||||
page = context.new_page()
|
||||
page.goto(captcha_url, wait_until="domcontentloaded")
|
||||
|
||||
page.wait_for_selector('[data-attrid="Price"]', timeout=120000)
|
||||
|
||||
html = page.content()
|
||||
context.storage_state(path=auth_file)
|
||||
filename = data_filename("serp")
|
||||
with open(filename, "w") as out:
|
||||
out.write(html)
|
||||
|
||||
self.parse_html(html)
|
||||
|
||||
page.close()
|
||||
|
||||
context.storage_state(path=auth_file)
|
||||
context.close()
|
||||
browser.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue