Handle Google CAPTCHA by opening headed browser for manual solve
Also wait for [data-attrid="Price"] instead of #center_col to ensure the finance widget has rendered before parsing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0f77442fd4
commit
3ab79893c3
1 changed files with 12 additions and 4 deletions
|
|
@ -96,18 +96,26 @@ class Index:
|
||||||
accept_cookies(page)
|
accept_cookies(page)
|
||||||
stay_signed_out(page)
|
stay_signed_out(page)
|
||||||
|
|
||||||
expect(page.locator("#center_col")).to_be_visible()
|
if page.locator("#recaptcha, #captcha-form").count() > 0:
|
||||||
|
captcha_url = page.url
|
||||||
|
context.close()
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
print("Google is showing a CAPTCHA. Solve it in the browser window...", flush=True)
|
||||||
|
browser = playwright.chromium.launch(headless=False)
|
||||||
|
context = browser.new_context(storage_state=auth_file)
|
||||||
|
page = context.new_page()
|
||||||
|
page.goto(captcha_url, wait_until="domcontentloaded")
|
||||||
|
|
||||||
|
page.wait_for_selector('[data-attrid="Price"]', timeout=120000)
|
||||||
|
|
||||||
html = page.content()
|
html = page.content()
|
||||||
context.storage_state(path=auth_file)
|
|
||||||
filename = data_filename("serp")
|
filename = data_filename("serp")
|
||||||
with open(filename, "w") as out:
|
with open(filename, "w") as out:
|
||||||
out.write(html)
|
out.write(html)
|
||||||
|
|
||||||
self.parse_html(html)
|
self.parse_html(html)
|
||||||
|
|
||||||
page.close()
|
|
||||||
|
|
||||||
context.storage_state(path=auth_file)
|
context.storage_state(path=auth_file)
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue