Handle Google CAPTCHA by opening headed browser for manual solve

Also wait for [data-attrid="Price"] instead of #center_col to ensure
the finance widget has rendered before parsing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-18 12:35:03 +00:00
parent 0f77442fd4
commit 3ab79893c3

View file

@ -96,18 +96,26 @@ class Index:
accept_cookies(page)
stay_signed_out(page)
expect(page.locator("#center_col")).to_be_visible()
if page.locator("#recaptcha, #captcha-form").count() > 0:
captcha_url = page.url
context.close()
browser.close()
print("Google is showing a CAPTCHA. Solve it in the browser window...", flush=True)
browser = playwright.chromium.launch(headless=False)
context = browser.new_context(storage_state=auth_file)
page = context.new_page()
page.goto(captcha_url, wait_until="domcontentloaded")
page.wait_for_selector('[data-attrid="Price"]', timeout=120000)
html = page.content()
context.storage_state(path=auth_file)
filename = data_filename("serp")
with open(filename, "w") as out:
out.write(html)
self.parse_html(html)
page.close()
context.storage_state(path=auth_file)
context.close()
browser.close()