diff --git a/agenda/waste_schedule.py b/agenda/waste_schedule.py index 5e63423..e9b9994 100644 --- a/agenda/waste_schedule.py +++ b/agenda/waste_schedule.py @@ -2,6 +2,7 @@ import json import os +import re import typing from collections import defaultdict from datetime import date, datetime, timedelta @@ -41,9 +42,12 @@ async def get_html(data_dir: str, postcode: str, uprn: str) -> str: now_str = now.strftime("%Y-%m-%d_%H:%M") filename = f"{waste_dir}/{now_str}.html" + forms_base_url = "https://forms.n-somerset.gov.uk" + # url2 = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule/ViewSchedule" + url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule" async with httpx.AsyncClient() as client: r = await client.post( - "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule", + url, data={ "PreviousHouse": "", "PreviousPostcode": "-", @@ -51,6 +55,11 @@ async def get_html(data_dir: str, postcode: str, uprn: str) -> str: "SelectedUprn": uprn, }, ) + form_post_html = r.text + pattern = r'

Object moved to here<\/a>\.<\/h2>' + m = re.search(pattern, form_post_html) + if m: + r = await client.get(forms_base_url + m.group(1)) html = r.text open(filename, "w").write(html) return html @@ -139,7 +148,7 @@ async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response: _uprn = str(uprn).zfill(12) - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=10) as client: # Initialise form payload = {"servicetypeid": "7dce896c-b3ba-ea11-a812-000d3a7f1cdc"} response = await client.get(