"""Waste collection schedules.""" import os import re from collections import defaultdict from datetime import date, datetime, time, timedelta import httpx import lxml.html from . import uk_time from .event import Event from .utils import make_waste_dir ttl_hours = 12 async def get_html(data_dir: str, postcode: str, uprn: str) -> str: """Get waste schedule.""" now = datetime.now() waste_dir = os.path.join(data_dir, "waste") make_waste_dir(data_dir) existing_data = os.listdir(waste_dir) existing = [f for f in existing_data if f.endswith(".html")] if existing: recent_filename = max(existing) recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html") delta = now - recent if existing and delta < timedelta(hours=ttl_hours): return open(os.path.join(waste_dir, recent_filename)).read() now_str = now.strftime("%Y-%m-%d_%H:%M") filename = f"{waste_dir}/{now_str}.html" forms_base_url = "https://forms.n-somerset.gov.uk" url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule" async with httpx.AsyncClient() as client: r = await client.post( url, data={ "PreviousHouse": "", "PreviousPostcode": "-", "Postcode": postcode, "SelectedUprn": uprn, }, ) form_post_html = r.text pattern = r'

Object moved to here<\/a>\.<\/h2>' m = re.search(pattern, form_post_html) if m: r = await client.get(forms_base_url + m.group(1)) html = r.text open(filename, "w").write(html) return html def parse_waste_schedule_date(day_and_month: str) -> date: """Parse waste schedule date.""" today = date.today() fmt = "%A %d %B %Y" d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date() if d < today: d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date() return d def parse(root: lxml.html.HtmlElement) -> list[Event]: """Parse waste schedule.""" tbody = root.find(".//table/tbody") assert tbody is not None by_date = defaultdict(list) for e_service, e_next_date, e_following in tbody: assert e_service.text and e_next_date.text and e_following.text service = e_service.text next_date = parse_waste_schedule_date(e_next_date.text) following_date = parse_waste_schedule_date(e_following.text) by_date[next_date].append(service) by_date[following_date].append(service) return [ Event( name="waste_schedule", date=uk_time(d, time(6, 30)), title="Backwell: " + ", ".join(services), ) for d, services in by_date.items() ]