agenda/agenda/waste_schedule.py

"""Waste collection schedules."""

import json
import os
import re
import typing
from collections import defaultdict
from datetime import date, datetime, time, timedelta

import httpx
import lxml.html

from . import uk_time
from .types import Event

ttl_hours = 12


def make_waste_dir(data_dir: str) -> None:
    """Make waste dir if missing."""
    waste_dir = os.path.join(data_dir, "waste")
    if not os.path.exists(waste_dir):
        os.mkdir(waste_dir)


async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
    """Get waste schedule."""
    now = datetime.now()
    waste_dir = os.path.join(data_dir, "waste")

    make_waste_dir(data_dir)

    existing_data = os.listdir(waste_dir)
    existing = [f for f in existing_data if f.endswith(".html")]
    if existing:
        recent_filename = max(existing)
        recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
        delta = now - recent

    if existing and delta < timedelta(hours=ttl_hours):
        return open(os.path.join(waste_dir, recent_filename)).read()

    now_str = now.strftime("%Y-%m-%d_%H:%M")
    filename = f"{waste_dir}/{now_str}.html"

    forms_base_url = "https://forms.n-somerset.gov.uk"
    # url2 = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule/ViewSchedule"
    url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
    async with httpx.AsyncClient() as client:
        r = await client.post(
            url,
            data={
                "PreviousHouse": "",
                "PreviousPostcode": "-",
                "Postcode": postcode,
                "SelectedUprn": uprn,
            },
        )
        form_post_html = r.text
        pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
        m = re.search(pattern, form_post_html)
        if m:
            r = await client.get(forms_base_url + m.group(1))
    html = r.text
    open(filename, "w").write(html)
    return html


def parse_waste_schedule_date(day_and_month: str) -> date:
    """Parse waste schedule date."""
    today = date.today()
    this_year = today.year
    date_format = "%A %d %B %Y"
    d = datetime.strptime(f"{day_and_month} {this_year}", date_format).date()
    if d < today:
        d = datetime.strptime(f"{day_and_month} {this_year + 1}", date_format).date()
    return d


def parse(root: lxml.html.HtmlElement) -> list[Event]:
    """Parse waste schedule."""
    tbody = root.find(".//table/tbody")
    assert tbody is not None
    by_date = defaultdict(list)
    for e_service, e_next_date, e_following in tbody:
        assert e_service.text and e_next_date.text and e_following.text
        service = e_service.text
        next_date = parse_waste_schedule_date(e_next_date.text)
        following_date = parse_waste_schedule_date(e_following.text)

        by_date[next_date].append(service)
        by_date[following_date].append(service)

    return [
        Event(
            name="waste_schedule",
            date=uk_time(d, time(6, 30)),
            title="Backwell: " + ", ".join(services),
        )
        for d, services in by_date.items()
    ]


BristolSchedule = list[dict[str, typing.Any]]


async def get_bristol_data(data_dir: str, uprn: str) -> BristolSchedule:
    """Get Bristol Waste schedule, with cache."""
    now = datetime.now()
    waste_dir = os.path.join(data_dir, "waste")

    make_waste_dir(data_dir)

    existing_data = os.listdir(waste_dir)
    existing = [f for f in existing_data if f.endswith(f"_{uprn}.json")]
    if existing:
        recent_filename = max(existing)
        recent = datetime.strptime(recent_filename, f"%Y-%m-%d_%H:%M_{uprn}.json")
        delta = now - recent

    def get_from_recent() -> BristolSchedule:
        json_data = json.load(open(os.path.join(waste_dir, recent_filename)))
        return typing.cast(BristolSchedule, json_data["data"])

    if existing and delta < timedelta(hours=ttl_hours):
        return get_from_recent()

    try:
        r = await get_bristol_gov_uk_data(uprn)
    except httpx.ReadTimeout:
        return get_from_recent()

    with open(f'{waste_dir}/{now.strftime("%Y-%m-%d_%H:%M")}_{uprn}.json', "wb") as out:
        out.write(r.content)

    return typing.cast(BristolSchedule, r.json()["data"])


async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response:
    """Get JSON from Bristol City Council."""
    UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    HEADERS = {
        "Accept": "*/*",
        "Accept-Language": "en-GB,en;q=0.9",
        "Connection": "keep-alive",
        "Ocp-Apim-Subscription-Key": "47ffd667d69c4a858f92fc38dc24b150",
        "Ocp-Apim-Trace": "true",
        "Origin": "https://bristolcouncil.powerappsportals.com",
        "Referer": "https://bristolcouncil.powerappsportals.com/",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "cross-site",
        "Sec-GPC": "1",
        "User-Agent": UA,
    }

    _uprn = str(uprn).zfill(12)

    async with httpx.AsyncClient(timeout=20) as client:
        # Initialise form
        payload = {"servicetypeid": "7dce896c-b3ba-ea11-a812-000d3a7f1cdc"}
        response = await client.get(
            "https://bristolcouncil.powerappsportals.com/completedynamicformunauth/",
            headers=HEADERS,
            params=payload,
        )

        host = "bcprdapidyna002.azure-api.net"

        # Set the search criteria
        payload = {"Uprn": "UPRN" + _uprn}
        response = await client.post(
            f"https://{host}/bcprdfundyna001-llpg/DetailedLLPG",
            headers=HEADERS,
            json=payload,
        )

        # Retrieve the schedule
        payload = {"uprn": _uprn}
        response = await client.post(
            f"https://{host}/bcprdfundyna001-alloy/NextCollectionDates",
            headers=HEADERS,
            json=payload,
        )

    return response


async def get_bristol_gov_uk(start_date: date, data_dir: str, uprn: str) -> list[Event]:
    """Get waste collection schedule from Bristol City Council."""
    data = await get_bristol_data(data_dir, uprn)

    by_date: defaultdict[date, list[str]] = defaultdict(list)

    for item in data:
        service = item["containerName"]
        service = "Recycling" if "Recycling" in service else service.partition(" ")[2]
        for collection in item["collection"]:
            for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]:
                d = date.fromisoformat(collection[collection_date_key][:10])
                if d < start_date:
                    continue
                if service not in by_date[d]:
                    by_date[d].append(service)

    return [
        Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services))
        for d, services in by_date.items()
    ]