wheeliefreshbins/schedule.py

#!/usr/bin/python3
"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""

import configparser
import json
import os
import re
import sys
from datetime import date, datetime, timedelta
from typing import NoReturn

import ics
import jinja2
import requests
from playwright.sync_api import Playwright, sync_playwright

base_dir = os.path.dirname(__file__)

templates_dir = os.path.join(base_dir, "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))

template = env.get_template("schedule.html")

config_location = os.path.join(base_dir, "config")
auth_json_path = os.path.join(base_dir, "auth.json")

assert os.path.exists(config_location)
assert os.path.exists(auth_json_path)
config = configparser.ConfigParser()
config.read(config_location)
username = config["login"]["username"]
password = config["login"]["password"]
data_dir = config["location"]["data"]

no_permission = "You do not have permission to view this directory or page."
booking_id = config["booking"]["booking_id"]

login_url = "https://portal.wheeliefreshbins.com/Account/Login"
summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"

dest = config["location"]["dest"]
ics_file = config["location"]["ics_file"]


def run(playwright: Playwright) -> None:
    """Login to the Wheelie Fresh Bin website."""
    browser = playwright.chromium.launch(headless=True)
    context = browser.new_context()

    page = context.new_page()

    page.goto(login_url)
    page.locator('input[name="UserName"]').fill(username)
    page.locator('input[name="Password"]').fill(password)
    page.locator('input[name="RememberMe"]').check()

    with page.expect_navigation(url=summary_url):
        page.locator('input:has-text("Log in")').click()

    page.locator('a:has-text("Schedule")').click()

    page.close()

    context.storage_state(path=auth_json_path)
    context.close()
    browser.close()


def get_cookie_value() -> str:
    """Get the value of the cookie we need from auth.json."""
    auth = json.load(open(auth_json_path))
    v: str = next(
        cookie["value"]
        for cookie in auth["cookies"]
        if cookie["name"] == ".AspNet.Cookies"
    )
    return v


def retrieve_schedule() -> requests.models.Response:
    """Retrieve the bin cleaning schedule from the user dashboard."""
    return requests.post(
        "https://portal.wheeliefreshbins.com/home/schedule",
        json={"bookingId": booking_id},
        cookies={".AspNet.Cookies": get_cookie_value()},
    )


def read_html_from_json(r: requests.models.Response) -> str:
    """Return HTML from the JSON response."""
    html: str = r.json()["html"]
    return html


def login() -> None:
    """Login to Wheelie Fresh Bins."""
    with sync_playwright() as playwright:
        run(playwright)


def get_schedule_html() -> str | NoReturn:
    """Grab the schedule and return the HTML part of the response."""
    if not os.path.exists(auth_json_path):
        login()
    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)

    login()

    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)

    print("login failed")
    sys.exit(1)


re_div = re.compile(r"<div[^>]*?>.*?</div>")
re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')


def parse_bin_date(bin_date: str) -> date:
    """Parse bin date with year."""
    return datetime.strptime(bin_date, "%A, %d %b %Y").date()


def find_date(d1: date, target: str) -> date:
    """Find the next occurrence of the same day and month."""
    d2 = parse_bin_date(f"{target} {d1.year}")
    if d2 < d1:
        d2 = parse_bin_date(f"{target} {d1.year + 1}")
        assert d1 <= d2

    return d2


def get_date_from_line(line: str) -> date:
    """Read date from line."""
    m_date = re_date.match(line)
    assert m_date
    return datetime.strptime(m_date.group(1), "%d %b %Y").date()


def parse_part(d: date, part: str) -> date | None:
    """Parse part."""
    if "bincell" not in part:
        return None
    m = re_bin.match(part)
    if not m:
        print(part)
    assert m
    bin_colour, date_str = m.groups()
    if date_str.endswith("Christmas Closure"):
        return None
    return find_date(d, date_str)


def html_to_ics(html: str) -> ics.Calendar:
    """Parse HTML file, return calendar."""
    bin_dates: set[date] = set()

    for line in html.splitlines():
        if "weekcell" not in line:
            continue
        line = line.strip()
        d = get_date_from_line(line)

        bin_dates.update(
            d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
        )

    cal = ics.Calendar()

    for d in bin_dates:
        event = ics.Event()
        event.name = "Wheelie Fresh Bins"
        event.begin = d
        event.end = d + timedelta(days=1)
        cal.events.add(event)

    return cal


def main() -> None:
    """Get schedule and save as web page."""
    html = get_schedule_html()
    page = template.render(html=html)

    # Drop the schedbody class because it sets max height to 400px and adds a scrollbar
    with open(dest, "w") as fh:
        fh.write(page.replace("schedbody ", '"'))

    cal = html_to_ics(html)
    with open(ics_file, "w") as fh:
        fh.write(cal.serialize())

    now = datetime.utcnow()
    now_str = now.strftime("%Y-%m-%d_%H:%M")
    filename = os.path.join(data_dir, now_str + ".html")
    with open(filename, "w") as fh:
        fh.write(page)


if __name__ == "__main__":
    main()