wheeliefreshbins/schedule.py

#!/usr/bin/python3
"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""

import configparser
import json
import os
import re
import sys
import typing
from datetime import date, datetime, timezone

import ics  # type: ignore
import jinja2
import requests
from playwright.sync_api import Playwright, sync_playwright  # type: ignore

base_dir = os.path.dirname(__file__)

templates_dir = os.path.join(base_dir, "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))

template = env.get_template("schedule.html")

config_location = os.path.join(base_dir, "config")
auth_json_path = os.path.join(base_dir, "auth.json")

assert os.path.exists(config_location) and os.path.exists(auth_json_path)
config = configparser.ConfigParser()
config.read(config_location)
username = config["login"]["username"]
password = config["login"]["password"]
data_dir = config["location"]["data"]

no_permission = "You do not have permission to view this directory or page."
booking_id = config["booking"]["booking_id"]

login_url = "https://portal.wheeliefreshbins.com/Account/Login"
summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"

dest = config["location"]["dest"]
ics_file = config["location"]["ics_file"]


def run(playwright: Playwright) -> None:
    """Login to the Wheelie Fresh Bin website."""
    browser = playwright.chromium.launch(headless=True)
    context = browser.new_context()

    page = context.new_page()

    page.goto(login_url)
    page.locator('input[name="UserName"]').fill(username)
    page.locator('input[name="Password"]').fill(password)
    page.locator('input[name="RememberMe"]').check()

    with page.expect_navigation(url=summary_url):
        page.locator('input:has-text("Log in")').click()

    page.locator('a:has-text("Schedule")').click()

    page.close()

    context.storage_state(path=auth_json_path)
    context.close()
    browser.close()


def get_cookie_value() -> str:
    """Get the value of the cookie we need from auth.json."""
    auth = json.load(open(auth_json_path))
    v: str = next(
        cookie["value"]
        for cookie in auth["cookies"]
        if cookie["name"] == ".AspNet.Cookies"
    )
    return v


def retrieve_schedule() -> requests.models.Response:
    """Retrieve the bin cleaning schedule from the user dashboard."""
    return requests.post(
        "https://portal.wheeliefreshbins.com/home/schedule",
        json={"bookingId": booking_id},
        cookies={".AspNet.Cookies": get_cookie_value()},
    )


def read_html_from_json(r: requests.models.Response) -> str:
    """Return HTML from the JSON response."""
    return typing.cast(str, r.json()["html"])


def login() -> None:
    """Login to Wheelie Fresh Bins."""
    with sync_playwright() as playwright:
        run(playwright)


def get_schedule_html() -> str | typing.NoReturn:
    """Grab the schedule and return the HTML part of the response."""
    if not os.path.exists(auth_json_path):
        login()
    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)

    login()

    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)

    print("login failed")
    sys.exit(1)


re_div = re.compile(r"<div[^>]*?>.*?</div>")
re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')


def parse_bin_date(bin_date: str) -> date:
    """Parse bin date with year."""
    return datetime.strptime(bin_date, "%A, %d %b %Y").date()


def find_date(d1: date, target: str) -> date:
    """Find the next occurrence of the same day and month."""
    d2 = parse_bin_date(f"{target} {d1.year}")
    if d2 < d1:
        d2 = parse_bin_date(f"{target} {d1.year + 1}")
        assert d1 <= d2

    return d2


def get_date_from_line(line: str) -> date:
    """Read date from line."""
    m_date = re_date.match(line)
    assert m_date
    return datetime.strptime(m_date.group(1), "%d %b %Y").date()


def parse_part(d: date, part: str) -> date | None:
    """Parse part."""
    if "bincell" not in part:
        return None
    m = re_bin.match(part)
    if not m:
        print(part)
    assert m
    bin_colour, date_str = m.groups()
    if date_str.endswith("Christmas Closure"):
        return None
    return find_date(d, date_str)


def html_to_ics(html: str) -> ics.Calendar:
    """Parse HTML file, return calendar."""
    bin_dates: set[date] = set()

    for line in html.splitlines():
        if "weekcell" not in line:
            continue
        line = line.strip()
        d = get_date_from_line(line)

        bin_dates.update(
            d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
        )

    cal = ics.Calendar()

    for d in bin_dates:
        event = ics.Event()
        event.name = "Wheelie Fresh Bins"
        event.begin = d
        event.make_all_day()
        cal.events.add(event)

    return cal


def main() -> None:
    """Get schedule and save as web page."""
    html = get_schedule_html()
    page = template.render(html=html)

    # Drop the schedbody class because it sets max height to 400px and adds a scrollbar
    with open(dest, "w") as fh:
        fh.write(page.replace("schedbody ", '"'))

    cal = html_to_ics(html)
    with open(ics_file, "w") as fh:
        fh.write(cal.serialize())

    now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H:%M")
    filename = os.path.join(data_dir, now_str + ".html")
    with open(filename, "w") as fh:
        fh.write(page)


if __name__ == "__main__":
    main()
Initial commit 2023-08-07 18:08:30 +01:00			`#!/usr/bin/python3`
			`"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""`

			`import configparser`
			`import json`
			`import os`
			`import re`
			`import sys`
Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`import typing`
Replace datetime.utcnow(), it is deprecated 2023-12-21 14:29:01 +00:00			`from datetime import date, datetime, timezone`
Initial commit 2023-08-07 18:08:30 +01:00
Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`import ics # type: ignore`
Initial commit 2023-08-07 18:08:30 +01:00			`import jinja2`
			`import requests`
Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`from playwright.sync_api import Playwright, sync_playwright # type: ignore`
Initial commit 2023-08-07 18:08:30 +01:00
			`base_dir = os.path.dirname(__file__)`

			`templates_dir = os.path.join(base_dir, "templates")`
			`env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))`

			`template = env.get_template("schedule.html")`

			`config_location = os.path.join(base_dir, "config")`
			`auth_json_path = os.path.join(base_dir, "auth.json")`

Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`assert os.path.exists(config_location) and os.path.exists(auth_json_path)`
Initial commit 2023-08-07 18:08:30 +01:00			`config = configparser.ConfigParser()`
			`config.read(config_location)`
			`username = config["login"]["username"]`
			`password = config["login"]["password"]`
			`data_dir = config["location"]["data"]`

			`no_permission = "You do not have permission to view this directory or page."`
			`booking_id = config["booking"]["booking_id"]`

			`login_url = "https://portal.wheeliefreshbins.com/Account/Login"`
			`summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"`

			`dest = config["location"]["dest"]`
			`ics_file = config["location"]["ics_file"]`


			`def run(playwright: Playwright) -> None:`
			`"""Login to the Wheelie Fresh Bin website."""`
			`browser = playwright.chromium.launch(headless=True)`
			`context = browser.new_context()`

			`page = context.new_page()`

			`page.goto(login_url)`
			`page.locator('input[name="UserName"]').fill(username)`
			`page.locator('input[name="Password"]').fill(password)`
			`page.locator('input[name="RememberMe"]').check()`

			`with page.expect_navigation(url=summary_url):`
			`page.locator('input:has-text("Log in")').click()`

			`page.locator('a:has-text("Schedule")').click()`

			`page.close()`

			`context.storage_state(path=auth_json_path)`
			`context.close()`
			`browser.close()`


			`def get_cookie_value() -> str:`
			`"""Get the value of the cookie we need from auth.json."""`
			`auth = json.load(open(auth_json_path))`
			`v: str = next(`
			`cookie["value"]`
			`for cookie in auth["cookies"]`
			`if cookie["name"] == ".AspNet.Cookies"`
			`)`
			`return v`


			`def retrieve_schedule() -> requests.models.Response:`
			`"""Retrieve the bin cleaning schedule from the user dashboard."""`
			`return requests.post(`
			`"https://portal.wheeliefreshbins.com/home/schedule",`
			`json={"bookingId": booking_id},`
			`cookies={".AspNet.Cookies": get_cookie_value()},`
			`)`


			`def read_html_from_json(r: requests.models.Response) -> str:`
			`"""Return HTML from the JSON response."""`
Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`return typing.cast(str, r.json()["html"])`
Initial commit 2023-08-07 18:08:30 +01:00

			`def login() -> None:`
			`"""Login to Wheelie Fresh Bins."""`
			`with sync_playwright() as playwright:`
			`run(playwright)`


Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`def get_schedule_html() -> str \| typing.NoReturn:`
Initial commit 2023-08-07 18:08:30 +01:00			`"""Grab the schedule and return the HTML part of the response."""`
			`if not os.path.exists(auth_json_path):`
			`login()`
			`r = retrieve_schedule()`
			`if r.text != no_permission:`
			`return read_html_from_json(r)`

			`login()`

			`r = retrieve_schedule()`
			`if r.text != no_permission:`
			`return read_html_from_json(r)`

			`print("login failed")`
			`sys.exit(1)`


			`re_div = re.compile(r"<div[^>]?>.?</div>")`
			`re_bin = re.compile('<div class="col-xs-3 bincell.(black\|blue\|green)bin">(.?)</div>')`
			`re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')`


			`def parse_bin_date(bin_date: str) -> date:`
			`"""Parse bin date with year."""`
			`return datetime.strptime(bin_date, "%A, %d %b %Y").date()`


			`def find_date(d1: date, target: str) -> date:`
			`"""Find the next occurrence of the same day and month."""`
			`d2 = parse_bin_date(f"{target} {d1.year}")`
			`if d2 < d1:`
			`d2 = parse_bin_date(f"{target} {d1.year + 1}")`
			`assert d1 <= d2`

			`return d2`


			`def get_date_from_line(line: str) -> date:`
			`"""Read date from line."""`
			`m_date = re_date.match(line)`
			`assert m_date`
			`return datetime.strptime(m_date.group(1), "%d %b %Y").date()`


			`def parse_part(d: date, part: str) -> date \| None:`
			`"""Parse part."""`
			`if "bincell" not in part:`
			`return None`
			`m = re_bin.match(part)`
			`if not m:`
			`print(part)`
			`assert m`
			`bin_colour, date_str = m.groups()`
			`if date_str.endswith("Christmas Closure"):`
			`return None`
			`return find_date(d, date_str)`


			`def html_to_ics(html: str) -> ics.Calendar:`
			`"""Parse HTML file, return calendar."""`
			`bin_dates: set[date] = set()`

			`for line in html.splitlines():`
			`if "weekcell" not in line:`
			`continue`
			`line = line.strip()`
			`d = get_date_from_line(line)`

			`bin_dates.update(`
			`d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d`
			`)`

			`cal = ics.Calendar()`

			`for d in bin_dates:`
			`event = ics.Event()`
			`event.name = "Wheelie Fresh Bins"`
			`event.begin = d`
Make events all day events Closes: #1 2023-12-21 14:25:51 +00:00			`event.make_all_day()`
Initial commit 2023-08-07 18:08:30 +01:00			`cal.events.add(event)`

			`return cal`


			`def main() -> None:`
			`"""Get schedule and save as web page."""`
			`html = get_schedule_html()`
			`page = template.render(html=html)`

			`# Drop the schedbody class because it sets max height to 400px and adds a scrollbar`
			`with open(dest, "w") as fh:`
			`fh.write(page.replace("schedbody ", '"'))`

			`cal = html_to_ics(html)`
			`with open(ics_file, "w") as fh:`
			`fh.write(cal.serialize())`

Replace datetime.utcnow(), it is deprecated 2023-12-21 14:29:01 +00:00			`now_str = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H:%M")`
Initial commit 2023-08-07 18:08:30 +01:00			`filename = os.path.join(data_dir, now_str + ".html")`
			`with open(filename, "w") as fh:`
			`fh.write(page)`


			`if __name__ == "__main__":`
			`main()`