commit 775826349bfac69a37c86c7b681139f42646c64b
Author: Edward Betts <edward@4angle.com>
Date:   Mon Aug 7 18:08:30 2023 +0100

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9a0cbd2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+.mypy_cache/
+__pycache__
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..379270c
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Edward Betts <edward@4angle.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4834592
--- /dev/null
+++ b/README.md
@@ -0,0 +1,92 @@
+# Wheelie Fresh Bins cleaning schedule retrieval
+
+## Overview
+
+`schedule.py` is a Python script designed to retrieve the cleaning schedule
+information from Wheelie Fresh Bins and save it as HTML and an ICS (iCalendar)
+file. This tool automates the process of accessing your cleaning schedule and
+provides you with easily accessible calendar data.
+
+## Prerequisites
+
+Before using this script, make sure you have the following prerequisites:
+
+- Python 3
+- Required Python modules: `ics`, `jinja2`, `lxml`, and `requests`.
+- Playwright (used for headless web scraping).
+
+## Installation
+
+1. Clone or download this repository to your local machine.
+
+2. Install the required Python modules by running:
+
+   ```
+   pip install playwright lxml ics jinja2 requests
+   ```
+
+3. Make sure you have the Playwright dependencies installed for your platform.
+   You can follow the installation instructions for Playwright
+   [here](https://playwright.dev/python/docs/intro).
+
+4. Customize the configuration in the `config` file to match your requirements.
+
+## Usage
+
+To use the script, run it from the command line:
+
+```
+python schedule.py
+```
+
+The script will log in to the Wheelie Fresh Bins website, retrieve your
+cleaning schedule, and save it as an HTML file (`dest`) and an ICS file
+(`ics_file`). The HTML file can be opened in a web browser, while the ICS file
+can be imported into your favorite calendar application.
+
+## Scheduling with Crontab
+
+You can automate the execution of `schedule.py` by scheduling it to run once per
+day using the crontab utility. Here's how to do it:
+
+1. Edit your crontab file using the following command:
+
+   ```
+   crontab -e
+   ```
+
+2. Add the following line to schedule the script to run daily at a specific
+   time. Replace `/path/to/schedule.py` with the actual path to your
+   `schedule.py` script:
+
+   ```
+   0 0 * * * /usr/bin/python3 /path/to/schedule.py
+   ```
+
+   This example schedules the script to run every day at midnight. You can
+   adjust the time and frequency according to your preferences. Save the crontab
+   file.
+
+3. Crontab will automatically execute the script at the specified time each
+   day, and the schedule data will be updated accordingly.
+
+
+## Customization
+
+- You can customize the script's behavior by editing the configuration in the
+  `config` file, such as specifying your login credentials, file paths, and
+  other options.
+
+- The script uses Jinja2 templates to render the HTML output. You can modify
+  the HTML template in the `templates` directory to change the appearance of
+  the schedule.
+
+## Author
+
+This script was created by Edward Betts (edward@4angle.com). Feel free to
+contact me for support or improvements.
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
+for details.
diff --git a/schedule.py b/schedule.py
new file mode 100755
index 0000000..9a89588
--- /dev/null
+++ b/schedule.py
@@ -0,0 +1,207 @@
+#!/usr/bin/python3
+"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""
+
+import configparser
+import json
+import os
+import re
+import sys
+from datetime import date, datetime, timedelta
+from typing import NoReturn
+
+import ics
+import jinja2
+import requests
+from playwright.sync_api import Playwright, sync_playwright
+
+base_dir = os.path.dirname(__file__)
+
+templates_dir = os.path.join(base_dir, "templates")
+env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
+
+template = env.get_template("schedule.html")
+
+config_location = os.path.join(base_dir, "config")
+auth_json_path = os.path.join(base_dir, "auth.json")
+
+assert os.path.exists(config_location)
+assert os.path.exists(auth_json_path)
+config = configparser.ConfigParser()
+config.read(config_location)
+username = config["login"]["username"]
+password = config["login"]["password"]
+data_dir = config["location"]["data"]
+
+no_permission = "You do not have permission to view this directory or page."
+booking_id = config["booking"]["booking_id"]
+
+login_url = "https://portal.wheeliefreshbins.com/Account/Login"
+summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"
+
+dest = config["location"]["dest"]
+ics_file = config["location"]["ics_file"]
+
+
+def run(playwright: Playwright) -> None:
+    """Login to the Wheelie Fresh Bin website."""
+    browser = playwright.chromium.launch(headless=True)
+    context = browser.new_context()
+
+    page = context.new_page()
+
+    page.goto(login_url)
+    page.locator('input[name="UserName"]').fill(username)
+    page.locator('input[name="Password"]').fill(password)
+    page.locator('input[name="RememberMe"]').check()
+
+    with page.expect_navigation(url=summary_url):
+        page.locator('input:has-text("Log in")').click()
+
+    page.locator('a:has-text("Schedule")').click()
+
+    page.close()
+
+    context.storage_state(path=auth_json_path)
+    context.close()
+    browser.close()
+
+
+def get_cookie_value() -> str:
+    """Get the value of the cookie we need from auth.json."""
+    auth = json.load(open(auth_json_path))
+    v: str = next(
+        cookie["value"]
+        for cookie in auth["cookies"]
+        if cookie["name"] == ".AspNet.Cookies"
+    )
+    return v
+
+
+def retrieve_schedule() -> requests.models.Response:
+    """Retrieve the bin cleaning schedule from the user dashboard."""
+    return requests.post(
+        "https://portal.wheeliefreshbins.com/home/schedule",
+        json={"bookingId": booking_id},
+        cookies={".AspNet.Cookies": get_cookie_value()},
+    )
+
+
+def read_html_from_json(r: requests.models.Response) -> str:
+    """Return HTML from the JSON response."""
+    html: str = r.json()["html"]
+    return html
+
+
+def login() -> None:
+    """Login to Wheelie Fresh Bins."""
+    with sync_playwright() as playwright:
+        run(playwright)
+
+
+def get_schedule_html() -> str | NoReturn:
+    """Grab the schedule and return the HTML part of the response."""
+    if not os.path.exists(auth_json_path):
+        login()
+    r = retrieve_schedule()
+    if r.text != no_permission:
+        return read_html_from_json(r)
+
+    login()
+
+    r = retrieve_schedule()
+    if r.text != no_permission:
+        return read_html_from_json(r)
+
+    print("login failed")
+    sys.exit(1)
+
+
+re_div = re.compile(r"<div[^>]*?>.*?</div>")
+re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
+re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')
+
+
+def parse_bin_date(bin_date: str) -> date:
+    """Parse bin date with year."""
+    return datetime.strptime(bin_date, "%A, %d %b %Y").date()
+
+
+def find_date(d1: date, target: str) -> date:
+    """Find the next occurrence of the same day and month."""
+    d2 = parse_bin_date(f"{target} {d1.year}")
+    if d2 < d1:
+        d2 = parse_bin_date(f"{target} {d1.year + 1}")
+        assert d1 <= d2
+
+    return d2
+
+
+def get_date_from_line(line: str) -> date:
+    """Read date from line."""
+    m_date = re_date.match(line)
+    assert m_date
+    return datetime.strptime(m_date.group(1), "%d %b %Y").date()
+
+
+def parse_part(d: date, part: str) -> date | None:
+    """Parse part."""
+    if "bincell" not in part:
+        return None
+    m = re_bin.match(part)
+    if not m:
+        print(part)
+    assert m
+    bin_colour, date_str = m.groups()
+    if date_str.endswith("Christmas Closure"):
+        return None
+    return find_date(d, date_str)
+
+
+def html_to_ics(html: str) -> ics.Calendar:
+    """Parse HTML file, return calendar."""
+    bin_dates: set[date] = set()
+
+    for line in html.splitlines():
+        if "weekcell" not in line:
+            continue
+        line = line.strip()
+        d = get_date_from_line(line)
+
+        bin_dates.update(
+            d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
+        )
+
+    cal = ics.Calendar()
+
+    for d in bin_dates:
+        event = ics.Event()
+        event.name = "Wheelie Fresh Bins"
+        event.begin = d
+        event.end = d + timedelta(days=1)
+        cal.events.add(event)
+
+    return cal
+
+
+def main() -> None:
+    """Get schedule and save as web page."""
+    html = get_schedule_html()
+    page = template.render(html=html)
+
+    # Drop the schedbody class because it sets max height to 400px and adds a scrollbar
+    with open(dest, "w") as fh:
+        fh.write(page.replace("schedbody ", '"'))
+
+    cal = html_to_ics(html)
+    with open(ics_file, "w") as fh:
+        fh.write(cal.serialize())
+
+    now = datetime.utcnow()
+    now_str = now.strftime("%Y-%m-%d_%H:%M")
+    filename = os.path.join(data_dir, now_str + ".html")
+    with open(filename, "w") as fh:
+        fh.write(page)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/templates/schedule.html b/templates/schedule.html
new file mode 100644
index 0000000..4c3eef6
--- /dev/null
+++ b/templates/schedule.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Bin cleaning schedule</title>
+  <link href="https://portal.wheeliefreshbins.com/Content/xplugin" rel="stylesheet"/>
+  <link href="https://portal.wheeliefreshbins.com/Content/css" rel="stylesheet"/>
+</head>
+
+<body>
+  {{ html | safe }}
+</body>
+</html>