Initial commit

2023-08-07 18:08:30 +01:00 · 2023-08-07 18:08:30 +01:00 · 775826349b
commit 775826349b
5 changed files with 335 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 .mypy_cache/
 __pycache__
--- a/21
+++ b/21
@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2023 Edward Betts <edward@4angle.com>
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,92 @@
 # Wheelie Fresh Bins cleaning schedule retrieval
 ## Overview
 `schedule.py` is a Python script designed to retrieve the cleaning schedule
 information from Wheelie Fresh Bins and save it as HTML and an ICS (iCalendar)
 file. This tool automates the process of accessing your cleaning schedule and
 provides you with easily accessible calendar data.
 ## Prerequisites
 Before using this script, make sure you have the following prerequisites:
 - Python 3
 - Required Python modules: `ics`, `jinja2`, `lxml`, and `requests`.
 - Playwright (used for headless web scraping).
 ## Installation
 1. Clone or download this repository to your local machine.
 2. Install the required Python modules by running:
   ```
   pip install playwright lxml ics jinja2 requests
   ```
 3. Make sure you have the Playwright dependencies installed for your platform.
   You can follow the installation instructions for Playwright
   [here](https://playwright.dev/python/docs/intro).
 4. Customize the configuration in the `config` file to match your requirements.
 ## Usage
 To use the script, run it from the command line:
 ```
 python schedule.py
 ```
 The script will log in to the Wheelie Fresh Bins website, retrieve your
 cleaning schedule, and save it as an HTML file (`dest`) and an ICS file
 (`ics_file`). The HTML file can be opened in a web browser, while the ICS file
 can be imported into your favorite calendar application.
 ## Scheduling with Crontab
 You can automate the execution of `schedule.py` by scheduling it to run once per
 day using the crontab utility. Here's how to do it:
 1. Edit your crontab file using the following command:
   ```
   crontab -e
   ```
 2. Add the following line to schedule the script to run daily at a specific
   time. Replace `/path/to/schedule.py` with the actual path to your
   `schedule.py` script:
   ```
   0 0 * * * /usr/bin/python3 /path/to/schedule.py
   ```
   This example schedules the script to run every day at midnight. You can
   adjust the time and frequency according to your preferences. Save the crontab
   file.
 3. Crontab will automatically execute the script at the specified time each
   day, and the schedule data will be updated accordingly.
 ## Customization
 - You can customize the script's behavior by editing the configuration in the
  `config` file, such as specifying your login credentials, file paths, and
  other options.
 - The script uses Jinja2 templates to render the HTML output. You can modify
  the HTML template in the `templates` directory to change the appearance of
  the schedule.
 ## Author
 This script was created by Edward Betts (edward@4angle.com). Feel free to
 contact me for support or improvements.
 ## License
 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
 for details.
--- a/schedule.py
+++ b/schedule.py
@ -0,0 +1,207 @@
 #!/usr/bin/python3
 """Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""
 import configparser
 import json
 import os
 import re
 import sys
 from datetime import date, datetime, timedelta
 from typing import NoReturn
 import ics
 import jinja2
 import requests
 from playwright.sync_api import Playwright, sync_playwright
 base_dir = os.path.dirname(__file__)
 templates_dir = os.path.join(base_dir, "templates")
 env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
 template = env.get_template("schedule.html")
 config_location = os.path.join(base_dir, "config")
 auth_json_path = os.path.join(base_dir, "auth.json")
 assert os.path.exists(config_location)
 assert os.path.exists(auth_json_path)
 config = configparser.ConfigParser()
 config.read(config_location)
 username = config["login"]["username"]
 password = config["login"]["password"]
 data_dir = config["location"]["data"]
 no_permission = "You do not have permission to view this directory or page."
 booking_id = config["booking"]["booking_id"]
 login_url = "https://portal.wheeliefreshbins.com/Account/Login"
 summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"
 dest = config["location"]["dest"]
 ics_file = config["location"]["ics_file"]
 def run(playwright: Playwright) -> None:
    """Login to the Wheelie Fresh Bin website."""
    browser = playwright.chromium.launch(headless=True)
    context = browser.new_context()
    page = context.new_page()
    page.goto(login_url)
    page.locator('input[name="UserName"]').fill(username)
    page.locator('input[name="Password"]').fill(password)
    page.locator('input[name="RememberMe"]').check()
    with page.expect_navigation(url=summary_url):
        page.locator('input:has-text("Log in")').click()
    page.locator('a:has-text("Schedule")').click()
    page.close()
    context.storage_state(path=auth_json_path)
    context.close()
    browser.close()
 def get_cookie_value() -> str:
    """Get the value of the cookie we need from auth.json."""
    auth = json.load(open(auth_json_path))
    v: str = next(
        cookie["value"]
        for cookie in auth["cookies"]
        if cookie["name"] == ".AspNet.Cookies"
    )
    return v
 def retrieve_schedule() -> requests.models.Response:
    """Retrieve the bin cleaning schedule from the user dashboard."""
    return requests.post(
        "https://portal.wheeliefreshbins.com/home/schedule",
        json={"bookingId": booking_id},
        cookies={".AspNet.Cookies": get_cookie_value()},
    )
 def read_html_from_json(r: requests.models.Response) -> str:
    """Return HTML from the JSON response."""
    html: str = r.json()["html"]
    return html
 def login() -> None:
    """Login to Wheelie Fresh Bins."""
    with sync_playwright() as playwright:
        run(playwright)
 def get_schedule_html() -> str | NoReturn:
    """Grab the schedule and return the HTML part of the response."""
    if not os.path.exists(auth_json_path):
        login()
    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)
    login()
    r = retrieve_schedule()
    if r.text != no_permission:
        return read_html_from_json(r)
    print("login failed")
    sys.exit(1)
 re_div = re.compile(r"<div[^>]*?>.*?</div>")
 re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
 re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')
 def parse_bin_date(bin_date: str) -> date:
    """Parse bin date with year."""
    return datetime.strptime(bin_date, "%A, %d %b %Y").date()
 def find_date(d1: date, target: str) -> date:
    """Find the next occurrence of the same day and month."""
    d2 = parse_bin_date(f"{target} {d1.year}")
    if d2 < d1:
        d2 = parse_bin_date(f"{target} {d1.year + 1}")
        assert d1 <= d2
    return d2
 def get_date_from_line(line: str) -> date:
    """Read date from line."""
    m_date = re_date.match(line)
    assert m_date
    return datetime.strptime(m_date.group(1), "%d %b %Y").date()
 def parse_part(d: date, part: str) -> date | None:
    """Parse part."""
    if "bincell" not in part:
        return None
    m = re_bin.match(part)
    if not m:
        print(part)
    assert m
    bin_colour, date_str = m.groups()
    if date_str.endswith("Christmas Closure"):
        return None
    return find_date(d, date_str)
 def html_to_ics(html: str) -> ics.Calendar:
    """Parse HTML file, return calendar."""
    bin_dates: set[date] = set()
    for line in html.splitlines():
        if "weekcell" not in line:
            continue
        line = line.strip()
        d = get_date_from_line(line)
        bin_dates.update(
            d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
        )
    cal = ics.Calendar()
    for d in bin_dates:
        event = ics.Event()
        event.name = "Wheelie Fresh Bins"
        event.begin = d
        event.end = d + timedelta(days=1)
        cal.events.add(event)
    return cal
 def main() -> None:
    """Get schedule and save as web page."""
    html = get_schedule_html()
    page = template.render(html=html)
    # Drop the schedbody class because it sets max height to 400px and adds a scrollbar
    with open(dest, "w") as fh:
        fh.write(page.replace("schedbody ", '"'))
    cal = html_to_ics(html)
    with open(ics_file, "w") as fh:
        fh.write(cal.serialize())
    now = datetime.utcnow()
    now_str = now.strftime("%Y-%m-%d_%H:%M")
    filename = os.path.join(data_dir, now_str + ".html")
    with open(filename, "w") as fh:
        fh.write(page)
 if __name__ == "__main__":
    main()
--- a/templates/schedule.html
+++ b/templates/schedule.html
@ -0,0 +1,13 @@
 <!doctype html>
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <title>Bin cleaning schedule</title>
  <link href="https://portal.wheeliefreshbins.com/Content/xplugin" rel="stylesheet"/>
  <link href="https://portal.wheeliefreshbins.com/Content/css" rel="stylesheet"/>
 </head>
 <body>
  {{ html | safe }}
 </body>
 </html>