From 43cfb21523ee44dcfa87aa48a88f8cfb24db7fc8 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Mon, 7 Aug 2023 18:08:30 +0100 Subject: [PATCH] Initial commit --- .gitignore | 2 + LICENSE | 21 +++++ README.md | 92 ++++++++++++++++++ schedule.py | 204 ++++++++++++++++++++++++++++++++++++++++ templates/schedule.html | 13 +++ 5 files changed, 332 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100755 schedule.py create mode 100644 templates/schedule.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9a0cbd2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.mypy_cache/ +__pycache__ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..379270c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Edward Betts + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4834592 --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +# Wheelie Fresh Bins cleaning schedule retrieval + +## Overview + +`schedule.py` is a Python script designed to retrieve the cleaning schedule +information from Wheelie Fresh Bins and save it as HTML and an ICS (iCalendar) +file. This tool automates the process of accessing your cleaning schedule and +provides you with easily accessible calendar data. + +## Prerequisites + +Before using this script, make sure you have the following prerequisites: + +- Python 3 +- Required Python modules: `ics`, `jinja2`, `lxml`, and `requests`. +- Playwright (used for headless web scraping). + +## Installation + +1. Clone or download this repository to your local machine. + +2. Install the required Python modules by running: + + ``` + pip install playwright lxml ics jinja2 requests + ``` + +3. Make sure you have the Playwright dependencies installed for your platform. + You can follow the installation instructions for Playwright + [here](https://playwright.dev/python/docs/intro). + +4. Customize the configuration in the `config` file to match your requirements. + +## Usage + +To use the script, run it from the command line: + +``` +python schedule.py +``` + +The script will log in to the Wheelie Fresh Bins website, retrieve your +cleaning schedule, and save it as an HTML file (`dest`) and an ICS file +(`ics_file`). The HTML file can be opened in a web browser, while the ICS file +can be imported into your favorite calendar application. + +## Scheduling with Crontab + +You can automate the execution of `schedule.py` by scheduling it to run once per +day using the crontab utility. Here's how to do it: + +1. Edit your crontab file using the following command: + + ``` + crontab -e + ``` + +2. Add the following line to schedule the script to run daily at a specific + time. Replace `/path/to/schedule.py` with the actual path to your + `schedule.py` script: + + ``` + 0 0 * * * /usr/bin/python3 /path/to/schedule.py + ``` + + This example schedules the script to run every day at midnight. You can + adjust the time and frequency according to your preferences. Save the crontab + file. + +3. Crontab will automatically execute the script at the specified time each + day, and the schedule data will be updated accordingly. + + +## Customization + +- You can customize the script's behavior by editing the configuration in the + `config` file, such as specifying your login credentials, file paths, and + other options. + +- The script uses Jinja2 templates to render the HTML output. You can modify + the HTML template in the `templates` directory to change the appearance of + the schedule. + +## Author + +This script was created by Edward Betts (edward@4angle.com). Feel free to +contact me for support or improvements. + +## License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file +for details. diff --git a/schedule.py b/schedule.py new file mode 100755 index 0000000..beaac7f --- /dev/null +++ b/schedule.py @@ -0,0 +1,204 @@ +#!/usr/bin/python3 +"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file.""" + +import configparser +import json +import os +import re +import sys +from datetime import date, datetime, timedelta +from typing import NoReturn + +import ics +import jinja2 +import requests +from playwright.sync_api import Playwright, sync_playwright + +base_dir = os.path.dirname(__file__) + +templates_dir = os.path.join(base_dir, "templates") +env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir)) + +template = env.get_template("schedule.html") + +config_location = os.path.join(base_dir, "config") +auth_json_path = os.path.join(base_dir, "auth.json") + +config = configparser.ConfigParser() +config.read(config_location) +username = config["login"]["username"] +password = config["login"]["password"] +data_dir = config["locations"]["data"] + +no_permission = "You do not have permission to view this directory or page." +booking_id = config["booking"]["booking_id"] + +login_url = "https://portal.wheeliefreshbins.com/Account/Login" +summary_url = "https://portal.wheeliefreshbins.com/Home/Summary" + +dest = config["location"]["dest"] +ics_file = config["location"]["ics_file"] + + +def run(playwright: Playwright) -> None: + """Login to the Wheelie Fresh Bin website.""" + browser = playwright.chromium.launch(headless=True) + context = browser.new_context() + + page = context.new_page() + + page.goto(login_url) + page.locator('input[name="UserName"]').fill(username) + page.locator('input[name="Password"]').fill(password) + page.locator('input[name="RememberMe"]').check() + + with page.expect_navigation(url=summary_url): + page.locator('input:has-text("Log in")').click() + + page.locator('a:has-text("Schedule")').click() + + page.close() + + context.storage_state(path=auth_json_path) + context.close() + browser.close() + + +def get_cookie_value() -> str: + """Get the value of the cookie we need from auth.json.""" + auth = json.load(open(auth_json_path)) + v: str = next( + cookie["value"] + for cookie in auth["cookies"] + if cookie["name"] == ".AspNet.Cookies" + ) + return v + + +def retrieve_schedule() -> requests.models.Response: + """Retrieve the bin cleaning schedule from the user dashboard.""" + return requests.post( + "https://portal.wheeliefreshbins.com/home/schedule", + json={"bookingId": booking_id}, + cookies={".AspNet.Cookies": get_cookie_value()}, + ) + + +def read_html_from_json(r: requests.models.Response) -> str: + """Return HTML from the JSON response.""" + html: str = r.json()["html"] + return html + + +def login() -> None: + """Login to Wheelie Fresh Bins.""" + with sync_playwright() as playwright: + run(playwright) + + +def get_schedule_html() -> str | NoReturn: + """Grab the schedule and return the HTML part of the response.""" + if not os.path.exists(auth_json_path): + login() + r = retrieve_schedule() + if r.text != no_permission: + return read_html_from_json(r) + + login() + + r = retrieve_schedule() + if r.text != no_permission: + return read_html_from_json(r) + + print("login failed") + sys.exit(1) + + +re_div = re.compile(r"]*?>.*?") +re_bin = re.compile('
(.*?)
') +re_date = re.compile(r'
(\d{2} [A-Za-z]{3} \d{4})<\/div>') + + +def parse_bin_date(bin_date: str) -> date: + """Parse bin date with year.""" + return datetime.strptime(bin_date, "%A, %d %b %Y").date() + + +def find_date(d1: date, target: str) -> date: + """Find the next occurrence of the same day and month.""" + d2 = parse_bin_date(f"{target} {d1.year}") + if d2 < d1: + d2 = parse_bin_date(f"{target} {d1.year + 1}") + assert d1 <= d2 + + return d2 + + +def get_date_from_line(line: str) -> date: + """Read date from line.""" + m_date = re_date.match(line) + assert m_date + return datetime.strptime(m_date.group(1), "%d %b %Y").date() + + +def parse_part(d: date, part: str) -> date | None: + """Parse part.""" + if "bincell" not in part: + return None + m = re_bin.match(part) + if not m: + print(part) + assert m + bin_colour, date_str = m.groups() + if date_str.endswith("Christmas Closure"): + return None + return find_date(d, date_str) + + +def html_to_ics(html: str) -> ics.Calendar: + """Parse HTML file, return calendar.""" + bin_dates: set[date] = set() + + for line in html.splitlines(): + if "weekcell" not in line: + continue + line = line.strip() + d = get_date_from_line(line) + + bin_dates.update( + d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d + ) + + cal = ics.Calendar() + + for d in bin_dates: + event = ics.Event() + event.name = "Wheelie Fresh Bins" + event.begin = d + event.end = d + timedelta(days=1) + cal.events.add(event) + + return cal + + +def main() -> None: + """Get schedule and save as web page.""" + html = get_schedule_html() + page = template.render(html=html) + + # Drop the schedbody class because it sets max height to 400px and adds a scrollbar + with open(dest, "w") as fh: + fh.write(page.replace("schedbody ", '"')) + + cal = html_to_ics(html) + with open(ics_file, "w") as fh: + fh.write(cal.serialize()) + + now = datetime.utcnow() + now_str = now.strftime("%Y-%m-%d_%H:%M") + filename = os.path.join(data_dir, now_str + ".html") + with open(filename, "w") as fh: + fh.write(page) + +if __name__ == "__main__": + main() diff --git a/templates/schedule.html b/templates/schedule.html new file mode 100644 index 0000000..4c3eef6 --- /dev/null +++ b/templates/schedule.html @@ -0,0 +1,13 @@ + + + + + Bin cleaning schedule + + + + + + {{ html | safe }} + +