Initial commit

This commit is contained in:
Edward Betts 2023-08-07 18:08:30 +01:00
commit 43cfb21523
5 changed files with 332 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
.mypy_cache/
__pycache__

21
LICENSE Normal file
View file

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2023 Edward Betts <edward@4angle.com>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

92
README.md Normal file
View file

@ -0,0 +1,92 @@
# Wheelie Fresh Bins cleaning schedule retrieval
## Overview
`schedule.py` is a Python script designed to retrieve the cleaning schedule
information from Wheelie Fresh Bins and save it as HTML and an ICS (iCalendar)
file. This tool automates the process of accessing your cleaning schedule and
provides you with easily accessible calendar data.
## Prerequisites
Before using this script, make sure you have the following prerequisites:
- Python 3
- Required Python modules: `ics`, `jinja2`, `lxml`, and `requests`.
- Playwright (used for headless web scraping).
## Installation
1. Clone or download this repository to your local machine.
2. Install the required Python modules by running:
```
pip install playwright lxml ics jinja2 requests
```
3. Make sure you have the Playwright dependencies installed for your platform.
You can follow the installation instructions for Playwright
[here](https://playwright.dev/python/docs/intro).
4. Customize the configuration in the `config` file to match your requirements.
## Usage
To use the script, run it from the command line:
```
python schedule.py
```
The script will log in to the Wheelie Fresh Bins website, retrieve your
cleaning schedule, and save it as an HTML file (`dest`) and an ICS file
(`ics_file`). The HTML file can be opened in a web browser, while the ICS file
can be imported into your favorite calendar application.
## Scheduling with Crontab
You can automate the execution of `schedule.py` by scheduling it to run once per
day using the crontab utility. Here's how to do it:
1. Edit your crontab file using the following command:
```
crontab -e
```
2. Add the following line to schedule the script to run daily at a specific
time. Replace `/path/to/schedule.py` with the actual path to your
`schedule.py` script:
```
0 0 * * * /usr/bin/python3 /path/to/schedule.py
```
This example schedules the script to run every day at midnight. You can
adjust the time and frequency according to your preferences. Save the crontab
file.
3. Crontab will automatically execute the script at the specified time each
day, and the schedule data will be updated accordingly.
## Customization
- You can customize the script's behavior by editing the configuration in the
`config` file, such as specifying your login credentials, file paths, and
other options.
- The script uses Jinja2 templates to render the HTML output. You can modify
the HTML template in the `templates` directory to change the appearance of
the schedule.
## Author
This script was created by Edward Betts (edward@4angle.com). Feel free to
contact me for support or improvements.
## License
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
for details.

204
schedule.py Executable file
View file

@ -0,0 +1,204 @@
#!/usr/bin/python3
"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""
import configparser
import json
import os
import re
import sys
from datetime import date, datetime, timedelta
from typing import NoReturn
import ics
import jinja2
import requests
from playwright.sync_api import Playwright, sync_playwright
base_dir = os.path.dirname(__file__)
templates_dir = os.path.join(base_dir, "templates")
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
template = env.get_template("schedule.html")
config_location = os.path.join(base_dir, "config")
auth_json_path = os.path.join(base_dir, "auth.json")
config = configparser.ConfigParser()
config.read(config_location)
username = config["login"]["username"]
password = config["login"]["password"]
data_dir = config["locations"]["data"]
no_permission = "You do not have permission to view this directory or page."
booking_id = config["booking"]["booking_id"]
login_url = "https://portal.wheeliefreshbins.com/Account/Login"
summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"
dest = config["location"]["dest"]
ics_file = config["location"]["ics_file"]
def run(playwright: Playwright) -> None:
"""Login to the Wheelie Fresh Bin website."""
browser = playwright.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
page.goto(login_url)
page.locator('input[name="UserName"]').fill(username)
page.locator('input[name="Password"]').fill(password)
page.locator('input[name="RememberMe"]').check()
with page.expect_navigation(url=summary_url):
page.locator('input:has-text("Log in")').click()
page.locator('a:has-text("Schedule")').click()
page.close()
context.storage_state(path=auth_json_path)
context.close()
browser.close()
def get_cookie_value() -> str:
"""Get the value of the cookie we need from auth.json."""
auth = json.load(open(auth_json_path))
v: str = next(
cookie["value"]
for cookie in auth["cookies"]
if cookie["name"] == ".AspNet.Cookies"
)
return v
def retrieve_schedule() -> requests.models.Response:
"""Retrieve the bin cleaning schedule from the user dashboard."""
return requests.post(
"https://portal.wheeliefreshbins.com/home/schedule",
json={"bookingId": booking_id},
cookies={".AspNet.Cookies": get_cookie_value()},
)
def read_html_from_json(r: requests.models.Response) -> str:
"""Return HTML from the JSON response."""
html: str = r.json()["html"]
return html
def login() -> None:
"""Login to Wheelie Fresh Bins."""
with sync_playwright() as playwright:
run(playwright)
def get_schedule_html() -> str | NoReturn:
"""Grab the schedule and return the HTML part of the response."""
if not os.path.exists(auth_json_path):
login()
r = retrieve_schedule()
if r.text != no_permission:
return read_html_from_json(r)
login()
r = retrieve_schedule()
if r.text != no_permission:
return read_html_from_json(r)
print("login failed")
sys.exit(1)
re_div = re.compile(r"<div[^>]*?>.*?</div>")
re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')
def parse_bin_date(bin_date: str) -> date:
"""Parse bin date with year."""
return datetime.strptime(bin_date, "%A, %d %b %Y").date()
def find_date(d1: date, target: str) -> date:
"""Find the next occurrence of the same day and month."""
d2 = parse_bin_date(f"{target} {d1.year}")
if d2 < d1:
d2 = parse_bin_date(f"{target} {d1.year + 1}")
assert d1 <= d2
return d2
def get_date_from_line(line: str) -> date:
"""Read date from line."""
m_date = re_date.match(line)
assert m_date
return datetime.strptime(m_date.group(1), "%d %b %Y").date()
def parse_part(d: date, part: str) -> date | None:
"""Parse part."""
if "bincell" not in part:
return None
m = re_bin.match(part)
if not m:
print(part)
assert m
bin_colour, date_str = m.groups()
if date_str.endswith("Christmas Closure"):
return None
return find_date(d, date_str)
def html_to_ics(html: str) -> ics.Calendar:
"""Parse HTML file, return calendar."""
bin_dates: set[date] = set()
for line in html.splitlines():
if "weekcell" not in line:
continue
line = line.strip()
d = get_date_from_line(line)
bin_dates.update(
d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
)
cal = ics.Calendar()
for d in bin_dates:
event = ics.Event()
event.name = "Wheelie Fresh Bins"
event.begin = d
event.end = d + timedelta(days=1)
cal.events.add(event)
return cal
def main() -> None:
"""Get schedule and save as web page."""
html = get_schedule_html()
page = template.render(html=html)
# Drop the schedbody class because it sets max height to 400px and adds a scrollbar
with open(dest, "w") as fh:
fh.write(page.replace("schedbody ", '"'))
cal = html_to_ics(html)
with open(ics_file, "w") as fh:
fh.write(cal.serialize())
now = datetime.utcnow()
now_str = now.strftime("%Y-%m-%d_%H:%M")
filename = os.path.join(data_dir, now_str + ".html")
with open(filename, "w") as fh:
fh.write(page)
if __name__ == "__main__":
main()

13
templates/schedule.html Normal file
View file

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Bin cleaning schedule</title>
<link href="https://portal.wheeliefreshbins.com/Content/xplugin" rel="stylesheet"/>
<link href="https://portal.wheeliefreshbins.com/Content/css" rel="stylesheet"/>
</head>
<body>
{{ html | safe }}
</body>
</html>