Initial commit
This commit is contained in:
commit
43cfb21523
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
.mypy_cache/
|
||||
__pycache__
|
21
LICENSE
Normal file
21
LICENSE
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2023 Edward Betts <edward@4angle.com>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
92
README.md
Normal file
92
README.md
Normal file
|
@ -0,0 +1,92 @@
|
|||
# Wheelie Fresh Bins cleaning schedule retrieval
|
||||
|
||||
## Overview
|
||||
|
||||
`schedule.py` is a Python script designed to retrieve the cleaning schedule
|
||||
information from Wheelie Fresh Bins and save it as HTML and an ICS (iCalendar)
|
||||
file. This tool automates the process of accessing your cleaning schedule and
|
||||
provides you with easily accessible calendar data.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before using this script, make sure you have the following prerequisites:
|
||||
|
||||
- Python 3
|
||||
- Required Python modules: `ics`, `jinja2`, `lxml`, and `requests`.
|
||||
- Playwright (used for headless web scraping).
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone or download this repository to your local machine.
|
||||
|
||||
2. Install the required Python modules by running:
|
||||
|
||||
```
|
||||
pip install playwright lxml ics jinja2 requests
|
||||
```
|
||||
|
||||
3. Make sure you have the Playwright dependencies installed for your platform.
|
||||
You can follow the installation instructions for Playwright
|
||||
[here](https://playwright.dev/python/docs/intro).
|
||||
|
||||
4. Customize the configuration in the `config` file to match your requirements.
|
||||
|
||||
## Usage
|
||||
|
||||
To use the script, run it from the command line:
|
||||
|
||||
```
|
||||
python schedule.py
|
||||
```
|
||||
|
||||
The script will log in to the Wheelie Fresh Bins website, retrieve your
|
||||
cleaning schedule, and save it as an HTML file (`dest`) and an ICS file
|
||||
(`ics_file`). The HTML file can be opened in a web browser, while the ICS file
|
||||
can be imported into your favorite calendar application.
|
||||
|
||||
## Scheduling with Crontab
|
||||
|
||||
You can automate the execution of `schedule.py` by scheduling it to run once per
|
||||
day using the crontab utility. Here's how to do it:
|
||||
|
||||
1. Edit your crontab file using the following command:
|
||||
|
||||
```
|
||||
crontab -e
|
||||
```
|
||||
|
||||
2. Add the following line to schedule the script to run daily at a specific
|
||||
time. Replace `/path/to/schedule.py` with the actual path to your
|
||||
`schedule.py` script:
|
||||
|
||||
```
|
||||
0 0 * * * /usr/bin/python3 /path/to/schedule.py
|
||||
```
|
||||
|
||||
This example schedules the script to run every day at midnight. You can
|
||||
adjust the time and frequency according to your preferences. Save the crontab
|
||||
file.
|
||||
|
||||
3. Crontab will automatically execute the script at the specified time each
|
||||
day, and the schedule data will be updated accordingly.
|
||||
|
||||
|
||||
## Customization
|
||||
|
||||
- You can customize the script's behavior by editing the configuration in the
|
||||
`config` file, such as specifying your login credentials, file paths, and
|
||||
other options.
|
||||
|
||||
- The script uses Jinja2 templates to render the HTML output. You can modify
|
||||
the HTML template in the `templates` directory to change the appearance of
|
||||
the schedule.
|
||||
|
||||
## Author
|
||||
|
||||
This script was created by Edward Betts (edward@4angle.com). Feel free to
|
||||
contact me for support or improvements.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
||||
for details.
|
204
schedule.py
Executable file
204
schedule.py
Executable file
|
@ -0,0 +1,204 @@
|
|||
#!/usr/bin/python3
|
||||
"""Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file."""
|
||||
|
||||
import configparser
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import date, datetime, timedelta
|
||||
from typing import NoReturn
|
||||
|
||||
import ics
|
||||
import jinja2
|
||||
import requests
|
||||
from playwright.sync_api import Playwright, sync_playwright
|
||||
|
||||
base_dir = os.path.dirname(__file__)
|
||||
|
||||
templates_dir = os.path.join(base_dir, "templates")
|
||||
env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir))
|
||||
|
||||
template = env.get_template("schedule.html")
|
||||
|
||||
config_location = os.path.join(base_dir, "config")
|
||||
auth_json_path = os.path.join(base_dir, "auth.json")
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
config.read(config_location)
|
||||
username = config["login"]["username"]
|
||||
password = config["login"]["password"]
|
||||
data_dir = config["locations"]["data"]
|
||||
|
||||
no_permission = "You do not have permission to view this directory or page."
|
||||
booking_id = config["booking"]["booking_id"]
|
||||
|
||||
login_url = "https://portal.wheeliefreshbins.com/Account/Login"
|
||||
summary_url = "https://portal.wheeliefreshbins.com/Home/Summary"
|
||||
|
||||
dest = config["location"]["dest"]
|
||||
ics_file = config["location"]["ics_file"]
|
||||
|
||||
|
||||
def run(playwright: Playwright) -> None:
|
||||
"""Login to the Wheelie Fresh Bin website."""
|
||||
browser = playwright.chromium.launch(headless=True)
|
||||
context = browser.new_context()
|
||||
|
||||
page = context.new_page()
|
||||
|
||||
page.goto(login_url)
|
||||
page.locator('input[name="UserName"]').fill(username)
|
||||
page.locator('input[name="Password"]').fill(password)
|
||||
page.locator('input[name="RememberMe"]').check()
|
||||
|
||||
with page.expect_navigation(url=summary_url):
|
||||
page.locator('input:has-text("Log in")').click()
|
||||
|
||||
page.locator('a:has-text("Schedule")').click()
|
||||
|
||||
page.close()
|
||||
|
||||
context.storage_state(path=auth_json_path)
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
|
||||
def get_cookie_value() -> str:
|
||||
"""Get the value of the cookie we need from auth.json."""
|
||||
auth = json.load(open(auth_json_path))
|
||||
v: str = next(
|
||||
cookie["value"]
|
||||
for cookie in auth["cookies"]
|
||||
if cookie["name"] == ".AspNet.Cookies"
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
def retrieve_schedule() -> requests.models.Response:
|
||||
"""Retrieve the bin cleaning schedule from the user dashboard."""
|
||||
return requests.post(
|
||||
"https://portal.wheeliefreshbins.com/home/schedule",
|
||||
json={"bookingId": booking_id},
|
||||
cookies={".AspNet.Cookies": get_cookie_value()},
|
||||
)
|
||||
|
||||
|
||||
def read_html_from_json(r: requests.models.Response) -> str:
|
||||
"""Return HTML from the JSON response."""
|
||||
html: str = r.json()["html"]
|
||||
return html
|
||||
|
||||
|
||||
def login() -> None:
|
||||
"""Login to Wheelie Fresh Bins."""
|
||||
with sync_playwright() as playwright:
|
||||
run(playwright)
|
||||
|
||||
|
||||
def get_schedule_html() -> str | NoReturn:
|
||||
"""Grab the schedule and return the HTML part of the response."""
|
||||
if not os.path.exists(auth_json_path):
|
||||
login()
|
||||
r = retrieve_schedule()
|
||||
if r.text != no_permission:
|
||||
return read_html_from_json(r)
|
||||
|
||||
login()
|
||||
|
||||
r = retrieve_schedule()
|
||||
if r.text != no_permission:
|
||||
return read_html_from_json(r)
|
||||
|
||||
print("login failed")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
re_div = re.compile(r"<div[^>]*?>.*?</div>")
|
||||
re_bin = re.compile('<div class="col-xs-3 bincell.*(black|blue|green)bin">(.*?)</div>')
|
||||
re_date = re.compile(r'<div class="[^"].*?">(\d{2} [A-Za-z]{3} \d{4})<\/div>')
|
||||
|
||||
|
||||
def parse_bin_date(bin_date: str) -> date:
|
||||
"""Parse bin date with year."""
|
||||
return datetime.strptime(bin_date, "%A, %d %b %Y").date()
|
||||
|
||||
|
||||
def find_date(d1: date, target: str) -> date:
|
||||
"""Find the next occurrence of the same day and month."""
|
||||
d2 = parse_bin_date(f"{target} {d1.year}")
|
||||
if d2 < d1:
|
||||
d2 = parse_bin_date(f"{target} {d1.year + 1}")
|
||||
assert d1 <= d2
|
||||
|
||||
return d2
|
||||
|
||||
|
||||
def get_date_from_line(line: str) -> date:
|
||||
"""Read date from line."""
|
||||
m_date = re_date.match(line)
|
||||
assert m_date
|
||||
return datetime.strptime(m_date.group(1), "%d %b %Y").date()
|
||||
|
||||
|
||||
def parse_part(d: date, part: str) -> date | None:
|
||||
"""Parse part."""
|
||||
if "bincell" not in part:
|
||||
return None
|
||||
m = re_bin.match(part)
|
||||
if not m:
|
||||
print(part)
|
||||
assert m
|
||||
bin_colour, date_str = m.groups()
|
||||
if date_str.endswith("Christmas Closure"):
|
||||
return None
|
||||
return find_date(d, date_str)
|
||||
|
||||
|
||||
def html_to_ics(html: str) -> ics.Calendar:
|
||||
"""Parse HTML file, return calendar."""
|
||||
bin_dates: set[date] = set()
|
||||
|
||||
for line in html.splitlines():
|
||||
if "weekcell" not in line:
|
||||
continue
|
||||
line = line.strip()
|
||||
d = get_date_from_line(line)
|
||||
|
||||
bin_dates.update(
|
||||
d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d
|
||||
)
|
||||
|
||||
cal = ics.Calendar()
|
||||
|
||||
for d in bin_dates:
|
||||
event = ics.Event()
|
||||
event.name = "Wheelie Fresh Bins"
|
||||
event.begin = d
|
||||
event.end = d + timedelta(days=1)
|
||||
cal.events.add(event)
|
||||
|
||||
return cal
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Get schedule and save as web page."""
|
||||
html = get_schedule_html()
|
||||
page = template.render(html=html)
|
||||
|
||||
# Drop the schedbody class because it sets max height to 400px and adds a scrollbar
|
||||
with open(dest, "w") as fh:
|
||||
fh.write(page.replace("schedbody ", '"'))
|
||||
|
||||
cal = html_to_ics(html)
|
||||
with open(ics_file, "w") as fh:
|
||||
fh.write(cal.serialize())
|
||||
|
||||
now = datetime.utcnow()
|
||||
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
||||
filename = os.path.join(data_dir, now_str + ".html")
|
||||
with open(filename, "w") as fh:
|
||||
fh.write(page)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
13
templates/schedule.html
Normal file
13
templates/schedule.html
Normal file
|
@ -0,0 +1,13 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Bin cleaning schedule</title>
|
||||
<link href="https://portal.wheeliefreshbins.com/Content/xplugin" rel="stylesheet"/>
|
||||
<link href="https://portal.wheeliefreshbins.com/Content/css" rel="stylesheet"/>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
{{ html | safe }}
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in a new issue