#!/usr/bin/python3 """Retrieve Wheelie Fresh Bins cleaning schedule and save HTML to file.""" import configparser import json import os import re import sys import typing from datetime import date, datetime, timedelta import ics # type: ignore import jinja2 import requests from playwright.sync_api import Playwright, sync_playwright # type: ignore base_dir = os.path.dirname(__file__) templates_dir = os.path.join(base_dir, "templates") env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_dir)) template = env.get_template("schedule.html") config_location = os.path.join(base_dir, "config") auth_json_path = os.path.join(base_dir, "auth.json") assert os.path.exists(config_location) and os.path.exists(auth_json_path) config = configparser.ConfigParser() config.read(config_location) username = config["login"]["username"] password = config["login"]["password"] data_dir = config["location"]["data"] no_permission = "You do not have permission to view this directory or page." booking_id = config["booking"]["booking_id"] login_url = "https://portal.wheeliefreshbins.com/Account/Login" summary_url = "https://portal.wheeliefreshbins.com/Home/Summary" dest = config["location"]["dest"] ics_file = config["location"]["ics_file"] def run(playwright: Playwright) -> None: """Login to the Wheelie Fresh Bin website.""" browser = playwright.chromium.launch(headless=True) context = browser.new_context() page = context.new_page() page.goto(login_url) page.locator('input[name="UserName"]').fill(username) page.locator('input[name="Password"]').fill(password) page.locator('input[name="RememberMe"]').check() with page.expect_navigation(url=summary_url): page.locator('input:has-text("Log in")').click() page.locator('a:has-text("Schedule")').click() page.close() context.storage_state(path=auth_json_path) context.close() browser.close() def get_cookie_value() -> str: """Get the value of the cookie we need from auth.json.""" auth = json.load(open(auth_json_path)) v: str = next( cookie["value"] for cookie in auth["cookies"] if cookie["name"] == ".AspNet.Cookies" ) return v def retrieve_schedule() -> requests.models.Response: """Retrieve the bin cleaning schedule from the user dashboard.""" return requests.post( "https://portal.wheeliefreshbins.com/home/schedule", json={"bookingId": booking_id}, cookies={".AspNet.Cookies": get_cookie_value()}, ) def read_html_from_json(r: requests.models.Response) -> str: """Return HTML from the JSON response.""" return typing.cast(str, r.json()["html"]) def login() -> None: """Login to Wheelie Fresh Bins.""" with sync_playwright() as playwright: run(playwright) def get_schedule_html() -> str | typing.NoReturn: """Grab the schedule and return the HTML part of the response.""" if not os.path.exists(auth_json_path): login() r = retrieve_schedule() if r.text != no_permission: return read_html_from_json(r) login() r = retrieve_schedule() if r.text != no_permission: return read_html_from_json(r) print("login failed") sys.exit(1) re_div = re.compile(r"]*?>.*?") re_bin = re.compile('
(.*?)
') re_date = re.compile(r'
(\d{2} [A-Za-z]{3} \d{4})<\/div>') def parse_bin_date(bin_date: str) -> date: """Parse bin date with year.""" return datetime.strptime(bin_date, "%A, %d %b %Y").date() def find_date(d1: date, target: str) -> date: """Find the next occurrence of the same day and month.""" d2 = parse_bin_date(f"{target} {d1.year}") if d2 < d1: d2 = parse_bin_date(f"{target} {d1.year + 1}") assert d1 <= d2 return d2 def get_date_from_line(line: str) -> date: """Read date from line.""" m_date = re_date.match(line) assert m_date return datetime.strptime(m_date.group(1), "%d %b %Y").date() def parse_part(d: date, part: str) -> date | None: """Parse part.""" if "bincell" not in part: return None m = re_bin.match(part) if not m: print(part) assert m bin_colour, date_str = m.groups() if date_str.endswith("Christmas Closure"): return None return find_date(d, date_str) def html_to_ics(html: str) -> ics.Calendar: """Parse HTML file, return calendar.""" bin_dates: set[date] = set() for line in html.splitlines(): if "weekcell" not in line: continue line = line.strip() d = get_date_from_line(line) bin_dates.update( d for d in (parse_part(d, part) for part in re_div.findall(line)[1:]) if d ) cal = ics.Calendar() for d in bin_dates: event = ics.Event() event.name = "Wheelie Fresh Bins" event.begin = d event.make_all_day() cal.events.add(event) return cal def main() -> None: """Get schedule and save as web page.""" html = get_schedule_html() page = template.render(html=html) # Drop the schedbody class because it sets max height to 400px and adds a scrollbar with open(dest, "w") as fh: fh.write(page.replace("schedbody ", '"')) cal = html_to_ics(html) with open(ics_file, "w") as fh: fh.write(cal.serialize()) now = datetime.utcnow() # need to rewrite this, utcnow is deprecated now_str = now.strftime("%Y-%m-%d_%H:%M") filename = os.path.join(data_dir, now_str + ".html") with open(filename, "w") as fh: fh.write(page) if __name__ == "__main__": main()