92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
|
"""Waste collection schedules."""
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
from collections import defaultdict
|
||
|
from datetime import date, datetime, time, timedelta
|
||
|
|
||
|
import httpx
|
||
|
import lxml.html
|
||
|
|
||
|
from . import uk_time
|
||
|
from .types import Event
|
||
|
from .utils import make_waste_dir
|
||
|
|
||
|
ttl_hours = 12
|
||
|
|
||
|
|
||
|
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
|
||
|
"""Get waste schedule."""
|
||
|
now = datetime.now()
|
||
|
waste_dir = os.path.join(data_dir, "waste")
|
||
|
|
||
|
make_waste_dir(data_dir)
|
||
|
|
||
|
existing_data = os.listdir(waste_dir)
|
||
|
existing = [f for f in existing_data if f.endswith(".html")]
|
||
|
if existing:
|
||
|
recent_filename = max(existing)
|
||
|
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
|
||
|
delta = now - recent
|
||
|
|
||
|
if existing and delta < timedelta(hours=ttl_hours):
|
||
|
return open(os.path.join(waste_dir, recent_filename)).read()
|
||
|
|
||
|
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
||
|
filename = f"{waste_dir}/{now_str}.html"
|
||
|
|
||
|
forms_base_url = "https://forms.n-somerset.gov.uk"
|
||
|
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
|
||
|
async with httpx.AsyncClient() as client:
|
||
|
r = await client.post(
|
||
|
url,
|
||
|
data={
|
||
|
"PreviousHouse": "",
|
||
|
"PreviousPostcode": "-",
|
||
|
"Postcode": postcode,
|
||
|
"SelectedUprn": uprn,
|
||
|
},
|
||
|
)
|
||
|
form_post_html = r.text
|
||
|
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
|
||
|
m = re.search(pattern, form_post_html)
|
||
|
if m:
|
||
|
r = await client.get(forms_base_url + m.group(1))
|
||
|
html = r.text
|
||
|
open(filename, "w").write(html)
|
||
|
return html
|
||
|
|
||
|
|
||
|
def parse_waste_schedule_date(day_and_month: str) -> date:
|
||
|
"""Parse waste schedule date."""
|
||
|
today = date.today()
|
||
|
fmt = "%A %d %B %Y"
|
||
|
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
|
||
|
if d < today:
|
||
|
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
|
||
|
return d
|
||
|
|
||
|
|
||
|
def parse(root: lxml.html.HtmlElement) -> list[Event]:
|
||
|
"""Parse waste schedule."""
|
||
|
tbody = root.find(".//table/tbody")
|
||
|
assert tbody is not None
|
||
|
by_date = defaultdict(list)
|
||
|
for e_service, e_next_date, e_following in tbody:
|
||
|
assert e_service.text and e_next_date.text and e_following.text
|
||
|
service = e_service.text
|
||
|
next_date = parse_waste_schedule_date(e_next_date.text)
|
||
|
following_date = parse_waste_schedule_date(e_following.text)
|
||
|
|
||
|
by_date[next_date].append(service)
|
||
|
by_date[following_date].append(service)
|
||
|
|
||
|
return [
|
||
|
Event(
|
||
|
name="waste_schedule",
|
||
|
date=uk_time(d, time(6, 30)),
|
||
|
title="Backwell: " + ", ".join(services),
|
||
|
)
|
||
|
for d, services in by_date.items()
|
||
|
]
|