diff --git a/agenda/data.py b/agenda/data.py index ae050bd..b7f9cb8 100644 --- a/agenda/data.py +++ b/agenda/data.py @@ -15,7 +15,6 @@ import pytz from . import ( accommodation, birthday, - bristol_waste, busy, carnival, conference, @@ -27,13 +26,13 @@ from . import ( hn, holidays, meetup, - n_somerset_waste, stock_market, subscription, sun, thespacedevs, travel, uk_holiday, + waste_schedule, ) from .types import Event, StrDict @@ -63,9 +62,9 @@ async def waste_collection_events( data_dir: str, postcode: str, uprn: str ) -> list[Event]: """Waste colllection events.""" - html = await n_somerset_waste.get_html(data_dir, postcode, uprn) + html = await waste_schedule.get_html(data_dir, postcode, uprn) root = lxml.html.fromstring(html) - events = n_somerset_waste.parse(root) + events = waste_schedule.parse(root) return events @@ -73,7 +72,7 @@ async def bristol_waste_collection_events( data_dir: str, start_date: date, uprn: str ) -> list[Event]: """Waste colllection events.""" - return await bristol_waste.get(start_date, data_dir, uprn) + return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn) def find_events_during_stay( diff --git a/agenda/n_somerset_waste.py b/agenda/n_somerset_waste.py deleted file mode 100644 index 4621b2e..0000000 --- a/agenda/n_somerset_waste.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Waste collection schedules.""" - -import os -import re -from collections import defaultdict -from datetime import date, datetime, time, timedelta - -import httpx -import lxml.html - -from . import uk_time -from .types import Event -from .utils import make_waste_dir - -ttl_hours = 12 - - -async def get_html(data_dir: str, postcode: str, uprn: str) -> str: - """Get waste schedule.""" - now = datetime.now() - waste_dir = os.path.join(data_dir, "waste") - - make_waste_dir(data_dir) - - existing_data = os.listdir(waste_dir) - existing = [f for f in existing_data if f.endswith(".html")] - if existing: - recent_filename = max(existing) - recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html") - delta = now - recent - - if existing and delta < timedelta(hours=ttl_hours): - return open(os.path.join(waste_dir, recent_filename)).read() - - now_str = now.strftime("%Y-%m-%d_%H:%M") - filename = f"{waste_dir}/{now_str}.html" - - forms_base_url = "https://forms.n-somerset.gov.uk" - url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule" - async with httpx.AsyncClient() as client: - r = await client.post( - url, - data={ - "PreviousHouse": "", - "PreviousPostcode": "-", - "Postcode": postcode, - "SelectedUprn": uprn, - }, - ) - form_post_html = r.text - pattern = r'

Object moved to here<\/a>\.<\/h2>' - m = re.search(pattern, form_post_html) - if m: - r = await client.get(forms_base_url + m.group(1)) - html = r.text - open(filename, "w").write(html) - return html - - -def parse_waste_schedule_date(day_and_month: str) -> date: - """Parse waste schedule date.""" - today = date.today() - fmt = "%A %d %B %Y" - d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date() - if d < today: - d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date() - return d - - -def parse(root: lxml.html.HtmlElement) -> list[Event]: - """Parse waste schedule.""" - tbody = root.find(".//table/tbody") - assert tbody is not None - by_date = defaultdict(list) - for e_service, e_next_date, e_following in tbody: - assert e_service.text and e_next_date.text and e_following.text - service = e_service.text - next_date = parse_waste_schedule_date(e_next_date.text) - following_date = parse_waste_schedule_date(e_following.text) - - by_date[next_date].append(service) - by_date[following_date].append(service) - - return [ - Event( - name="waste_schedule", - date=uk_time(d, time(6, 30)), - title="Backwell: " + ", ".join(services), - ) - for d, services in by_date.items() - ] diff --git a/agenda/thespacedevs.py b/agenda/thespacedevs.py index 8168d8c..4092f40 100644 --- a/agenda/thespacedevs.py +++ b/agenda/thespacedevs.py @@ -32,7 +32,7 @@ def next_launch_api_data(rocket_dir: str, limit: int = 200) -> StrDict | None: return data -def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Summary] | None: +def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Launch] | None: """Get the next upcoming launches from the API.""" data = next_launch_api_data(rocket_dir, limit) if not data: @@ -158,7 +158,7 @@ def read_cached_launches(rocket_dir: str) -> list[Summary]: def get_launches( rocket_dir: str, limit: int = 200, refresh: bool = False -) -> list[Summary] | None: +) -> list[Summary]: """Get rocket launches with caching.""" now = datetime.now() existing = [ diff --git a/agenda/utils.py b/agenda/utils.py index 00a4e50..27e3ad0 100644 --- a/agenda/utils.py +++ b/agenda/utils.py @@ -77,10 +77,3 @@ def get_most_recent_file(directory: str, ext: str) -> str | None: return None existing.sort(reverse=True) return os.path.join(directory, existing[0][1]) - - -def make_waste_dir(data_dir: str) -> None: - """Make waste dir if missing.""" - waste_dir = os.path.join(data_dir, "waste") - if not os.path.exists(waste_dir): - os.mkdir(waste_dir) diff --git a/agenda/bristol_waste.py b/agenda/waste_schedule.py similarity index 50% rename from agenda/bristol_waste.py rename to agenda/waste_schedule.py index 0738d32..947eb6c 100644 --- a/agenda/bristol_waste.py +++ b/agenda/waste_schedule.py @@ -2,39 +2,109 @@ import json import os +import re import typing from collections import defaultdict -from datetime import date, datetime, timedelta +from datetime import date, datetime, time, timedelta import httpx +import lxml.html +from . import uk_time from .types import Event -from .utils import make_waste_dir ttl_hours = 12 +def make_waste_dir(data_dir: str) -> None: + """Make waste dir if missing.""" + waste_dir = os.path.join(data_dir, "waste") + if not os.path.exists(waste_dir): + os.mkdir(waste_dir) + + +async def get_html(data_dir: str, postcode: str, uprn: str) -> str: + """Get waste schedule.""" + now = datetime.now() + waste_dir = os.path.join(data_dir, "waste") + + make_waste_dir(data_dir) + + existing_data = os.listdir(waste_dir) + existing = [f for f in existing_data if f.endswith(".html")] + if existing: + recent_filename = max(existing) + recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html") + delta = now - recent + + if existing and delta < timedelta(hours=ttl_hours): + return open(os.path.join(waste_dir, recent_filename)).read() + + now_str = now.strftime("%Y-%m-%d_%H:%M") + filename = f"{waste_dir}/{now_str}.html" + + forms_base_url = "https://forms.n-somerset.gov.uk" + url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule" + async with httpx.AsyncClient() as client: + r = await client.post( + url, + data={ + "PreviousHouse": "", + "PreviousPostcode": "-", + "Postcode": postcode, + "SelectedUprn": uprn, + }, + ) + form_post_html = r.text + pattern = r'

Object moved to here<\/a>\.<\/h2>' + m = re.search(pattern, form_post_html) + if m: + r = await client.get(forms_base_url + m.group(1)) + html = r.text + open(filename, "w").write(html) + return html + + +def parse_waste_schedule_date(day_and_month: str) -> date: + """Parse waste schedule date.""" + today = date.today() + fmt = "%A %d %B %Y" + d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date() + if d < today: + d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date() + return d + + +def parse(root: lxml.html.HtmlElement) -> list[Event]: + """Parse waste schedule.""" + tbody = root.find(".//table/tbody") + assert tbody is not None + by_date = defaultdict(list) + for e_service, e_next_date, e_following in tbody: + assert e_service.text and e_next_date.text and e_following.text + service = e_service.text + next_date = parse_waste_schedule_date(e_next_date.text) + following_date = parse_waste_schedule_date(e_following.text) + + by_date[next_date].append(service) + by_date[following_date].append(service) + + return [ + Event( + name="waste_schedule", + date=uk_time(d, time(6, 30)), + title="Backwell: " + ", ".join(services), + ) + for d, services in by_date.items() + ] + + BristolSchedule = list[dict[str, typing.Any]] -async def get( - start_date: date, data_dir: str, uprn: str, refresh: bool = False -) -> list[Event]: - """Get waste collection schedule from Bristol City Council.""" - by_date: defaultdict[date, list[str]] = defaultdict(list) - for item in await get_data(data_dir, uprn, refresh): - service = get_service(item) - for d in collections(item): - if d < start_date and service not in by_date[d]: - by_date[d].append(service) - - return [ - Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services)) - for d, services in by_date.items() - ] - - -async def get_data(data_dir: str, uprn: str, refresh: bool = False) -> BristolSchedule: +async def get_bristol_data( + data_dir: str, uprn: str, refresh: bool = False +) -> BristolSchedule: """Get Bristol Waste schedule, with cache.""" now = datetime.now() waste_dir = os.path.join(data_dir, "waste") @@ -56,7 +126,7 @@ async def get_data(data_dir: str, uprn: str, refresh: bool = False) -> BristolSc return get_from_recent() try: - r = await get_web_data(uprn) + r = await get_bristol_gov_uk_data(uprn) except httpx.ReadTimeout: return get_from_recent() @@ -66,7 +136,7 @@ async def get_data(data_dir: str, uprn: str, refresh: bool = False) -> BristolSc return typing.cast(BristolSchedule, r.json()["data"]) -async def get_web_data(uprn: str) -> httpx.Response: +async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response: """Get JSON from Bristol City Council.""" UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" HEADERS = { @@ -116,14 +186,39 @@ async def get_web_data(uprn: str) -> httpx.Response: return response -def get_service(item: dict[str, typing.Any]) -> str: +def bristol_service(item: dict[str, typing.Any]) -> str: """Bristol waste service name.""" service: str = item["containerName"] return "Recycling" if "Recycling" in service else service.partition(" ")[2] -def collections(item: dict[str, typing.Any]) -> typing.Iterable[date]: +def bristol_collections(item: dict[str, typing.Any]) -> typing.Iterable[date]: """Bristol dates from collections.""" for collection in item["collection"]: for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]: yield date.fromisoformat(collection[collection_date_key][:10]) + + +def bristol_by_date(data: BristolSchedule, start_date: date) -> dict[date, list[str]]: + """Build list of Bristol services by date.""" + by_date: defaultdict[date, list[str]] = defaultdict(list) + + for item in data: + service = bristol_service(item) + for d in bristol_collections(item): + if d < start_date and service not in by_date[d]: + by_date[d].append(service) + return by_date + + +async def get_bristol_gov_uk( + start_date: date, data_dir: str, uprn: str, refresh: bool = False +) -> list[Event]: + """Get waste collection schedule from Bristol City Council.""" + data = await get_bristol_data(data_dir, uprn, refresh) + + by_date = bristol_by_date(data, start_date) + return [ + Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services)) + for d, services in by_date.items() + ] diff --git a/templates/birthday_list.html b/templates/birthday_list.html index 6ab4811..7346bae 100644 --- a/templates/birthday_list.html +++ b/templates/birthday_list.html @@ -8,7 +8,7 @@ {% for event in items %} - + {% endfor %} diff --git a/update.py b/update.py index a86b360..d4a1f0b 100755 --- a/update.py +++ b/update.py @@ -13,7 +13,6 @@ import deepdiff # type: ignore import flask import requests -import agenda.bristol_waste import agenda.fx import agenda.geomob import agenda.gwr @@ -21,6 +20,7 @@ import agenda.mail import agenda.thespacedevs import agenda.types import agenda.uk_holiday +import agenda.waste_schedule from agenda.types import StrDict from web_view import app @@ -39,7 +39,7 @@ async def update_bank_holidays(config: flask.config.Config) -> None: async def update_bristol_bins(config: flask.config.Config) -> None: """Update waste schedule from Bristol City Council.""" t0 = time() - events = await agenda.bristol_waste.get( + events = await agenda.waste_schedule.get_bristol_gov_uk( date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True ) time_taken = time() - t0
{{event.as_date.strftime("%a, %d, %b %Y")}}{{event.as_date.strftime("%a, %d, %b")}} {{ event.title }}