Compare commits

...

5 commits

7 changed files with 132 additions and 128 deletions

View file

@ -2,109 +2,39 @@
import json import json
import os import os
import re
import typing import typing
from collections import defaultdict from collections import defaultdict
from datetime import date, datetime, time, timedelta from datetime import date, datetime, timedelta
import httpx import httpx
import lxml.html
from . import uk_time
from .types import Event from .types import Event
from .utils import make_waste_dir
ttl_hours = 12 ttl_hours = 12
def make_waste_dir(data_dir: str) -> None:
"""Make waste dir if missing."""
waste_dir = os.path.join(data_dir, "waste")
if not os.path.exists(waste_dir):
os.mkdir(waste_dir)
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
"""Get waste schedule."""
now = datetime.now()
waste_dir = os.path.join(data_dir, "waste")
make_waste_dir(data_dir)
existing_data = os.listdir(waste_dir)
existing = [f for f in existing_data if f.endswith(".html")]
if existing:
recent_filename = max(existing)
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
delta = now - recent
if existing and delta < timedelta(hours=ttl_hours):
return open(os.path.join(waste_dir, recent_filename)).read()
now_str = now.strftime("%Y-%m-%d_%H:%M")
filename = f"{waste_dir}/{now_str}.html"
forms_base_url = "https://forms.n-somerset.gov.uk"
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
async with httpx.AsyncClient() as client:
r = await client.post(
url,
data={
"PreviousHouse": "",
"PreviousPostcode": "-",
"Postcode": postcode,
"SelectedUprn": uprn,
},
)
form_post_html = r.text
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
m = re.search(pattern, form_post_html)
if m:
r = await client.get(forms_base_url + m.group(1))
html = r.text
open(filename, "w").write(html)
return html
def parse_waste_schedule_date(day_and_month: str) -> date:
"""Parse waste schedule date."""
today = date.today()
fmt = "%A %d %B %Y"
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
if d < today:
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
return d
def parse(root: lxml.html.HtmlElement) -> list[Event]:
"""Parse waste schedule."""
tbody = root.find(".//table/tbody")
assert tbody is not None
by_date = defaultdict(list)
for e_service, e_next_date, e_following in tbody:
assert e_service.text and e_next_date.text and e_following.text
service = e_service.text
next_date = parse_waste_schedule_date(e_next_date.text)
following_date = parse_waste_schedule_date(e_following.text)
by_date[next_date].append(service)
by_date[following_date].append(service)
return [
Event(
name="waste_schedule",
date=uk_time(d, time(6, 30)),
title="Backwell: " + ", ".join(services),
)
for d, services in by_date.items()
]
BristolSchedule = list[dict[str, typing.Any]] BristolSchedule = list[dict[str, typing.Any]]
async def get_bristol_data( async def get(
data_dir: str, uprn: str, refresh: bool = False start_date: date, data_dir: str, uprn: str, refresh: bool = False
) -> BristolSchedule: ) -> list[Event]:
"""Get waste collection schedule from Bristol City Council."""
by_date: defaultdict[date, list[str]] = defaultdict(list)
for item in await get_data(data_dir, uprn, refresh):
service = get_service(item)
for d in collections(item):
if d < start_date and service not in by_date[d]:
by_date[d].append(service)
return [
Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services))
for d, services in by_date.items()
]
async def get_data(data_dir: str, uprn: str, refresh: bool = False) -> BristolSchedule:
"""Get Bristol Waste schedule, with cache.""" """Get Bristol Waste schedule, with cache."""
now = datetime.now() now = datetime.now()
waste_dir = os.path.join(data_dir, "waste") waste_dir = os.path.join(data_dir, "waste")
@ -126,7 +56,7 @@ async def get_bristol_data(
return get_from_recent() return get_from_recent()
try: try:
r = await get_bristol_gov_uk_data(uprn) r = await get_web_data(uprn)
except httpx.ReadTimeout: except httpx.ReadTimeout:
return get_from_recent() return get_from_recent()
@ -136,7 +66,7 @@ async def get_bristol_data(
return typing.cast(BristolSchedule, r.json()["data"]) return typing.cast(BristolSchedule, r.json()["data"])
async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response: async def get_web_data(uprn: str) -> httpx.Response:
"""Get JSON from Bristol City Council.""" """Get JSON from Bristol City Council."""
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)" UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
HEADERS = { HEADERS = {
@ -186,39 +116,14 @@ async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response:
return response return response
def bristol_service(item: dict[str, typing.Any]) -> str: def get_service(item: dict[str, typing.Any]) -> str:
"""Bristol waste service name.""" """Bristol waste service name."""
service: str = item["containerName"] service: str = item["containerName"]
return "Recycling" if "Recycling" in service else service.partition(" ")[2] return "Recycling" if "Recycling" in service else service.partition(" ")[2]
def bristol_collections(item: dict[str, typing.Any]) -> typing.Iterable[date]: def collections(item: dict[str, typing.Any]) -> typing.Iterable[date]:
"""Bristol dates from collections.""" """Bristol dates from collections."""
for collection in item["collection"]: for collection in item["collection"]:
for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]: for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]:
yield date.fromisoformat(collection[collection_date_key][:10]) yield date.fromisoformat(collection[collection_date_key][:10])
def bristol_by_date(data: BristolSchedule, start_date: date) -> dict[date, list[str]]:
"""Build list of Bristol services by date."""
by_date: defaultdict[date, list[str]] = defaultdict(list)
for item in data:
service = bristol_service(item)
for d in bristol_collections(item):
if d < start_date and service not in by_date[d]:
by_date[d].append(service)
return by_date
async def get_bristol_gov_uk(
start_date: date, data_dir: str, uprn: str, refresh: bool = False
) -> list[Event]:
"""Get waste collection schedule from Bristol City Council."""
data = await get_bristol_data(data_dir, uprn, refresh)
by_date = bristol_by_date(data, start_date)
return [
Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services))
for d, services in by_date.items()
]

View file

@ -15,6 +15,7 @@ import pytz
from . import ( from . import (
accommodation, accommodation,
birthday, birthday,
bristol_waste,
busy, busy,
carnival, carnival,
conference, conference,
@ -26,13 +27,13 @@ from . import (
hn, hn,
holidays, holidays,
meetup, meetup,
n_somerset_waste,
stock_market, stock_market,
subscription, subscription,
sun, sun,
thespacedevs, thespacedevs,
travel, travel,
uk_holiday, uk_holiday,
waste_schedule,
) )
from .types import Event, StrDict from .types import Event, StrDict
@ -62,9 +63,9 @@ async def waste_collection_events(
data_dir: str, postcode: str, uprn: str data_dir: str, postcode: str, uprn: str
) -> list[Event]: ) -> list[Event]:
"""Waste colllection events.""" """Waste colllection events."""
html = await waste_schedule.get_html(data_dir, postcode, uprn) html = await n_somerset_waste.get_html(data_dir, postcode, uprn)
root = lxml.html.fromstring(html) root = lxml.html.fromstring(html)
events = waste_schedule.parse(root) events = n_somerset_waste.parse(root)
return events return events
@ -72,7 +73,7 @@ async def bristol_waste_collection_events(
data_dir: str, start_date: date, uprn: str data_dir: str, start_date: date, uprn: str
) -> list[Event]: ) -> list[Event]:
"""Waste colllection events.""" """Waste colllection events."""
return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn) return await bristol_waste.get(start_date, data_dir, uprn)
def find_events_during_stay( def find_events_during_stay(

View file

@ -0,0 +1,91 @@
"""Waste collection schedules."""
import os
import re
from collections import defaultdict
from datetime import date, datetime, time, timedelta
import httpx
import lxml.html
from . import uk_time
from .types import Event
from .utils import make_waste_dir
ttl_hours = 12
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
"""Get waste schedule."""
now = datetime.now()
waste_dir = os.path.join(data_dir, "waste")
make_waste_dir(data_dir)
existing_data = os.listdir(waste_dir)
existing = [f for f in existing_data if f.endswith(".html")]
if existing:
recent_filename = max(existing)
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
delta = now - recent
if existing and delta < timedelta(hours=ttl_hours):
return open(os.path.join(waste_dir, recent_filename)).read()
now_str = now.strftime("%Y-%m-%d_%H:%M")
filename = f"{waste_dir}/{now_str}.html"
forms_base_url = "https://forms.n-somerset.gov.uk"
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
async with httpx.AsyncClient() as client:
r = await client.post(
url,
data={
"PreviousHouse": "",
"PreviousPostcode": "-",
"Postcode": postcode,
"SelectedUprn": uprn,
},
)
form_post_html = r.text
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
m = re.search(pattern, form_post_html)
if m:
r = await client.get(forms_base_url + m.group(1))
html = r.text
open(filename, "w").write(html)
return html
def parse_waste_schedule_date(day_and_month: str) -> date:
"""Parse waste schedule date."""
today = date.today()
fmt = "%A %d %B %Y"
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
if d < today:
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
return d
def parse(root: lxml.html.HtmlElement) -> list[Event]:
"""Parse waste schedule."""
tbody = root.find(".//table/tbody")
assert tbody is not None
by_date = defaultdict(list)
for e_service, e_next_date, e_following in tbody:
assert e_service.text and e_next_date.text and e_following.text
service = e_service.text
next_date = parse_waste_schedule_date(e_next_date.text)
following_date = parse_waste_schedule_date(e_following.text)
by_date[next_date].append(service)
by_date[following_date].append(service)
return [
Event(
name="waste_schedule",
date=uk_time(d, time(6, 30)),
title="Backwell: " + ", ".join(services),
)
for d, services in by_date.items()
]

View file

@ -32,7 +32,7 @@ def next_launch_api_data(rocket_dir: str, limit: int = 200) -> StrDict | None:
return data return data
def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Launch] | None: def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Summary] | None:
"""Get the next upcoming launches from the API.""" """Get the next upcoming launches from the API."""
data = next_launch_api_data(rocket_dir, limit) data = next_launch_api_data(rocket_dir, limit)
if not data: if not data:
@ -158,7 +158,7 @@ def read_cached_launches(rocket_dir: str) -> list[Summary]:
def get_launches( def get_launches(
rocket_dir: str, limit: int = 200, refresh: bool = False rocket_dir: str, limit: int = 200, refresh: bool = False
) -> list[Summary]: ) -> list[Summary] | None:
"""Get rocket launches with caching.""" """Get rocket launches with caching."""
now = datetime.now() now = datetime.now()
existing = [ existing = [

View file

@ -77,3 +77,10 @@ def get_most_recent_file(directory: str, ext: str) -> str | None:
return None return None
existing.sort(reverse=True) existing.sort(reverse=True)
return os.path.join(directory, existing[0][1]) return os.path.join(directory, existing[0][1])
def make_waste_dir(data_dir: str) -> None:
"""Make waste dir if missing."""
waste_dir = os.path.join(data_dir, "waste")
if not os.path.exists(waste_dir):
os.mkdir(waste_dir)

View file

@ -8,7 +8,7 @@
<table class="w-auto table"> <table class="w-auto table">
{% for event in items %} {% for event in items %}
<tr> <tr>
<td class="text-end">{{event.as_date.strftime("%a, %d, %b")}}</td> <td class="text-end">{{event.as_date.strftime("%a, %d, %b %Y")}}</td>
<td>{{ event.title }}</td> <td>{{ event.title }}</td>
</tr> </tr>
{% endfor %} {% endfor %}

View file

@ -13,6 +13,7 @@ import deepdiff # type: ignore
import flask import flask
import requests import requests
import agenda.bristol_waste
import agenda.fx import agenda.fx
import agenda.geomob import agenda.geomob
import agenda.gwr import agenda.gwr
@ -20,7 +21,6 @@ import agenda.mail
import agenda.thespacedevs import agenda.thespacedevs
import agenda.types import agenda.types
import agenda.uk_holiday import agenda.uk_holiday
import agenda.waste_schedule
from agenda.types import StrDict from agenda.types import StrDict
from web_view import app from web_view import app
@ -39,7 +39,7 @@ async def update_bank_holidays(config: flask.config.Config) -> None:
async def update_bristol_bins(config: flask.config.Config) -> None: async def update_bristol_bins(config: flask.config.Config) -> None:
"""Update waste schedule from Bristol City Council.""" """Update waste schedule from Bristol City Council."""
t0 = time() t0 = time()
events = await agenda.waste_schedule.get_bristol_gov_uk( events = await agenda.bristol_waste.get(
date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True
) )
time_taken = time() - t0 time_taken = time() - t0