Compare commits
5 commits
17eca6a95a
...
ace517c482
Author | SHA1 | Date | |
---|---|---|---|
Edward Betts | ace517c482 | ||
Edward Betts | ef695af7af | ||
Edward Betts | 28ad4950fd | ||
Edward Betts | 5c4eac60ee | ||
Edward Betts | 0f3f596cb3 |
|
@ -2,109 +2,39 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import typing
|
import typing
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import date, datetime, time, timedelta
|
from datetime import date, datetime, timedelta
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
import lxml.html
|
|
||||||
|
|
||||||
from . import uk_time
|
|
||||||
from .types import Event
|
from .types import Event
|
||||||
|
from .utils import make_waste_dir
|
||||||
|
|
||||||
ttl_hours = 12
|
ttl_hours = 12
|
||||||
|
|
||||||
|
|
||||||
def make_waste_dir(data_dir: str) -> None:
|
|
||||||
"""Make waste dir if missing."""
|
|
||||||
waste_dir = os.path.join(data_dir, "waste")
|
|
||||||
if not os.path.exists(waste_dir):
|
|
||||||
os.mkdir(waste_dir)
|
|
||||||
|
|
||||||
|
|
||||||
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
|
|
||||||
"""Get waste schedule."""
|
|
||||||
now = datetime.now()
|
|
||||||
waste_dir = os.path.join(data_dir, "waste")
|
|
||||||
|
|
||||||
make_waste_dir(data_dir)
|
|
||||||
|
|
||||||
existing_data = os.listdir(waste_dir)
|
|
||||||
existing = [f for f in existing_data if f.endswith(".html")]
|
|
||||||
if existing:
|
|
||||||
recent_filename = max(existing)
|
|
||||||
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
|
|
||||||
delta = now - recent
|
|
||||||
|
|
||||||
if existing and delta < timedelta(hours=ttl_hours):
|
|
||||||
return open(os.path.join(waste_dir, recent_filename)).read()
|
|
||||||
|
|
||||||
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
|
||||||
filename = f"{waste_dir}/{now_str}.html"
|
|
||||||
|
|
||||||
forms_base_url = "https://forms.n-somerset.gov.uk"
|
|
||||||
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
r = await client.post(
|
|
||||||
url,
|
|
||||||
data={
|
|
||||||
"PreviousHouse": "",
|
|
||||||
"PreviousPostcode": "-",
|
|
||||||
"Postcode": postcode,
|
|
||||||
"SelectedUprn": uprn,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
form_post_html = r.text
|
|
||||||
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
|
|
||||||
m = re.search(pattern, form_post_html)
|
|
||||||
if m:
|
|
||||||
r = await client.get(forms_base_url + m.group(1))
|
|
||||||
html = r.text
|
|
||||||
open(filename, "w").write(html)
|
|
||||||
return html
|
|
||||||
|
|
||||||
|
|
||||||
def parse_waste_schedule_date(day_and_month: str) -> date:
|
|
||||||
"""Parse waste schedule date."""
|
|
||||||
today = date.today()
|
|
||||||
fmt = "%A %d %B %Y"
|
|
||||||
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
|
|
||||||
if d < today:
|
|
||||||
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
|
|
||||||
return d
|
|
||||||
|
|
||||||
|
|
||||||
def parse(root: lxml.html.HtmlElement) -> list[Event]:
|
|
||||||
"""Parse waste schedule."""
|
|
||||||
tbody = root.find(".//table/tbody")
|
|
||||||
assert tbody is not None
|
|
||||||
by_date = defaultdict(list)
|
|
||||||
for e_service, e_next_date, e_following in tbody:
|
|
||||||
assert e_service.text and e_next_date.text and e_following.text
|
|
||||||
service = e_service.text
|
|
||||||
next_date = parse_waste_schedule_date(e_next_date.text)
|
|
||||||
following_date = parse_waste_schedule_date(e_following.text)
|
|
||||||
|
|
||||||
by_date[next_date].append(service)
|
|
||||||
by_date[following_date].append(service)
|
|
||||||
|
|
||||||
return [
|
|
||||||
Event(
|
|
||||||
name="waste_schedule",
|
|
||||||
date=uk_time(d, time(6, 30)),
|
|
||||||
title="Backwell: " + ", ".join(services),
|
|
||||||
)
|
|
||||||
for d, services in by_date.items()
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
BristolSchedule = list[dict[str, typing.Any]]
|
BristolSchedule = list[dict[str, typing.Any]]
|
||||||
|
|
||||||
|
|
||||||
async def get_bristol_data(
|
async def get(
|
||||||
data_dir: str, uprn: str, refresh: bool = False
|
start_date: date, data_dir: str, uprn: str, refresh: bool = False
|
||||||
) -> BristolSchedule:
|
) -> list[Event]:
|
||||||
|
"""Get waste collection schedule from Bristol City Council."""
|
||||||
|
by_date: defaultdict[date, list[str]] = defaultdict(list)
|
||||||
|
for item in await get_data(data_dir, uprn, refresh):
|
||||||
|
service = get_service(item)
|
||||||
|
for d in collections(item):
|
||||||
|
if d < start_date and service not in by_date[d]:
|
||||||
|
by_date[d].append(service)
|
||||||
|
|
||||||
|
return [
|
||||||
|
Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services))
|
||||||
|
for d, services in by_date.items()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def get_data(data_dir: str, uprn: str, refresh: bool = False) -> BristolSchedule:
|
||||||
"""Get Bristol Waste schedule, with cache."""
|
"""Get Bristol Waste schedule, with cache."""
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
waste_dir = os.path.join(data_dir, "waste")
|
waste_dir = os.path.join(data_dir, "waste")
|
||||||
|
@ -126,7 +56,7 @@ async def get_bristol_data(
|
||||||
return get_from_recent()
|
return get_from_recent()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
r = await get_bristol_gov_uk_data(uprn)
|
r = await get_web_data(uprn)
|
||||||
except httpx.ReadTimeout:
|
except httpx.ReadTimeout:
|
||||||
return get_from_recent()
|
return get_from_recent()
|
||||||
|
|
||||||
|
@ -136,7 +66,7 @@ async def get_bristol_data(
|
||||||
return typing.cast(BristolSchedule, r.json()["data"])
|
return typing.cast(BristolSchedule, r.json()["data"])
|
||||||
|
|
||||||
|
|
||||||
async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response:
|
async def get_web_data(uprn: str) -> httpx.Response:
|
||||||
"""Get JSON from Bristol City Council."""
|
"""Get JSON from Bristol City Council."""
|
||||||
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
|
@ -186,39 +116,14 @@ async def get_bristol_gov_uk_data(uprn: str) -> httpx.Response:
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
def bristol_service(item: dict[str, typing.Any]) -> str:
|
def get_service(item: dict[str, typing.Any]) -> str:
|
||||||
"""Bristol waste service name."""
|
"""Bristol waste service name."""
|
||||||
service: str = item["containerName"]
|
service: str = item["containerName"]
|
||||||
return "Recycling" if "Recycling" in service else service.partition(" ")[2]
|
return "Recycling" if "Recycling" in service else service.partition(" ")[2]
|
||||||
|
|
||||||
|
|
||||||
def bristol_collections(item: dict[str, typing.Any]) -> typing.Iterable[date]:
|
def collections(item: dict[str, typing.Any]) -> typing.Iterable[date]:
|
||||||
"""Bristol dates from collections."""
|
"""Bristol dates from collections."""
|
||||||
for collection in item["collection"]:
|
for collection in item["collection"]:
|
||||||
for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]:
|
for collection_date_key in ["nextCollectionDate", "lastCollectionDate"]:
|
||||||
yield date.fromisoformat(collection[collection_date_key][:10])
|
yield date.fromisoformat(collection[collection_date_key][:10])
|
||||||
|
|
||||||
|
|
||||||
def bristol_by_date(data: BristolSchedule, start_date: date) -> dict[date, list[str]]:
|
|
||||||
"""Build list of Bristol services by date."""
|
|
||||||
by_date: defaultdict[date, list[str]] = defaultdict(list)
|
|
||||||
|
|
||||||
for item in data:
|
|
||||||
service = bristol_service(item)
|
|
||||||
for d in bristol_collections(item):
|
|
||||||
if d < start_date and service not in by_date[d]:
|
|
||||||
by_date[d].append(service)
|
|
||||||
return by_date
|
|
||||||
|
|
||||||
|
|
||||||
async def get_bristol_gov_uk(
|
|
||||||
start_date: date, data_dir: str, uprn: str, refresh: bool = False
|
|
||||||
) -> list[Event]:
|
|
||||||
"""Get waste collection schedule from Bristol City Council."""
|
|
||||||
data = await get_bristol_data(data_dir, uprn, refresh)
|
|
||||||
|
|
||||||
by_date = bristol_by_date(data, start_date)
|
|
||||||
return [
|
|
||||||
Event(name="waste_schedule", date=d, title="Bristol: " + ", ".join(services))
|
|
||||||
for d, services in by_date.items()
|
|
||||||
]
|
|
|
@ -15,6 +15,7 @@ import pytz
|
||||||
from . import (
|
from . import (
|
||||||
accommodation,
|
accommodation,
|
||||||
birthday,
|
birthday,
|
||||||
|
bristol_waste,
|
||||||
busy,
|
busy,
|
||||||
carnival,
|
carnival,
|
||||||
conference,
|
conference,
|
||||||
|
@ -26,13 +27,13 @@ from . import (
|
||||||
hn,
|
hn,
|
||||||
holidays,
|
holidays,
|
||||||
meetup,
|
meetup,
|
||||||
|
n_somerset_waste,
|
||||||
stock_market,
|
stock_market,
|
||||||
subscription,
|
subscription,
|
||||||
sun,
|
sun,
|
||||||
thespacedevs,
|
thespacedevs,
|
||||||
travel,
|
travel,
|
||||||
uk_holiday,
|
uk_holiday,
|
||||||
waste_schedule,
|
|
||||||
)
|
)
|
||||||
from .types import Event, StrDict
|
from .types import Event, StrDict
|
||||||
|
|
||||||
|
@ -62,9 +63,9 @@ async def waste_collection_events(
|
||||||
data_dir: str, postcode: str, uprn: str
|
data_dir: str, postcode: str, uprn: str
|
||||||
) -> list[Event]:
|
) -> list[Event]:
|
||||||
"""Waste colllection events."""
|
"""Waste colllection events."""
|
||||||
html = await waste_schedule.get_html(data_dir, postcode, uprn)
|
html = await n_somerset_waste.get_html(data_dir, postcode, uprn)
|
||||||
root = lxml.html.fromstring(html)
|
root = lxml.html.fromstring(html)
|
||||||
events = waste_schedule.parse(root)
|
events = n_somerset_waste.parse(root)
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,7 +73,7 @@ async def bristol_waste_collection_events(
|
||||||
data_dir: str, start_date: date, uprn: str
|
data_dir: str, start_date: date, uprn: str
|
||||||
) -> list[Event]:
|
) -> list[Event]:
|
||||||
"""Waste colllection events."""
|
"""Waste colllection events."""
|
||||||
return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn)
|
return await bristol_waste.get(start_date, data_dir, uprn)
|
||||||
|
|
||||||
|
|
||||||
def find_events_during_stay(
|
def find_events_during_stay(
|
||||||
|
|
91
agenda/n_somerset_waste.py
Normal file
91
agenda/n_somerset_waste.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
"""Waste collection schedules."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import date, datetime, time, timedelta
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
|
from . import uk_time
|
||||||
|
from .types import Event
|
||||||
|
from .utils import make_waste_dir
|
||||||
|
|
||||||
|
ttl_hours = 12
|
||||||
|
|
||||||
|
|
||||||
|
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
|
||||||
|
"""Get waste schedule."""
|
||||||
|
now = datetime.now()
|
||||||
|
waste_dir = os.path.join(data_dir, "waste")
|
||||||
|
|
||||||
|
make_waste_dir(data_dir)
|
||||||
|
|
||||||
|
existing_data = os.listdir(waste_dir)
|
||||||
|
existing = [f for f in existing_data if f.endswith(".html")]
|
||||||
|
if existing:
|
||||||
|
recent_filename = max(existing)
|
||||||
|
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
|
||||||
|
delta = now - recent
|
||||||
|
|
||||||
|
if existing and delta < timedelta(hours=ttl_hours):
|
||||||
|
return open(os.path.join(waste_dir, recent_filename)).read()
|
||||||
|
|
||||||
|
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
||||||
|
filename = f"{waste_dir}/{now_str}.html"
|
||||||
|
|
||||||
|
forms_base_url = "https://forms.n-somerset.gov.uk"
|
||||||
|
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
r = await client.post(
|
||||||
|
url,
|
||||||
|
data={
|
||||||
|
"PreviousHouse": "",
|
||||||
|
"PreviousPostcode": "-",
|
||||||
|
"Postcode": postcode,
|
||||||
|
"SelectedUprn": uprn,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
form_post_html = r.text
|
||||||
|
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
|
||||||
|
m = re.search(pattern, form_post_html)
|
||||||
|
if m:
|
||||||
|
r = await client.get(forms_base_url + m.group(1))
|
||||||
|
html = r.text
|
||||||
|
open(filename, "w").write(html)
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def parse_waste_schedule_date(day_and_month: str) -> date:
|
||||||
|
"""Parse waste schedule date."""
|
||||||
|
today = date.today()
|
||||||
|
fmt = "%A %d %B %Y"
|
||||||
|
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
|
||||||
|
if d < today:
|
||||||
|
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def parse(root: lxml.html.HtmlElement) -> list[Event]:
|
||||||
|
"""Parse waste schedule."""
|
||||||
|
tbody = root.find(".//table/tbody")
|
||||||
|
assert tbody is not None
|
||||||
|
by_date = defaultdict(list)
|
||||||
|
for e_service, e_next_date, e_following in tbody:
|
||||||
|
assert e_service.text and e_next_date.text and e_following.text
|
||||||
|
service = e_service.text
|
||||||
|
next_date = parse_waste_schedule_date(e_next_date.text)
|
||||||
|
following_date = parse_waste_schedule_date(e_following.text)
|
||||||
|
|
||||||
|
by_date[next_date].append(service)
|
||||||
|
by_date[following_date].append(service)
|
||||||
|
|
||||||
|
return [
|
||||||
|
Event(
|
||||||
|
name="waste_schedule",
|
||||||
|
date=uk_time(d, time(6, 30)),
|
||||||
|
title="Backwell: " + ", ".join(services),
|
||||||
|
)
|
||||||
|
for d, services in by_date.items()
|
||||||
|
]
|
|
@ -32,7 +32,7 @@ def next_launch_api_data(rocket_dir: str, limit: int = 200) -> StrDict | None:
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Launch] | None:
|
def next_launch_api(rocket_dir: str, limit: int = 200) -> list[Summary] | None:
|
||||||
"""Get the next upcoming launches from the API."""
|
"""Get the next upcoming launches from the API."""
|
||||||
data = next_launch_api_data(rocket_dir, limit)
|
data = next_launch_api_data(rocket_dir, limit)
|
||||||
if not data:
|
if not data:
|
||||||
|
@ -158,7 +158,7 @@ def read_cached_launches(rocket_dir: str) -> list[Summary]:
|
||||||
|
|
||||||
def get_launches(
|
def get_launches(
|
||||||
rocket_dir: str, limit: int = 200, refresh: bool = False
|
rocket_dir: str, limit: int = 200, refresh: bool = False
|
||||||
) -> list[Summary]:
|
) -> list[Summary] | None:
|
||||||
"""Get rocket launches with caching."""
|
"""Get rocket launches with caching."""
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
existing = [
|
existing = [
|
||||||
|
|
|
@ -77,3 +77,10 @@ def get_most_recent_file(directory: str, ext: str) -> str | None:
|
||||||
return None
|
return None
|
||||||
existing.sort(reverse=True)
|
existing.sort(reverse=True)
|
||||||
return os.path.join(directory, existing[0][1])
|
return os.path.join(directory, existing[0][1])
|
||||||
|
|
||||||
|
|
||||||
|
def make_waste_dir(data_dir: str) -> None:
|
||||||
|
"""Make waste dir if missing."""
|
||||||
|
waste_dir = os.path.join(data_dir, "waste")
|
||||||
|
if not os.path.exists(waste_dir):
|
||||||
|
os.mkdir(waste_dir)
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
<table class="w-auto table">
|
<table class="w-auto table">
|
||||||
{% for event in items %}
|
{% for event in items %}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="text-end">{{event.as_date.strftime("%a, %d, %b")}}</td>
|
<td class="text-end">{{event.as_date.strftime("%a, %d, %b %Y")}}</td>
|
||||||
<td>{{ event.title }}</td>
|
<td>{{ event.title }}</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
|
@ -13,6 +13,7 @@ import deepdiff # type: ignore
|
||||||
import flask
|
import flask
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
import agenda.bristol_waste
|
||||||
import agenda.fx
|
import agenda.fx
|
||||||
import agenda.geomob
|
import agenda.geomob
|
||||||
import agenda.gwr
|
import agenda.gwr
|
||||||
|
@ -20,7 +21,6 @@ import agenda.mail
|
||||||
import agenda.thespacedevs
|
import agenda.thespacedevs
|
||||||
import agenda.types
|
import agenda.types
|
||||||
import agenda.uk_holiday
|
import agenda.uk_holiday
|
||||||
import agenda.waste_schedule
|
|
||||||
from agenda.types import StrDict
|
from agenda.types import StrDict
|
||||||
from web_view import app
|
from web_view import app
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ async def update_bank_holidays(config: flask.config.Config) -> None:
|
||||||
async def update_bristol_bins(config: flask.config.Config) -> None:
|
async def update_bristol_bins(config: flask.config.Config) -> None:
|
||||||
"""Update waste schedule from Bristol City Council."""
|
"""Update waste schedule from Bristol City Council."""
|
||||||
t0 = time()
|
t0 = time()
|
||||||
events = await agenda.waste_schedule.get_bristol_gov_uk(
|
events = await agenda.bristol_waste.get(
|
||||||
date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True
|
date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True
|
||||||
)
|
)
|
||||||
time_taken = time() - t0
|
time_taken = time() - t0
|
||||||
|
|
Loading…
Reference in a new issue