Split waste_schedule.py in two
This commit is contained in:
parent
17eca6a95a
commit
0f3f596cb3
|
@ -2,103 +2,18 @@
|
|||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import typing
|
||||
from collections import defaultdict
|
||||
from datetime import date, datetime, time, timedelta
|
||||
from datetime import date, datetime, timedelta
|
||||
|
||||
import httpx
|
||||
import lxml.html
|
||||
|
||||
from . import uk_time
|
||||
from .types import Event
|
||||
from .utils import make_waste_dir
|
||||
|
||||
ttl_hours = 12
|
||||
|
||||
|
||||
def make_waste_dir(data_dir: str) -> None:
|
||||
"""Make waste dir if missing."""
|
||||
waste_dir = os.path.join(data_dir, "waste")
|
||||
if not os.path.exists(waste_dir):
|
||||
os.mkdir(waste_dir)
|
||||
|
||||
|
||||
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
|
||||
"""Get waste schedule."""
|
||||
now = datetime.now()
|
||||
waste_dir = os.path.join(data_dir, "waste")
|
||||
|
||||
make_waste_dir(data_dir)
|
||||
|
||||
existing_data = os.listdir(waste_dir)
|
||||
existing = [f for f in existing_data if f.endswith(".html")]
|
||||
if existing:
|
||||
recent_filename = max(existing)
|
||||
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
|
||||
delta = now - recent
|
||||
|
||||
if existing and delta < timedelta(hours=ttl_hours):
|
||||
return open(os.path.join(waste_dir, recent_filename)).read()
|
||||
|
||||
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
||||
filename = f"{waste_dir}/{now_str}.html"
|
||||
|
||||
forms_base_url = "https://forms.n-somerset.gov.uk"
|
||||
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.post(
|
||||
url,
|
||||
data={
|
||||
"PreviousHouse": "",
|
||||
"PreviousPostcode": "-",
|
||||
"Postcode": postcode,
|
||||
"SelectedUprn": uprn,
|
||||
},
|
||||
)
|
||||
form_post_html = r.text
|
||||
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
|
||||
m = re.search(pattern, form_post_html)
|
||||
if m:
|
||||
r = await client.get(forms_base_url + m.group(1))
|
||||
html = r.text
|
||||
open(filename, "w").write(html)
|
||||
return html
|
||||
|
||||
|
||||
def parse_waste_schedule_date(day_and_month: str) -> date:
|
||||
"""Parse waste schedule date."""
|
||||
today = date.today()
|
||||
fmt = "%A %d %B %Y"
|
||||
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
|
||||
if d < today:
|
||||
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
|
||||
return d
|
||||
|
||||
|
||||
def parse(root: lxml.html.HtmlElement) -> list[Event]:
|
||||
"""Parse waste schedule."""
|
||||
tbody = root.find(".//table/tbody")
|
||||
assert tbody is not None
|
||||
by_date = defaultdict(list)
|
||||
for e_service, e_next_date, e_following in tbody:
|
||||
assert e_service.text and e_next_date.text and e_following.text
|
||||
service = e_service.text
|
||||
next_date = parse_waste_schedule_date(e_next_date.text)
|
||||
following_date = parse_waste_schedule_date(e_following.text)
|
||||
|
||||
by_date[next_date].append(service)
|
||||
by_date[following_date].append(service)
|
||||
|
||||
return [
|
||||
Event(
|
||||
name="waste_schedule",
|
||||
date=uk_time(d, time(6, 30)),
|
||||
title="Backwell: " + ", ".join(services),
|
||||
)
|
||||
for d, services in by_date.items()
|
||||
]
|
||||
|
||||
|
||||
BristolSchedule = list[dict[str, typing.Any]]
|
||||
|
||||
|
|
@ -15,6 +15,7 @@ import pytz
|
|||
from . import (
|
||||
accommodation,
|
||||
birthday,
|
||||
bristol_waste,
|
||||
busy,
|
||||
carnival,
|
||||
conference,
|
||||
|
@ -26,13 +27,13 @@ from . import (
|
|||
hn,
|
||||
holidays,
|
||||
meetup,
|
||||
n_somerset_waste,
|
||||
stock_market,
|
||||
subscription,
|
||||
sun,
|
||||
thespacedevs,
|
||||
travel,
|
||||
uk_holiday,
|
||||
waste_schedule,
|
||||
)
|
||||
from .types import Event, StrDict
|
||||
|
||||
|
@ -62,9 +63,9 @@ async def waste_collection_events(
|
|||
data_dir: str, postcode: str, uprn: str
|
||||
) -> list[Event]:
|
||||
"""Waste colllection events."""
|
||||
html = await waste_schedule.get_html(data_dir, postcode, uprn)
|
||||
html = await n_somerset_waste.get_html(data_dir, postcode, uprn)
|
||||
root = lxml.html.fromstring(html)
|
||||
events = waste_schedule.parse(root)
|
||||
events = n_somerset_waste.parse(root)
|
||||
return events
|
||||
|
||||
|
||||
|
@ -72,7 +73,7 @@ async def bristol_waste_collection_events(
|
|||
data_dir: str, start_date: date, uprn: str
|
||||
) -> list[Event]:
|
||||
"""Waste colllection events."""
|
||||
return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn)
|
||||
return await bristol_waste.get_bristol_gov_uk(start_date, data_dir, uprn)
|
||||
|
||||
|
||||
def find_events_during_stay(
|
||||
|
|
91
agenda/n_somerset_waste.py
Normal file
91
agenda/n_somerset_waste.py
Normal file
|
@ -0,0 +1,91 @@
|
|||
"""Waste collection schedules."""
|
||||
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from datetime import date, datetime, time, timedelta
|
||||
|
||||
import httpx
|
||||
import lxml.html
|
||||
|
||||
from . import uk_time
|
||||
from .types import Event
|
||||
from .utils import make_waste_dir
|
||||
|
||||
ttl_hours = 12
|
||||
|
||||
|
||||
async def get_html(data_dir: str, postcode: str, uprn: str) -> str:
|
||||
"""Get waste schedule."""
|
||||
now = datetime.now()
|
||||
waste_dir = os.path.join(data_dir, "waste")
|
||||
|
||||
make_waste_dir(data_dir)
|
||||
|
||||
existing_data = os.listdir(waste_dir)
|
||||
existing = [f for f in existing_data if f.endswith(".html")]
|
||||
if existing:
|
||||
recent_filename = max(existing)
|
||||
recent = datetime.strptime(recent_filename, "%Y-%m-%d_%H:%M.html")
|
||||
delta = now - recent
|
||||
|
||||
if existing and delta < timedelta(hours=ttl_hours):
|
||||
return open(os.path.join(waste_dir, recent_filename)).read()
|
||||
|
||||
now_str = now.strftime("%Y-%m-%d_%H:%M")
|
||||
filename = f"{waste_dir}/{now_str}.html"
|
||||
|
||||
forms_base_url = "https://forms.n-somerset.gov.uk"
|
||||
url = "https://forms.n-somerset.gov.uk/Waste/CollectionSchedule"
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.post(
|
||||
url,
|
||||
data={
|
||||
"PreviousHouse": "",
|
||||
"PreviousPostcode": "-",
|
||||
"Postcode": postcode,
|
||||
"SelectedUprn": uprn,
|
||||
},
|
||||
)
|
||||
form_post_html = r.text
|
||||
pattern = r'<h2>Object moved to <a href="([^"]*)">here<\/a>\.<\/h2>'
|
||||
m = re.search(pattern, form_post_html)
|
||||
if m:
|
||||
r = await client.get(forms_base_url + m.group(1))
|
||||
html = r.text
|
||||
open(filename, "w").write(html)
|
||||
return html
|
||||
|
||||
|
||||
def parse_waste_schedule_date(day_and_month: str) -> date:
|
||||
"""Parse waste schedule date."""
|
||||
today = date.today()
|
||||
fmt = "%A %d %B %Y"
|
||||
d = datetime.strptime(f"{day_and_month} {today.year}", fmt).date()
|
||||
if d < today:
|
||||
d = datetime.strptime(f"{day_and_month} {today.year + 1}", fmt).date()
|
||||
return d
|
||||
|
||||
|
||||
def parse(root: lxml.html.HtmlElement) -> list[Event]:
|
||||
"""Parse waste schedule."""
|
||||
tbody = root.find(".//table/tbody")
|
||||
assert tbody is not None
|
||||
by_date = defaultdict(list)
|
||||
for e_service, e_next_date, e_following in tbody:
|
||||
assert e_service.text and e_next_date.text and e_following.text
|
||||
service = e_service.text
|
||||
next_date = parse_waste_schedule_date(e_next_date.text)
|
||||
following_date = parse_waste_schedule_date(e_following.text)
|
||||
|
||||
by_date[next_date].append(service)
|
||||
by_date[following_date].append(service)
|
||||
|
||||
return [
|
||||
Event(
|
||||
name="waste_schedule",
|
||||
date=uk_time(d, time(6, 30)),
|
||||
title="Backwell: " + ", ".join(services),
|
||||
)
|
||||
for d, services in by_date.items()
|
||||
]
|
|
@ -77,3 +77,10 @@ def get_most_recent_file(directory: str, ext: str) -> str | None:
|
|||
return None
|
||||
existing.sort(reverse=True)
|
||||
return os.path.join(directory, existing[0][1])
|
||||
|
||||
|
||||
def make_waste_dir(data_dir: str) -> None:
|
||||
"""Make waste dir if missing."""
|
||||
waste_dir = os.path.join(data_dir, "waste")
|
||||
if not os.path.exists(waste_dir):
|
||||
os.mkdir(waste_dir)
|
||||
|
|
|
@ -13,6 +13,7 @@ import deepdiff # type: ignore
|
|||
import flask
|
||||
import requests
|
||||
|
||||
import agenda.bristol_waste
|
||||
import agenda.fx
|
||||
import agenda.geomob
|
||||
import agenda.gwr
|
||||
|
@ -20,7 +21,6 @@ import agenda.mail
|
|||
import agenda.thespacedevs
|
||||
import agenda.types
|
||||
import agenda.uk_holiday
|
||||
import agenda.waste_schedule
|
||||
from agenda.types import StrDict
|
||||
from web_view import app
|
||||
|
||||
|
@ -39,7 +39,7 @@ async def update_bank_holidays(config: flask.config.Config) -> None:
|
|||
async def update_bristol_bins(config: flask.config.Config) -> None:
|
||||
"""Update waste schedule from Bristol City Council."""
|
||||
t0 = time()
|
||||
events = await agenda.waste_schedule.get_bristol_gov_uk(
|
||||
events = await agenda.bristol_waste.get_bristol_gov_uk(
|
||||
date.today(), config["DATA_DIR"], config["BRISTOL_UPRN"], refresh=True
|
||||
)
|
||||
time_taken = time() - t0
|
||||
|
|
Loading…
Reference in a new issue