agenda/agenda/data.py

510 lines
15 KiB
Python
Raw Normal View History

2023-11-07 15:55:05 +00:00
"""Agenda data."""
import asyncio
import collections
2023-12-28 20:32:55 +00:00
import itertools
2023-11-07 15:55:05 +00:00
import os
import typing
from datetime import date, datetime, timedelta
2023-11-07 15:55:05 +00:00
import dateutil.rrule
2023-11-07 15:55:05 +00:00
import dateutil.tz
import flask
import holidays
import isodate # type: ignore
2023-11-07 15:55:05 +00:00
import lxml
import pytz
2023-11-08 14:40:07 +00:00
import yaml
2023-11-07 15:55:05 +00:00
from . import (
accommodation,
birthday,
calendar,
conference,
domains,
2023-11-07 15:55:05 +00:00
economist,
fx,
gwr,
hn,
2023-11-21 08:15:16 +00:00
meetup,
2023-11-07 15:55:05 +00:00
stock_market,
subscription,
sun,
thespacedevs,
travel,
uk_holiday,
uk_tz,
2023-11-07 15:55:05 +00:00
waste_schedule,
)
from .types import Event, Holiday
2023-11-07 15:55:05 +00:00
2023-12-28 20:32:55 +00:00
StrDict = dict[str, typing.Any]
2023-11-07 15:55:05 +00:00
here = dateutil.tz.tzlocal()
# deadline to file tax return
# credit card expiry dates
# morzine ski lifts
# chalet availablity calendar
# starlink visible
def timezone_transition(
start: datetime, end: datetime, key: str, tz_name: str
2023-11-07 15:55:05 +00:00
) -> list[Event]:
"""Clocks changes."""
tz = pytz.timezone(tz_name)
return [
Event(name=key, date=pytz.utc.localize(t).astimezone(tz))
for t in tz._utc_transition_times # type: ignore
if start <= t <= end
2023-11-07 15:55:05 +00:00
]
def us_holidays(start_date: date, end_date: date) -> list[Holiday]:
"""Get US holidays."""
found: list[Holiday] = []
2023-11-07 15:55:05 +00:00
for year in range(start_date.year, end_date.year + 1):
hols = holidays.country_holidays("US", years=year, language="en")
2023-11-07 15:55:05 +00:00
found += [
Holiday(date=hol_date, name=title, country="us")
2023-11-07 15:55:05 +00:00
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
extra = []
for h in found:
if h.name != "Thanksgiving":
2023-11-07 15:55:05 +00:00
continue
extra += [
Holiday(date=h.date + timedelta(days=1), name="Black Friday", country="us"),
Holiday(date=h.date + timedelta(days=4), name="Cyber Monday", country="us"),
2023-11-07 15:55:05 +00:00
]
return found + extra
def get_nyse_holidays(
start_date: date, end_date: date, us_hols: list[Holiday]
) -> list[Event]:
"""NYSE holidays."""
known_us_hols = {(h.date, h.name) for h in us_hols}
found: list[Event] = []
rename = {"Thanksgiving Day": "Thanksgiving"}
for year in range(start_date.year, end_date.year + 1):
hols = holidays.financial_holidays("NYSE", years=year)
found += [
Event(
name="holiday",
date=hol_date,
title=rename.get(title, title),
)
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
found = [hol for hol in found if (hol.date, hol.title) not in known_us_hols]
for hol in found:
assert hol.title
hol.title += " (NYSE)"
return found
def get_holidays(country: str, start_date: date, end_date: date) -> list[Holiday]:
"""Get holidays."""
found: list[Holiday] = []
for year in range(start_date.year, end_date.year + 1):
2023-12-14 23:22:53 +00:00
hols = holidays.country_holidays(country.upper(), years=year, language="en_US")
found += [
Holiday(
date=hol_date,
name=title,
country=country.lower(),
)
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
return found
def midnight(d: date) -> datetime:
"""Convert from date to midnight on that day."""
return datetime.combine(d, datetime.min.time())
def dates_from_rrule(
rrule: str, start: date, end: date
) -> typing.Sequence[datetime | date]:
"""Generate events from an RRULE between start_date and end_date."""
all_day = not any(param in rrule for param in ["BYHOUR", "BYMINUTE", "BYSECOND"])
2023-11-07 15:55:05 +00:00
return [
i.date() if all_day else uk_tz.localize(i)
for i in dateutil.rrule.rrulestr(rrule, dtstart=midnight(start)).between(
midnight(start), midnight(end)
)
]
2023-11-07 15:55:05 +00:00
async def waste_collection_events(data_dir: str) -> list[Event]:
"""Waste colllection events."""
postcode = "BS48 3HG"
uprn = "24071046"
html = await waste_schedule.get_html(data_dir, postcode, uprn)
root = lxml.html.fromstring(html)
events = waste_schedule.parse(root)
return events
async def bristol_waste_collection_events(
data_dir: str, start_date: date
) -> list[Event]:
"""Waste colllection events."""
uprn = "358335"
return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn)
def combine_holidays(holidays: list[Holiday]) -> list[Event]:
2023-11-08 14:40:07 +00:00
"""Combine UK and US holidays with the same date and title."""
2023-12-15 12:01:50 +00:00
all_countries = {h.country for h in holidays}
standard_name = {
(1, 1): "New Year's Day",
(1, 6): "Epiphany",
(12, 8): "Immaculate conception",
(12, 25): "Christmas Day",
(12, 26): "Boxing Day",
2023-12-29 09:30:14 +00:00
(5, 1): "Labour Day",
2023-12-15 12:01:50 +00:00
}
combined: collections.defaultdict[
tuple[date, str], set[str]
] = collections.defaultdict(set)
for h in holidays:
assert isinstance(h.name, str) and isinstance(h.date, date)
2023-12-15 12:01:50 +00:00
event_key = (h.date, standard_name.get((h.date.month, h.date.day), h.name))
combined[event_key].add(h.country)
events: list[Event] = []
for (d, name), countries in combined.items():
2023-12-15 12:01:50 +00:00
if len(countries) == len(all_countries):
country_list = ""
elif len(countries) < len(all_countries) / 2:
country_list = ", ".join(sorted(country.upper() for country in countries))
else:
country_list = "not " + ", ".join(
sorted(country.upper() for country in all_countries - set(countries))
)
2023-12-15 12:03:58 +00:00
e = Event(
name="holiday",
date=d,
title=f"{name} ({country_list})" if country_list else name,
)
events.append(e)
return events
def get_yaml_event_date_field(item: dict[str, str]) -> str:
"""Event date field name."""
return (
"end_date"
if item["name"] == "travel_insurance"
else ("start_date" if "start_date" in item else "date")
)
def get_yaml_event_end_date_field(item: dict[str, str]) -> str:
"""Event date field name."""
return (
"end_date"
if item["name"] == "travel_insurance"
else ("start_date" if "start_date" in item else "date")
)
def read_events_yaml(data_dir: str, start: date, end: date) -> list[Event]:
2023-11-08 14:40:07 +00:00
"""Read eventes from YAML file."""
events: list[Event] = []
for item in yaml.safe_load(open(os.path.join(data_dir, "events.yaml"))):
duration = (
isodate.parse_duration(item["duration"]) if "duration" in item else None
)
dates = (
dates_from_rrule(item["rrule"], start, end)
if "rrule" in item
else [item[get_yaml_event_date_field(item)]]
)
for dt in dates:
e = Event(
name=item["name"],
date=dt,
end_date=(
dt + duration
if duration
else (
item.get("end_date")
if item["name"] != "travel_insurance"
else None
)
),
title=item.get("title"),
url=item.get("url"),
)
events.append(e)
return events
def find_markets_during_stay(
accommodation_events: list[Event], markets: list[Event]
) -> list[Event]:
"""Market events that happen during accommodation stays."""
overlapping_markets = []
for market in markets:
for e in accommodation_events:
# Check if the market date is within the accommodation dates.
2023-12-30 17:13:46 +00:00
if e.as_date <= market.as_date <= e.end_as_date:
overlapping_markets.append(market)
break # Breaks the inner loop if overlap is found.
return overlapping_markets
2023-12-28 20:32:55 +00:00
def find_gaps(events: list[Event], min_gap_days: int = 3) -> list[StrDict]:
"""Gaps of at least `min_gap_days` between events in a list of events."""
# Sort events by start date
gaps: list[tuple[date, date]] = []
previous_event_end = None
2023-12-28 20:32:55 +00:00
by_start_date = {
d: list(on_day)
for d, on_day in itertools.groupby(events, key=lambda e: e.as_date)
}
by_end_date = {
d: list(on_day)
for d, on_day in itertools.groupby(events, key=lambda e: e.end_as_date)
}
for event in events:
# Use start date for current event
start_date = event.as_date
# If previous event exists, calculate the gap
if previous_event_end:
gap_days = (start_date - previous_event_end).days
if gap_days >= (min_gap_days + 2):
start_end = (
previous_event_end + timedelta(days=1),
start_date - timedelta(days=1),
)
gaps.append(start_end)
# Update previous event end date
2023-12-28 20:32:55 +00:00
end = event.end_as_date
if not previous_event_end or end > previous_event_end:
previous_event_end = end
2023-12-28 20:32:55 +00:00
return [
{
"start": gap_start,
"end": gap_end,
"after": by_start_date[gap_end + timedelta(days=1)],
"before": by_end_date[gap_start - timedelta(days=1)],
}
for gap_start, gap_end in gaps
]
def busy_event(e: Event) -> bool:
"""Busy."""
if e.name not in {
"event",
"accommodation",
"conference",
"dodainville",
"transport",
"meetup",
}:
return False
if e.title in ("IA UK board meeting", "Mill Road Winter Fair"):
return False
if e.name == "conference" and not e.going:
return False
if not e.title:
return True
if e.title == "LHG Run Club" or "Third Thursday Social" in e.title:
return False
lc_title = e.title.lower()
return "rebels" not in lc_title and "south west data social" not in lc_title
async def get_data(
now: datetime, config: flask.config.Config
) -> typing.Mapping[str, str | object]:
"""Get data to display on agenda dashboard."""
data_dir = config["DATA_DIR"]
2023-11-07 15:55:05 +00:00
rocket_dir = os.path.join(data_dir, "thespacedevs")
today = now.date()
two_weeks_ago = today - timedelta(weeks=2)
last_week = today - timedelta(weeks=1)
last_year = today - timedelta(days=365)
next_year = today + timedelta(days=365)
minus_365 = now - timedelta(days=365)
plus_365 = now + timedelta(days=365)
(
gbpusd,
gwr_advance_tickets,
bank_holiday,
rockets,
backwell_bins,
bristol_bins,
) = await asyncio.gather(
fx.get_gbpusd(config),
gwr.advance_ticket_date(data_dir),
uk_holiday.bank_holiday_list(last_year, next_year, data_dir),
thespacedevs.get_launches(rocket_dir, limit=40),
waste_collection_events(data_dir),
bristol_waste_collection_events(data_dir, today),
)
reply: dict[str, typing.Any] = {
2023-11-07 15:55:05 +00:00
"now": now,
"gbpusd": gbpusd,
"stock_markets": stock_market.open_and_close(),
"rockets": rockets,
2023-11-10 23:57:38 +00:00
"gwr_advance_tickets": gwr_advance_tickets,
2023-11-07 15:55:05 +00:00
}
my_data = config["PERSONAL_DATA"]
2023-11-10 10:42:17 +00:00
events = (
[
Event(name="mothers_day", date=uk_holiday.get_mothers_day(today)),
]
+ timezone_transition(minus_365, plus_365, "uk_clock_change", "Europe/London")
+ timezone_transition(
minus_365, plus_365, "us_clock_change", "America/New_York"
)
)
2023-11-19 11:41:42 +00:00
if gwr_advance_tickets:
events.append(Event(name="gwr_advance_tickets", date=gwr_advance_tickets))
us_hols = us_holidays(last_year, next_year)
holidays: list[Holiday] = bank_holiday + us_hols
for country in (
"at",
"be",
2023-12-24 08:31:46 +00:00
"br",
"ch",
"cz",
"de",
"dk",
"ee",
"es",
2023-12-26 23:48:46 +00:00
"fi",
"fr",
"gr",
"it",
"ke",
"nl",
"pl",
):
holidays += get_holidays(country, last_year, next_year)
events += get_nyse_holidays(last_year, next_year, us_hols)
accommodation_events = accommodation.get_events(
os.path.join(my_data, "accommodation.yaml")
)
events += combine_holidays(holidays)
2023-11-07 15:55:05 +00:00
events += birthday.get_birthdays(last_year, os.path.join(my_data, "entities.yaml"))
events += accommodation_events
events += travel.all_events(my_data)
2023-11-07 15:55:05 +00:00
events += conference.get_list(os.path.join(my_data, "conferences.yaml"))
events += backwell_bins + bristol_bins
events += read_events_yaml(my_data, last_year, next_year)
2023-11-07 15:55:05 +00:00
events += subscription.get_events(os.path.join(my_data, "subscriptions.yaml"))
2023-11-10 10:42:17 +00:00
events += economist.publication_dates(last_week, next_year)
2023-11-21 08:15:16 +00:00
events += meetup.get_events(my_data)
events += hn.whoishiring(last_year, next_year)
2023-11-07 15:55:05 +00:00
events += domains.renewal_dates(my_data)
# hide markets that happen while away
markets = [e for e in events if e.name == "market"]
2023-12-30 17:13:46 +00:00
going = [e for e in events if e.going]
2023-12-30 17:13:46 +00:00
overlapping_markets = find_markets_during_stay(
accommodation_events + going, markets
)
for market in overlapping_markets:
events.remove(market)
for launch in rockets:
dt = None
if launch["net_precision"] == "Day":
dt = datetime.strptime(launch["net"], "%Y-%m-%dT00:00:00Z").date()
elif launch["t0_time"]:
dt = pytz.utc.localize(
datetime.strptime(launch["net"], "%Y-%m-%dT%H:%M:%SZ")
)
if not dt:
continue
2023-12-09 11:37:52 +00:00
rocket_name = f'🚀{launch["rocket"]}: {launch["mission_name"] or "[no mission]"}'
e = Event(name="rocket", date=dt, title=rocket_name)
events.append(e)
events += [Event(name="today", date=today)]
busy_events = [
e
for e in sorted(events, key=lambda e: e.as_date)
if e.as_date > today
and e.as_date < (today + timedelta(days=365 * 2))
and busy_event(e)
]
gaps = find_gaps(busy_events)
2023-12-28 20:32:55 +00:00
events += [
Event(name="gap", date=gap["start"], end_date=gap["end"]) for gap in gaps
]
# Sort events by their datetime; the "today" event is prioritised
# at the top of the list for today. This is achieved by sorting first by
# the datetime attribute, and then ensuring that events with the name
# "today" are ordered before others on the same date.
events.sort(key=lambda e: (e.as_datetime, e.name != "today"))
2023-11-07 15:55:05 +00:00
reply["gaps"] = gaps
observer = sun.bristol()
reply["sunrise"] = sun.sunrise(observer)
reply["sunset"] = sun.sunset(observer)
2023-11-07 15:55:05 +00:00
reply["events"] = events
reply["last_week"] = last_week
reply["two_weeks_ago"] = two_weeks_ago
reply["fullcalendar_events"] = calendar.build_events(events)
return reply