agenda/agenda/data.py

505 lines
14 KiB
Python

"""Agenda data."""
import asyncio
import collections
import itertools
import os
import typing
from datetime import date, datetime, timedelta
import dateutil.rrule
import dateutil.tz
import flask
import holidays
import isodate # type: ignore
import lxml
import pytz
import yaml
from . import (
accommodation,
birthday,
calendar,
conference,
domains,
economist,
fx,
gwr,
hn,
meetup,
stock_market,
subscription,
sun,
thespacedevs,
travel,
uk_holiday,
uk_tz,
waste_schedule,
)
from .types import Event, Holiday
StrDict = dict[str, typing.Any]
here = dateutil.tz.tzlocal()
# deadline to file tax return
# credit card expiry dates
# morzine ski lifts
# chalet availablity calendar
# starlink visible
def timezone_transition(
start: datetime, end: datetime, key: str, tz_name: str
) -> list[Event]:
"""Clocks changes."""
tz = pytz.timezone(tz_name)
return [
Event(name=key, date=pytz.utc.localize(t).astimezone(tz))
for t in tz._utc_transition_times # type: ignore
if start <= t <= end
]
def us_holidays(start_date: date, end_date: date) -> list[Holiday]:
"""Get US holidays."""
found: list[Holiday] = []
for year in range(start_date.year, end_date.year + 1):
hols = holidays.country_holidays("US", years=year, language="en")
found += [
Holiday(date=hol_date, name=title, country="us")
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
extra = []
for h in found:
if h.name != "Thanksgiving":
continue
extra += [
Holiday(date=h.date + timedelta(days=1), name="Black Friday", country="us"),
Holiday(date=h.date + timedelta(days=4), name="Cyber Monday", country="us"),
]
return found + extra
def get_nyse_holidays(
start_date: date, end_date: date, us_hols: list[Holiday]
) -> list[Event]:
"""NYSE holidays."""
known_us_hols = {(h.date, h.name) for h in us_hols}
found: list[Event] = []
rename = {"Thanksgiving Day": "Thanksgiving"}
for year in range(start_date.year, end_date.year + 1):
hols = holidays.financial_holidays("NYSE", years=year)
found += [
Event(
name="holiday",
date=hol_date,
title=rename.get(title, title),
)
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
found = [hol for hol in found if (hol.date, hol.title) not in known_us_hols]
for hol in found:
assert hol.title
hol.title += " (NYSE)"
return found
def get_holidays(country: str, start_date: date, end_date: date) -> list[Holiday]:
"""Get holidays."""
found: list[Holiday] = []
for year in range(start_date.year, end_date.year + 1):
hols = holidays.country_holidays(country.upper(), years=year, language="en_US")
found += [
Holiday(
date=hol_date,
name=title,
country=country.lower(),
)
for hol_date, title in hols.items()
if start_date < hol_date < end_date
]
return found
def midnight(d: date) -> datetime:
"""Convert from date to midnight on that day."""
return datetime.combine(d, datetime.min.time())
def dates_from_rrule(
rrule: str, start: date, end: date
) -> typing.Sequence[datetime | date]:
"""Generate events from an RRULE between start_date and end_date."""
all_day = not any(param in rrule for param in ["BYHOUR", "BYMINUTE", "BYSECOND"])
return [
i.date() if all_day else uk_tz.localize(i)
for i in dateutil.rrule.rrulestr(rrule, dtstart=midnight(start)).between(
midnight(start), midnight(end)
)
]
async def waste_collection_events(data_dir: str) -> list[Event]:
"""Waste colllection events."""
postcode = "BS48 3HG"
uprn = "24071046"
html = await waste_schedule.get_html(data_dir, postcode, uprn)
root = lxml.html.fromstring(html)
events = waste_schedule.parse(root)
return events
async def bristol_waste_collection_events(
data_dir: str, start_date: date
) -> list[Event]:
"""Waste colllection events."""
uprn = "358335"
return await waste_schedule.get_bristol_gov_uk(start_date, data_dir, uprn)
def combine_holidays(holidays: list[Holiday]) -> list[Event]:
"""Combine UK and US holidays with the same date and title."""
all_countries = {h.country for h in holidays}
standard_name = {
(1, 1): "New Year's Day",
(1, 6): "Epiphany",
(12, 8): "Immaculate conception",
(12, 25): "Christmas Day",
(12, 26): "Boxing Day",
(5, 1): "Labour Day",
}
combined: collections.defaultdict[
tuple[date, str], set[str]
] = collections.defaultdict(set)
for h in holidays:
assert isinstance(h.name, str) and isinstance(h.date, date)
event_key = (h.date, standard_name.get((h.date.month, h.date.day), h.name))
combined[event_key].add(h.country)
events: list[Event] = []
for (d, name), countries in combined.items():
if len(countries) == len(all_countries):
country_list = ""
elif len(countries) < len(all_countries) / 2:
country_list = ", ".join(sorted(country.upper() for country in countries))
else:
country_list = "not " + ", ".join(
sorted(country.upper() for country in all_countries - set(countries))
)
e = Event(
name="holiday",
date=d,
title=f"{name} ({country_list})" if country_list else name,
)
events.append(e)
return events
def get_yaml_event_date_field(item: dict[str, str]) -> str:
"""Event date field name."""
return (
"end_date"
if item["name"] == "travel_insurance"
else ("start_date" if "start_date" in item else "date")
)
def get_yaml_event_end_date_field(item: dict[str, str]) -> str:
"""Event date field name."""
return (
"end_date"
if item["name"] == "travel_insurance"
else ("start_date" if "start_date" in item else "date")
)
def read_events_yaml(data_dir: str, start: date, end: date) -> list[Event]:
"""Read eventes from YAML file."""
events: list[Event] = []
for item in yaml.safe_load(open(os.path.join(data_dir, "events.yaml"))):
duration = (
isodate.parse_duration(item["duration"]) if "duration" in item else None
)
dates = (
dates_from_rrule(item["rrule"], start, end)
if "rrule" in item
else [item[get_yaml_event_date_field(item)]]
)
for dt in dates:
e = Event(
name=item["name"],
date=dt,
end_date=(
dt + duration
if duration
else (
item.get("end_date")
if item["name"] != "travel_insurance"
else None
)
),
title=item.get("title"),
url=item.get("url"),
)
events.append(e)
return events
def find_markets_during_stay(
accommodation_events: list[Event], markets: list[Event]
) -> list[Event]:
"""Market events that happen during accommodation stays."""
overlapping_markets = []
for market in markets:
for e in accommodation_events:
# Check if the market date is within the accommodation dates.
if e.as_date <= market.as_date <= e.end_as_date:
overlapping_markets.append(market)
break # Breaks the inner loop if overlap is found.
return overlapping_markets
def find_gaps(events: list[Event], min_gap_days: int = 3) -> list[StrDict]:
"""Gaps of at least `min_gap_days` between events in a list of events."""
# Sort events by start date
gaps: list[tuple[date, date]] = []
previous_event_end = None
by_start_date = {
d: list(on_day)
for d, on_day in itertools.groupby(events, key=lambda e: e.as_date)
}
by_end_date = {
d: list(on_day)
for d, on_day in itertools.groupby(events, key=lambda e: e.end_as_date)
}
for event in events:
# Use start date for current event
start_date = event.as_date
# If previous event exists, calculate the gap
if previous_event_end:
gap_days = (start_date - previous_event_end).days
if gap_days >= (min_gap_days + 2):
start_end = (
previous_event_end + timedelta(days=1),
start_date - timedelta(days=1),
)
gaps.append(start_end)
# Update previous event end date
end = event.end_as_date
if not previous_event_end or end > previous_event_end:
previous_event_end = end
return [
{
"start": gap_start,
"end": gap_end,
"after": by_start_date[gap_end + timedelta(days=1)],
"before": by_end_date[gap_start - timedelta(days=1)],
}
for gap_start, gap_end in gaps
]
def busy_event(e: Event) -> bool:
"""Busy."""
if e.name not in {
"event",
"accommodation",
"conference",
"dodainville",
"transport",
"meetup",
}:
return False
if e.name == "conference" and not e.going:
return False
if not e.title:
return True
if e.title == "LHG Run Club" or "Third Thursday Social" in e.title:
return False
lc_title = e.title.lower()
return "rebels" not in lc_title and "south west data social" not in lc_title
async def get_data(
now: datetime, config: flask.config.Config
) -> typing.Mapping[str, str | object]:
"""Get data to display on agenda dashboard."""
data_dir = config["DATA_DIR"]
rocket_dir = os.path.join(data_dir, "thespacedevs")
today = now.date()
two_weeks_ago = today - timedelta(weeks=2)
last_week = today - timedelta(weeks=1)
last_year = today - timedelta(days=365)
next_year = today + timedelta(days=365)
minus_365 = now - timedelta(days=365)
plus_365 = now + timedelta(days=365)
(
gbpusd,
gwr_advance_tickets,
bank_holiday,
rockets,
backwell_bins,
bristol_bins,
) = await asyncio.gather(
fx.get_gbpusd(config),
gwr.advance_ticket_date(data_dir),
uk_holiday.bank_holiday_list(last_year, next_year, data_dir),
thespacedevs.get_launches(rocket_dir, limit=40),
waste_collection_events(data_dir),
bristol_waste_collection_events(data_dir, today),
)
reply: dict[str, typing.Any] = {
"now": now,
"gbpusd": gbpusd,
"stock_markets": stock_market.open_and_close(),
"rockets": rockets,
"gwr_advance_tickets": gwr_advance_tickets,
}
my_data = config["PERSONAL_DATA"]
events = (
[
Event(name="mothers_day", date=uk_holiday.get_mothers_day(today)),
]
+ timezone_transition(minus_365, plus_365, "uk_clock_change", "Europe/London")
+ timezone_transition(
minus_365, plus_365, "us_clock_change", "America/New_York"
)
)
if gwr_advance_tickets:
events.append(Event(name="gwr_advance_tickets", date=gwr_advance_tickets))
us_hols = us_holidays(last_year, next_year)
holidays: list[Holiday] = bank_holiday + us_hols
for country in (
"at",
"be",
"br",
"ch",
"cz",
"de",
"dk",
"ee",
"es",
"fi",
"fr",
"gr",
"it",
"ke",
"nl",
"pl",
):
holidays += get_holidays(country, last_year, next_year)
events += get_nyse_holidays(last_year, next_year, us_hols)
accommodation_events = accommodation.get_events(
os.path.join(my_data, "accommodation.yaml")
)
events += combine_holidays(holidays)
events += birthday.get_birthdays(last_year, os.path.join(my_data, "entities.yaml"))
events += accommodation_events
events += travel.all_events(my_data)
events += conference.get_list(os.path.join(my_data, "conferences.yaml"))
events += backwell_bins + bristol_bins
events += read_events_yaml(my_data, last_year, next_year)
events += subscription.get_events(os.path.join(my_data, "subscriptions.yaml"))
events += economist.publication_dates(last_week, next_year)
events += meetup.get_events(my_data)
events += hn.whoishiring(last_year, next_year)
events += domains.renewal_dates(my_data)
# hide markets that happen while away
markets = [e for e in events if e.name == "market"]
going = [e for e in events if e.going]
overlapping_markets = find_markets_during_stay(
accommodation_events + going, markets
)
for market in overlapping_markets:
events.remove(market)
for launch in rockets:
dt = None
if launch["net_precision"] == "Day":
dt = datetime.strptime(launch["net"], "%Y-%m-%dT00:00:00Z").date()
elif launch["t0_time"]:
dt = pytz.utc.localize(
datetime.strptime(launch["net"], "%Y-%m-%dT%H:%M:%SZ")
)
if not dt:
continue
rocket_name = f'🚀{launch["rocket"]}: {launch["mission_name"] or "[no mission]"}'
e = Event(name="rocket", date=dt, title=rocket_name)
events.append(e)
events += [Event(name="today", date=today)]
busy_events = [
e
for e in sorted(events, key=lambda e: e.as_date)
if e.as_date > today and e.as_date < next_year and busy_event(e)
]
gaps = find_gaps(busy_events)
events += [
Event(name="gap", date=gap["start"], end_date=gap["end"]) for gap in gaps
]
# Sort events by their datetime; the "today" event is prioritised
# at the top of the list for today. This is achieved by sorting first by
# the datetime attribute, and then ensuring that events with the name
# "today" are ordered before others on the same date.
events.sort(key=lambda e: (e.as_datetime, e.name != "today"))
reply["gaps"] = gaps
observer = sun.bristol()
reply["sunrise"] = sun.sunrise(observer)
reply["sunset"] = sun.sunset(observer)
reply["events"] = events
reply["last_week"] = last_week
reply["two_weeks_ago"] = two_weeks_ago
reply["fullcalendar_events"] = calendar.build_events(events)
return reply