agenda/update.py
Edward Betts 9f881d7177 Fix SpaceDevs rate-limiting and cache corruption from throttled responses
Don't write rate-limit/error responses to disk in next_launch_api_data,
so they can't become the "most recent" cache file and cause KeyError crashes
in read_cached_launches. Add defensive results-list checks in
read_cached_launches and get_launches to handle any existing bad files.

Drop refresh=True from the updater's get_active_crewed_flights call so the
2-hour TTL applies; the paginated spacecraft/flight crawl was running on
every hourly cron job and likely causing the burst that triggered throttling.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 20:50:05 +00:00

543 lines
17 KiB
Python
Executable file

#!/usr/bin/python3
"""Combined update script for various data sources."""
import asyncio
import os
import sys
import typing
from datetime import date, datetime
from time import time
import deepdiff
import flask
import requests
import agenda.birthday
import agenda.bristol_waste
import agenda.fx
import agenda.geomob
import agenda.gwr
import agenda.mail
import agenda.thespacedevs
import agenda.trip
import agenda.types
import agenda.uk_holiday
import agenda.uk_school_holiday
import agenda.weather
from agenda.event import Event
from agenda.types import StrDict
from web_view import app
async def update_bank_holidays(config: flask.config.Config) -> None:
"""Update cached copy of UK Bank holidays."""
t0 = time()
events = await agenda.uk_holiday.get_holiday_list(config["DATA_DIR"])
time_taken = time() - t0
if not sys.stdin.isatty():
return
print(len(events), "bank holidays in list")
print(f"took {time_taken:.1f} seconds")
async def update_school_holidays(config: flask.config.Config) -> None:
"""Update cached copy of Bristol school holidays."""
t0 = time()
events = await agenda.uk_school_holiday.get_holiday_list(config["DATA_DIR"])
time_taken = time() - t0
if not sys.stdin.isatty():
return
print(len(events), "school holidays in list")
print(f"took {time_taken:.1f} seconds")
async def update_bristol_bins(config: flask.config.Config) -> None:
"""Update waste schedule from Bristol City Council."""
t0 = time()
events = await agenda.bristol_waste.get(
date.today(),
config["DATA_DIR"],
config["BRISTOL_UPRN"],
cache="refresh",
)
time_taken = time() - t0
if not sys.stdin.isatty():
return
for event in events:
print(event)
print(f"took {time_taken:.1f} seconds")
def update_gwr_advance_ticket_date(config: flask.config.Config) -> None:
"""Update GWR advance ticket date cache."""
filename = os.path.join(config["DATA_DIR"], "advance-tickets.html")
existing_html = open(filename).read()
existing_dates = agenda.gwr.extract_dates(existing_html)
assert existing_dates
assert list(existing_dates.keys()) == ["Weekdays", "Saturdays", "Sundays"]
new_html = requests.get(agenda.gwr.url).text
new_dates = agenda.gwr.extract_dates(new_html)
if not new_dates:
subject = "Error parsing GWR advance ticket booking dates"
body = new_html
agenda.mail.send_mail(config, subject, body)
return
assert new_dates
assert list(new_dates.keys()) == ["Weekdays", "Saturdays", "Sundays"]
if existing_dates == new_dates:
if sys.stdin.isatty():
print(filename)
print(agenda.gwr.url)
print("dates haven't changed:", existing_dates)
return
open(filename, "w").write(new_html)
subject = (
"New GWR advance ticket booking date: "
+ f'{new_dates["Weekdays"].strftime("%d %b %Y")} (Weekdays)'
)
body = f"""
{"\n".join(f'{key}: {when.strftime("%d %b %Y")}' for key, when in new_dates.items())}
{agenda.gwr.url}
Agenda: https://edwardbetts.com/agenda/
"""
if sys.stdin.isatty():
print(filename)
print(agenda.gwr.url)
print()
print("dates have changed")
print("old:", existing_dates)
print("new:", new_dates)
print()
print(subject)
print(body)
agenda.mail.send_mail(config, subject, body)
def report_space_launch_change(
config: flask.config.Config, prev_launch: StrDict | None, cur_launch: StrDict | None
) -> None:
"""Send mail to announce change to space launch data."""
# Handle case where launch disappeared from upcoming list
if prev_launch and not cur_launch:
# Launch is no longer in upcoming list - could be completed, cancelled, or failed
# Check if we can determine status from previous data
prev_status_id = (
prev_launch.get("status", {}).get("id", 0)
if isinstance(prev_launch.get("status"), dict)
else 0
)
name = prev_launch["name"]
launch_date = prev_launch.get("net", "Unknown")
location = (
prev_launch.get("pad", {}).get("location", {}).get("name", "Unknown")
if isinstance(prev_launch.get("pad"), dict)
else "Unknown"
)
# Since launch is no longer in upcoming list, it likely completed
# We can't know the exact outcome, so provide helpful message
subject = f"🚀 Space Launch Completed: {name}"
# Format launch date nicely
formatted_date = "Unknown"
if launch_date and launch_date != "Unknown":
try:
dt = datetime.fromisoformat(launch_date.replace("Z", "+00:00"))
formatted_date = dt.strftime("%d %b %Y at %H:%M UTC")
except:
formatted_date = launch_date
body = f"""🚀 Space Launch Completed
Mission: {name}
Launch Date: {formatted_date}
Location: {location}
This launch is no longer appearing in the upcoming launches list, which typically means it has taken place.
To check if the launch was successful or failed, visit:
https://edwardbetts.com/agenda/launches
View all launches: https://edwardbetts.com/agenda/launches
"""
agenda.mail.send_mail(config, subject, body)
return
# Handle regular status updates
if cur_launch:
name = cur_launch["name"]
status = (
cur_launch.get("status", {}).get("name", "Unknown")
if isinstance(cur_launch.get("status"), dict)
else "Unknown"
)
status_id = (
cur_launch.get("status", {}).get("id", 0)
if isinstance(cur_launch.get("status"), dict)
else 0
)
launch_date = cur_launch.get("net", "Unknown")
location = (
cur_launch.get("pad", {}).get("location", {}).get("name", "Unknown")
if isinstance(cur_launch.get("pad"), dict)
else "Unknown"
)
# Check for specific status changes that deserve special attention
prev_status_id = 0
if prev_launch and isinstance(prev_launch.get("status"), dict):
prev_status_id = prev_launch.get("status", {}).get("id", 0)
# Customize subject based on status changes
if status_id == 3: # Launch Successful
subject = f"🎉 Launch Successful: {name}"
elif status_id == 4: # Launch Failure
subject = f"💥 Launch Failed: {name}"
elif status_id == 7: # Partial Failure
subject = f"⚠️ Launch Partial Failure: {name}"
elif status_id == 6: # In Flight
subject = f"🚀 Launch In Flight: {name}"
elif status_id == 5: # On Hold
subject = f"⏸️ Launch On Hold: {name}"
else:
subject = f"Space Launch Update: {name}"
else:
# This shouldn't happen with the new logic above, but keep as fallback
assert prev_launch
name = prev_launch["name"]
status = "Unknown"
launch_date = prev_launch.get("net", "Unknown")
location = (
prev_launch.get("pad", {}).get("location", {}).get("name", "Unknown")
if isinstance(prev_launch.get("pad"), dict)
else "Unknown"
)
subject = f"Space Launch Update: {name}"
differences = deepdiff.DeepDiff(prev_launch, cur_launch)
changes_text = agenda.thespacedevs.format_launch_changes(differences)
# Format launch date nicely
formatted_date = "Unknown"
if launch_date and launch_date != "Unknown":
try:
dt = datetime.fromisoformat(launch_date.replace("Z", "+00:00"))
formatted_date = dt.strftime("%d %b %Y at %H:%M UTC")
except:
formatted_date = launch_date
body = f"""🚀 Space Launch Update
Mission: {name}
Status: {status}
Launch Date: {formatted_date}
Location: {location}
Changes:
{changes_text}
View all launches: https://edwardbetts.com/agenda/launches
"""
agenda.mail.send_mail(config, subject, body)
def is_test_flight(launch: StrDict) -> bool:
"""Return True if the launch is a test flight."""
mission = typing.cast(dict[str, typing.Any] | None, launch.get("mission"))
return bool(mission and mission.get("type") == "Test Flight")
def get_launch_by_slug(data: StrDict, slug: str) -> StrDict | None:
"""Find last update for space launch."""
results = data.get("results")
if not isinstance(results, list):
return None
by_slug: dict[str, StrDict] = {}
for item in results:
if not isinstance(item, dict):
continue
item_slug = item.get("slug")
if isinstance(item_slug, str):
by_slug[item_slug] = typing.cast(StrDict, item)
return by_slug.get(slug)
def send_thespacedevs_payload_alert(
config: flask.config.Config,
reason: str,
data: StrDict | None,
) -> None:
"""Alert admin when SpaceDevs update payload is missing expected fields."""
payload = data or {}
detail = payload.get("detail")
status_code = payload.get("status_code", payload.get("status"))
detail_text = detail if isinstance(detail, str) else ""
is_rate_limited = (
status_code == 429
or "rate" in detail_text.lower()
or "thrott" in detail_text.lower()
)
alert_type = "rate-limit" if is_rate_limited else "error"
subject = f"⚠️ SpaceDevs {alert_type}: {reason}"
body = f"""SpaceDevs update returned an unexpected payload.
Reason: {reason}
Type: {alert_type}
Status: {status_code!r}
Detail: {detail!r}
Payload keys: {sorted(payload.keys())}
Expected payload shape includes a top-level 'results' list.
Updater: /home/edward/src/agenda/update.py
"""
agenda.mail.send_mail(config, subject, body)
def update_thespacedevs(config: flask.config.Config) -> None:
"""Update cache of space launch API and send emails on relevant changes.
In addition to the configured FOLLOW_LAUNCHES, also send emails for any
launch whose mission.type == "Test Flight" even if its slug is not in
FOLLOW_LAUNCHES.
"""
rocket_dir = os.path.join(config["DATA_DIR"], "thespacedevs")
existing_data = agenda.thespacedevs.load_cached_launches(rocket_dir)
assert existing_data
# Update active crewed mission cache used by the launches page.
# Uses the 2-hour TTL; failures are handled internally with cache fallback.
active_crewed = agenda.thespacedevs.get_active_crewed_flights(rocket_dir)
# Always follow configured slugs
follow_slugs: set[str] = set(config["FOLLOW_LAUNCHES"])
# Identify test-flight slugs present in the previous cache
prev_test_slugs: set[str] = {
typing.cast(str, item["slug"])
for item in existing_data.get("results", [])
if is_test_flight(typing.cast(StrDict, item))
}
t0 = time()
data = agenda.thespacedevs.next_launch_api_data(rocket_dir)
if not data:
send_thespacedevs_payload_alert(
config,
reason="API request failed or returned invalid JSON",
data=None,
)
return # thespacedevs API call failed
data_results = data.get("results")
if not isinstance(data_results, list):
send_thespacedevs_payload_alert(
config,
reason="response missing top-level results list",
data=data,
)
return
# Identify test-flight slugs present in the current data
cur_test_slugs: set[str] = {
typing.cast(str, item["slug"])
for item in data_results
if is_test_flight(typing.cast(StrDict, item))
}
# Add any test-flight slugs (whether old or new), excluding those we already
# explicitly follow.
extra_test_slugs = (prev_test_slugs | cur_test_slugs) - follow_slugs
# Final set of slugs to evaluate for changes
slugs_to_check = follow_slugs | extra_test_slugs
# Build prev/cur lookup dicts for all slugs we're checking
prev_launches = {
slug: get_launch_by_slug(existing_data, slug) for slug in slugs_to_check
}
cur_launches = {slug: get_launch_by_slug(data, slug) for slug in slugs_to_check}
# Emit reports when a launch appears/disappears or changes
for slug in slugs_to_check:
prev, cur = prev_launches.get(slug), cur_launches.get(slug)
if prev is None and cur is None:
continue
if prev and cur and prev.get("last_updated") == cur.get("last_updated"):
continue
report_space_launch_change(config, prev, cur)
time_taken = time() - t0
if not sys.stdin.isatty():
return
rockets = [agenda.thespacedevs.summarize_launch(item) for item in data_results]
print(len(rockets), "launches")
print(len(active_crewed or []), "active crewed missions")
print(f"took {time_taken:.1f} seconds")
def update_gandi(config: flask.config.Config) -> None:
"""Retrieve list of domains from gandi.net."""
url = "https://api.gandi.net/v5/domain/domains"
headers = {"authorization": "Bearer " + config["GANDI_TOKEN"]}
filename = os.path.join(config["DATA_DIR"], "gandi_domains.json")
r = requests.request("GET", url, headers=headers)
items = r.json()
assert isinstance(items, list)
assert all(item["fqdn"] and item["dates"]["registry_ends_at"] for item in items)
with open(filename, "w") as out:
out.write(r.text)
def update_weather(config: flask.config.Config) -> None:
"""Refresh weather cache for all upcoming trips."""
from datetime import date, timedelta
today = date.today()
forecast_window = today + timedelta(days=8)
trips = agenda.trip.build_trip_list()
upcoming = [
t
for t in trips
if (t.end or t.start) >= today and t.start <= forecast_window
]
seen: set[tuple[float, float]] = set()
count = 0
for trip in upcoming:
latlon = agenda.weather.trip_latlon(trip)
if not latlon or latlon in seen:
continue
seen.add(latlon)
lat, lon = latlon
try:
agenda.weather.get_forecast(
config["DATA_DIR"], config["OPENWEATHERMAP_API_KEY"], lat, lon
)
count += 1
except Exception as exc:
print(f"weather update failed for {lat},{lon}: {exc}")
if sys.stdin.isatty():
print(f"updated weather for {count} location(s)")
def check_birthday_reminders(config: flask.config.Config) -> None:
"""Send at most one grouped birthday reminder email per day.
Collects birthdays in the next 7 days, groups them into sections
(Today/Tomorrow/In N days), and sends a single email.
"""
today = date.today()
data_dir = config["PERSONAL_DATA"]
entities_file = os.path.join(data_dir, "entities.yaml")
if not os.path.exists(entities_file):
return
birthdays = agenda.birthday.get_birthdays(today, entities_file)
# Collect next 7 days into a dict keyed by days-until
by_days: dict[int, list[Event]] = {}
for ev in birthdays:
days_until = (ev.as_date - today).days
if 0 <= days_until <= 7:
by_days.setdefault(days_until, []).append(ev)
if not by_days:
return
# Build subject
headings: list[str] = []
if 0 in by_days:
headings.append("today")
if 1 in by_days:
headings.append("tomorrow")
others = sum(1 for k in by_days.keys() if k not in (0, 1))
if others:
plural = "s" if others != 1 else ""
headings.append(f"{others} other{plural}")
subject = (
f"🎂 Birthday reminders ({', '.join(headings)})"
if headings
else "🎂 Birthday reminders"
)
# Build body (UK style dates)
lines: list[str] = ["Upcoming birthdays (next 7 days):", ""]
for delta in sorted(by_days.keys()):
if delta == 0:
lines.append("Today")
elif delta == 1:
lines.append("Tomorrow")
else:
lines.append(f"In {delta} days")
entries = sorted(
by_days[delta],
key=lambda e: (e.as_date, (e.title or e.name or "")),
)
for ev in entries:
d = ev.as_date
# Portable UK-style date: weekday, D Month YYYY
date_str = f"{d:%A}, {d.day} {d:%B %Y}"
label = ev.title or ev.name
lines.append(f"{label}{date_str}")
lines.append("")
lines.append("View all birthdays: https://edwardbetts.com/agenda/birthdays")
body = "\n".join(lines)
if sys.stdin.isatty():
print(f"Birthday reminder: {subject}\n{body}")
agenda.mail.send_mail(config, subject, body)
def main() -> None:
"""Update caches."""
now = datetime.now()
hour = now.hour
with app.app_context():
if hour % 3 == 0:
asyncio.run(update_bank_holidays(app.config))
asyncio.run(update_school_holidays(app.config))
asyncio.run(update_bristol_bins(app.config))
update_gwr_advance_ticket_date(app.config)
# TODO: debug why update gandi fails
# update_gandi(app.config)
agenda.geomob.update(app.config)
agenda.fx.get_rates(app.config)
update_weather(app.config)
update_thespacedevs(app.config)
# Check for birthday reminders daily at 9 AM
if hour == 9:
check_birthday_reminders(app.config)
if __name__ == "__main__":
main()