agenda/agenda/geomob.py
2024-07-08 11:58:12 +01:00

108 lines
2.9 KiB
Python

"""Geomob events."""
import os
from dataclasses import dataclass
from datetime import date, datetime
from typing import List
import dateutil.parser
import flask
import lxml.html
import requests
import agenda.mail
import agenda.utils
@dataclass(frozen=True)
class GeomobEvent:
"""Geomob event."""
date: date
href: str
hashtag: str
def extract_events(
tree: lxml.html.HtmlElement,
) -> List[GeomobEvent]:
"""Extract upcoming events from the HTML content."""
events = []
for event in tree.xpath('//ol[@class="event-list"]/li/a'):
date_str, _, hashtag = event.text_content().strip().rpartition(" ")
events.append(
GeomobEvent(
date=dateutil.parser.parse(date_str).date(),
href=event.get("href"),
hashtag=hashtag,
)
)
return events
def find_new_events(
prev: list[GeomobEvent], cur: list[GeomobEvent]
) -> list[GeomobEvent]:
"""Find new events that appear in cur but not in prev."""
return list(set(cur) - set(prev))
def geomob_email(new_events: list[GeomobEvent], base_url: str) -> tuple[str, str]:
"""Generate email subject and body for new events.
Args:
new_events (List[Event]): List of new events.
base_url (str): The base URL of the website.
Returns:
tuple[str, str]: Email subject and body.
"""
assert new_events
subject = f"{len(new_events)} New Geomob Event(s) Announced"
body_lines = ["Hello,\n", "Here are the new Geomob events:\n"]
for event in new_events:
event_details = (
f"Date: {event.date}\n"
f"URL: {base_url + event.href}\n"
f"Hashtag: {event.hashtag}\n"
)
body_lines.append(event_details)
body_lines.append("-" * 40)
body = "\n".join(body_lines)
return (subject, body)
def get_cached_upcoming_events_list(geomob_dir: str) -> list[GeomobEvent]:
"""Get known geomob events."""
filename = agenda.utils.get_most_recent_file(geomob_dir, "html")
return extract_events(lxml.html.parse(filename).getroot()) if filename else []
def update(config: flask.config.Config) -> None:
"""Get upcoming Geomob events and report new ones."""
geomob_dir = os.path.join(config["DATA_DIR"], "geomob")
prev_events = get_cached_upcoming_events_list(geomob_dir)
r = requests.get("https://thegeomob.com/")
cur_events = extract_events(lxml.html.fromstring(r.content))
if cur_events == prev_events:
return # no change
now = datetime.now()
new_filename = os.path.join(geomob_dir, now.strftime("%Y-%m-%d_%H:%M:%S.html"))
open(new_filename, "w").write(r.text)
new_events = list(set(cur_events) - set(prev_events))
if not new_events:
return
base_url = "https://thegeomob.com/"
subject, body = geomob_email(new_events, base_url)
agenda.mail.send_mail(config, subject, body)