2024-07-08 11:48:56 +01:00
|
|
|
"""Geomob events."""
|
|
|
|
|
|
|
|
import os
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from datetime import date, datetime
|
|
|
|
from typing import List
|
|
|
|
|
|
|
|
import dateutil.parser
|
|
|
|
import flask
|
|
|
|
import lxml.html
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import agenda.mail
|
|
|
|
import agenda.utils
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
class GeomobEvent:
|
|
|
|
"""Geomob event."""
|
|
|
|
|
|
|
|
date: date
|
|
|
|
href: str
|
|
|
|
hashtag: str
|
|
|
|
|
|
|
|
|
2024-07-08 11:58:12 +01:00
|
|
|
def extract_events(
|
2024-07-08 11:48:56 +01:00
|
|
|
tree: lxml.html.HtmlElement,
|
|
|
|
) -> List[GeomobEvent]:
|
|
|
|
"""Extract upcoming events from the HTML content."""
|
|
|
|
events = []
|
|
|
|
|
|
|
|
for event in tree.xpath('//ol[@class="event-list"]/li/a'):
|
|
|
|
date_str, _, hashtag = event.text_content().strip().rpartition(" ")
|
|
|
|
events.append(
|
|
|
|
GeomobEvent(
|
|
|
|
date=dateutil.parser.parse(date_str).date(),
|
|
|
|
href=event.get("href"),
|
|
|
|
hashtag=hashtag,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
return events
|
|
|
|
|
|
|
|
|
|
|
|
def find_new_events(
|
|
|
|
prev: list[GeomobEvent], cur: list[GeomobEvent]
|
|
|
|
) -> list[GeomobEvent]:
|
|
|
|
"""Find new events that appear in cur but not in prev."""
|
|
|
|
return list(set(cur) - set(prev))
|
|
|
|
|
|
|
|
|
|
|
|
def geomob_email(new_events: list[GeomobEvent], base_url: str) -> tuple[str, str]:
|
|
|
|
"""Generate email subject and body for new events.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
new_events (List[Event]): List of new events.
|
|
|
|
base_url (str): The base URL of the website.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
tuple[str, str]: Email subject and body.
|
|
|
|
"""
|
|
|
|
assert new_events
|
|
|
|
|
|
|
|
subject = f"{len(new_events)} New Geomob Event(s) Announced"
|
|
|
|
|
|
|
|
body_lines = ["Hello,\n", "Here are the new Geomob events:\n"]
|
|
|
|
for event in new_events:
|
|
|
|
event_details = (
|
|
|
|
f"Date: {event.date}\n"
|
|
|
|
f"URL: {base_url + event.href}\n"
|
|
|
|
f"Hashtag: {event.hashtag}\n"
|
|
|
|
)
|
|
|
|
body_lines.append(event_details)
|
|
|
|
body_lines.append("-" * 40)
|
|
|
|
|
|
|
|
body = "\n".join(body_lines)
|
|
|
|
return (subject, body)
|
|
|
|
|
|
|
|
|
|
|
|
def get_cached_upcoming_events_list(geomob_dir: str) -> list[GeomobEvent]:
|
|
|
|
"""Get known geomob events."""
|
2024-07-08 11:58:12 +01:00
|
|
|
filename = agenda.utils.get_most_recent_file(geomob_dir, "html")
|
|
|
|
return extract_events(lxml.html.parse(filename).getroot()) if filename else []
|
2024-07-08 11:48:56 +01:00
|
|
|
|
|
|
|
|
|
|
|
def update(config: flask.config.Config) -> None:
|
|
|
|
"""Get upcoming Geomob events and report new ones."""
|
|
|
|
geomob_dir = os.path.join(config["DATA_DIR"], "geomob")
|
|
|
|
|
|
|
|
prev_events = get_cached_upcoming_events_list(geomob_dir)
|
|
|
|
r = requests.get("https://thegeomob.com/")
|
2024-07-08 11:58:12 +01:00
|
|
|
cur_events = extract_events(lxml.html.fromstring(r.content))
|
2024-07-08 11:48:56 +01:00
|
|
|
|
|
|
|
if cur_events == prev_events:
|
|
|
|
return # no change
|
|
|
|
|
|
|
|
now = datetime.now()
|
|
|
|
new_filename = os.path.join(geomob_dir, now.strftime("%Y-%m-%d_%H:%M:%S.html"))
|
|
|
|
open(new_filename, "w").write(r.text)
|
|
|
|
|
|
|
|
new_events = list(set(cur_events) - set(prev_events))
|
|
|
|
if not new_events:
|
|
|
|
return
|
|
|
|
|
|
|
|
base_url = "https://thegeomob.com/"
|
|
|
|
subject, body = geomob_email(new_events, base_url)
|
|
|
|
agenda.mail.send_mail(config, subject, body)
|