agenda/agenda/geomob.py
Edward Betts fac73962b2 Add comprehensive tests and fix geomob URL bug
- Add complete test suite for geomob module (19 tests)
- Add comprehensive Bristol waste collection tests
- Fix geomob_email() double slash assertion bug for HTTPS URLs
- Fix utils.py human_readable_delta days pluralization
- Update agenda tests with better coverage

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-20 01:31:19 +02:00

110 lines
3.1 KiB
Python

"""Geomob events."""
import os
from dataclasses import dataclass
from datetime import date, datetime
from typing import List
import dateutil.parser
import flask
import lxml.html
import requests
import agenda.mail
import agenda.utils
@dataclass(frozen=True)
class GeomobEvent:
"""Geomob event."""
date: date
href: str
hashtag: str
def extract_events(
tree: lxml.html.HtmlElement,
) -> List[GeomobEvent]:
"""Extract upcoming events from the HTML content."""
events = []
for event in tree.xpath('//ol[@class="event-list"]/li/a'):
date_str, _, hashtag = event.text_content().strip().rpartition(" ")
events.append(
GeomobEvent(
date=dateutil.parser.parse(date_str).date(),
href=event.get("href"),
hashtag=hashtag,
)
)
return events
def find_new_events(
prev: list[GeomobEvent], cur: list[GeomobEvent]
) -> list[GeomobEvent]:
"""Find new events that appear in cur but not in prev."""
return list(set(cur) - set(prev))
def geomob_email(new_events: list[GeomobEvent], base_url: str) -> tuple[str, str]:
"""Generate email subject and body for new events.
Args:
new_events (List[Event]): List of new events.
base_url (str): The base URL of the website.
Returns:
tuple[str, str]: Email subject and body.
"""
assert new_events
subject = f"{len(new_events)} New Geomob Event(s) Announced"
body_lines = ["Hello,\n", "Here are the new Geomob events:\n"]
for event in new_events:
url = base_url + event.href
# Check for double slashes in the path part only (after protocol)
if "://" in url:
protocol, rest = url.split("://", 1)
assert "//" not in rest, f"Double slash found in URL path: {url}"
else:
assert "//" not in url, f"Double slash found in URL: {url}"
event_details = f"Date: {event.date}\nURL: {url}\nHashtag: {event.hashtag}\n"
body_lines.append(event_details)
body_lines.append("-" * 40)
body = "\n".join(body_lines)
return (subject, body)
def get_cached_upcoming_events_list(geomob_dir: str) -> list[GeomobEvent]:
"""Get known geomob events."""
filename = agenda.utils.get_most_recent_file(geomob_dir, "html")
return extract_events(lxml.html.parse(filename).getroot()) if filename else []
def update(config: flask.config.Config) -> None:
"""Get upcoming Geomob events and report new ones."""
geomob_dir = os.path.join(config["DATA_DIR"], "geomob")
prev_events = get_cached_upcoming_events_list(geomob_dir)
r = requests.get("https://thegeomob.com/")
cur_events = extract_events(lxml.html.fromstring(r.content))
if cur_events == prev_events:
return # no change
now = datetime.now()
new_filename = os.path.join(geomob_dir, now.strftime("%Y-%m-%d_%H:%M:%S.html"))
open(new_filename, "w").write(r.text)
new_events = list(set(cur_events) - set(prev_events))
if not new_events:
return
base_url = "https://thegeomob.com"
subject, body = geomob_email(new_events, base_url)
agenda.mail.send_mail(config, subject, body)