conference-check/check.py

111 lines
3.4 KiB
Python
Executable file

#!/usr/bin/python3
"""Check if conference websites are live."""
import re
import smtplib
import warnings
from email.mime.text import MIMEText
from email.utils import formatdate, make_msgid
import requests
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single InsecureRequestWarning from urllib3
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
re_title = re.compile("<title>(.*?)</title>", re.DOTALL)
AGENT = "Mozilla/5.0 (Windows NT 6.1) Gecko/20100101 Firefox/29.0"
headers = {"User-Agent": AGENT, "Accept": "text/html"}
s = requests.Session()
s.headers.update(headers)
MAIL_FROM = "edward@4angle.com"
MAIL_TO_NAME = "Edward Betts"
MAIL_TO_ADDRESS = "edward@4angle.com"
SMTP_HOST = "4angle.com"
not_here_list = [
"The specified URL was not found.",
"There is currently no text in this page.",
"This page does not exist yet",
"404 Not Found",
"500 Internal Server Error",
"Test Page for the Apache HTTP Server",
"Site not found &middot; GitHub Pages",
]
conferences = [
# ("FOSDEM", "https://fosdem.org/2024"),
# ("PyCascades", "https://2024.pycascades.com/"),
("foss-north", "https://foss-north.se/2024"),
("Wikimedia Hackathon", "https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2024"),
("FOSS4G", "https://2024.foss4g.org/"),
("FOSS4G Europe", "https://2024.europe.foss4g.org/"),
("FOSSY", "https://2024.fossy.us/"),
# ("North Bay Python", "https://2024.northbaypython.org/"),
# ("DebConf", "https://wiki.debian.org/DebConf/24"),
("State of the Map US", "https://2024.stateofthemap.us/"),
("WikiConference North America", "https://wikiconference.org/wiki/2024/Main_Page"),
("PyCon DE", "https://2024.pycon.de/"),
("PyData London", "https://pydata.org/london2024"),
("Pass the SALT", "https://2024.pass-the-salt.org/"),
("SotM Baltics", "https://2024.sotm-baltics.org/"),
]
def find_not_here_message(html: str) -> str | None:
"""Find not here message in web page."""
return next((not_here for not_here in not_here_list if not_here in html), None)
def get_title(html: str) -> str:
"""Title from web page."""
m = re_title.search(html)
return m.group(1).strip() if m and m.group(1) else "no title"
def check_conference(name: str, url: str) -> tuple[bool, str]:
"""Check if conference is live."""
try:
# SotM Baltics has an invalid TLS certificate, but we don't care
r = s.get(url, verify=False)
except requests.exceptions.ConnectionError:
return (False, "connection refused")
not_here = find_not_here_message(r.text)
return (False, not_here) if not_here else (True, get_title(r.text))
def send_mail(subject: str, body: str) -> None:
"""Send an e-mail."""
mail_from = MAIL_FROM
msg = MIMEText(body, "plain", "UTF-8")
msg["Subject"] = subject
msg["To"] = f"{MAIL_TO_NAME} <{MAIL_TO_ADDRESS}>"
msg["From"] = f"Edward Betts <{mail_from}>"
msg["Date"] = formatdate()
msg["Message-ID"] = make_msgid()
s = smtplib.SMTP(SMTP_HOST)
s.sendmail(mail_from, [MAIL_TO_ADDRESS], msg.as_string())
s.quit()
def main(show_not_live: bool = False) -> None:
"""Check each conference."""
for name, url in conferences:
live, msg = check_conference(name, url)
if not live:
continue
body = f"{name}\n{url}\nWeb page title: {msg}"
send_mail(f"Conference site live: {name}", body)
if __name__ == "__main__":
main()