conference-check/check.py

118 lines
3.8 KiB
Python
Raw Normal View History

2023-09-06 07:57:12 +01:00
#!/usr/bin/python3
"""Check if conference websites are live."""
import re
import smtplib
import warnings
2023-09-06 07:57:12 +01:00
from email.mime.text import MIMEText
from email.utils import formatdate, make_msgid
import requests
from urllib3.exceptions import InsecureRequestWarning
# Suppress only the single InsecureRequestWarning from urllib3
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
2023-09-06 07:57:12 +01:00
2023-09-11 07:40:16 +01:00
re_title = re.compile("<title>(.*?)</title>", re.DOTALL)
2023-09-06 07:57:12 +01:00
AGENT = "Mozilla/5.0 (Windows NT 6.1) Gecko/20100101 Firefox/29.0"
headers = {"User-Agent": AGENT, "Accept": "text/html"}
s = requests.Session()
s.headers.update(headers)
MAIL_FROM = "edward@4angle.com"
MAIL_TO_NAME = "Edward Betts"
MAIL_TO_ADDRESS = "edward@4angle.com"
SMTP_HOST = "4angle.com"
not_here_list = [
"The specified URL was not found.",
"There is currently no text in this page.",
"This page does not exist yet",
"404 Not Found",
2023-09-07 16:14:19 +01:00
"500 Internal Server Error",
"Test Page for the Apache HTTP Server",
"Site not found &middot; GitHub Pages",
2023-11-04 22:30:34 +00:00
"504: Gateway time-out",
2023-09-06 07:57:12 +01:00
]
conferences = [
2023-09-29 07:13:13 +01:00
# ("FOSDEM", "https://fosdem.org/2024"),
2023-09-11 07:38:59 +01:00
# ("PyCascades", "https://2024.pycascades.com/"),
2023-09-06 07:57:12 +01:00
("foss-north", "https://foss-north.se/2024"),
2023-11-20 14:45:17 +00:00
# ("Wikimedia Hackathon", "https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2024"),
2023-09-06 07:57:12 +01:00
("FOSS4G", "https://2024.foss4g.org/"),
2023-10-31 14:22:31 +00:00
# ("FOSS4G Europe", "https://2024.europe.foss4g.org/"),
2023-09-06 07:57:12 +01:00
("FOSSY", "https://2024.fossy.us/"),
2023-09-29 07:13:13 +01:00
# ("North Bay Python", "https://2024.northbaypython.org/"),
# ("DebConf", "https://wiki.debian.org/DebConf/24"),
2023-09-06 07:57:12 +01:00
("State of the Map US", "https://2024.stateofthemap.us/"),
("WikiConference North America", "https://wikiconference.org/wiki/2024/Main_Page"),
2023-11-20 14:45:17 +00:00
# ("PyCon DE", "https://2024.pycon.de/"),
# ("PyData London", "https://pydata.org/london2024"),
2023-09-06 07:57:12 +01:00
("Pass the SALT", "https://2024.pass-the-salt.org/"),
("SotM Baltics", "https://2024.sotm-baltics.org/"),
2023-11-01 23:12:51 +00:00
("EuroSciPy", "https://www.euroscipy.org/2024/"),
2023-11-04 22:30:34 +00:00
("EuroPython", "https://ep2024.europython.eu/"),
("Semantic Web in Libraries", "https://swib.org/swib24/"),
("SotM Africa", "https://2024.stateofthemap.africa/"),
2023-11-08 15:30:35 +00:00
("FOSS4G Oceania", "https://2024.foss4g-oceania.org/"),
2023-11-08 15:30:07 +00:00
("All Things Open", "https://2024.allthingsopen.org/"),
2023-09-06 07:57:12 +01:00
]
def find_not_here_message(html: str) -> str | None:
"""Find not here message in web page."""
return next((not_here for not_here in not_here_list if not_here in html), None)
def get_title(html: str) -> str:
"""Title from web page."""
m = re_title.search(html)
2023-09-11 07:40:16 +01:00
return m.group(1).strip() if m and m.group(1) else "no title"
2023-09-06 07:57:12 +01:00
def check_conference(name: str, url: str) -> tuple[bool, str]:
"""Check if conference is live."""
try:
# SotM Baltics has an invalid TLS certificate, but we don't care
r = s.get(url, verify=False)
2023-09-06 07:57:12 +01:00
except requests.exceptions.ConnectionError:
return (False, "connection refused")
not_here = find_not_here_message(r.text)
return (False, not_here) if not_here else (True, get_title(r.text))
def send_mail(subject: str, body: str) -> None:
"""Send an e-mail."""
mail_from = MAIL_FROM
msg = MIMEText(body, "plain", "UTF-8")
msg["Subject"] = subject
msg["To"] = f"{MAIL_TO_NAME} <{MAIL_TO_ADDRESS}>"
msg["From"] = f"Edward Betts <{mail_from}>"
msg["Date"] = formatdate()
msg["Message-ID"] = make_msgid()
s = smtplib.SMTP(SMTP_HOST)
s.sendmail(mail_from, [MAIL_TO_ADDRESS], msg.as_string())
s.quit()
def main(show_not_live: bool = False) -> None:
"""Check each conference."""
for name, url in conferences:
live, msg = check_conference(name, url)
if not live:
continue
body = f"{name}\n{url}\nWeb page title: {msg}"
send_mail(f"Conference site live: {name}", body)
if __name__ == "__main__":
main()