conference-check/check.py

170 lines
5.5 KiB
Python
Raw Normal View History

2023-09-06 07:57:12 +01:00
#!/usr/bin/python3
"""Check if conference websites are live."""
import configparser
import os
2023-09-06 07:57:12 +01:00
import re
import smtplib
import warnings
2023-09-06 07:57:12 +01:00
from email.mime.text import MIMEText
from email.utils import formatdate, make_msgid
import requests
2024-02-25 15:15:38 +00:00
from requests.adapters import HTTPAdapter
from urllib3.exceptions import InsecureRequestWarning # type: ignore
2024-02-25 15:15:38 +00:00
from urllib3.util.url import parse_url # type: ignore
class AbsoluteDNSAdapter(HTTPAdapter):
"""A custom adapter for requests to ensure hostnames are treated as absolute."""
def add_dot_to_hostname(self, url: str) -> str:
"""Append a dot to the hostname to treat it as an absolute domain name."""
parsed_url = parse_url(url)
# Append a dot to the hostname if it's not already there.
hostname = parsed_url.host
if not hostname.endswith("."):
hostname += "."
# Reconstruct the URL with the modified hostname.
new_url: str = parsed_url._replace(host=hostname).url
return new_url
def send(self, request, **kwargs): # type: ignore
"""Override the send method to modify the request URL before sending."""
# Modify the request URL to ensure the hostname is treated as absolute.
request.url = self.add_dot_to_hostname(request.url)
return super().send(request, **kwargs)
config_file_path = os.path.expanduser(
os.path.join(
os.getenv("XDG_CONFIG_HOME", "~/.config"), "conference-check", "config"
)
)
config = configparser.ConfigParser()
config.read(os.path.expanduser(config_file_path))
# Suppress only the single InsecureRequestWarning from urllib3
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
2023-09-06 07:57:12 +01:00
2023-09-11 07:40:16 +01:00
re_title = re.compile("<title>(.*?)</title>", re.DOTALL)
2023-09-06 07:57:12 +01:00
AGENT = "Mozilla/5.0 (Windows NT 6.1) Gecko/20100101 Firefox/29.0"
headers = {"User-Agent": AGENT, "Accept": "text/html"}
s = requests.Session()
s.headers.update(headers)
2024-02-25 15:15:38 +00:00
# Create a session and mount the custom adapter for both HTTP and HTTPS requests.
adapter = AbsoluteDNSAdapter()
s.mount("http://", adapter)
s.mount("https://", adapter)
2023-09-06 07:57:12 +01:00
MAIL_FROM = "edward@4angle.com"
MAIL_TO_NAME = "Edward Betts"
MAIL_TO_ADDRESS = "edward@4angle.com"
SMTP_HOST = "4angle.com"
not_here_list = [
"The specified URL was not found.",
"There is currently no text in this page.",
"This page does not exist yet",
"404 Not Found",
2023-09-07 16:14:19 +01:00
"500 Internal Server Error",
"Test Page for the Apache HTTP Server",
"Site not found &middot; GitHub Pages",
2023-11-04 22:30:34 +00:00
"504: Gateway time-out",
2024-02-11 06:12:32 +00:00
"This page doesn't exist (404)",
"Coming soon",
"NOT_FOUND",
2023-09-06 07:57:12 +01:00
]
conferences = [
2023-09-29 07:13:13 +01:00
# ("FOSDEM", "https://fosdem.org/2024"),
2023-09-11 07:38:59 +01:00
# ("PyCascades", "https://2024.pycascades.com/"),
2024-02-11 06:09:05 +00:00
# ("foss-north", "https://foss-north.se/2024"),
2023-11-20 14:45:17 +00:00
# ("Wikimedia Hackathon", "https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2024"),
2023-09-06 07:57:12 +01:00
("FOSS4G", "https://2024.foss4g.org/"),
2023-10-31 14:22:31 +00:00
# ("FOSS4G Europe", "https://2024.europe.foss4g.org/"),
2023-09-06 07:57:12 +01:00
("FOSSY", "https://2024.fossy.us/"),
2023-09-29 07:13:13 +01:00
# ("North Bay Python", "https://2024.northbaypython.org/"),
# ("DebConf", "https://wiki.debian.org/DebConf/24"),
2024-02-11 06:21:45 +00:00
# ("State of the Map US", "https://2024.stateofthemap.us/"),
# ("WikiConference North America", "https://wikiconference.org/wiki/2024/Main_Page"),
2023-11-20 14:45:17 +00:00
# ("PyCon DE", "https://2024.pycon.de/"),
# ("PyData London", "https://pydata.org/london2024"),
2024-02-11 06:11:13 +00:00
# ("Pass the SALT", "https://2024.pass-the-salt.org/"),
("SotM Baltics", "https://2024.sotm-baltics.org/"),
2023-11-01 23:12:51 +00:00
("EuroSciPy", "https://www.euroscipy.org/2024/"),
2024-02-11 06:11:41 +00:00
# ("EuroPython", "https://ep2024.europython.eu/"),
("Semantic Web in Libraries", "https://swib.org/swib24/"),
("SotM Africa", "https://2024.stateofthemap.africa/"),
2023-11-08 15:30:35 +00:00
("FOSS4G Oceania", "https://2024.foss4g-oceania.org/"),
2024-02-11 06:22:40 +00:00
# ("All Things Open", "https://2024.allthingsopen.org/"),
2024-02-11 06:23:00 +00:00
("GLAMhack24", "https://opendata.ch/events/glamhack2024/"),
2023-09-06 07:57:12 +01:00
]
def find_not_here_message(html: str) -> str | None:
"""Find not here message in web page."""
return next((not_here for not_here in not_here_list if not_here in html), None)
def get_title(html: str) -> str:
"""Title from web page."""
m = re_title.search(html)
2023-09-11 07:40:16 +01:00
return m.group(1).strip() if m and m.group(1) else "no title"
2023-09-06 07:57:12 +01:00
def check_conference(name: str, url: str) -> tuple[bool, str]:
"""Check if conference is live."""
try:
# SotM Baltics has an invalid TLS certificate, but we don't care
r = s.get(url, verify=False)
2023-09-06 07:57:12 +01:00
except requests.exceptions.ConnectionError:
return (False, "connection refused")
not_here = find_not_here_message(r.text)
return (False, not_here) if not_here else (True, get_title(r.text))
def send_mail(subject: str, body: str) -> None:
"""Send an e-mail."""
mail_from = MAIL_FROM
msg = MIMEText(body, "plain", "UTF-8")
msg["Subject"] = subject
msg["To"] = f"{MAIL_TO_NAME} <{MAIL_TO_ADDRESS}>"
msg["From"] = f"Edward Betts <{mail_from}>"
msg["Date"] = formatdate()
msg["Message-ID"] = make_msgid()
# extra mail headers from config
for header_name, value in config["mail_headers"].items():
msg[header_name] = value
2023-09-06 07:57:12 +01:00
s = smtplib.SMTP(SMTP_HOST)
s.sendmail(mail_from, [MAIL_TO_ADDRESS], msg.as_string())
s.quit()
def main(show_not_live: bool = False) -> None:
"""Check each conference."""
for name, url in conferences:
live, msg = check_conference(name, url)
if not live:
continue
body = f"{name}\n{url}\nWeb page title: {msg}"
send_mail(f"Conference site live: {name}", body)
if __name__ == "__main__":
main()