2023-09-06 07:57:12 +01:00
|
|
|
#!/usr/bin/python3
|
|
|
|
|
|
|
|
"""Check if conference websites are live."""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import smtplib
|
2023-10-29 11:52:15 +00:00
|
|
|
import warnings
|
2023-09-06 07:57:12 +01:00
|
|
|
from email.mime.text import MIMEText
|
|
|
|
from email.utils import formatdate, make_msgid
|
|
|
|
|
|
|
|
import requests
|
2023-10-29 11:52:15 +00:00
|
|
|
from urllib3.exceptions import InsecureRequestWarning
|
|
|
|
|
|
|
|
# Suppress only the single InsecureRequestWarning from urllib3
|
|
|
|
warnings.filterwarnings("ignore", category=InsecureRequestWarning)
|
|
|
|
|
2023-09-06 07:57:12 +01:00
|
|
|
|
2023-09-11 07:40:16 +01:00
|
|
|
re_title = re.compile("<title>(.*?)</title>", re.DOTALL)
|
2023-09-06 07:57:12 +01:00
|
|
|
|
|
|
|
AGENT = "Mozilla/5.0 (Windows NT 6.1) Gecko/20100101 Firefox/29.0"
|
|
|
|
headers = {"User-Agent": AGENT, "Accept": "text/html"}
|
|
|
|
|
|
|
|
s = requests.Session()
|
|
|
|
s.headers.update(headers)
|
|
|
|
|
|
|
|
MAIL_FROM = "edward@4angle.com"
|
|
|
|
MAIL_TO_NAME = "Edward Betts"
|
|
|
|
MAIL_TO_ADDRESS = "edward@4angle.com"
|
|
|
|
SMTP_HOST = "4angle.com"
|
|
|
|
|
|
|
|
not_here_list = [
|
|
|
|
"The specified URL was not found.",
|
|
|
|
"There is currently no text in this page.",
|
|
|
|
"This page does not exist yet",
|
|
|
|
"404 Not Found",
|
2023-09-07 16:14:19 +01:00
|
|
|
"500 Internal Server Error",
|
2023-10-29 11:52:15 +00:00
|
|
|
"Test Page for the Apache HTTP Server",
|
2023-10-29 18:19:12 +00:00
|
|
|
"Site not found · GitHub Pages",
|
2023-11-04 22:30:34 +00:00
|
|
|
"504: Gateway time-out",
|
2023-09-06 07:57:12 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
conferences = [
|
2023-09-29 07:13:13 +01:00
|
|
|
# ("FOSDEM", "https://fosdem.org/2024"),
|
2023-09-11 07:38:59 +01:00
|
|
|
# ("PyCascades", "https://2024.pycascades.com/"),
|
2023-09-06 07:57:12 +01:00
|
|
|
("foss-north", "https://foss-north.se/2024"),
|
|
|
|
("Wikimedia Hackathon", "https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2024"),
|
|
|
|
("FOSS4G", "https://2024.foss4g.org/"),
|
2023-10-31 14:22:31 +00:00
|
|
|
# ("FOSS4G Europe", "https://2024.europe.foss4g.org/"),
|
2023-09-06 07:57:12 +01:00
|
|
|
("FOSSY", "https://2024.fossy.us/"),
|
2023-09-29 07:13:13 +01:00
|
|
|
# ("North Bay Python", "https://2024.northbaypython.org/"),
|
|
|
|
# ("DebConf", "https://wiki.debian.org/DebConf/24"),
|
2023-09-06 07:57:12 +01:00
|
|
|
("State of the Map US", "https://2024.stateofthemap.us/"),
|
|
|
|
("WikiConference North America", "https://wikiconference.org/wiki/2024/Main_Page"),
|
|
|
|
("PyCon DE", "https://2024.pycon.de/"),
|
|
|
|
("PyData London", "https://pydata.org/london2024"),
|
|
|
|
("Pass the SALT", "https://2024.pass-the-salt.org/"),
|
2023-10-29 11:52:15 +00:00
|
|
|
("SotM Baltics", "https://2024.sotm-baltics.org/"),
|
2023-11-01 23:12:51 +00:00
|
|
|
("EuroSciPy", "https://www.euroscipy.org/2024/"),
|
2023-11-04 22:30:34 +00:00
|
|
|
("EuroPython", "https://ep2024.europython.eu/"),
|
2023-11-04 22:30:01 +00:00
|
|
|
("Semantic Web in Libraries", "https://swib.org/swib24/"),
|
|
|
|
("SotM Africa", "https://2024.stateofthemap.africa/"),
|
2023-09-06 07:57:12 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def find_not_here_message(html: str) -> str | None:
|
|
|
|
"""Find not here message in web page."""
|
|
|
|
return next((not_here for not_here in not_here_list if not_here in html), None)
|
|
|
|
|
|
|
|
|
|
|
|
def get_title(html: str) -> str:
|
|
|
|
"""Title from web page."""
|
|
|
|
m = re_title.search(html)
|
2023-09-11 07:40:16 +01:00
|
|
|
return m.group(1).strip() if m and m.group(1) else "no title"
|
2023-09-06 07:57:12 +01:00
|
|
|
|
|
|
|
|
|
|
|
def check_conference(name: str, url: str) -> tuple[bool, str]:
|
|
|
|
"""Check if conference is live."""
|
|
|
|
try:
|
2023-10-29 11:52:15 +00:00
|
|
|
# SotM Baltics has an invalid TLS certificate, but we don't care
|
|
|
|
r = s.get(url, verify=False)
|
2023-09-06 07:57:12 +01:00
|
|
|
except requests.exceptions.ConnectionError:
|
|
|
|
return (False, "connection refused")
|
|
|
|
|
|
|
|
not_here = find_not_here_message(r.text)
|
|
|
|
return (False, not_here) if not_here else (True, get_title(r.text))
|
|
|
|
|
|
|
|
|
|
|
|
def send_mail(subject: str, body: str) -> None:
|
|
|
|
"""Send an e-mail."""
|
|
|
|
mail_from = MAIL_FROM
|
|
|
|
msg = MIMEText(body, "plain", "UTF-8")
|
|
|
|
|
|
|
|
msg["Subject"] = subject
|
|
|
|
msg["To"] = f"{MAIL_TO_NAME} <{MAIL_TO_ADDRESS}>"
|
|
|
|
msg["From"] = f"Edward Betts <{mail_from}>"
|
|
|
|
msg["Date"] = formatdate()
|
|
|
|
msg["Message-ID"] = make_msgid()
|
|
|
|
|
|
|
|
s = smtplib.SMTP(SMTP_HOST)
|
|
|
|
s.sendmail(mail_from, [MAIL_TO_ADDRESS], msg.as_string())
|
|
|
|
s.quit()
|
|
|
|
|
|
|
|
|
|
|
|
def main(show_not_live: bool = False) -> None:
|
|
|
|
"""Check each conference."""
|
|
|
|
for name, url in conferences:
|
|
|
|
live, msg = check_conference(name, url)
|
|
|
|
if not live:
|
|
|
|
continue
|
|
|
|
body = f"{name}\n{url}\nWeb page title: {msg}"
|
|
|
|
send_mail(f"Conference site live: {name}", body)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|