From d707eef267d96871eb045bd3ad0bd7c61b3a14e8 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 25 Feb 2024 15:15:38 +0000 Subject: [PATCH 1/3] Absolute DNS lookups --- check.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/check.py b/check.py index 55a5581..fd3ae0e 100755 --- a/check.py +++ b/check.py @@ -11,7 +11,33 @@ from email.mime.text import MIMEText from email.utils import formatdate, make_msgid import requests +from requests.adapters import HTTPAdapter from urllib3.exceptions import InsecureRequestWarning # type: ignore +from urllib3.util.url import parse_url # type: ignore + + +class AbsoluteDNSAdapter(HTTPAdapter): + """A custom adapter for requests to ensure hostnames are treated as absolute.""" + + def add_dot_to_hostname(self, url: str) -> str: + """Append a dot to the hostname to treat it as an absolute domain name.""" + parsed_url = parse_url(url) + + # Append a dot to the hostname if it's not already there. + hostname = parsed_url.host + if not hostname.endswith("."): + hostname += "." + + # Reconstruct the URL with the modified hostname. + new_url: str = parsed_url._replace(host=hostname).url + return new_url + + def send(self, request, **kwargs): # type: ignore + """Override the send method to modify the request URL before sending.""" + # Modify the request URL to ensure the hostname is treated as absolute. + request.url = self.add_dot_to_hostname(request.url) + return super().send(request, **kwargs) + config_file_path = os.path.expanduser( os.path.join( @@ -35,6 +61,12 @@ headers = {"User-Agent": AGENT, "Accept": "text/html"} s = requests.Session() s.headers.update(headers) +# Create a session and mount the custom adapter for both HTTP and HTTPS requests. +adapter = AbsoluteDNSAdapter() +s.mount("http://", adapter) +s.mount("https://", adapter) + + MAIL_FROM = "edward@4angle.com" MAIL_TO_NAME = "Edward Betts" MAIL_TO_ADDRESS = "edward@4angle.com" From 30a5847320a467a49fb5cac25d9382f69b5dcb72 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 25 Feb 2024 17:20:26 +0000 Subject: [PATCH 2/3] Move mail settings to config file --- check.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/check.py b/check.py index fd3ae0e..53615c5 100755 --- a/check.py +++ b/check.py @@ -67,11 +67,6 @@ s.mount("http://", adapter) s.mount("https://", adapter) -MAIL_FROM = "edward@4angle.com" -MAIL_TO_NAME = "Edward Betts" -MAIL_TO_ADDRESS = "edward@4angle.com" -SMTP_HOST = "4angle.com" - not_here_list = [ "The specified URL was not found.", "There is currently no text in this page.", @@ -137,12 +132,15 @@ def check_conference(name: str, url: str) -> tuple[bool, str]: def send_mail(subject: str, body: str) -> None: """Send an e-mail.""" - mail_from = MAIL_FROM + mail_from_address = config["mail"]["from_address"] + mail_from_name = config["mail"]["from_name"] + mail_to_address = config["mail"]["to_address"] + mail_to_name = config["mail"]["to_name"] msg = MIMEText(body, "plain", "UTF-8") msg["Subject"] = subject - msg["To"] = f"{MAIL_TO_NAME} <{MAIL_TO_ADDRESS}>" - msg["From"] = f"Edward Betts <{mail_from}>" + msg["To"] = f"{mail_to_name} <{mail_to_address}>" + msg["From"] = f"{mail_from_name} <{mail_from_address}>" msg["Date"] = formatdate() msg["Message-ID"] = make_msgid() From 97267c5f294893b3d72b44ea4f450ab21da6fee2 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sun, 25 Feb 2024 17:23:38 +0000 Subject: [PATCH 3/3] Record conference state in YAML file Closes: #2, Closes: #3 --- check.py | 74 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/check.py b/check.py index 53615c5..d7a4108 100755 --- a/check.py +++ b/check.py @@ -6,11 +6,14 @@ import configparser import os import re import smtplib +import typing import warnings +from datetime import date from email.mime.text import MIMEText from email.utils import formatdate, make_msgid import requests +import yaml from requests.adapters import HTTPAdapter from urllib3.exceptions import InsecureRequestWarning # type: ignore from urllib3.util.url import parse_url # type: ignore @@ -79,31 +82,7 @@ not_here_list = [ "This page doesn't exist (404)", "Coming soon", "NOT_FOUND", -] - -conferences = [ - # ("FOSDEM", "https://fosdem.org/2024"), - # ("PyCascades", "https://2024.pycascades.com/"), - # ("foss-north", "https://foss-north.se/2024"), - # ("Wikimedia Hackathon", "https://www.mediawiki.org/wiki/Wikimedia_Hackathon_2024"), - ("FOSS4G", "https://2024.foss4g.org/"), - # ("FOSS4G Europe", "https://2024.europe.foss4g.org/"), - ("FOSSY", "https://2024.fossy.us/"), - # ("North Bay Python", "https://2024.northbaypython.org/"), - # ("DebConf", "https://wiki.debian.org/DebConf/24"), - # ("State of the Map US", "https://2024.stateofthemap.us/"), - # ("WikiConference North America", "https://wikiconference.org/wiki/2024/Main_Page"), - # ("PyCon DE", "https://2024.pycon.de/"), - # ("PyData London", "https://pydata.org/london2024"), - # ("Pass the SALT", "https://2024.pass-the-salt.org/"), - ("SotM Baltics", "https://2024.sotm-baltics.org/"), - ("EuroSciPy", "https://www.euroscipy.org/2024/"), - # ("EuroPython", "https://ep2024.europython.eu/"), - ("Semantic Web in Libraries", "https://swib.org/swib24/"), - ("SotM Africa", "https://2024.stateofthemap.africa/"), - ("FOSS4G Oceania", "https://2024.foss4g-oceania.org/"), - # ("All Things Open", "https://2024.allthingsopen.org/"), - ("GLAMhack24", "https://opendata.ch/events/glamhack2024/"), + "Resource Not Found", ] @@ -148,19 +127,48 @@ def send_mail(subject: str, body: str) -> None: for header_name, value in config["mail_headers"].items(): msg[header_name] = value - s = smtplib.SMTP(SMTP_HOST) - s.sendmail(mail_from, [MAIL_TO_ADDRESS], msg.as_string()) + s = smtplib.SMTP(config["mail"]["smtp_host"]) + s.sendmail(mail_from_address, [mail_to_address], msg.as_string()) s.quit() +def load_yaml(name: str) -> typing.Any: + """Load YAML.""" + filename = os.path.expanduser(config["data"][name]) + assert os.path.exists(filename) + return yaml.safe_load(open(filename)) + + def main(show_not_live: bool = False) -> None: """Check each conference.""" - for name, url in conferences: - live, msg = check_conference(name, url) - if not live: - continue - body = f"{name}\n{url}\nWeb page title: {msg}" - send_mail(f"Conference site live: {name}", body) + today = date.today() + this_year = today.year + + conferences = load_yaml("conferences") + live_conferences = load_yaml("live") + live_set = {(c["conference"], c["year"]) for c in live_conferences} + + new_live = False + + for name, src_url in conferences.items(): + for year in this_year, this_year + 1: + if (name, year) in live_set: + continue + assert "{year}" in src_url + url = src_url.format(year=year) + live, msg = check_conference(name, url) + if not live: + continue + body = f"{name}\n{url}\nWeb page title: {msg}" + send_mail(f"Conference site live: {name}", body) + + new_live = True + live_conferences.append({"conference": name, "year": year, "live": today}) + + if new_live: + live_filename = os.path.expanduser(config["data"]["live"]) + with open(live_filename, "w") as out: + yaml.dump(live_conferences, stream=out, sort_keys=False) if __name__ == "__main__":