diff --git a/check.py b/check.py index efcfa08..9cf0ce1 100755 --- a/check.py +++ b/check.py @@ -9,7 +9,6 @@ import warnings from datetime import date from email.mime.text import MIMEText from email.utils import formatdate, make_msgid -from urllib.parse import urlparse, urlunparse import requests import yaml @@ -71,7 +70,6 @@ not_here_list = [ "Test Page for the Apache HTTP Server", "Site not found · GitHub Pages", "504: Gateway time-out", - "504 Gateway Time-out", "502 Bad Gateway", "This page doesn't exist (404)", "Coming soon", @@ -90,8 +88,6 @@ not_here_list = [ "looks like there's no page here", "404 page", "Database Error", - "You are not authorized to access this page", - "Attention Required! | Cloudflare", ] @@ -106,25 +102,8 @@ def get_title(html: str) -> str: return m.group(1).strip() if m and m.group(1) else "no title" -def normalize_url(url: str) -> str: - """ - Normalize the URL by parsing and reconstructing to ensure uniformity. - - This handles cases like differing schemes, casing in the domain - and trailing slashes. - """ - # Parse the URL into components - parsed_url = urlparse(url) - # Normalize the domain to lowercase and remove any trailing dot - normalized_netloc = parsed_url.netloc.lower().rstrip(".") - # Reconstruct the URL with normalized components - return urlunparse(parsed_url._replace(netloc=normalized_netloc)) - - -def check_conference(name: str, src_url: str, year: int) -> tuple[bool, str]: +def check_conference(name: str, url: str) -> tuple[bool, str]: """Check if conference is live.""" - url = src_url.format(year=year) - past_url = src_url.format(year=year - 1) try: # SotM Baltics has an invalid TLS certificate, but we don't care r = s.get(url, verify=False) @@ -132,19 +111,6 @@ def check_conference(name: str, src_url: str, year: int) -> tuple[bool, str]: return (False, "connection refused") not_here = find_not_here_message(r.text) - if ( - len(r.text) < 2048 - and 'http-equiv="refresh"' in r.text - and str(year) not in r.text - ): - return (False, "redirect to URL without year") - - if str(year) not in r.url: - return (False, "redirect to URL without year") - - if normalize_url(r.url) == normalize_url(past_url): - return (False, "redirect to previous year") - return (False, not_here) if not_here else (True, get_title(r.text)) @@ -174,8 +140,7 @@ def send_mail(subject: str, body: str) -> None: def check_conference_web_site(name: str, src_url: str, year: int) -> bool: """Check if an individual web site is live.""" assert "{year}" in src_url - live, msg = check_conference(name, src_url, year) - url = src_url.format(year=year) + live, msg = check_conference(name, url := src_url.format(year=year)) if live: body = f"{name}\n{url}\nWeb page title: {msg}" send_mail(f"Conference site live: {name} - {year}", body) diff --git a/conference/__init__.py b/conference/__init__.py index 415e3d2..6891012 100644 --- a/conference/__init__.py +++ b/conference/__init__.py @@ -6,9 +6,6 @@ import typing from datetime import date import yaml -from dotenv import load_dotenv - -load_dotenv() config_file_path = os.path.expanduser( os.path.join(