diff --git a/check.py b/check.py index 61b46b1..62349ec 100755 --- a/check.py +++ b/check.py @@ -6,6 +6,7 @@ import html import os import re import smtplib +import sys import warnings from dataclasses import dataclass from datetime import date @@ -22,6 +23,8 @@ from urllib3.util.url import parse_url from conference import LiveConference, config, load_yaml +IS_TTY = sys.stdout.isatty() + class AbsoluteDNSAdapter(HTTPAdapter): """A custom adapter for requests to ensure hostnames are treated as absolute.""" @@ -105,9 +108,17 @@ not_here_list = [ "This page doesn't currently exist", "ERROR 503 - Service Unavailable", "ERROR 503", + "503 Service Unavailable", "401 Authorization Required", "Authorization Required", "used Cloudflare to restrict access", + "Error 1014", + "CNAME Cross-User Banned", + "looks like there's no page here", + "404 page can’t be found either", + "503 self-signed certificate", + "504 Gateway Timeout", + "

Pages

", ] @@ -205,6 +216,12 @@ class Conference: self.response = r + if r.url.endswith("404.html") or r.url.endswith("404.htm"): + return (False, "URL ends with 404.html/404.htm", r.url) + + if not r.text: + return (False, "empty response", r.url) + not_here = find_not_here_message(r.text) if ( len(r.text) < 2048 @@ -228,10 +245,17 @@ class Conference: def check_web_site(self) -> bool: """Check if an individual web site is live.""" assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url + if IS_TTY: + print(f"Checking {self.name} {self.year}: {self.url}") live, msg, redirect_to_url = self.check() if not live: + if IS_TTY: + print(f" Not live: {msg}") return False + if IS_TTY: + print(f" Live! Title: {msg}") + og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items()) if og: @@ -247,6 +271,8 @@ class Conference: body += f"Web page title: {msg}{og}" "" send_mail(f"Conference site live: {self.name} - {self.year}", body) + if IS_TTY: + print(f" Email sent") return True @@ -304,6 +330,7 @@ def find_new_conference_web_sites( class NoAliasDumper(yaml.SafeDumper): """Dumper that disables YAML anchors and aliases.""" + def ignore_aliases(self, data): """Skip alias generation.""" return True # disables anchors and aliases @@ -311,10 +338,19 @@ class NoAliasDumper(yaml.SafeDumper): def main(show_not_live: bool = False) -> None: """Check fow new conference web sites.""" + if IS_TTY: + print("Loading existing live conferences...") live: list[LiveConference] = load_yaml("live") + if IS_TTY: + print(f"Found {len(live)} existing live conferences") + print("\nChecking for new conference websites...") if not (new := find_new_conference_web_sites(date.today(), live)): + if IS_TTY: + print("\nNo new conference websites found") return + if IS_TTY: + print(f"\n{len(new)} new conference(s) found! Updating live.yaml...") live_filename = os.path.expanduser(config["data"]["live"]) with open(live_filename, "w") as out: yaml.dump( @@ -325,6 +361,8 @@ def main(show_not_live: bool = False) -> None: allow_unicode=True, default_flow_style=False, ) + if IS_TTY: + print(f"Updated {live_filename}") if __name__ == "__main__":