diff --git a/check.py b/check.py
index 61b46b1..62349ec 100755
--- a/check.py
+++ b/check.py
@@ -6,6 +6,7 @@ import html
import os
import re
import smtplib
+import sys
import warnings
from dataclasses import dataclass
from datetime import date
@@ -22,6 +23,8 @@ from urllib3.util.url import parse_url
from conference import LiveConference, config, load_yaml
+IS_TTY = sys.stdout.isatty()
+
class AbsoluteDNSAdapter(HTTPAdapter):
"""A custom adapter for requests to ensure hostnames are treated as absolute."""
@@ -105,9 +108,17 @@ not_here_list = [
"This page doesn't currently exist",
"ERROR 503 - Service Unavailable",
"ERROR 503",
+ "503 Service Unavailable",
"401 Authorization Required",
"Authorization Required",
"used Cloudflare to restrict access",
+ "Error 1014",
+ "CNAME Cross-User Banned",
+ "looks like there's no page here",
+ "404 page can’t be found either",
+ "503 self-signed certificate",
+ "504 Gateway Timeout",
+ "
Pages
",
]
@@ -205,6 +216,12 @@ class Conference:
self.response = r
+ if r.url.endswith("404.html") or r.url.endswith("404.htm"):
+ return (False, "URL ends with 404.html/404.htm", r.url)
+
+ if not r.text:
+ return (False, "empty response", r.url)
+
not_here = find_not_here_message(r.text)
if (
len(r.text) < 2048
@@ -228,10 +245,17 @@ class Conference:
def check_web_site(self) -> bool:
"""Check if an individual web site is live."""
assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url
+ if IS_TTY:
+ print(f"Checking {self.name} {self.year}: {self.url}")
live, msg, redirect_to_url = self.check()
if not live:
+ if IS_TTY:
+ print(f" Not live: {msg}")
return False
+ if IS_TTY:
+ print(f" Live! Title: {msg}")
+
og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items())
if og:
@@ -247,6 +271,8 @@ class Conference:
body += f"Web page title: {msg}{og}" ""
send_mail(f"Conference site live: {self.name} - {self.year}", body)
+ if IS_TTY:
+ print(f" Email sent")
return True
@@ -304,6 +330,7 @@ def find_new_conference_web_sites(
class NoAliasDumper(yaml.SafeDumper):
"""Dumper that disables YAML anchors and aliases."""
+
def ignore_aliases(self, data):
"""Skip alias generation."""
return True # disables anchors and aliases
@@ -311,10 +338,19 @@ class NoAliasDumper(yaml.SafeDumper):
def main(show_not_live: bool = False) -> None:
"""Check fow new conference web sites."""
+ if IS_TTY:
+ print("Loading existing live conferences...")
live: list[LiveConference] = load_yaml("live")
+ if IS_TTY:
+ print(f"Found {len(live)} existing live conferences")
+ print("\nChecking for new conference websites...")
if not (new := find_new_conference_web_sites(date.today(), live)):
+ if IS_TTY:
+ print("\nNo new conference websites found")
return
+ if IS_TTY:
+ print(f"\n{len(new)} new conference(s) found! Updating live.yaml...")
live_filename = os.path.expanduser(config["data"]["live"])
with open(live_filename, "w") as out:
yaml.dump(
@@ -325,6 +361,8 @@ def main(show_not_live: bool = False) -> None:
allow_unicode=True,
default_flow_style=False,
)
+ if IS_TTY:
+ print(f"Updated {live_filename}")
if __name__ == "__main__":