Various improvements

This commit is contained in:
Edward Betts 2026-02-03 20:51:43 +00:00
parent b856d90d14
commit 92243863d7

View file

@ -6,6 +6,7 @@ import html
import os import os
import re import re
import smtplib import smtplib
import sys
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from datetime import date from datetime import date
@ -22,6 +23,8 @@ from urllib3.util.url import parse_url
from conference import LiveConference, config, load_yaml from conference import LiveConference, config, load_yaml
IS_TTY = sys.stdout.isatty()
class AbsoluteDNSAdapter(HTTPAdapter): class AbsoluteDNSAdapter(HTTPAdapter):
"""A custom adapter for requests to ensure hostnames are treated as absolute.""" """A custom adapter for requests to ensure hostnames are treated as absolute."""
@ -105,9 +108,17 @@ not_here_list = [
"This page doesn't currently exist", "This page doesn't currently exist",
"ERROR 503 - Service Unavailable", "ERROR 503 - Service Unavailable",
"ERROR 503", "ERROR 503",
"503 Service Unavailable",
"401 Authorization Required", "401 Authorization Required",
"Authorization Required", "Authorization Required",
"used Cloudflare to restrict access", "used Cloudflare to restrict access",
"Error 1014",
"CNAME Cross-User Banned",
"looks like there's no page here",
"404 page cant be found either",
"503 self-signed certificate",
"504 Gateway Timeout",
"<h2>Pages</h2>",
] ]
@ -205,6 +216,12 @@ class Conference:
self.response = r self.response = r
if r.url.endswith("404.html") or r.url.endswith("404.htm"):
return (False, "URL ends with 404.html/404.htm", r.url)
if not r.text:
return (False, "empty response", r.url)
not_here = find_not_here_message(r.text) not_here = find_not_here_message(r.text)
if ( if (
len(r.text) < 2048 len(r.text) < 2048
@ -228,10 +245,17 @@ class Conference:
def check_web_site(self) -> bool: def check_web_site(self) -> bool:
"""Check if an individual web site is live.""" """Check if an individual web site is live."""
assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url
if IS_TTY:
print(f"Checking {self.name} {self.year}: {self.url}")
live, msg, redirect_to_url = self.check() live, msg, redirect_to_url = self.check()
if not live: if not live:
if IS_TTY:
print(f" Not live: {msg}")
return False return False
if IS_TTY:
print(f" Live! Title: {msg}")
og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items()) og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items())
if og: if og:
@ -247,6 +271,8 @@ class Conference:
body += f"Web page title: {msg}{og}" "" body += f"Web page title: {msg}{og}" ""
send_mail(f"Conference site live: {self.name} - {self.year}", body) send_mail(f"Conference site live: {self.name} - {self.year}", body)
if IS_TTY:
print(f" Email sent")
return True return True
@ -304,6 +330,7 @@ def find_new_conference_web_sites(
class NoAliasDumper(yaml.SafeDumper): class NoAliasDumper(yaml.SafeDumper):
"""Dumper that disables YAML anchors and aliases.""" """Dumper that disables YAML anchors and aliases."""
def ignore_aliases(self, data): def ignore_aliases(self, data):
"""Skip alias generation.""" """Skip alias generation."""
return True # disables anchors and aliases return True # disables anchors and aliases
@ -311,10 +338,19 @@ class NoAliasDumper(yaml.SafeDumper):
def main(show_not_live: bool = False) -> None: def main(show_not_live: bool = False) -> None:
"""Check fow new conference web sites.""" """Check fow new conference web sites."""
if IS_TTY:
print("Loading existing live conferences...")
live: list[LiveConference] = load_yaml("live") live: list[LiveConference] = load_yaml("live")
if IS_TTY:
print(f"Found {len(live)} existing live conferences")
print("\nChecking for new conference websites...")
if not (new := find_new_conference_web_sites(date.today(), live)): if not (new := find_new_conference_web_sites(date.today(), live)):
if IS_TTY:
print("\nNo new conference websites found")
return return
if IS_TTY:
print(f"\n{len(new)} new conference(s) found! Updating live.yaml...")
live_filename = os.path.expanduser(config["data"]["live"]) live_filename = os.path.expanduser(config["data"]["live"])
with open(live_filename, "w") as out: with open(live_filename, "w") as out:
yaml.dump( yaml.dump(
@ -325,6 +361,8 @@ def main(show_not_live: bool = False) -> None:
allow_unicode=True, allow_unicode=True,
default_flow_style=False, default_flow_style=False,
) )
if IS_TTY:
print(f"Updated {live_filename}")
if __name__ == "__main__": if __name__ == "__main__":