Various improvements

This commit is contained in:
Edward Betts 2026-02-03 20:51:43 +00:00
parent b856d90d14
commit 92243863d7

View file

@ -6,6 +6,7 @@ import html
import os
import re
import smtplib
import sys
import warnings
from dataclasses import dataclass
from datetime import date
@ -22,6 +23,8 @@ from urllib3.util.url import parse_url
from conference import LiveConference, config, load_yaml
IS_TTY = sys.stdout.isatty()
class AbsoluteDNSAdapter(HTTPAdapter):
"""A custom adapter for requests to ensure hostnames are treated as absolute."""
@ -105,9 +108,17 @@ not_here_list = [
"This page doesn't currently exist",
"ERROR 503 - Service Unavailable",
"ERROR 503",
"503 Service Unavailable",
"401 Authorization Required",
"Authorization Required",
"used Cloudflare to restrict access",
"Error 1014",
"CNAME Cross-User Banned",
"looks like there's no page here",
"404 page cant be found either",
"503 self-signed certificate",
"504 Gateway Timeout",
"<h2>Pages</h2>",
]
@ -205,6 +216,12 @@ class Conference:
self.response = r
if r.url.endswith("404.html") or r.url.endswith("404.htm"):
return (False, "URL ends with 404.html/404.htm", r.url)
if not r.text:
return (False, "empty response", r.url)
not_here = find_not_here_message(r.text)
if (
len(r.text) < 2048
@ -228,10 +245,17 @@ class Conference:
def check_web_site(self) -> bool:
"""Check if an individual web site is live."""
assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url
if IS_TTY:
print(f"Checking {self.name} {self.year}: {self.url}")
live, msg, redirect_to_url = self.check()
if not live:
if IS_TTY:
print(f" Not live: {msg}")
return False
if IS_TTY:
print(f" Live! Title: {msg}")
og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items())
if og:
@ -247,6 +271,8 @@ class Conference:
body += f"Web page title: {msg}{og}" ""
send_mail(f"Conference site live: {self.name} - {self.year}", body)
if IS_TTY:
print(f" Email sent")
return True
@ -304,6 +330,7 @@ def find_new_conference_web_sites(
class NoAliasDumper(yaml.SafeDumper):
"""Dumper that disables YAML anchors and aliases."""
def ignore_aliases(self, data):
"""Skip alias generation."""
return True # disables anchors and aliases
@ -311,10 +338,19 @@ class NoAliasDumper(yaml.SafeDumper):
def main(show_not_live: bool = False) -> None:
"""Check fow new conference web sites."""
if IS_TTY:
print("Loading existing live conferences...")
live: list[LiveConference] = load_yaml("live")
if IS_TTY:
print(f"Found {len(live)} existing live conferences")
print("\nChecking for new conference websites...")
if not (new := find_new_conference_web_sites(date.today(), live)):
if IS_TTY:
print("\nNo new conference websites found")
return
if IS_TTY:
print(f"\n{len(new)} new conference(s) found! Updating live.yaml...")
live_filename = os.path.expanduser(config["data"]["live"])
with open(live_filename, "w") as out:
yaml.dump(
@ -325,6 +361,8 @@ def main(show_not_live: bool = False) -> None:
allow_unicode=True,
default_flow_style=False,
)
if IS_TTY:
print(f"Updated {live_filename}")
if __name__ == "__main__":