Various improvements
This commit is contained in:
parent
b856d90d14
commit
92243863d7
1 changed files with 38 additions and 0 deletions
38
check.py
38
check.py
|
|
@ -6,6 +6,7 @@ import html
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import smtplib
|
import smtplib
|
||||||
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
@ -22,6 +23,8 @@ from urllib3.util.url import parse_url
|
||||||
|
|
||||||
from conference import LiveConference, config, load_yaml
|
from conference import LiveConference, config, load_yaml
|
||||||
|
|
||||||
|
IS_TTY = sys.stdout.isatty()
|
||||||
|
|
||||||
|
|
||||||
class AbsoluteDNSAdapter(HTTPAdapter):
|
class AbsoluteDNSAdapter(HTTPAdapter):
|
||||||
"""A custom adapter for requests to ensure hostnames are treated as absolute."""
|
"""A custom adapter for requests to ensure hostnames are treated as absolute."""
|
||||||
|
|
@ -105,9 +108,17 @@ not_here_list = [
|
||||||
"This page doesn't currently exist",
|
"This page doesn't currently exist",
|
||||||
"ERROR 503 - Service Unavailable",
|
"ERROR 503 - Service Unavailable",
|
||||||
"ERROR 503",
|
"ERROR 503",
|
||||||
|
"503 Service Unavailable",
|
||||||
"401 Authorization Required",
|
"401 Authorization Required",
|
||||||
"Authorization Required",
|
"Authorization Required",
|
||||||
"used Cloudflare to restrict access",
|
"used Cloudflare to restrict access",
|
||||||
|
"Error 1014",
|
||||||
|
"CNAME Cross-User Banned",
|
||||||
|
"looks like there's no page here",
|
||||||
|
"404 page can’t be found either",
|
||||||
|
"503 self-signed certificate",
|
||||||
|
"504 Gateway Timeout",
|
||||||
|
"<h2>Pages</h2>",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -205,6 +216,12 @@ class Conference:
|
||||||
|
|
||||||
self.response = r
|
self.response = r
|
||||||
|
|
||||||
|
if r.url.endswith("404.html") or r.url.endswith("404.htm"):
|
||||||
|
return (False, "URL ends with 404.html/404.htm", r.url)
|
||||||
|
|
||||||
|
if not r.text:
|
||||||
|
return (False, "empty response", r.url)
|
||||||
|
|
||||||
not_here = find_not_here_message(r.text)
|
not_here = find_not_here_message(r.text)
|
||||||
if (
|
if (
|
||||||
len(r.text) < 2048
|
len(r.text) < 2048
|
||||||
|
|
@ -228,10 +245,17 @@ class Conference:
|
||||||
def check_web_site(self) -> bool:
|
def check_web_site(self) -> bool:
|
||||||
"""Check if an individual web site is live."""
|
"""Check if an individual web site is live."""
|
||||||
assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url
|
assert "{year}" in self.src_url or "{two_digit_year}" in self.src_url
|
||||||
|
if IS_TTY:
|
||||||
|
print(f"Checking {self.name} {self.year}: {self.url}")
|
||||||
live, msg, redirect_to_url = self.check()
|
live, msg, redirect_to_url = self.check()
|
||||||
if not live:
|
if not live:
|
||||||
|
if IS_TTY:
|
||||||
|
print(f" Not live: {msg}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
if IS_TTY:
|
||||||
|
print(f" Live! Title: {msg}")
|
||||||
|
|
||||||
og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items())
|
og = "".join(f"\n{key}: {value}" for key, value in self.og_tags().items())
|
||||||
|
|
||||||
if og:
|
if og:
|
||||||
|
|
@ -247,6 +271,8 @@ class Conference:
|
||||||
|
|
||||||
body += f"Web page title: {msg}{og}" ""
|
body += f"Web page title: {msg}{og}" ""
|
||||||
send_mail(f"Conference site live: {self.name} - {self.year}", body)
|
send_mail(f"Conference site live: {self.name} - {self.year}", body)
|
||||||
|
if IS_TTY:
|
||||||
|
print(f" Email sent")
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
@ -304,6 +330,7 @@ def find_new_conference_web_sites(
|
||||||
|
|
||||||
class NoAliasDumper(yaml.SafeDumper):
|
class NoAliasDumper(yaml.SafeDumper):
|
||||||
"""Dumper that disables YAML anchors and aliases."""
|
"""Dumper that disables YAML anchors and aliases."""
|
||||||
|
|
||||||
def ignore_aliases(self, data):
|
def ignore_aliases(self, data):
|
||||||
"""Skip alias generation."""
|
"""Skip alias generation."""
|
||||||
return True # disables anchors and aliases
|
return True # disables anchors and aliases
|
||||||
|
|
@ -311,10 +338,19 @@ class NoAliasDumper(yaml.SafeDumper):
|
||||||
|
|
||||||
def main(show_not_live: bool = False) -> None:
|
def main(show_not_live: bool = False) -> None:
|
||||||
"""Check fow new conference web sites."""
|
"""Check fow new conference web sites."""
|
||||||
|
if IS_TTY:
|
||||||
|
print("Loading existing live conferences...")
|
||||||
live: list[LiveConference] = load_yaml("live")
|
live: list[LiveConference] = load_yaml("live")
|
||||||
|
if IS_TTY:
|
||||||
|
print(f"Found {len(live)} existing live conferences")
|
||||||
|
print("\nChecking for new conference websites...")
|
||||||
if not (new := find_new_conference_web_sites(date.today(), live)):
|
if not (new := find_new_conference_web_sites(date.today(), live)):
|
||||||
|
if IS_TTY:
|
||||||
|
print("\nNo new conference websites found")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if IS_TTY:
|
||||||
|
print(f"\n{len(new)} new conference(s) found! Updating live.yaml...")
|
||||||
live_filename = os.path.expanduser(config["data"]["live"])
|
live_filename = os.path.expanduser(config["data"]["live"])
|
||||||
with open(live_filename, "w") as out:
|
with open(live_filename, "w") as out:
|
||||||
yaml.dump(
|
yaml.dump(
|
||||||
|
|
@ -325,6 +361,8 @@ def main(show_not_live: bool = False) -> None:
|
||||||
allow_unicode=True,
|
allow_unicode=True,
|
||||||
default_flow_style=False,
|
default_flow_style=False,
|
||||||
)
|
)
|
||||||
|
if IS_TTY:
|
||||||
|
print(f"Updated {live_filename}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue