Compare commits
No commits in common. "28865b078363c1023fe272c8caa99b39c32a80a1" and "43f8b6faa94b412b1d6bf8e37effa27c91180787" have entirely different histories.
28865b0783
...
43f8b6faa9
39
check.py
39
check.py
|
@ -9,7 +9,6 @@ import warnings
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.utils import formatdate, make_msgid
|
from email.utils import formatdate, make_msgid
|
||||||
from urllib.parse import urlparse, urlunparse
|
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
import yaml
|
import yaml
|
||||||
|
@ -71,7 +70,6 @@ not_here_list = [
|
||||||
"Test Page for the Apache HTTP Server",
|
"Test Page for the Apache HTTP Server",
|
||||||
"Site not found · GitHub Pages",
|
"Site not found · GitHub Pages",
|
||||||
"504: Gateway time-out",
|
"504: Gateway time-out",
|
||||||
"504 Gateway Time-out",
|
|
||||||
"502 Bad Gateway",
|
"502 Bad Gateway",
|
||||||
"This page doesn't exist (404)",
|
"This page doesn't exist (404)",
|
||||||
"Coming soon",
|
"Coming soon",
|
||||||
|
@ -90,8 +88,6 @@ not_here_list = [
|
||||||
"looks like there's no page here",
|
"looks like there's no page here",
|
||||||
"404 page",
|
"404 page",
|
||||||
"Database Error",
|
"Database Error",
|
||||||
"You are not authorized to access this page",
|
|
||||||
"Attention Required! | Cloudflare",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,25 +102,8 @@ def get_title(html: str) -> str:
|
||||||
return m.group(1).strip() if m and m.group(1) else "no title"
|
return m.group(1).strip() if m and m.group(1) else "no title"
|
||||||
|
|
||||||
|
|
||||||
def normalize_url(url: str) -> str:
|
def check_conference(name: str, url: str) -> tuple[bool, str]:
|
||||||
"""
|
|
||||||
Normalize the URL by parsing and reconstructing to ensure uniformity.
|
|
||||||
|
|
||||||
This handles cases like differing schemes, casing in the domain
|
|
||||||
and trailing slashes.
|
|
||||||
"""
|
|
||||||
# Parse the URL into components
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
# Normalize the domain to lowercase and remove any trailing dot
|
|
||||||
normalized_netloc = parsed_url.netloc.lower().rstrip(".")
|
|
||||||
# Reconstruct the URL with normalized components
|
|
||||||
return urlunparse(parsed_url._replace(netloc=normalized_netloc))
|
|
||||||
|
|
||||||
|
|
||||||
def check_conference(name: str, src_url: str, year: int) -> tuple[bool, str]:
|
|
||||||
"""Check if conference is live."""
|
"""Check if conference is live."""
|
||||||
url = src_url.format(year=year)
|
|
||||||
past_url = src_url.format(year=year - 1)
|
|
||||||
try:
|
try:
|
||||||
# SotM Baltics has an invalid TLS certificate, but we don't care
|
# SotM Baltics has an invalid TLS certificate, but we don't care
|
||||||
r = s.get(url, verify=False)
|
r = s.get(url, verify=False)
|
||||||
|
@ -132,19 +111,6 @@ def check_conference(name: str, src_url: str, year: int) -> tuple[bool, str]:
|
||||||
return (False, "connection refused")
|
return (False, "connection refused")
|
||||||
|
|
||||||
not_here = find_not_here_message(r.text)
|
not_here = find_not_here_message(r.text)
|
||||||
if (
|
|
||||||
len(r.text) < 2048
|
|
||||||
and 'http-equiv="refresh"' in r.text
|
|
||||||
and str(year) not in r.text
|
|
||||||
):
|
|
||||||
return (False, "redirect to URL without year")
|
|
||||||
|
|
||||||
if str(year) not in r.url:
|
|
||||||
return (False, "redirect to URL without year")
|
|
||||||
|
|
||||||
if normalize_url(r.url) == normalize_url(past_url):
|
|
||||||
return (False, "redirect to previous year")
|
|
||||||
|
|
||||||
return (False, not_here) if not_here else (True, get_title(r.text))
|
return (False, not_here) if not_here else (True, get_title(r.text))
|
||||||
|
|
||||||
|
|
||||||
|
@ -174,8 +140,7 @@ def send_mail(subject: str, body: str) -> None:
|
||||||
def check_conference_web_site(name: str, src_url: str, year: int) -> bool:
|
def check_conference_web_site(name: str, src_url: str, year: int) -> bool:
|
||||||
"""Check if an individual web site is live."""
|
"""Check if an individual web site is live."""
|
||||||
assert "{year}" in src_url
|
assert "{year}" in src_url
|
||||||
live, msg = check_conference(name, src_url, year)
|
live, msg = check_conference(name, url := src_url.format(year=year))
|
||||||
url = src_url.format(year=year)
|
|
||||||
if live:
|
if live:
|
||||||
body = f"{name}\n{url}\nWeb page title: {msg}"
|
body = f"{name}\n{url}\nWeb page title: {msg}"
|
||||||
send_mail(f"Conference site live: {name} - {year}", body)
|
send_mail(f"Conference site live: {name} - {year}", body)
|
||||||
|
|
|
@ -6,9 +6,6 @@ import typing
|
||||||
from datetime import date
|
from datetime import date
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
config_file_path = os.path.expanduser(
|
config_file_path = os.path.expanduser(
|
||||||
os.path.join(
|
os.path.join(
|
||||||
|
|
Loading…
Reference in a new issue