diff --git a/check.py b/check.py
index 5d3bb74..81e028f 100755
--- a/check.py
+++ b/check.py
@@ -2,7 +2,6 @@
"""Check if conference websites are live."""
-import html
import os
import re
import smtplib
@@ -12,7 +11,6 @@ from email.mime.text import MIMEText
from email.utils import formatdate, make_msgid
from urllib.parse import urlparse, urlunparse
-import cloudscraper
import requests
import yaml
from requests.adapters import HTTPAdapter
@@ -55,14 +53,8 @@ re_title = re.compile("
(.*?)", re.DOTALL)
AGENT = config["browser"]["User-Agent"]
headers = {"User-Agent": AGENT, "Accept": "text/html"}
-s = cloudscraper.CloudScraper()
-s_no_dot = cloudscraper.CloudScraper()
-
-# s = requests.Session()
-# s.headers.update(headers)
-
-# s_no_dot = requests.Session()
-# s_no_dot.headers.update(headers)
+s = requests.Session()
+s.headers.update(headers)
# Create a session and mount the custom adapter for both HTTP and HTTPS requests.
adapter = AbsoluteDNSAdapter()
@@ -90,7 +82,7 @@ not_here_list = [
"Ooops! Could Not Find It",
"OpenStreetMap Authentication Proxy",
"Error 404",
- # "Under Construction",
+ "Under Construction",
"Page not found",
"Error 404: Page not found",
"Barcamptools",
@@ -101,23 +93,18 @@ not_here_list = [
"You are not authorized to access this page",
"Attention Required! | Cloudflare",
"This page doesn't currently exist",
- "ERROR 503 - Service Unavailable",
- "ERROR 503",
- "401 Authorization Required",
- "Authorization Required",
- "used Cloudflare to restrict access",
]
-def find_not_here_message(page_html: str) -> str | None:
+def find_not_here_message(html: str) -> str | None:
"""Find not here message in web page."""
- return next((not_here for not_here in not_here_list if not_here in page_html), None)
+ return next((not_here for not_here in not_here_list if not_here in html), None)
-def get_title(page_html: str) -> str:
+def get_title(html: str) -> str:
"""Title from web page."""
- m = re_title.search(page_html)
- return html.unescape(m.group(1).strip()) if m and m.group(1) else "no title"
+ m = re_title.search(html)
+ return m.group(1).strip() if m and m.group(1) else "no title"
def normalize_url(url: str) -> str:
@@ -141,10 +128,9 @@ def check_conference(
"""Check if conference is live."""
url = src_url.format(year=year)
past_url = src_url.format(year=year - 1)
- no_dot = {"bsideskbh.dk", "pif.camp"}
- session = s if all(hostname not in url for hostname in no_dot) else s_no_dot
try:
- r = session.get(url)
+ # SotM Baltics has an invalid TLS certificate, but we don't care
+ r = s.get(url, verify=False)
except requests.exceptions.ConnectionError:
return (False, "connection refused", None)