agenda/agenda/gwr.py

49 lines
1.5 KiB
Python

"""Check GWR for advance ticket booking date."""
import os
import re
from datetime import date, datetime
from time import time
import httpx
url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets"
def extract_weekday_date(html: str) -> date | None:
"""Furthest date of GWR advance ticket booking."""
# Compile a regular expression pattern to match the relevant table row
pattern = re.compile(
r"<tr>\s*<td>Weekdays</td>\s*<td>(.*?)(?:\*\*)?</td>\s*</tr>", re.DOTALL
)
# Search the HTML for the pattern
if not (match := pattern.search(html)):
return None
date_str = match.group(1)
# If the year is missing, use the current year
if not date_str[-1].isdigit():
date_str += f" {date.today().year}"
return datetime.strptime(date_str, "%A %d %B %Y").date()
async def advance_tickets_page_html(data_dir: str, ttl: int = 60 * 60 * 6) -> str:
"""Get advance-tickets web page HTML with cache."""
filename = os.path.join(data_dir, "advance-tickets.html")
mtime = os.path.getmtime(filename) if os.path.exists(filename) else 0
if (time() - mtime) < ttl: # use cache
return open(filename).read()
async with httpx.AsyncClient() as client:
r = await client.get(url)
html = r.text
open(filename, "w").write(html)
return html
async def advance_ticket_date(data_dir: str) -> date | None:
"""Get GWR advance tickets date with cache."""
html = await advance_tickets_page_html(data_dir)
return extract_weekday_date(html)