70 lines
2.1 KiB
Python
70 lines
2.1 KiB
Python
"""Check GWR for advance ticket booking date."""
|
|
|
|
import os
|
|
import re
|
|
from datetime import date, datetime
|
|
from time import time
|
|
|
|
import httpx
|
|
|
|
url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets"
|
|
|
|
|
|
def parse_date_string(date_str: str) -> date:
|
|
"""Parse date string from HTML."""
|
|
if not date_str[-1].isdigit(): # If the year is missing, use the current year
|
|
date_str += f" {date.today().year}"
|
|
|
|
return datetime.strptime(date_str, "%A %d %B %Y").date()
|
|
|
|
|
|
def extract_dates(html: str) -> None | dict[str, date]:
|
|
"""Extract dates from HTML."""
|
|
pattern = re.compile(
|
|
r"<tr>\s*<td>(Weekdays|Saturdays|Sundays)</td>*"
|
|
+ r"\s*<td>(.*?)(?:\*\*)?</td>\s*</tr>",
|
|
)
|
|
|
|
if not pattern.search(html):
|
|
return None
|
|
|
|
return {
|
|
match.group(1): parse_date_string(match.group(2))
|
|
for match in pattern.finditer(html)
|
|
}
|
|
|
|
|
|
def extract_weekday_date(html: str) -> date | None:
|
|
"""Furthest date of GWR advance ticket booking."""
|
|
# Compile a regular expression pattern to match the relevant table row
|
|
pattern = re.compile(
|
|
r"<tr>\s*<td>Weekdays</td>\s*<td>(.*?)(?:\*\*)?</td>\s*</tr>", re.DOTALL
|
|
)
|
|
|
|
# Search the HTML for the pattern
|
|
if match := pattern.search(html):
|
|
return parse_date_string(match.group(1))
|
|
else:
|
|
return None
|
|
|
|
|
|
async def advance_tickets_page_html(
|
|
data_dir: str, ttl: int = 60 * 60 * 6, force_cache: bool = False
|
|
) -> str:
|
|
"""Get advance-tickets web page HTML with cache."""
|
|
filename = os.path.join(data_dir, "advance-tickets.html")
|
|
mtime = os.path.getmtime(filename) if os.path.exists(filename) else 0
|
|
if force_cache or (time() - mtime) < ttl: # use cache
|
|
return open(filename).read()
|
|
async with httpx.AsyncClient() as client:
|
|
r = await client.get(url)
|
|
html = r.text
|
|
open(filename, "w").write(html)
|
|
return html
|
|
|
|
|
|
async def advance_ticket_date(data_dir: str, force_cache: bool = False) -> date | None:
|
|
"""Get GWR advance tickets date with cache."""
|
|
html = await advance_tickets_page_html(data_dir, force_cache=force_cache)
|
|
return extract_weekday_date(html)
|