diff --git a/agenda/gwr.py b/agenda/gwr.py index 5806939..750af90 100644 --- a/agenda/gwr.py +++ b/agenda/gwr.py @@ -10,6 +10,30 @@ import httpx url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets" +def parse_date_string(date_str: str) -> date: + """Parse date string from HTML.""" + if not date_str[-1].isdigit(): # If the year is missing, use the current year + date_str += f" {date.today().year}" + + return datetime.strptime(date_str, "%A %d %B %Y").date() + + +def extract_dates(html: str) -> None | dict[str, date]: + """Extract dates from HTML.""" + pattern = re.compile( + r"\s*(Weekdays|Saturdays|Sundays)*" + + r"\s*(.*?)(?:\*\*)?\s*", + ) + + if not pattern.search(html): + return None + + return { + match.group(1): parse_date_string(match.group(2)) + for match in pattern.finditer(html) + } + + def extract_weekday_date(html: str) -> date | None: """Furthest date of GWR advance ticket booking.""" # Compile a regular expression pattern to match the relevant table row @@ -18,15 +42,10 @@ def extract_weekday_date(html: str) -> date | None: ) # Search the HTML for the pattern - if not (match := pattern.search(html)): + if match := pattern.search(html): + return parse_date_string(match.group(1)) + else: return None - date_str = match.group(1) - - # If the year is missing, use the current year - if not date_str[-1].isdigit(): - date_str += f" {date.today().year}" - - return datetime.strptime(date_str, "%A %d %B %Y").date() async def advance_tickets_page_html(data_dir: str, ttl: int = 60 * 60 * 6) -> str: diff --git a/validate_yaml.py b/validate_yaml.py index ec34a52..4533034 100755 --- a/validate_yaml.py +++ b/validate_yaml.py @@ -5,6 +5,9 @@ import os import typing from datetime import date, timedelta +import yaml +from rich.pretty import pprint + import agenda import agenda.conference import agenda.data @@ -54,6 +57,38 @@ def check_events() -> None: print(len(events), "events") +def check_accommodation() -> None: + """Check accommodation.""" + filepath = os.path.join(data_dir, "accommodation.yaml") + accommodation_list = yaml.safe_load(open(filepath)) + + for stay in accommodation_list: + try: + assert all( + field in stay + for field in ( + "type", + "name", + "country", + "location", + "trip", + "from", + "to", + ) + ) + if "latitude" in stay or "longitude" in stay: + assert "latitude" in stay and "longitude" in stay + assert all( + isinstance(stay[key], (int, float)) + for key in ("latitude", "longitude") + ) + except AssertionError: + pprint(stay) + raise + + print(len(accommodation_list), "stays") + + def check_airports() -> None: """Check airports.""" airports = typing.cast( @@ -89,6 +124,7 @@ def check() -> None: check_trains() check_conferences() check_events() + check_accommodation() check_airports() check_stations() check_airlines()