Validate accommodation YAML

This commit is contained in:
Edward Betts 2024-09-23 09:18:35 +01:00
parent 20e8515bb7
commit 9f6ed9c372
2 changed files with 63 additions and 8 deletions

View file

@ -10,6 +10,30 @@ import httpx
url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets" url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets"
def parse_date_string(date_str: str) -> date:
"""Parse date string from HTML."""
if not date_str[-1].isdigit(): # If the year is missing, use the current year
date_str += f" {date.today().year}"
return datetime.strptime(date_str, "%A %d %B %Y").date()
def extract_dates(html: str) -> None | dict[str, date]:
"""Extract dates from HTML."""
pattern = re.compile(
r"<tr>\s*<td>(Weekdays|Saturdays|Sundays)</td>*"
+ r"\s*<td>(.*?)(?:\*\*)?</td>\s*</tr>",
)
if not pattern.search(html):
return None
return {
match.group(1): parse_date_string(match.group(2))
for match in pattern.finditer(html)
}
def extract_weekday_date(html: str) -> date | None: def extract_weekday_date(html: str) -> date | None:
"""Furthest date of GWR advance ticket booking.""" """Furthest date of GWR advance ticket booking."""
# Compile a regular expression pattern to match the relevant table row # Compile a regular expression pattern to match the relevant table row
@ -18,15 +42,10 @@ def extract_weekday_date(html: str) -> date | None:
) )
# Search the HTML for the pattern # Search the HTML for the pattern
if not (match := pattern.search(html)): if match := pattern.search(html):
return parse_date_string(match.group(1))
else:
return None return None
date_str = match.group(1)
# If the year is missing, use the current year
if not date_str[-1].isdigit():
date_str += f" {date.today().year}"
return datetime.strptime(date_str, "%A %d %B %Y").date()
async def advance_tickets_page_html(data_dir: str, ttl: int = 60 * 60 * 6) -> str: async def advance_tickets_page_html(data_dir: str, ttl: int = 60 * 60 * 6) -> str:

View file

@ -5,6 +5,9 @@ import os
import typing import typing
from datetime import date, timedelta from datetime import date, timedelta
import yaml
from rich.pretty import pprint
import agenda import agenda
import agenda.conference import agenda.conference
import agenda.data import agenda.data
@ -54,6 +57,38 @@ def check_events() -> None:
print(len(events), "events") print(len(events), "events")
def check_accommodation() -> None:
"""Check accommodation."""
filepath = os.path.join(data_dir, "accommodation.yaml")
accommodation_list = yaml.safe_load(open(filepath))
for stay in accommodation_list:
try:
assert all(
field in stay
for field in (
"type",
"name",
"country",
"location",
"trip",
"from",
"to",
)
)
if "latitude" in stay or "longitude" in stay:
assert "latitude" in stay and "longitude" in stay
assert all(
isinstance(stay[key], (int, float))
for key in ("latitude", "longitude")
)
except AssertionError:
pprint(stay)
raise
print(len(accommodation_list), "stays")
def check_airports() -> None: def check_airports() -> None:
"""Check airports.""" """Check airports."""
airports = typing.cast( airports = typing.cast(
@ -89,6 +124,7 @@ def check() -> None:
check_trains() check_trains()
check_conferences() check_conferences()
check_events() check_events()
check_accommodation()
check_airports() check_airports()
check_stations() check_stations()
check_airlines() check_airlines()