Validate accommodation YAML

This commit is contained in:
Edward Betts 2024-09-23 09:18:35 +01:00
parent 20e8515bb7
commit 9f6ed9c372
2 changed files with 63 additions and 8 deletions

View file

@ -10,6 +10,30 @@ import httpx
url = "https://www.gwr.com/your-tickets/choosing-your-ticket/advance-tickets"
def parse_date_string(date_str: str) -> date:
"""Parse date string from HTML."""
if not date_str[-1].isdigit(): # If the year is missing, use the current year
date_str += f" {date.today().year}"
return datetime.strptime(date_str, "%A %d %B %Y").date()
def extract_dates(html: str) -> None | dict[str, date]:
"""Extract dates from HTML."""
pattern = re.compile(
r"<tr>\s*<td>(Weekdays|Saturdays|Sundays)</td>*"
+ r"\s*<td>(.*?)(?:\*\*)?</td>\s*</tr>",
)
if not pattern.search(html):
return None
return {
match.group(1): parse_date_string(match.group(2))
for match in pattern.finditer(html)
}
def extract_weekday_date(html: str) -> date | None:
"""Furthest date of GWR advance ticket booking."""
# Compile a regular expression pattern to match the relevant table row
@ -18,15 +42,10 @@ def extract_weekday_date(html: str) -> date | None:
)
# Search the HTML for the pattern
if not (match := pattern.search(html)):
if match := pattern.search(html):
return parse_date_string(match.group(1))
else:
return None
date_str = match.group(1)
# If the year is missing, use the current year
if not date_str[-1].isdigit():
date_str += f" {date.today().year}"
return datetime.strptime(date_str, "%A %d %B %Y").date()
async def advance_tickets_page_html(data_dir: str, ttl: int = 60 * 60 * 6) -> str:

View file

@ -5,6 +5,9 @@ import os
import typing
from datetime import date, timedelta
import yaml
from rich.pretty import pprint
import agenda
import agenda.conference
import agenda.data
@ -54,6 +57,38 @@ def check_events() -> None:
print(len(events), "events")
def check_accommodation() -> None:
"""Check accommodation."""
filepath = os.path.join(data_dir, "accommodation.yaml")
accommodation_list = yaml.safe_load(open(filepath))
for stay in accommodation_list:
try:
assert all(
field in stay
for field in (
"type",
"name",
"country",
"location",
"trip",
"from",
"to",
)
)
if "latitude" in stay or "longitude" in stay:
assert "latitude" in stay and "longitude" in stay
assert all(
isinstance(stay[key], (int, float))
for key in ("latitude", "longitude")
)
except AssertionError:
pprint(stay)
raise
print(len(accommodation_list), "stays")
def check_airports() -> None:
"""Check airports."""
airports = typing.cast(
@ -89,6 +124,7 @@ def check() -> None:
check_trains()
check_conferences()
check_events()
check_accommodation()
check_airports()
check_stations()
check_airlines()