211 lines
6 KiB
Python
Executable file
211 lines
6 KiB
Python
Executable file
#!/usr/bin/python3
|
|
|
|
import json
|
|
import re
|
|
import sys
|
|
import typing
|
|
from datetime import datetime
|
|
from typing import Any
|
|
from zoneinfo import ZoneInfo
|
|
|
|
import lxml.html
|
|
import pycountry
|
|
import yaml
|
|
|
|
StrDict = dict[str, typing.Any]
|
|
|
|
|
|
def build_datetime(date_str: str, time_str: str, tz_name: str) -> datetime:
|
|
"""
|
|
Combine an ISO date string, HH:MM time string, and a timezone name
|
|
into a timezone-aware datetime in the specified timezone.
|
|
"""
|
|
dt_str = f"{date_str}T{time_str}"
|
|
naive_dt = datetime.fromisoformat(dt_str)
|
|
return naive_dt.replace(tzinfo=ZoneInfo(tz_name))
|
|
|
|
|
|
def list_to_dict(items: list) -> dict[str, int]:
|
|
"""Convert a flat list to a dict, assuming alternating keys and values."""
|
|
return {items[i]: items[i + 1] for i in range(0, len(items), 2)}
|
|
|
|
|
|
def extract_country_code(address: str) -> str | None:
|
|
"""Return ISO 3166-1 alpha-2 country code from a free-text address."""
|
|
address_lower = address.lower()
|
|
for country in pycountry.countries:
|
|
if country.name.lower() in address_lower:
|
|
return str(country.alpha_2.lower())
|
|
if (
|
|
hasattr(country, "official_name")
|
|
and country.official_name.lower() in address_lower
|
|
):
|
|
return str(country.alpha_2.lower())
|
|
return None
|
|
|
|
|
|
def get_json_blob(tree) -> str:
|
|
data_id = "data-injector-instances"
|
|
js_string = tree.xpath(f'//*[@id="{data_id}"]/text()')[0]
|
|
return js_string
|
|
|
|
|
|
def get_ui_state(tree) -> StrDict:
|
|
data_id = "data-injector-instances"
|
|
js_string = tree.xpath(f'//*[@id="{data_id}"]/text()')[0]
|
|
# print(js_string)
|
|
# sys.exit(0)
|
|
big_blob = json.loads(js_string)
|
|
ui_state = walk_tree(big_blob, "uiState")
|
|
# print(json.dumps(ui_state))
|
|
|
|
return list_to_dict(ui_state[0])
|
|
|
|
|
|
def get_reservation_data(ui_state: StrDict) -> StrDict:
|
|
return {
|
|
row["id"]: row for row in ui_state["reservation"]["scheduled_event"]["rows"]
|
|
}
|
|
|
|
|
|
def get_room_url(tree) -> str:
|
|
for e in tree.xpath('//a[@data-testid="reservation-destination-link"]'):
|
|
href = e.get("href")
|
|
assert isinstance(href, str)
|
|
if not href.startswith("/room"):
|
|
continue
|
|
return "https://www.airbnb.co.uk" + href
|
|
|
|
|
|
def get_price_from_reservation(reservation: StrDict) -> str:
|
|
price_string = reservation["payment_summary"]["subtitle"]
|
|
assert isinstance(price_string, str)
|
|
tc = "Total cost: "
|
|
assert price_string.startswith(tc)
|
|
price = price_string[len(tc) :]
|
|
assert price[0] == "£"
|
|
return price[1:]
|
|
|
|
|
|
def extract_booking_from_html(html_file: str) -> StrDict:
|
|
"""Extract booking information from Airbnb HTML file."""
|
|
|
|
with open(html_file, "r", encoding="utf-8") as f:
|
|
text_content = f.read()
|
|
|
|
confirmation_code = re.search(
|
|
r"/trips/v1/reservation-details/ro/RESERVATION2_CHECKIN/([A-Z0-9]+)",
|
|
text_content,
|
|
).group(1)
|
|
|
|
tree = lxml.html.parse(html_file)
|
|
root = tree.getroot()
|
|
try:
|
|
ui_state = get_ui_state(tree)
|
|
except Exception:
|
|
print(html_file)
|
|
raise
|
|
# print(json.dumps(ui_state))
|
|
|
|
reservation = get_reservation_data(ui_state)
|
|
m_guests = re.match(r"^(\d+) guests?$", reservation["guests"]["subtitle"])
|
|
number_of_adults = int(m_guests.group(1))
|
|
|
|
price = get_price_from_reservation(reservation)
|
|
# print(reservation["payment_summary"]["subtitle"])
|
|
# print(json.dumps(reservation))
|
|
metadata = ui_state["reservation"]["metadata"]
|
|
country_code = metadata["country"].lower()
|
|
# pprint(metadata)
|
|
|
|
# print(json.dumps(x))
|
|
|
|
title = reservation["dynamic_marquee_title_image_v3"]["title"]
|
|
location = title.rpartition(" in ")[2]
|
|
|
|
# print(json.dumps(reservation))
|
|
checkin_checkout = reservation["checkin_checkout_arrival_guide"]
|
|
# pprint(checkin_checkout)
|
|
check_in_time = checkin_checkout["leading_subtitle"]
|
|
check_out_time = checkin_checkout["trailing_subtitle"]
|
|
|
|
check_in = build_datetime(
|
|
metadata["check_in_date"], check_in_time, metadata["timezone"]
|
|
)
|
|
|
|
check_out = build_datetime(
|
|
metadata["check_out_date"], check_out_time, metadata["timezone"]
|
|
)
|
|
|
|
# print(check_in, check_out)
|
|
|
|
address = reservation["map"]["address"]
|
|
|
|
# country_code = extract_country_code(address)
|
|
|
|
# if "header_action.pdp" not in reservation:
|
|
# pprint(reservation)
|
|
|
|
if "header_action.pdp" in reservation:
|
|
name = reservation["header_action.pdp"]["subtitle"]
|
|
else:
|
|
name = root.findtext(".//h1")
|
|
|
|
booking = {
|
|
"type": "apartment",
|
|
"operator": "airbnb",
|
|
"name": name,
|
|
"location": location,
|
|
"booking_reference": confirmation_code,
|
|
"booking_url": f"https://www.airbnb.co.uk/trips/v1/reservation-details/ro/RESERVATION2_CHECKIN/{confirmation_code}",
|
|
"address": address,
|
|
"country": country_code,
|
|
"latitude": metadata["lat"],
|
|
"longitude": metadata["lng"],
|
|
"timezone": metadata["timezone"],
|
|
"from": check_in,
|
|
"to": check_out,
|
|
"price": price,
|
|
"currency": "GBP",
|
|
"number_of_adults": number_of_adults,
|
|
}
|
|
|
|
booking["url"] = get_room_url(tree)
|
|
|
|
return booking
|
|
|
|
|
|
def walk_tree(data: Any, want_key: str) -> Any:
|
|
"""Recursively search for a dict containing 'reservation' and return its value."""
|
|
if isinstance(data, dict):
|
|
if want_key in data:
|
|
return data[want_key]
|
|
for key, value in data.items():
|
|
result = walk_tree(value, want_key)
|
|
if result is not None:
|
|
return result
|
|
elif isinstance(data, list):
|
|
for item in data:
|
|
result = walk_tree(item, want_key)
|
|
if result is not None:
|
|
return result
|
|
return None
|
|
|
|
|
|
def main() -> None:
|
|
"""Main function."""
|
|
|
|
filenames = sorted(sys.argv[1:])
|
|
|
|
bookings = []
|
|
for html_file in filenames:
|
|
booking = extract_booking_from_html(html_file)
|
|
assert booking
|
|
bookings.append(booking)
|
|
|
|
print(yaml.dump(bookings, sort_keys=False))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|