eurotunnel-scrape/web_view.py

105 lines
2.8 KiB
Python
Executable file

#!/usr/bin/python3
import os
from dataclasses import dataclass
from datetime import UTC, datetime, date
from decimal import Decimal
import flask
import lxml.html
import pytz
app = flask.Flask(__name__)
app.debug = True
data_loc = os.path.expanduser("~edward/lib/data/eurotunnel")
def get_filename(direction: str) -> tuple[datetime, str]:
"""Most recent list of outbound prices."""
end = f"_{direction}.html"
most_recent = max([f for f in os.listdir(data_loc) if f.endswith(end)])
filename = os.path.join(data_loc, most_recent)
timestamp = most_recent.removesuffix(end)
dt_utc = datetime.strptime(timestamp, "%Y-%m-%d_%H%M%S").replace(tzinfo=UTC)
dt = dt_utc.astimezone(pytz.timezone("Europe/London"))
return (dt, filename)
@dataclass
class Train:
"""Eurostar train."""
dep: str
arr: str
price: Decimal | None = None
def get_tickets(filename: str) -> tuple[date, list[Train]]:
"""Get trains and prices."""
tree = lxml.html.parse(filename)
root = tree.getroot()
trains = []
by_time = {}
day_div = root.find(".//div[@class='col-md-1 als-item selected']")
assert day_div
day_id = day_div.get("data-id")
assert day_id
d = date.fromisoformat(day_id[1:])
for mission in root.findall(".//div[@data-mission]"):
dep_text = mission.findtext(".//b")
assert dep_text
dep = dep_text.replace(":", "")
arr_text = mission.findtext(".//small")
assert arr_text
arr = arr_text.removeprefix("Arrive ").replace(":", "")
item = Train(dep=dep, arr=arr)
trains.append(item)
by_time[dep] = item
for ticket in root.xpath("//div[contains(@class, 'ticket')]"):
classes = ticket.get("class").split(" ")
if "ticket" not in classes:
continue
ticket_class = classes[-1]
if ticket_class != "standard":
continue
mission_time = ticket.getparent().getparent().get("data-mission-time")
onclick = ticket.get("onclick").split(",")
price = Decimal(onclick[2][1:-1])
by_time[mission_time].price = price
return (d, trains)
@app.route("/")
def index() -> str:
"""Index."""
out_ts, out_filename = get_filename("outbound")
out_date, out = get_tickets(out_filename)
out = [t for t in out if t.dep > "0800" and "0700" < t.arr < "2200"]
back_ts, back_filename = get_filename("return")
back_date, back = get_tickets(back_filename)
back = [t for t in back if t.dep > "1100" and "0700" < t.arr < "2200"]
return flask.render_template(
"index.html",
out_ts=out_ts,
out_date=out_date,
out=out,
back_ts=back_ts,
back_date=back_date,
back=back,
)
if __name__ == "__main__":
app.run(host="0.0.0.0")