Initial commit.

This commit is contained in:
Edward Betts 2026-03-30 19:34:46 +01:00
commit a8e0bd39e5
16 changed files with 981 additions and 0 deletions

22
.gitignore vendored Normal file
View file

@ -0,0 +1,22 @@
# Python
__pycache__/
*.pyc
*.pyo
.mypy_cache/
.ruff_cache/
*.egg-info/
dist/
build/
.venv/
venv/
# App
cache/
# Pytest
.pytest_cache/
# Editors
.idea/
.vscode/
*.swp

53
README.md Normal file
View file

@ -0,0 +1,53 @@
# Bristol Eurostar
Plan a trip from Bristol Temple Meads to Europe on Eurostar.
Combines GWR trains (Bristol → Paddington) with Eurostar services (St Pancras → destination) and shows all valid same-day connections, filtering by journey time and minimum/maximum transfer window at Paddington/St Pancras.
## Destinations
- Paris Gare du Nord
- Brussels Midi
- Lille Europe
- Amsterdam Centraal
## How it works
Train times are fetched from two sources simultaneously:
- **GWR** — scraped from [Realtime Trains](https://www.realtimetrains.co.uk/) using httpx
- **Eurostar** — scraped from the Eurostar timetable pages via the embedded `__NEXT_DATA__` JSON (no browser required)
Results are cached to disk by date and destination.
## Connection constraints
| | |
|---|---|
| Minimum Paddington → St Pancras | 75 min |
| Maximum Paddington → St Pancras | 2h 20m |
| Maximum Bristol → Paddington | 1h 50m |
## Setup
```bash
pip install -e ".[dev]"
```
## Running
```bash
flask --app app run
```
Then open http://localhost:5000.
## Tests
```bash
pytest
```
## License
MIT

96
app.py Normal file
View file

@ -0,0 +1,96 @@
import asyncio
from flask import Flask, render_template, redirect, url_for, request
from datetime import date, timedelta
from cache import get_cached, set_cached
import scraper.eurostar as eurostar_scraper
import scraper.realtime_trains as rtt_scraper
from trip_planner import combine_trips
app = Flask(__name__)
DESTINATIONS = {
'paris': 'Paris Gare du Nord',
'brussels': 'Brussels Midi',
'lille': 'Lille Europe',
'amsterdam': 'Amsterdam Centraal',
}
async def _fetch_both(destination: str, travel_date: str, user_agent: str):
"""Fetch GWR trains and Eurostar times simultaneously."""
gwr, es = await asyncio.gather(
rtt_scraper.fetch(travel_date, user_agent),
eurostar_scraper.fetch(destination, travel_date, user_agent),
return_exceptions=True,
)
return gwr, es
@app.route('/')
def index():
today = date.today().isoformat()
return render_template('index.html', destinations=DESTINATIONS, today=today)
@app.route('/results/<slug>/<travel_date>')
def results(slug, travel_date):
destination = DESTINATIONS.get(slug)
if not destination or not travel_date:
return redirect(url_for('index'))
user_agent = request.headers.get('User-Agent', rtt_scraper.DEFAULT_UA)
cache_key = f"{travel_date}_{destination}"
cached = get_cached(cache_key)
error = None
if cached:
gwr_trains = cached['gwr']
eurostar_trains = cached['eurostar']
from_cache = True
else:
from_cache = False
gwr_result, es_result = asyncio.run(_fetch_both(destination, travel_date, user_agent))
if isinstance(gwr_result, Exception):
gwr_trains = []
error = f"Could not fetch GWR trains: {gwr_result}"
else:
gwr_trains = gwr_result
if isinstance(es_result, Exception):
eurostar_trains = []
msg = f"Could not fetch Eurostar times: {es_result}"
error = f"{error}; {msg}" if error else msg
else:
eurostar_trains = es_result
if gwr_trains or eurostar_trains:
set_cached(cache_key, {'gwr': gwr_trains, 'eurostar': eurostar_trains})
trips = combine_trips(gwr_trains, eurostar_trains, travel_date)
dt = date.fromisoformat(travel_date)
prev_date = (dt - timedelta(days=1)).isoformat()
next_date = (dt + timedelta(days=1)).isoformat()
travel_date_display = dt.strftime('%A %-d %B %Y')
return render_template(
'results.html',
trips=trips,
destination=destination,
travel_date=travel_date,
slug=slug,
prev_date=prev_date,
next_date=next_date,
travel_date_display=travel_date_display,
gwr_count=len(gwr_trains),
eurostar_count=len(eurostar_trains),
from_cache=from_cache,
error=error,
)
if __name__ == '__main__':
app.run(debug=True)

23
cache.py Normal file
View file

@ -0,0 +1,23 @@
import json
import os
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')
def _cache_path(key: str) -> str:
safe_key = key.replace('/', '_').replace(' ', '_')
return os.path.join(CACHE_DIR, f"{safe_key}.json")
def get_cached(key: str):
path = _cache_path(key)
if not os.path.exists(path):
return None
with open(path) as f:
return json.load(f)
def set_cached(key: str, data) -> None:
os.makedirs(CACHE_DIR, exist_ok=True)
with open(_cache_path(key), 'w') as f:
json.dump(data, f, indent=2)

28
pyproject.toml Normal file
View file

@ -0,0 +1,28 @@
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.backends.legacy:build"
[project]
name = "bristol-eurostar"
version = "0.1.0"
description = "Plan Bristol → Europe trips combining GWR and Eurostar services"
readme = "README.md"
license = "MIT"
authors = [
{ name = "Edward Betts" }
]
requires-python = ">=3.11"
dependencies = [
"flask",
"httpx",
"lxml",
"cssselect",
]
[project.optional-dependencies]
dev = [
"pytest",
]
[tool.pytest.ini_options]
testpaths = ["tests"]

5
requirements.txt Normal file
View file

@ -0,0 +1,5 @@
flask
httpx
lxml
cssselect
pytest

0
scraper/__init__.py Normal file
View file

90
scraper/eurostar.py Normal file
View file

@ -0,0 +1,90 @@
"""
Scrape Eurostar timetable via httpx.
The route-specific timetable pages are Next.js SSR all departure data is
embedded in <script id="__NEXT_DATA__"> as JSON, so no browser / JS needed.
URL pattern:
https://www.eurostar.com/uk-en/travel-info/timetable/
{origin_id}/{dest_id}/{origin_slug}/{dest_slug}?date=YYYY-MM-DD
Data path: props.pageProps.pageData.liveDepartures[]
.origin.model.scheduledDepartureDateTime London departure
.destination.model.scheduledArrivalDateTime destination arrival
(already filtered to the requested stop, not the final stop)
"""
import asyncio
import json
import re
import httpx
DEFAULT_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
ROUTE_URLS = {
'Paris Gare du Nord': (
'https://www.eurostar.com/uk-en/travel-info/timetable/'
'7015400/8727100/london-st-pancras-intl/paris-gare-du-nord'
),
'Brussels Midi': (
'https://www.eurostar.com/uk-en/travel-info/timetable/'
'7015400/8814001/london-st-pancras-intl/brussels-midi'
),
'Lille Europe': (
'https://www.eurostar.com/uk-en/travel-info/timetable/'
'7015400/8722326/london-st-pancras-intl/lille-europe'
),
'Amsterdam Centraal': (
'https://www.eurostar.com/uk-en/travel-info/timetable/'
'7015400/8400058/london-st-pancras-intl/amsterdam-centraal'
),
}
def _hhmm(dt_str: str | None) -> str | None:
"""'2026-03-30 09:34:00''09:34'"""
if not dt_str:
return None
m = re.search(r'(\d{2}):(\d{2}):\d{2}$', dt_str)
return f"{m.group(1)}:{m.group(2)}" if m else None
def _parse(html: str, destination: str) -> list[dict]:
m = re.search(r'<script id="__NEXT_DATA__"[^>]*>(.*?)</script>', html, re.DOTALL)
if not m:
return []
data = json.loads(m.group(1))
departures = data['props']['pageProps']['pageData']['liveDepartures']
services = []
for dep in departures:
dep_time = _hhmm(dep['origin']['model']['scheduledDepartureDateTime'])
arr_time = _hhmm(dep['destination']['model']['scheduledArrivalDateTime'])
if dep_time and arr_time:
services.append({
'depart_st_pancras': dep_time,
'arrive_destination': arr_time,
'destination': destination,
})
return sorted(services, key=lambda s: s['depart_st_pancras'])
async def fetch(destination: str, travel_date: str,
user_agent: str = DEFAULT_UA) -> list[dict]:
url = ROUTE_URLS[destination]
headers = {
'User-Agent': user_agent,
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.9',
}
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=20) as client:
r = await client.get(url, params={'date': travel_date})
r.raise_for_status()
return _parse(r.text, destination)
def get_eurostar_times(destination: str, travel_date: str,
user_agent: str = DEFAULT_UA) -> list[dict]:
"""Synchronous wrapper for CLI/testing."""
return asyncio.run(fetch(destination, travel_date, user_agent))

View file

@ -0,0 +1,96 @@
"""
Scrape GWR trains from Bristol Temple Meads to London Paddington using Realtime Trains.
Uses httpx (not Playwright) with browser-like headers.
Two fetches run concurrently:
BRI/to/PAD departure times from Bristol (div.time.plan.d)
PAD/from/BRI arrival times at Paddington (div.time.plan.a)
Matched by train ID (div.tid).
"""
import asyncio
import re
import httpx
import lxml.html
BRI_TO_PAD = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:BRI/to/gb-nr:PAD/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
PAD_FROM_BRI = (
"https://www.realtimetrains.co.uk/search/detailed/"
"gb-nr:PAD/from/gb-nr:BRI/{date}/0000-2359"
"?stp=WVS&show=pax-calls&order=wtt"
)
DEFAULT_UA = (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
)
def _browser_headers(user_agent: str) -> dict:
return {
"User-Agent": user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
"Accept-Language": "en-GB,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
}
def _fmt(hhmm: str) -> str:
"""Convert '0830''08:30'."""
hhmm = re.sub(r'[^0-9]', '', hhmm)
if len(hhmm) == 4:
return f"{hhmm[:2]}:{hhmm[2:]}"
return hhmm
def _parse_services(html: str, time_selector: str) -> dict[str, str]:
"""Return {train_id: time_string} from a servicelist page."""
root = lxml.html.fromstring(html)
sl = root.cssselect('div.servicelist')
if not sl:
return {}
result = {}
for svc in sl[0].cssselect('a.service'):
tid_els = svc.cssselect('div.tid')
time_els = svc.cssselect(time_selector)
if tid_els and time_els:
tid = tid_els[0].text_content().strip()
time_text = time_els[0].text_content().strip()
if time_text:
result[tid] = _fmt(time_text)
return result
async def fetch(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
"""Fetch GWR trains concurrently; returns [{'depart_bristol', 'arrive_paddington'}]."""
headers = _browser_headers(user_agent)
async with httpx.AsyncClient(headers=headers, follow_redirects=True, timeout=30) as client:
r_bri, r_pad = await asyncio.gather(
client.get(BRI_TO_PAD.format(date=date)),
client.get(PAD_FROM_BRI.format(date=date)),
)
departures = _parse_services(r_bri.text, 'div.time.plan.d')
arrivals = _parse_services(r_pad.text, 'div.time.plan.a')
trains = [
{'depart_bristol': dep, 'arrive_paddington': arr}
for tid, dep in departures.items()
if (arr := arrivals.get(tid))
]
return sorted(trains, key=lambda t: t['depart_bristol'])
def get_gwr_trains(date: str, user_agent: str = DEFAULT_UA) -> list[dict]:
"""Synchronous wrapper around fetch() for CLI/testing use."""
return asyncio.run(fetch(date, user_agent))

61
templates/base.html Normal file
View file

@ -0,0 +1,61 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Bristol to Europe via Eurostar</title>
<style>
*, *::before, *::after { box-sizing: border-box; }
body {
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
background: #f0f4f8;
color: #1a202c;
margin: 0;
padding: 0;
}
header {
background: #00539f;
color: #fff;
padding: 1rem 2rem;
}
header h1 {
margin: 0;
font-size: 1.4rem;
font-weight: 600;
}
header p {
margin: 0.2rem 0 0;
font-size: 0.85rem;
opacity: 0.85;
}
main {
max-width: 960px;
margin: 2rem auto;
padding: 0 1rem;
}
.card {
background: #fff;
border-radius: 8px;
box-shadow: 0 1px 4px rgba(0,0,0,0.12);
padding: 2rem;
}
a { color: #00539f; }
</style>
</head>
<body>
<header>
<h1>Bristol to Europe via Eurostar</h1>
<p>GWR to Paddington &rarr; St Pancras &rarr; Eurostar</p>
</header>
<main>
{% block content %}{% endblock %}
</main>
</body>
</html>

43
templates/index.html Normal file
View file

@ -0,0 +1,43 @@
{% extends "base.html" %}
{% block content %}
<div class="card">
<h2 style="margin-top:0">Plan your journey</h2>
<form id="journey-form">
<div style="margin-bottom:1.2rem">
<label for="destination" style="display:block;font-weight:600;margin-bottom:0.4rem">
Eurostar destination
</label>
<select id="destination" name="destination" required
style="width:100%;padding:0.6rem 0.8rem;font-size:1rem;border:1px solid #cbd5e0;border-radius:4px">
<option value="" disabled selected>Select destination&hellip;</option>
{% for slug, name in destinations.items() %}
<option value="{{ slug }}">{{ name }}</option>
{% endfor %}
</select>
</div>
<div style="margin-bottom:1.5rem">
<label for="travel_date" style="display:block;font-weight:600;margin-bottom:0.4rem">
Travel date
</label>
<input type="date" id="travel_date" name="travel_date" required
min="{{ today }}" value="{{ today }}"
style="width:100%;padding:0.6rem 0.8rem;font-size:1rem;border:1px solid #cbd5e0;border-radius:4px">
</div>
<button type="submit"
style="background:#00539f;color:#fff;border:none;padding:0.75rem 2rem;
font-size:1rem;font-weight:600;border-radius:4px;cursor:pointer">
Search journeys
</button>
</form>
</div>
<script>
document.getElementById('journey-form').addEventListener('submit', function(e) {
e.preventDefault();
const slug = this.querySelector('[name="destination"]').value;
const date = this.querySelector('[name="travel_date"]').value;
if (slug && date) window.location.href = '/results/' + slug + '/' + date;
});
</script>
{% endblock %}

92
templates/results.html Normal file
View file

@ -0,0 +1,92 @@
{% extends "base.html" %}
{% block content %}
<p style="margin-bottom:1rem">
<a href="/">&larr; New search</a>
</p>
<div class="card" style="margin-bottom:1.5rem">
<h2 style="margin-top:0">
Bristol Temple Meads &rarr; {{ destination }}
</h2>
<div style="display:flex;align-items:center;gap:0.75rem;margin-bottom:0.5rem">
<a href="/results/{{ slug }}/{{ prev_date }}"
style="padding:0.3rem 0.75rem;border:1px solid #cbd5e0;border-radius:4px;
text-decoration:none;color:#00539f;font-size:0.9rem">&larr; Prev</a>
<strong>{{ travel_date_display }}</strong>
<a href="/results/{{ slug }}/{{ next_date }}"
style="padding:0.3rem 0.75rem;border:1px solid #cbd5e0;border-radius:4px;
text-decoration:none;color:#00539f;font-size:0.9rem">Next &rarr;</a>
</div>
<p style="color:#4a5568;margin:0">
{{ gwr_count }} GWR service{{ 's' if gwr_count != 1 }}
&nbsp;&middot;&nbsp;
{{ eurostar_count }} Eurostar service{{ 's' if eurostar_count != 1 }}
{% if from_cache %}
&nbsp;&middot;&nbsp; <span style="color:#718096;font-size:0.85rem">(cached)</span>
{% endif %}
</p>
{% if error %}
<div style="margin-top:1rem;padding:0.75rem 1rem;background:#fff5f5;border:1px solid #fc8181;border-radius:4px;color:#c53030">
<strong>Warning:</strong> {{ error }}
</div>
{% endif %}
</div>
{% if trips %}
<div class="card" style="overflow-x:auto">
<table style="width:100%;border-collapse:collapse;font-size:0.95rem">
<thead>
<tr style="border-bottom:2px solid #e2e8f0;text-align:left">
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart Bristol</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive Paddington</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Transfer</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Depart St&nbsp;Pancras</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Arrive {{ destination }}</th>
<th style="padding:0.6rem 0.8rem;white-space:nowrap">Total</th>
</tr>
</thead>
<tbody>
{% for trip in trips %}
<tr style="border-bottom:1px solid #e2e8f0{% if loop.index is odd %};background:#f7fafc{% endif %}">
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_bristol }}</td>
<td style="padding:0.6rem 0.8rem">
{{ trip.arrive_paddington }}
<span style="font-size:0.8rem;color:#718096">({{ trip.gwr_duration }})</span>
</td>
<td style="padding:0.6rem 0.8rem;color:#4a5568">
{{ trip.connection_duration }}
</td>
<td style="padding:0.6rem 0.8rem;font-weight:600">{{ trip.depart_st_pancras }}</td>
<td style="padding:0.6rem 0.8rem">{{ trip.arrive_destination }}</td>
<td style="padding:0.6rem 0.8rem;font-weight:600;color:#00539f">{{ trip.total_duration }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<p style="margin-top:1rem;font-size:0.82rem;color:#718096">
Paddington &rarr; St&nbsp;Pancras connection: 75&nbsp;min minimum, 2h&nbsp;20m maximum.
Eurostar times are from the general timetable and may vary; always check
<a href="https://www.eurostar.com" target="_blank" rel="noopener">eurostar.com</a> to book.
</p>
{% else %}
<div class="card" style="color:#4a5568;text-align:center;padding:3rem 2rem">
<p style="font-size:1.1rem;margin:0 0 0.5rem">No valid journeys found.</p>
<p style="font-size:0.9rem;margin:0">
{% if gwr_count == 0 and eurostar_count == 0 %}
Could not retrieve train data. Check your network connection or try again.
{% elif gwr_count == 0 %}
No GWR trains found for this date.
{% elif eurostar_count == 0 %}
No Eurostar services found for {{ destination }} on this date.
{% else %}
No GWR&nbsp;+&nbsp;Eurostar combination allows an 80-minute connection at Paddington/St&nbsp;Pancras.
{% endif %}
</p>
</div>
{% endif %}
{% endblock %}

View file

@ -0,0 +1,82 @@
import json
import pytest
from scraper.eurostar import _hhmm, _parse
# ---------------------------------------------------------------------------
# _hhmm
# ---------------------------------------------------------------------------
def test_hhmm_parses_datetime_string():
assert _hhmm('2026-03-30 09:34:00') == '09:34'
def test_hhmm_none_input():
assert _hhmm(None) is None
def test_hhmm_empty_string():
assert _hhmm('') is None
# ---------------------------------------------------------------------------
# _parse
# ---------------------------------------------------------------------------
def _make_next_data(departures: list) -> str:
data = {
'props': {
'pageProps': {
'pageData': {
'liveDepartures': departures
}
}
}
}
return f'<script id="__NEXT_DATA__" type="application/json">{json.dumps(data)}</script>'
def _departure(dep_dt: str, arr_dt: str) -> dict:
return {
'origin': {'model': {'scheduledDepartureDateTime': dep_dt}},
'destination': {'model': {'scheduledArrivalDateTime': arr_dt}},
}
def test_parse_single_departure():
html = _make_next_data([_departure('2026-03-30 06:01:00', '2026-03-30 09:34:00')])
services = _parse(html, 'Paris Gare du Nord')
assert len(services) == 1
assert services[0] == {
'depart_st_pancras': '06:01',
'arrive_destination': '09:34',
'destination': 'Paris Gare du Nord',
}
def test_parse_results_sorted_by_departure():
html = _make_next_data([
_departure('2026-03-30 10:00:00', '2026-03-30 13:00:00'),
_departure('2026-03-30 07:00:00', '2026-03-30 10:00:00'),
])
services = _parse(html, 'Paris Gare du Nord')
assert services[0]['depart_st_pancras'] == '07:00'
assert services[1]['depart_st_pancras'] == '10:00'
def test_parse_skips_entries_with_missing_times():
html = _make_next_data([
_departure(None, '2026-03-30 09:34:00'),
_departure('2026-03-30 08:00:00', None),
_departure('2026-03-30 09:00:00', '2026-03-30 12:00:00'),
])
services = _parse(html, 'Paris Gare du Nord')
assert len(services) == 1
assert services[0]['depart_st_pancras'] == '09:00'
def test_parse_no_next_data_returns_empty():
assert _parse('<html><body>nothing here</body></html>', 'Paris Gare du Nord') == []
def test_parse_empty_departures():
html = _make_next_data([])
assert _parse(html, 'Paris Gare du Nord') == []

71
tests/test_rtt_scraper.py Normal file
View file

@ -0,0 +1,71 @@
import pytest
from scraper.realtime_trains import _fmt, _parse_services
# ---------------------------------------------------------------------------
# _fmt
# ---------------------------------------------------------------------------
def test_fmt_four_digits():
assert _fmt('0830') == '08:30'
def test_fmt_already_colon():
assert _fmt('08:30') == '08:30'
def test_fmt_strips_non_digits():
assert _fmt('08h30') == '08:30'
# ---------------------------------------------------------------------------
# _parse_services
# ---------------------------------------------------------------------------
def _make_html(services: list[tuple[str, str]], time_class: str) -> str:
"""Build a minimal servicelist HTML with (train_id, time) pairs."""
items = ''
for tid, time in services:
items += f'''
<a class="service">
<div class="tid">{tid}</div>
<div class="time plan {time_class}">{time}</div>
</a>'''
return f'<div class="servicelist">{items}</div>'
def test_parse_services_departures():
html = _make_html([('1A23', '0700'), ('2B45', '0830')], 'd')
result = _parse_services(html, 'div.time.plan.d')
assert result == {'1A23': '07:00', '2B45': '08:30'}
def test_parse_services_arrivals():
html = _make_html([('1A23', '0845')], 'a')
result = _parse_services(html, 'div.time.plan.a')
assert result == {'1A23': '08:45'}
def test_parse_services_no_servicelist():
assert _parse_services('<html></html>', 'div.time.plan.d') == {}
def test_parse_services_skips_missing_time():
html = '''
<div class="servicelist">
<a class="service"><div class="tid">1A23</div></a>
<a class="service"><div class="tid">2B45</div><div class="time plan d">0900</div></a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
assert '1A23' not in result
assert result == {'2B45': '09:00'}
def test_parse_services_skips_empty_time():
html = '''
<div class="servicelist">
<a class="service">
<div class="tid">1A23</div>
<div class="time plan d"> </div>
</a>
</div>'''
result = _parse_services(html, 'div.time.plan.d')
assert result == {}

131
tests/test_trip_planner.py Normal file
View file

@ -0,0 +1,131 @@
import pytest
from trip_planner import combine_trips, _fmt_duration
DATE = '2026-03-30'
# ---------------------------------------------------------------------------
# _fmt_duration
# ---------------------------------------------------------------------------
def test_fmt_duration_hours_and_minutes():
assert _fmt_duration(95) == '1h 35m'
def test_fmt_duration_exact_hours():
assert _fmt_duration(120) == '2h'
def test_fmt_duration_minutes_only():
assert _fmt_duration(45) == '45m'
# ---------------------------------------------------------------------------
# combine_trips — basic pairing
# ---------------------------------------------------------------------------
GWR_FAST = {'depart_bristol': '07:00', 'arrive_paddington': '08:45'} # 1h 45m
GWR_SLOW = {'depart_bristol': '07:00', 'arrive_paddington': '09:26'} # 2h 26m — over limit
ES_PARIS = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
ES_EARLY = {'depart_st_pancras': '09:00', 'arrive_destination': '12:00', 'destination': 'Paris Gare du Nord'}
def test_valid_trip_is_returned():
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert len(trips) == 1
t = trips[0]
assert t['depart_bristol'] == '07:00'
assert t['arrive_paddington'] == '08:45'
assert t['depart_st_pancras'] == '10:01'
assert t['arrive_destination'] == '13:34'
assert t['destination'] == 'Paris Gare du Nord'
def test_gwr_too_slow_excluded():
# 2h 26m GWR journey exceeds MAX_GWR_MINUTES (110)
trips = combine_trips([GWR_SLOW], [ES_PARIS], DATE)
assert trips == []
def test_eurostar_too_early_excluded():
# Eurostar departs before min connection time has elapsed
trips = combine_trips([GWR_FAST], [ES_EARLY], DATE)
assert trips == []
def test_no_trains_returns_empty():
assert combine_trips([], [], DATE) == []
def test_no_gwr_returns_empty():
assert combine_trips([], [ES_PARIS], DATE) == []
def test_no_eurostar_returns_empty():
assert combine_trips([GWR_FAST], [], DATE) == []
# ---------------------------------------------------------------------------
# Connection window constraints
# ---------------------------------------------------------------------------
def test_min_connection_enforced():
# Arrive Paddington 08:45, need 75 min → earliest St Pancras 10:00
# ES at 09:59 should be excluded, 10:00 should be included
es_too_close = {'depart_st_pancras': '09:59', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
es_ok = {'depart_st_pancras': '10:00', 'arrive_destination': '13:00', 'destination': 'Paris Gare du Nord'}
assert combine_trips([GWR_FAST], [es_too_close], DATE) == []
trips = combine_trips([GWR_FAST], [es_ok], DATE)
assert len(trips) == 1
def test_max_connection_enforced():
# Arrive Paddington 08:45, max 140 min → latest St Pancras 11:05
es_ok = {'depart_st_pancras': '11:05', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
es_too_late = {'depart_st_pancras': '11:06', 'arrive_destination': '14:00', 'destination': 'Paris Gare du Nord'}
trips = combine_trips([GWR_FAST], [es_ok], DATE)
assert len(trips) == 1
assert combine_trips([GWR_FAST], [es_too_late], DATE) == []
# ---------------------------------------------------------------------------
# Only earliest valid Eurostar per GWR departure
# ---------------------------------------------------------------------------
def test_only_earliest_eurostar_per_gwr():
es1 = {'depart_st_pancras': '10:01', 'arrive_destination': '13:34', 'destination': 'Paris Gare du Nord'}
es2 = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
trips = combine_trips([GWR_FAST], [es1, es2], DATE)
assert len(trips) == 1
assert trips[0]['depart_st_pancras'] == '10:01'
# ---------------------------------------------------------------------------
# Multiple GWR trains → multiple trips
# ---------------------------------------------------------------------------
def test_multiple_gwr_trains():
gwr2 = {'depart_bristol': '08:00', 'arrive_paddington': '09:45'}
es = {'depart_st_pancras': '11:01', 'arrive_destination': '14:34', 'destination': 'Paris Gare du Nord'}
trips = combine_trips([GWR_FAST, gwr2], [es], DATE)
assert len(trips) == 2
assert trips[0]['depart_bristol'] == '07:00'
assert trips[1]['depart_bristol'] == '08:00'
# ---------------------------------------------------------------------------
# Duration fields
# ---------------------------------------------------------------------------
def test_gwr_duration_in_trip():
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['gwr_duration'] == '1h 45m'
def test_total_duration_in_trip():
# depart 07:00, arrive 13:34 → 6h 34m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['total_duration'] == '6h 34m'
def test_connection_duration_in_trip():
# arrive Paddington 08:45, depart St Pancras 10:01 → 1h 16m
trips = combine_trips([GWR_FAST], [ES_PARIS], DATE)
assert trips[0]['connection_duration'] == '1h 16m'

88
trip_planner.py Normal file
View file

@ -0,0 +1,88 @@
"""
Combine GWR BristolPaddington trains with Eurostar St Pancrasdestination trains.
"""
from datetime import datetime, timedelta
MIN_CONNECTION_MINUTES = 75
MAX_CONNECTION_MINUTES = 140
MAX_GWR_MINUTES = 110
DATE_FMT = '%Y-%m-%d'
TIME_FMT = '%H:%M'
def _parse_dt(date: str, time: str) -> datetime:
return datetime.strptime(f"{date} {time}", f"{DATE_FMT} {TIME_FMT}")
def _fmt_duration(minutes: int) -> str:
h, m = divmod(minutes, 60)
if h and m:
return f"{h}h {m}m"
if h:
return f"{h}h"
return f"{m}m"
def combine_trips(
gwr_trains: list[dict],
eurostar_trains: list[dict],
travel_date: str,
) -> list[dict]:
"""
Return a list of valid combined trips, sorted by Bristol departure time.
Each trip dict:
depart_bristol HH:MM
arrive_paddington HH:MM
gwr_duration str (e.g. "1h 45m")
connection_duration str
depart_st_pancras HH:MM
arrive_destination HH:MM
total_duration str (e.g. "5h 30m")
destination str
"""
trips = []
for gwr in gwr_trains:
try:
arr_pad = _parse_dt(travel_date, gwr['arrive_paddington'])
dep_bri = _parse_dt(travel_date, gwr['depart_bristol'])
except (ValueError, KeyError):
continue
if int((arr_pad - dep_bri).total_seconds() / 60) > MAX_GWR_MINUTES:
continue
earliest_eurostar = arr_pad + timedelta(minutes=MIN_CONNECTION_MINUTES)
# Find only the earliest viable Eurostar for this GWR departure
for es in eurostar_trains:
try:
dep_stp = _parse_dt(travel_date, es['depart_st_pancras'])
arr_dest = _parse_dt(travel_date, es['arrive_destination'])
except (ValueError, KeyError):
continue
# Eurostar arrives next day? (e.g. night service — unlikely but handle it)
if arr_dest < dep_stp:
arr_dest += timedelta(days=1)
if dep_stp < earliest_eurostar:
continue
if (dep_stp - arr_pad).total_seconds() / 60 > MAX_CONNECTION_MINUTES:
continue
trips.append({
'depart_bristol': gwr['depart_bristol'],
'arrive_paddington': gwr['arrive_paddington'],
'gwr_duration': _fmt_duration(int((arr_pad - dep_bri).total_seconds() / 60)),
'connection_duration': _fmt_duration(int((dep_stp - arr_pad).total_seconds() / 60)),
'depart_st_pancras': es['depart_st_pancras'],
'arrive_destination': es['arrive_destination'],
'total_duration': _fmt_duration(int((arr_dest - dep_bri).total_seconds() / 60)),
'destination': es['destination'],
})
break # Only the earliest valid Eurostar per GWR departure
trips.sort(key=lambda t: (t['depart_bristol'], t['depart_st_pancras']))
return trips