Initial commit.

This commit is contained in:
Edward Betts 2026-02-27 10:59:27 +00:00
commit 2a2a42fe5d
5 changed files with 5628 additions and 0 deletions

90
AGENTS.md Normal file
View file

@ -0,0 +1,90 @@
# AGENTS.md
Guidelines for AI coding agents working in this repository.
## Project overview
A collection of Python CLI tools for working with OpenStreetMap data. Each
tool is a standalone Python script using Click for the CLI interface and
Requests for HTTP calls.
## Repository layout
```
osm-pt-geojson - Fetch a public transport route relation, list stops, export GeoJSON
README.md - User-facing documentation
AGENTS.md - This file
```
New tools are added as individual scripts at the repo root.
## Code conventions
- Python 3, shebang `#!/usr/bin/python3`
- CLI via [Click](https://click.palletsprojects.com/)
- HTTP via [Requests](https://requests.readthedocs.io/)
- Parse XML with lxml if needed; prefer the OSM JSON API where possible
- Scripts have no `.py` extension and are executable
- Errors go to stderr; data output goes to stdout
- GeoJSON output uses `ensure_ascii=False`
- All modules, functions, and test functions must have docstrings
## OSM API
Tools fetch data from the public OSM API:
```
https://www.openstreetmap.org/api/0.6/
```
No authentication is required for read-only access. Include a descriptive
`User-Agent` header in all requests.
## Type checking
All scripts use type hints. Run mypy in strict mode to check:
```
mypy --strict osm-pt-geojson
```
## Testing
Run the test suite with:
```
pytest tests/
```
Tests use the `responses` library to mock HTTP calls and never hit the live OSM
API. Fixture data is stored in `tests/fixtures/` as saved API responses.
Because tool scripts have hyphens in their names and no `.py` extension, they
cannot be imported with the normal `importlib.util.spec_from_file_location`.
Use `importlib.machinery.SourceFileLoader` instead:
```python
import importlib.machinery
import importlib.util
_loader = importlib.machinery.SourceFileLoader("osm_pt_geojson", "osm-pt-geojson")
_spec = importlib.util.spec_from_loader("osm_pt_geojson", _loader)
assert _spec
osm = importlib.util.module_from_spec(_spec)
_loader.exec_module(osm)
```
### Example relations used during development
| ID | Description |
|----------|------------------------------------------|
| 15083963 | M11 Istanbul Metro (subway) |
| 18892969 | Bus A1: Bristol Airport → Bus Station |
## Dependencies
Install with pip:
```
pip install click requests
```

70
README.md Normal file
View file

@ -0,0 +1,70 @@
# openstreetmap-tools
A collection of command-line tools for working with OpenStreetMap data.
## Tools
### osm-pt-geojson
Fetch an OSM public transport route relation and list its stops, or export the
route as GeoJSON.
#### Usage
```
osm-pt-geojson list-stations <relation_id>
osm-pt-geojson route-between <relation_id> <from_station> <to_station> [-o FILE]
osm-pt-geojson full-route <relation_id> [-o FILE]
```
**List stops on a route:**
```
$ osm-pt-geojson list-stations 18892969
Route: Bus A1: Bristol Airport → Bristol Bus Station
Stops (21):
1. Airport Terminal
2. Airport Tavern
...
21. Bus Station
```
**Export a segment between two stops as GeoJSON:**
```
$ osm-pt-geojson route-between 18892969 "Airport Terminal" "East Street" -o segment.geojson
```
**Export the full route as GeoJSON:**
```
$ osm-pt-geojson full-route 18892969 -o route.geojson
```
**Omit stop points from GeoJSON output:**
```
$ osm-pt-geojson full-route 18892969 --no-stops -o route.geojson
```
GeoJSON is written to stdout if `-o` is not given.
#### Output format
A GeoJSON `FeatureCollection` containing:
- A `LineString` feature for the route geometry, with `name`, `ref`, `from`,
`to`, and `route` properties from the OSM relation tags.
- A `Point` feature for each stop, with a `name` property (omitted with
`--no-stops`).
#### Requirements
- Python 3
- [Click](https://click.palletsprojects.com/)
- [Requests](https://requests.readthedocs.io/)
Install dependencies:
```
pip install click requests
```
## Licence
MIT License. Copyright (c) 2026 Edward Betts.

274
osm-pt-geojson Executable file
View file

@ -0,0 +1,274 @@
#!/usr/bin/python3
"""Fetch an OSM public transport route relation and export it as GeoJSON."""
import json
import sys
from typing import Any
import click
import requests
OSM_API = "https://www.openstreetmap.org/api/0.6"
# Type aliases
Coord = list[float] # [lon, lat]
OsmTags = dict[str, str]
OsmElement = dict[str, Any]
GeoJson = dict[str, Any]
def fetch_relation_full(relation_id: int) -> dict[str, Any]:
"""Fetch the full OSM API response for a relation, including all member ways and nodes."""
url = f"{OSM_API}/relation/{relation_id}/full.json"
try:
resp = requests.get(url, headers={"User-Agent": "osm-pt-geojson/1.0"}, timeout=30)
except requests.RequestException as e:
click.echo(f"Error: {e}", err=True)
sys.exit(1)
if resp.status_code != 200:
click.echo(f"Error: HTTP {resp.status_code} fetching relation {relation_id}", err=True)
sys.exit(1)
result: dict[str, Any] = resp.json()
return result
def parse_elements(
data: dict[str, Any],
relation_id: int,
) -> tuple[dict[int, OsmElement], dict[int, OsmElement], list[int], list[int], OsmTags]:
"""Index nodes and ways; extract ordered stops and way IDs from the target relation."""
nodes: dict[int, OsmElement] = {}
ways: dict[int, OsmElement] = {}
relation: OsmElement | None = None
for elem in data["elements"]:
t = elem["type"]
if t == "node":
nodes[elem["id"]] = elem
elif t == "way":
ways[elem["id"]] = elem
elif t == "relation" and elem["id"] == relation_id:
relation = elem
if relation is None:
click.echo(f"Error: relation {relation_id} not found in API response", err=True)
sys.exit(1)
stop_ids: list[int] = []
way_ids: list[int] = []
for member in relation["members"]:
if member["role"] in ("stop", "stop_entry_only", "stop_exit_only") and member["type"] == "node":
stop_ids.append(member["ref"])
elif member["role"] == "" and member["type"] == "way":
way_ids.append(member["ref"])
tags: OsmTags = relation.get("tags", {})
return nodes, ways, stop_ids, way_ids, tags
def build_route_coords(
way_ids: list[int],
ways: dict[int, OsmElement],
nodes: dict[int, OsmElement],
) -> list[Coord]:
"""Chain ways into a single ordered list of [lon, lat] coordinates."""
if not way_ids:
return []
def way_node_ids(way_id: int) -> list[int]:
return ways[way_id]["nodes"] if way_id in ways else []
chain: list[int] = list(way_node_ids(way_ids[0]))
for way_id in way_ids[1:]:
wn = way_node_ids(way_id)
if not wn:
continue
if chain[-1] == wn[0]:
chain.extend(wn[1:])
elif chain[-1] == wn[-1]:
chain.extend(reversed(wn[:-1]))
elif chain[0] == wn[-1]:
chain = list(wn) + chain[1:]
elif chain[0] == wn[0]:
chain = list(reversed(wn)) + chain[1:]
else:
click.echo(f"Warning: gap before way {way_id}", err=True)
chain.extend(wn)
return [[nodes[nid]["lon"], nodes[nid]["lat"]] for nid in chain if nid in nodes]
def nearest_coord_index(lon: float, lat: float, route_coords: list[Coord]) -> int:
"""Return the index in route_coords nearest to (lon, lat)."""
best_i = 0
best_d = float("inf")
for i, (rlon, rlat) in enumerate(route_coords):
d = (rlon - lon) ** 2 + (rlat - lat) ** 2
if d < best_d:
best_d = d
best_i = i
return best_i
def node_name(node: OsmElement) -> str:
"""Return a human-readable name for a node: name tag, ref tag, or node ID."""
tags: OsmTags = node.get("tags", {})
return tags.get("name") or tags.get("ref") or str(node["id"])
def make_geojson(
route_coords: list[Coord],
stop_ids: list[int],
nodes: dict[int, OsmElement],
route_tags: OsmTags,
idx_from: int | None = None,
idx_to: int | None = None,
no_stops: bool = False,
) -> GeoJson:
"""Build a GeoJSON FeatureCollection, optionally sliced between two coord indices."""
features: list[GeoJson] = []
# Map stops to their nearest position on the route
stop_positions: list[tuple[int, OsmElement]] = []
for sid in stop_ids:
if sid not in nodes:
continue
n = nodes[sid]
idx = nearest_coord_index(n["lon"], n["lat"], route_coords)
stop_positions.append((idx, n))
# Apply slice
if idx_from is not None and idx_to is not None:
if idx_from > idx_to:
idx_from, idx_to = idx_to, idx_from
geom_coords = route_coords[idx_from : idx_to + 1]
stops_in_range = [(i, n) for i, n in stop_positions if idx_from <= i <= idx_to]
else:
geom_coords = route_coords
stops_in_range = stop_positions
features.append(
{
"type": "Feature",
"geometry": {"type": "LineString", "coordinates": geom_coords},
"properties": {
"name": route_tags.get("name"),
"ref": route_tags.get("ref"),
"from": route_tags.get("from"),
"to": route_tags.get("to"),
"route": route_tags.get("route"),
},
}
)
if not no_stops:
for _, node in stops_in_range:
features.append(
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [node["lon"], node["lat"]]},
"properties": {"name": node_name(node)},
}
)
return {"type": "FeatureCollection", "features": features}
def output_geojson(geojson: GeoJson, output_path: str | None) -> None:
"""Write GeoJSON to a file, or to stdout if output_path is None."""
text = json.dumps(geojson, ensure_ascii=False, indent=2)
if output_path:
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
click.echo(f"Wrote {output_path}", err=True)
else:
click.echo(text)
@click.group()
def cli() -> None:
"""OSM public transport route → GeoJSON tool."""
@cli.command("list-stations")
@click.argument("relation_id", type=int)
def list_stations(relation_id: int) -> None:
"""List all stations in an OSM public transport route relation."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
click.echo(f"Route: {tags.get('name', relation_id)}")
click.echo(f"Stops ({len(stop_ids)}):")
for i, sid in enumerate(stop_ids, 1):
if sid in nodes:
click.echo(f" {i:2}. {node_name(nodes[sid])}")
else:
click.echo(f" {i:2}. (node {sid} not in response)")
@cli.command("route-between")
@click.argument("relation_id", type=int)
@click.argument("from_station")
@click.argument("to_station")
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def route_between(
relation_id: int,
from_station: str,
to_station: str,
output: str | None,
no_stops: bool,
) -> None:
"""Output GeoJSON for the route segment between two named stations."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
def find_stop(name: str) -> int | None:
for sid in stop_ids:
if sid in nodes and node_name(nodes[sid]).lower() == name.lower():
return sid
return None
sid_from = find_stop(from_station)
sid_to = find_stop(to_station)
errors = []
if sid_from is None:
errors.append(f"Station not found: {from_station!r}")
if sid_to is None:
errors.append(f"Station not found: {to_station!r}")
if errors:
for e in errors:
click.echo(f"Error: {e}", err=True)
click.echo("Available stations:", err=True)
for sid in stop_ids:
if sid in nodes:
click.echo(f" {node_name(nodes[sid])}", err=True)
sys.exit(1)
assert sid_from is not None and sid_to is not None
idx_from = nearest_coord_index(nodes[sid_from]["lon"], nodes[sid_from]["lat"], route_coords)
idx_to = nearest_coord_index(nodes[sid_to]["lon"], nodes[sid_to]["lat"], route_coords)
geojson = make_geojson(
route_coords, stop_ids, nodes, tags, idx_from=idx_from, idx_to=idx_to, no_stops=no_stops
)
output_geojson(geojson, output)
@cli.command("full-route")
@click.argument("relation_id", type=int)
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def full_route(relation_id: int, output: str | None, no_stops: bool) -> None:
"""Output GeoJSON for the entire route, end to end."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
geojson = make_geojson(route_coords, stop_ids, nodes, tags, no_stops=no_stops)
output_geojson(geojson, output)
if __name__ == "__main__":
cli()

4877
tests/fixtures/15083963-full.json vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,317 @@
"""Tests for osm-pt-geojson."""
import json
from pathlib import Path
import pytest
import responses as responses_lib
from click.testing import CliRunner
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
# Import the script as a module. The filename has hyphens so we use importlib.
import importlib.machinery
import importlib.util
_loader = importlib.machinery.SourceFileLoader(
"osm_pt_geojson", str(Path(__file__).parent.parent / "osm-pt-geojson")
)
_spec = importlib.util.spec_from_loader("osm_pt_geojson", _loader)
assert _spec
osm = importlib.util.module_from_spec(_spec)
_loader.exec_module(osm)
FIXTURES = Path(__file__).parent / "fixtures"
FULL_URL = "https://www.openstreetmap.org/api/0.6/relation/15083963/full.json"
RELATION_ID = 15083963
@pytest.fixture()
def full_data() -> dict:
"""Load the saved full API response for relation 15083963 (M11 Istanbul Metro)."""
return json.loads((FIXTURES / "15083963-full.json").read_text())
@pytest.fixture()
def parsed(full_data: dict) -> tuple:
"""Return parsed elements (nodes, ways, stop_ids, way_ids, tags) for relation 15083963."""
return osm.parse_elements(full_data, RELATION_ID)
# ---------------------------------------------------------------------------
# parse_elements
# ---------------------------------------------------------------------------
def test_parse_elements_stop_count(parsed: tuple) -> None:
"""All ten stops on the M11 are extracted in order."""
nodes, ways, stop_ids, way_ids, tags = parsed
assert len(stop_ids) == 10
def test_parse_elements_first_and_last_stop(parsed: tuple) -> None:
"""The first and last stops match the route terminus names."""
nodes, ways, stop_ids, way_ids, tags = parsed
assert osm.node_name(nodes[stop_ids[0]]) == "Arnavutköy Hastane"
assert osm.node_name(nodes[stop_ids[-1]]) == "Gayrettepe"
def test_parse_elements_way_count(parsed: tuple) -> None:
"""All ten member ways are extracted."""
nodes, ways, stop_ids, way_ids, tags = parsed
assert len(way_ids) == 10
def test_parse_elements_tags(parsed: tuple) -> None:
"""Route tags are returned correctly."""
_, _, _, _, tags = parsed
assert tags["ref"] == "M11"
assert tags["route"] == "subway"
def test_parse_elements_unknown_relation(full_data: dict) -> None:
"""Requesting a relation ID not present in the response exits with an error."""
runner = CliRunner()
with pytest.raises(SystemExit):
osm.parse_elements(full_data, 9999999)
# ---------------------------------------------------------------------------
# build_route_coords
# ---------------------------------------------------------------------------
def test_build_route_coords_returns_coords(parsed: tuple) -> None:
"""Chained coordinates are non-empty and fall within the Istanbul bounding box."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
assert len(coords) > 0
for coord in coords:
assert len(coord) == 2
lon, lat = coord
assert 28.0 < lon < 29.1
assert 40.0 < lat < 42.0
def test_build_route_coords_empty_ways() -> None:
"""An empty way list returns an empty coordinate list."""
assert osm.build_route_coords([], {}, {}) == []
# ---------------------------------------------------------------------------
# nearest_coord_index
# ---------------------------------------------------------------------------
def test_nearest_coord_index_exact() -> None:
"""Returns the index of an exact coordinate match."""
coords = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]
assert osm.nearest_coord_index(3.0, 4.0, coords) == 1
def test_nearest_coord_index_approximate() -> None:
"""Returns the index of the closest coordinate when there is no exact match."""
coords = [[0.0, 0.0], [10.0, 0.0], [20.0, 0.0]]
assert osm.nearest_coord_index(9.0, 0.0, coords) == 1
# ---------------------------------------------------------------------------
# node_name
# ---------------------------------------------------------------------------
def test_node_name_uses_name_tag() -> None:
"""Prefers the name tag when present."""
node = {"id": 1, "lat": 0.0, "lon": 0.0, "tags": {"name": "Central", "ref": "C1"}}
assert osm.node_name(node) == "Central"
def test_node_name_falls_back_to_ref() -> None:
"""Falls back to the ref tag when there is no name tag."""
node = {"id": 1, "lat": 0.0, "lon": 0.0, "tags": {"ref": "C1"}}
assert osm.node_name(node) == "C1"
def test_node_name_falls_back_to_id() -> None:
"""Falls back to the node ID when tags are present but empty."""
node = {"id": 42, "lat": 0.0, "lon": 0.0, "tags": {}}
assert osm.node_name(node) == "42"
def test_node_name_no_tags() -> None:
"""Falls back to the node ID when the tags key is absent."""
node = {"id": 99, "lat": 0.0, "lon": 0.0}
assert osm.node_name(node) == "99"
# ---------------------------------------------------------------------------
# make_geojson
# ---------------------------------------------------------------------------
def test_make_geojson_full(parsed: tuple) -> None:
"""Full output contains one LineString and one Point per stop."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags)
assert geojson["type"] == "FeatureCollection"
features = geojson["features"]
line_features = [f for f in features if f["geometry"]["type"] == "LineString"]
point_features = [f for f in features if f["geometry"]["type"] == "Point"]
assert len(line_features) == 1
assert len(point_features) == 10
def test_make_geojson_no_stops(parsed: tuple) -> None:
"""With no_stops=True, only the LineString feature is included."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags, no_stops=True)
features = geojson["features"]
assert all(f["geometry"]["type"] == "LineString" for f in features)
def test_make_geojson_slice(parsed: tuple) -> None:
"""Slicing by coord index produces a shorter LineString with the correct length."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
full = osm.make_geojson(coords, stop_ids, nodes, tags)
full_line_len = len(full["features"][0]["geometry"]["coordinates"])
sliced = osm.make_geojson(coords, stop_ids, nodes, tags, idx_from=10, idx_to=50)
sliced_line_len = len(sliced["features"][0]["geometry"]["coordinates"])
assert sliced_line_len == 41 # 50 - 10 + 1
assert sliced_line_len < full_line_len
def test_make_geojson_linestring_properties(parsed: tuple) -> None:
"""The LineString feature carries route properties from the OSM relation tags."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags)
props = geojson["features"][0]["properties"]
assert props["ref"] == "M11"
assert props["route"] == "subway"
# ---------------------------------------------------------------------------
# CLI — list-stations
# ---------------------------------------------------------------------------
@responses_lib.activate
def test_cli_list_stations(full_data: dict) -> None:
"""list-stations prints the route name and all stop names."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["list-stations", str(RELATION_ID)])
assert result.exit_code == 0
assert "M11" in result.output
assert "Arnavutköy Hastane" in result.output
assert "Gayrettepe" in result.output
@responses_lib.activate
def test_cli_list_stations_http_error() -> None:
"""list-stations exits with code 1 on an HTTP error response."""
responses_lib.add(responses_lib.GET, FULL_URL, status=503)
runner = CliRunner()
result = runner.invoke(osm.cli, ["list-stations", str(RELATION_ID)])
assert result.exit_code == 1
# ---------------------------------------------------------------------------
# CLI — full-route
# ---------------------------------------------------------------------------
@responses_lib.activate
def test_cli_full_route_geojson(full_data: dict) -> None:
"""full-route outputs a valid GeoJSON FeatureCollection to stdout."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID)])
assert result.exit_code == 0
geojson = json.loads(result.output)
assert geojson["type"] == "FeatureCollection"
@responses_lib.activate
def test_cli_full_route_no_stops(full_data: dict) -> None:
"""full-route --no-stops omits Point features from the output."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID), "--no-stops"])
assert result.exit_code == 0
geojson = json.loads(result.output)
types = [f["geometry"]["type"] for f in geojson["features"]]
assert "Point" not in types
@responses_lib.activate
def test_cli_full_route_output_file(full_data: dict, tmp_path) -> None:
"""full-route -o writes valid GeoJSON to the specified file."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
out = tmp_path / "route.geojson"
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID), "-o", str(out)])
assert result.exit_code == 0
assert out.exists()
geojson = json.loads(out.read_text())
assert geojson["type"] == "FeatureCollection"
# ---------------------------------------------------------------------------
# CLI — route-between
# ---------------------------------------------------------------------------
@responses_lib.activate
def test_cli_route_between(full_data: dict) -> None:
"""route-between includes both endpoint stops in the output."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
["route-between", str(RELATION_ID), "Arnavutköy Hastane", "Gayrettepe"],
)
assert result.exit_code == 0
geojson = json.loads(result.output)
stop_names = [
f["properties"]["name"]
for f in geojson["features"]
if f["geometry"]["type"] == "Point"
]
assert "Arnavutköy Hastane" in stop_names
assert "Gayrettepe" in stop_names
@responses_lib.activate
def test_cli_route_between_unknown_station(full_data: dict) -> None:
"""route-between exits with code 1 when a station name is not found."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
["route-between", str(RELATION_ID), "Nonexistent", "Gayrettepe"],
)
assert result.exit_code == 1
@responses_lib.activate
def test_cli_route_between_stops_subset(full_data: dict) -> None:
"""route-between only includes stops between the two named stations."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
["route-between", str(RELATION_ID), "İstanbul Havalimanı", "Hasdal"],
)
assert result.exit_code == 0
geojson = json.loads(result.output)
stop_names = [
f["properties"]["name"]
for f in geojson["features"]
if f["geometry"]["type"] == "Point"
]
assert "İstanbul Havalimanı" in stop_names
assert "Hasdal" in stop_names
assert "Arnavutköy Hastane" not in stop_names
assert "Gayrettepe" not in stop_names