Reorganise

This commit is contained in:
Edward Betts 2026-02-27 14:27:38 +00:00
parent 2a2a42fe5d
commit d3e6d7ac42
12 changed files with 206 additions and 178 deletions

View file

@ -11,20 +11,31 @@ Requests for HTTP calls.
## Repository layout
```
osm-pt-geojson - Fetch a public transport route relation, list stops, export GeoJSON
README.md - User-facing documentation
AGENTS.md - This file
pyproject.toml - Package metadata and build configuration
src/
osm_geojson/
__init__.py - Top-level package marker
pt/
__init__.py - Public transport subpackage
core.py - Data fetching and processing functions
cli.py - Click CLI commands
tests/
fixtures/ - Saved OSM API responses for offline testing
test_osm_pt_geojson.py - Test suite
README.md - User-facing documentation
AGENTS.md - This file
```
New tools are added as individual scripts at the repo root.
`osm_geojson` is the top-level namespace for all tools in this collection. Each
tool lives in its own subpackage (e.g. `osm_geojson.pt`), with its CLI entry
point registered in `pyproject.toml` under `[project.scripts]`.
## Code conventions
- Python 3, shebang `#!/usr/bin/python3`
- Python 3.11+
- CLI via [Click](https://click.palletsprojects.com/)
- HTTP via [Requests](https://requests.readthedocs.io/)
- Parse XML with lxml if needed; prefer the OSM JSON API where possible
- Scripts have no `.py` extension and are executable
- Errors go to stderr; data output goes to stdout
- GeoJSON output uses `ensure_ascii=False`
- All modules, functions, and test functions must have docstrings
@ -42,10 +53,10 @@ No authentication is required for read-only access. Include a descriptive
## Type checking
All scripts use type hints. Run mypy in strict mode to check:
All code uses type hints. Run mypy in strict mode to check:
```
mypy --strict osm-pt-geojson
mypy --strict src/osm_geojson/
```
## Testing
@ -59,21 +70,6 @@ pytest tests/
Tests use the `responses` library to mock HTTP calls and never hit the live OSM
API. Fixture data is stored in `tests/fixtures/` as saved API responses.
Because tool scripts have hyphens in their names and no `.py` extension, they
cannot be imported with the normal `importlib.util.spec_from_file_location`.
Use `importlib.machinery.SourceFileLoader` instead:
```python
import importlib.machinery
import importlib.util
_loader = importlib.machinery.SourceFileLoader("osm_pt_geojson", "osm-pt-geojson")
_spec = importlib.util.spec_from_loader("osm_pt_geojson", _loader)
assert _spec
osm = importlib.util.module_from_spec(_spec)
_loader.exec_module(osm)
```
### Example relations used during development
| ID | Description |
@ -83,8 +79,9 @@ _loader.exec_module(osm)
## Dependencies
Install with pip:
Use a virtual environment. Install the package and test dependencies with:
```
pip install click requests
python3 -m venv .venv
.venv/bin/pip install -e ".[dev]"
```

View file

@ -54,15 +54,17 @@ A GeoJSON `FeatureCollection` containing:
- A `Point` feature for each stop, with a `name` property (omitted with
`--no-stops`).
#### Requirements
#### Installation
- Python 3
- [Click](https://click.palletsprojects.com/)
- [Requests](https://requests.readthedocs.io/)
Install dependencies:
```
pip install click requests
pip install osm-geojson
```
Or from source:
```
git clone https://git.4angle.com/edward/openstreetmap-tools
cd openstreetmap-tools
pip install -e .
```
## Licence

29
pyproject.toml Normal file
View file

@ -0,0 +1,29 @@
[build-system]
requires = ["setuptools>=68"]
build-backend = "setuptools.build_meta"
[project]
name = "osm-geojson"
version = "0.1.0"
description = "Fetch OSM public transport route relations and export as GeoJSON"
license = {text = "MIT"}
authors = [{name = "Edward Betts"}]
requires-python = ">=3.11"
dependencies = [
"click",
"requests",
]
[project.optional-dependencies]
dev = [
"mypy",
"pytest",
"responses",
"types-requests",
]
[project.scripts]
osm-pt-geojson = "osm_geojson.pt.cli:cli"
[tool.setuptools.packages.find]
where = ["src"]

View file

@ -0,0 +1 @@
"""Fetch OSM public transport route relations and export as GeoJSON."""

Binary file not shown.

View file

@ -0,0 +1 @@
"""Public transport route tools for osm_geojson."""

Binary file not shown.

Binary file not shown.

111
src/osm_geojson/pt/cli.py Normal file
View file

@ -0,0 +1,111 @@
"""Click CLI commands for osm-pt-geojson."""
import json
import sys
import click
from osm_geojson.pt.core import (
GeoJson,
build_route_coords,
fetch_relation_full,
make_geojson,
node_name,
parse_elements,
nearest_coord_index,
)
def output_geojson(geojson: GeoJson, output_path: str | None) -> None:
"""Write GeoJSON to a file, or to stdout if output_path is None."""
text = json.dumps(geojson, ensure_ascii=False, indent=2)
if output_path:
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
click.echo(f"Wrote {output_path}", err=True)
else:
click.echo(text)
@click.group()
def cli() -> None:
"""OSM public transport route → GeoJSON tool."""
@cli.command("list-stations")
@click.argument("relation_id", type=int)
def list_stations(relation_id: int) -> None:
"""List all stations in an OSM public transport route relation."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
click.echo(f"Route: {tags.get('name', relation_id)}")
click.echo(f"Stops ({len(stop_ids)}):")
for i, sid in enumerate(stop_ids, 1):
if sid in nodes:
click.echo(f" {i:2}. {node_name(nodes[sid])}")
else:
click.echo(f" {i:2}. (node {sid} not in response)")
@cli.command("route-between")
@click.argument("relation_id", type=int)
@click.argument("from_station")
@click.argument("to_station")
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def route_between(
relation_id: int,
from_station: str,
to_station: str,
output: str | None,
no_stops: bool,
) -> None:
"""Output GeoJSON for the route segment between two named stations."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
def find_stop(name: str) -> int | None:
"""Return the node ID of the stop matching name (case-insensitive), or None."""
for sid in stop_ids:
if sid in nodes and node_name(nodes[sid]).lower() == name.lower():
return sid
return None
sid_from = find_stop(from_station)
sid_to = find_stop(to_station)
errors = []
if sid_from is None:
errors.append(f"Station not found: {from_station!r}")
if sid_to is None:
errors.append(f"Station not found: {to_station!r}")
if errors:
for e in errors:
click.echo(f"Error: {e}", err=True)
click.echo("Available stations:", err=True)
for sid in stop_ids:
if sid in nodes:
click.echo(f" {node_name(nodes[sid])}", err=True)
sys.exit(1)
assert sid_from is not None and sid_to is not None
idx_from = nearest_coord_index(nodes[sid_from]["lon"], nodes[sid_from]["lat"], route_coords)
idx_to = nearest_coord_index(nodes[sid_to]["lon"], nodes[sid_to]["lat"], route_coords)
geojson = make_geojson(
route_coords, stop_ids, nodes, tags, idx_from=idx_from, idx_to=idx_to, no_stops=no_stops
)
output_geojson(geojson, output)
@cli.command("full-route")
@click.argument("relation_id", type=int)
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def full_route(relation_id: int, output: str | None, no_stops: bool) -> None:
"""Output GeoJSON for the entire route, end to end."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
geojson = make_geojson(route_coords, stop_ids, nodes, tags, no_stops=no_stops)
output_geojson(geojson, output)

104
osm-pt-geojson → src/osm_geojson/pt/core.py Executable file → Normal file
View file

@ -1,6 +1,4 @@
#!/usr/bin/python3
"""Fetch an OSM public transport route relation and export it as GeoJSON."""
import json
"""Core data-fetching and processing functions for osm-pt-geojson."""
import sys
from typing import Any
@ -76,6 +74,7 @@ def build_route_coords(
return []
def way_node_ids(way_id: int) -> list[int]:
"""Return the ordered node IDs for a way, or an empty list if not found."""
return ways[way_id]["nodes"] if way_id in ways else []
chain: list[int] = list(way_node_ids(way_ids[0]))
@ -173,102 +172,3 @@ def make_geojson(
)
return {"type": "FeatureCollection", "features": features}
def output_geojson(geojson: GeoJson, output_path: str | None) -> None:
"""Write GeoJSON to a file, or to stdout if output_path is None."""
text = json.dumps(geojson, ensure_ascii=False, indent=2)
if output_path:
with open(output_path, "w", encoding="utf-8") as f:
f.write(text)
click.echo(f"Wrote {output_path}", err=True)
else:
click.echo(text)
@click.group()
def cli() -> None:
"""OSM public transport route → GeoJSON tool."""
@cli.command("list-stations")
@click.argument("relation_id", type=int)
def list_stations(relation_id: int) -> None:
"""List all stations in an OSM public transport route relation."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
click.echo(f"Route: {tags.get('name', relation_id)}")
click.echo(f"Stops ({len(stop_ids)}):")
for i, sid in enumerate(stop_ids, 1):
if sid in nodes:
click.echo(f" {i:2}. {node_name(nodes[sid])}")
else:
click.echo(f" {i:2}. (node {sid} not in response)")
@cli.command("route-between")
@click.argument("relation_id", type=int)
@click.argument("from_station")
@click.argument("to_station")
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def route_between(
relation_id: int,
from_station: str,
to_station: str,
output: str | None,
no_stops: bool,
) -> None:
"""Output GeoJSON for the route segment between two named stations."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
def find_stop(name: str) -> int | None:
for sid in stop_ids:
if sid in nodes and node_name(nodes[sid]).lower() == name.lower():
return sid
return None
sid_from = find_stop(from_station)
sid_to = find_stop(to_station)
errors = []
if sid_from is None:
errors.append(f"Station not found: {from_station!r}")
if sid_to is None:
errors.append(f"Station not found: {to_station!r}")
if errors:
for e in errors:
click.echo(f"Error: {e}", err=True)
click.echo("Available stations:", err=True)
for sid in stop_ids:
if sid in nodes:
click.echo(f" {node_name(nodes[sid])}", err=True)
sys.exit(1)
assert sid_from is not None and sid_to is not None
idx_from = nearest_coord_index(nodes[sid_from]["lon"], nodes[sid_from]["lat"], route_coords)
idx_to = nearest_coord_index(nodes[sid_to]["lon"], nodes[sid_to]["lat"], route_coords)
geojson = make_geojson(
route_coords, stop_ids, nodes, tags, idx_from=idx_from, idx_to=idx_to, no_stops=no_stops
)
output_geojson(geojson, output)
@cli.command("full-route")
@click.argument("relation_id", type=int)
@click.option("--output", "-o", type=click.Path(), default=None, help="Output file (default: stdout)")
@click.option("--no-stops", is_flag=True, default=False, help="Omit stop points from output.")
def full_route(relation_id: int, output: str | None, no_stops: bool) -> None:
"""Output GeoJSON for the entire route, end to end."""
data = fetch_relation_full(relation_id)
nodes, ways, stop_ids, way_ids, tags = parse_elements(data, relation_id)
route_coords = build_route_coords(way_ids, ways, nodes)
geojson = make_geojson(route_coords, stop_ids, nodes, tags, no_stops=no_stops)
output_geojson(geojson, output)
if __name__ == "__main__":
cli()

View file

@ -6,20 +6,8 @@ import pytest
import responses as responses_lib
from click.testing import CliRunner
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))
# Import the script as a module. The filename has hyphens so we use importlib.
import importlib.machinery
import importlib.util
_loader = importlib.machinery.SourceFileLoader(
"osm_pt_geojson", str(Path(__file__).parent.parent / "osm-pt-geojson")
)
_spec = importlib.util.spec_from_loader("osm_pt_geojson", _loader)
assert _spec
osm = importlib.util.module_from_spec(_spec)
_loader.exec_module(osm)
from osm_geojson.pt import core
from osm_geojson.pt.cli import cli, output_geojson
FIXTURES = Path(__file__).parent / "fixtures"
FULL_URL = "https://www.openstreetmap.org/api/0.6/relation/15083963/full.json"
@ -35,7 +23,7 @@ def full_data() -> dict:
@pytest.fixture()
def parsed(full_data: dict) -> tuple:
"""Return parsed elements (nodes, ways, stop_ids, way_ids, tags) for relation 15083963."""
return osm.parse_elements(full_data, RELATION_ID)
return core.parse_elements(full_data, RELATION_ID)
# ---------------------------------------------------------------------------
@ -51,8 +39,8 @@ def test_parse_elements_stop_count(parsed: tuple) -> None:
def test_parse_elements_first_and_last_stop(parsed: tuple) -> None:
"""The first and last stops match the route terminus names."""
nodes, ways, stop_ids, way_ids, tags = parsed
assert osm.node_name(nodes[stop_ids[0]]) == "Arnavutköy Hastane"
assert osm.node_name(nodes[stop_ids[-1]]) == "Gayrettepe"
assert core.node_name(nodes[stop_ids[0]]) == "Arnavutköy Hastane"
assert core.node_name(nodes[stop_ids[-1]]) == "Gayrettepe"
def test_parse_elements_way_count(parsed: tuple) -> None:
@ -70,9 +58,8 @@ def test_parse_elements_tags(parsed: tuple) -> None:
def test_parse_elements_unknown_relation(full_data: dict) -> None:
"""Requesting a relation ID not present in the response exits with an error."""
runner = CliRunner()
with pytest.raises(SystemExit):
osm.parse_elements(full_data, 9999999)
core.parse_elements(full_data, 9999999)
# ---------------------------------------------------------------------------
@ -82,7 +69,7 @@ def test_parse_elements_unknown_relation(full_data: dict) -> None:
def test_build_route_coords_returns_coords(parsed: tuple) -> None:
"""Chained coordinates are non-empty and fall within the Istanbul bounding box."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
coords = core.build_route_coords(way_ids, ways, nodes)
assert len(coords) > 0
for coord in coords:
assert len(coord) == 2
@ -93,7 +80,7 @@ def test_build_route_coords_returns_coords(parsed: tuple) -> None:
def test_build_route_coords_empty_ways() -> None:
"""An empty way list returns an empty coordinate list."""
assert osm.build_route_coords([], {}, {}) == []
assert core.build_route_coords([], {}, {}) == []
# ---------------------------------------------------------------------------
@ -103,13 +90,13 @@ def test_build_route_coords_empty_ways() -> None:
def test_nearest_coord_index_exact() -> None:
"""Returns the index of an exact coordinate match."""
coords = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]
assert osm.nearest_coord_index(3.0, 4.0, coords) == 1
assert core.nearest_coord_index(3.0, 4.0, coords) == 1
def test_nearest_coord_index_approximate() -> None:
"""Returns the index of the closest coordinate when there is no exact match."""
coords = [[0.0, 0.0], [10.0, 0.0], [20.0, 0.0]]
assert osm.nearest_coord_index(9.0, 0.0, coords) == 1
assert core.nearest_coord_index(9.0, 0.0, coords) == 1
# ---------------------------------------------------------------------------
@ -119,25 +106,25 @@ def test_nearest_coord_index_approximate() -> None:
def test_node_name_uses_name_tag() -> None:
"""Prefers the name tag when present."""
node = {"id": 1, "lat": 0.0, "lon": 0.0, "tags": {"name": "Central", "ref": "C1"}}
assert osm.node_name(node) == "Central"
assert core.node_name(node) == "Central"
def test_node_name_falls_back_to_ref() -> None:
"""Falls back to the ref tag when there is no name tag."""
node = {"id": 1, "lat": 0.0, "lon": 0.0, "tags": {"ref": "C1"}}
assert osm.node_name(node) == "C1"
assert core.node_name(node) == "C1"
def test_node_name_falls_back_to_id() -> None:
"""Falls back to the node ID when tags are present but empty."""
node = {"id": 42, "lat": 0.0, "lon": 0.0, "tags": {}}
assert osm.node_name(node) == "42"
assert core.node_name(node) == "42"
def test_node_name_no_tags() -> None:
"""Falls back to the node ID when the tags key is absent."""
node = {"id": 99, "lat": 0.0, "lon": 0.0}
assert osm.node_name(node) == "99"
assert core.node_name(node) == "99"
# ---------------------------------------------------------------------------
@ -147,8 +134,8 @@ def test_node_name_no_tags() -> None:
def test_make_geojson_full(parsed: tuple) -> None:
"""Full output contains one LineString and one Point per stop."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags)
coords = core.build_route_coords(way_ids, ways, nodes)
geojson = core.make_geojson(coords, stop_ids, nodes, tags)
assert geojson["type"] == "FeatureCollection"
features = geojson["features"]
@ -162,8 +149,8 @@ def test_make_geojson_full(parsed: tuple) -> None:
def test_make_geojson_no_stops(parsed: tuple) -> None:
"""With no_stops=True, only the LineString feature is included."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags, no_stops=True)
coords = core.build_route_coords(way_ids, ways, nodes)
geojson = core.make_geojson(coords, stop_ids, nodes, tags, no_stops=True)
features = geojson["features"]
assert all(f["geometry"]["type"] == "LineString" for f in features)
@ -172,11 +159,11 @@ def test_make_geojson_no_stops(parsed: tuple) -> None:
def test_make_geojson_slice(parsed: tuple) -> None:
"""Slicing by coord index produces a shorter LineString with the correct length."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
full = osm.make_geojson(coords, stop_ids, nodes, tags)
coords = core.build_route_coords(way_ids, ways, nodes)
full = core.make_geojson(coords, stop_ids, nodes, tags)
full_line_len = len(full["features"][0]["geometry"]["coordinates"])
sliced = osm.make_geojson(coords, stop_ids, nodes, tags, idx_from=10, idx_to=50)
sliced = core.make_geojson(coords, stop_ids, nodes, tags, idx_from=10, idx_to=50)
sliced_line_len = len(sliced["features"][0]["geometry"]["coordinates"])
assert sliced_line_len == 41 # 50 - 10 + 1
@ -186,8 +173,8 @@ def test_make_geojson_slice(parsed: tuple) -> None:
def test_make_geojson_linestring_properties(parsed: tuple) -> None:
"""The LineString feature carries route properties from the OSM relation tags."""
nodes, ways, stop_ids, way_ids, tags = parsed
coords = osm.build_route_coords(way_ids, ways, nodes)
geojson = osm.make_geojson(coords, stop_ids, nodes, tags)
coords = core.build_route_coords(way_ids, ways, nodes)
geojson = core.make_geojson(coords, stop_ids, nodes, tags)
props = geojson["features"][0]["properties"]
assert props["ref"] == "M11"
@ -203,7 +190,7 @@ def test_cli_list_stations(full_data: dict) -> None:
"""list-stations prints the route name and all stop names."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["list-stations", str(RELATION_ID)])
result = runner.invoke(cli, ["list-stations", str(RELATION_ID)])
assert result.exit_code == 0
assert "M11" in result.output
assert "Arnavutköy Hastane" in result.output
@ -215,7 +202,7 @@ def test_cli_list_stations_http_error() -> None:
"""list-stations exits with code 1 on an HTTP error response."""
responses_lib.add(responses_lib.GET, FULL_URL, status=503)
runner = CliRunner()
result = runner.invoke(osm.cli, ["list-stations", str(RELATION_ID)])
result = runner.invoke(cli, ["list-stations", str(RELATION_ID)])
assert result.exit_code == 1
@ -228,7 +215,7 @@ def test_cli_full_route_geojson(full_data: dict) -> None:
"""full-route outputs a valid GeoJSON FeatureCollection to stdout."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID)])
result = runner.invoke(cli, ["full-route", str(RELATION_ID)])
assert result.exit_code == 0
geojson = json.loads(result.output)
assert geojson["type"] == "FeatureCollection"
@ -239,7 +226,7 @@ def test_cli_full_route_no_stops(full_data: dict) -> None:
"""full-route --no-stops omits Point features from the output."""
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID), "--no-stops"])
result = runner.invoke(cli, ["full-route", str(RELATION_ID), "--no-stops"])
assert result.exit_code == 0
geojson = json.loads(result.output)
types = [f["geometry"]["type"] for f in geojson["features"]]
@ -252,7 +239,7 @@ def test_cli_full_route_output_file(full_data: dict, tmp_path) -> None:
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
out = tmp_path / "route.geojson"
runner = CliRunner()
result = runner.invoke(osm.cli, ["full-route", str(RELATION_ID), "-o", str(out)])
result = runner.invoke(cli, ["full-route", str(RELATION_ID), "-o", str(out)])
assert result.exit_code == 0
assert out.exists()
geojson = json.loads(out.read_text())
@ -269,7 +256,7 @@ def test_cli_route_between(full_data: dict) -> None:
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
cli,
["route-between", str(RELATION_ID), "Arnavutköy Hastane", "Gayrettepe"],
)
assert result.exit_code == 0
@ -289,7 +276,7 @@ def test_cli_route_between_unknown_station(full_data: dict) -> None:
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
cli,
["route-between", str(RELATION_ID), "Nonexistent", "Gayrettepe"],
)
assert result.exit_code == 1
@ -301,7 +288,7 @@ def test_cli_route_between_stops_subset(full_data: dict) -> None:
responses_lib.add(responses_lib.GET, FULL_URL, json=full_data)
runner = CliRunner()
result = runner.invoke(
osm.cli,
cli,
["route-between", str(RELATION_ID), "İstanbul Havalimanı", "Hasdal"],
)
assert result.exit_code == 0