Add pytest suite for sample locations with recorded fixtures

Tests replay pre-recorded WDQS and Wikidata API responses so they run offline against only the local PostGIS database. capture_fixtures.py records live responses into tests/fixtures/ for later replay.
2026-04-19 14:07:51 +01:00 · 2026-04-19 14:07:51 +01:00 · 7790d10f08
commit 7790d10f08
parent 6a5d5d0c2f
3 changed files with 214 additions and 0 deletions
--- a/tests/capture_fixtures.py
+++ b/tests/capture_fixtures.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+Download and save fixture data for the sample locations.
+
+Run from the project root directory:
+
+    python tests/capture_fixtures.py
+
+This makes real HTTP requests to WDQS and the Wikidata API and uses the
+local PostGIS database.  The output is saved to tests/fixtures/ and is
+used by test_examples.py so those tests run without any network access.
+
+Re-run this script whenever the expected results change (e.g. after an OSM
+or Wikidata edit that affects a sample location).
+"""
+
+import json
+import sys
+from pathlib import Path
+from unittest.mock import patch
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import geocode.wikidata as wikidata_module
+from geocode import samples
+from lookup import app, lat_lon_to_wikidata
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
+def capture_sample(i: int, lat: float, lon: float, name: str) -> None:
+    """Capture and save fixture data for one sample location."""
+    wdqs_calls: dict[str, list] = {}
+    api_calls: dict[str, dict] = {}
+
+    # Hold references to the real functions before patching.
+    real_wdqs = wikidata_module.wdqs
+    real_api_call = wikidata_module.api_call
+
+    def recording_wdqs(query: str) -> list:
+        result = real_wdqs(query)
+        wdqs_calls[query] = result
+        return result
+
+    def recording_api_call(params: dict) -> dict:
+        key = json.dumps(params, sort_keys=True)
+        result = real_api_call(params)
+        api_calls[key] = result
+        return result
+
+    with patch("geocode.wikidata.wdqs", side_effect=recording_wdqs), patch(
+        "geocode.wikidata.api_call", side_effect=recording_api_call
+    ):
+        reply = lat_lon_to_wikidata(lat, lon)
+
+    result = reply["result"]
+    commons_cat = result.get("commons_cat")
+
+    fixture: dict = {
+        "lat": lat,
+        "lon": lon,
+        "name": name,
+        "wdqs": wdqs_calls,
+        "api": api_calls,
+        "expected_wikidata": result.get("wikidata"),
+        "expected_commons_cat": commons_cat["title"]
+        if isinstance(commons_cat, dict)
+        else None,
+    }
+
+    fixture_path = FIXTURES_DIR / f"sample_{i:02d}.json"
+    fixture_path.write_text(json.dumps(fixture, indent=2, ensure_ascii=False))
+
+    qid = fixture["expected_wikidata"] or "none"
+    cat = fixture["expected_commons_cat"] or "no commons cat"
+    print(f"  [{i:02d}] {name}: {qid} / {cat}")
+
+
+def main() -> None:
+    """Capture fixtures for all samples."""
+    FIXTURES_DIR.mkdir(exist_ok=True)
+
+    indices = None
+    if len(sys.argv) > 1:
+        # Allow passing specific indices on the command line, e.g.:
+        #   python tests/capture_fixtures.py 0 5 12
+        indices = {int(a) for a in sys.argv[1:]}
+
+    print(f"Capturing fixtures for {len(samples)} samples...")
+    with app.app_context():
+        for i, (lat, lon, name) in enumerate(samples):
+            if indices and i not in indices:
+                continue
+            print(f"[{i:02d}/{len(samples) - 1}] {name} ({lat}, {lon})")
+            capture_sample(i, lat, lon, name)
+
+    print(f"\nDone. Fixtures saved to {FIXTURES_DIR}/")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -0,0 +1,17 @@
+"""Shared pytest fixtures."""
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def flask_app():
+    """Return the Flask application."""
+    from lookup import app
+    return app
+
+
+@pytest.fixture(scope="session")
+def app_ctx(flask_app):
+    """Push a Flask application context for the whole test session."""
+    with flask_app.app_context():
+        yield
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@ -0,0 +1,96 @@
+"""
+Tests for the sample locations listed in geocode.samples.
+
+Fixtures are pre-recorded snapshots of WDQS and Wikidata API responses.
+Generate or refresh them (requires network + DB) with:
+
+    python tests/capture_fixtures.py
+
+The tests themselves only need the local PostGIS database.
+"""
+
+import json
+from pathlib import Path
+
+import pytest
+
+from geocode import samples
+from lookup import lat_lon_to_wikidata
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+
+
+def _load_fixture(i: int) -> dict:
+    path = FIXTURES_DIR / f"sample_{i:02d}.json"
+    if not path.exists():
+        pytest.skip(f"No fixture file — run: python tests/capture_fixtures.py {i}")
+    return json.loads(path.read_text())
+
+
+def _make_mock_wdqs(fixture: dict, name: str):
+    """Return a mock for geocode.wikidata.wdqs that replays saved responses."""
+    saved = fixture["wdqs"]
+
+    def mock_wdqs(query: str) -> list:
+        if query not in saved:
+            short = query[:120].replace("\n", " ")
+            raise AssertionError(
+                f"[{name}] Unexpected WDQS query (not in fixture):\n  {short}…"
+            )
+        return saved[query]
+
+    return mock_wdqs
+
+
+def _make_mock_api_call(fixture: dict, name: str):
+    """Return a mock for geocode.wikidata.api_call that replays saved responses."""
+    saved = fixture["api"]
+
+    def mock_api_call(params: dict) -> dict:
+        key = json.dumps(params, sort_keys=True)
+        if key not in saved:
+            raise AssertionError(
+                f"[{name}] Unexpected api_call (not in fixture): {key}"
+            )
+        return saved[key]
+
+    return mock_api_call
+
+
+@pytest.mark.parametrize(
+    "i,lat,lon,name",
+    [(i, lat, lon, name) for i, (lat, lon, name) in enumerate(samples)],
+    ids=[name for _, _, name in samples],
+)
+def test_example(app_ctx, mocker, i: int, lat: float, lon: float, name: str) -> None:
+    """Each sample location resolves to the expected Wikidata item and Commons category."""
+    fixture = _load_fixture(i)
+
+    mocker.patch("geocode.wikidata.wdqs", side_effect=_make_mock_wdqs(fixture, name))
+    mocker.patch(
+        "geocode.wikidata.api_call", side_effect=_make_mock_api_call(fixture, name)
+    )
+
+    reply = lat_lon_to_wikidata(lat, lon)
+    result = reply["result"]
+
+    expected_qid = fixture["expected_wikidata"]
+    assert result.get("wikidata") == expected_qid, (
+        f"{name}: wikidata mismatch — expected {expected_qid!r}, "
+        f"got {result.get('wikidata')!r}"
+    )
+
+    expected_cat = fixture["expected_commons_cat"]
+    if expected_cat:
+        commons = result.get("commons_cat")
+        assert isinstance(commons, dict), (
+            f"{name}: expected commons_cat={expected_cat!r} but result has none"
+        )
+        assert commons["title"] == expected_cat, (
+            f"{name}: commons_cat mismatch — expected {expected_cat!r}, "
+            f"got {commons['title']!r}"
+        )
+    else:
+        assert not result.get("commons_cat"), (
+            f"{name}: expected no commons_cat but got {result.get('commons_cat')!r}"
+        )