From cd9d8779d3425faff7a32b7fdb7d514987cc1167 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 18 Apr 2026 20:22:09 +0100 Subject: [PATCH] Add needs_commons=false option and redesign detail and index pages Add a needs_commons parameter (default true) to both the API endpoint and the detail page. When needs_commons=false, look up Wikidata items by OSM relation ID (P402) via WDQS to return the most specific matching item even if it has no Wikimedia Commons category. Only activate this path when the matched item has no Commons category, so that locations with a Commons cat always get the same result regardless of the parameter. Remove the nearest-polygon fallback that was returning incorrect results for inland points in broad admin areas (e.g. returning Falmer for a point in Brighton). That fallback found the nearest polygon by boundary distance without requiring containment, so the pin would appear outside the polygon. The geosearch handles these cases correctly. Redesign the detail page: place name as heading, result card, collapsible API response and SPARQL query, improved OSM element cards with left-border highlight on the matched element, and a toggle button between modes. Redesign the index page: two-column layout with numbered steps and API documentation including the needs_commons parameter, Bootstrap form, and examples as a table. Closes #28 (Add support for returning Wikidata item instead of commons category) Co-Authored-By: Claude Sonnet 4.6 --- geocode/wikidata.py | 36 ++- lookup.py | 77 +++++-- templates/detail.html | 213 ++++++++++++------ templates/index.html | 183 ++++++++++++--- templates/sparql/geosearch.sparql | 4 +- .../sparql/lookup_by_osm_relation.sparql | 10 + 6 files changed, 385 insertions(+), 138 deletions(-) create mode 100644 templates/sparql/lookup_by_osm_relation.sparql diff --git a/geocode/wikidata.py b/geocode/wikidata.py index ec87711..14ea217 100644 --- a/geocode/wikidata.py +++ b/geocode/wikidata.py @@ -104,6 +104,7 @@ def qid_to_commons_category(qid: str, check_p910: bool = True) -> str | None: Row = dict[str, dict[str, typing.Any]] +Hit = dict[str, str | int | None] @backoff.on_exception(backoff.expo, QueryError, max_tries=5) @@ -139,10 +140,9 @@ def geosearch_query(lat: float, lon: float) -> str: return query -def geosearch(lat: float, lon: float) -> Row | None: - """Geosearch.""" +def filter_geosearch_row(rows: list[Row]) -> Row | None: + """Filter geosearch rows for the best match that has a Commons category.""" default_max_dist = 1 - rows = wdqs(geosearch_query(lat, lon)) max_dist = { "Q188509": 1, # suburb "Q3957": 2, # town @@ -173,6 +173,33 @@ def geosearch(lat: float, lon: float) -> Row | None: return None +def geosearch(lat: float, lon: float) -> Row | None: + """Geosearch.""" + rows = wdqs(geosearch_query(lat, lon)) + return filter_geosearch_row(rows) + + +def hit_from_row(row: Row) -> Hit: + """Build a Hit from a geosearch row (commons cat is optional).""" + qid = wd_to_qid(row["item"]) + commons_cat: str | None = None + if "commonsCat" in row: + commons_cat = row["commonsCat"]["value"] + elif "commonsSiteLink" in row: + site_link = row["commonsSiteLink"]["value"] + commons_cat = unescape_title(site_link[len(commons_cat_start):]) + return {"wikidata": qid, "commons_cat": commons_cat} + + +def lookup_wikidata_by_osm_relation_ids(relation_ids: list[int]) -> list[Row]: + """Look up Wikidata items that reference the given OSM relation IDs via P402.""" + query = render_template( + "sparql/lookup_by_osm_relation.sparql", + relation_ids=[str(r) for r in relation_ids], + ) + return wdqs(query) + + def lookup_scottish_parish_in_wikidata(code: str) -> list[Row]: """Lookup scottish parish in Wikidata.""" return wdqs(render_template("sparql/scottish_parish.sparql", code=code)) @@ -196,9 +223,6 @@ def unescape_title(t: str) -> str: return urllib.parse.unquote(t.replace("_", " ")) -Hit = dict[str, str | int | None] - - def commons_from_rows(rows: list[Row]) -> Hit | None: """Commons from rows.""" for row in rows: diff --git a/lookup.py b/lookup.py index 70d618b..8344c98 100755 --- a/lookup.py +++ b/lookup.py @@ -106,7 +106,9 @@ def add_missing_commons_cat(rows: list[StrDict]) -> None: row["commonsCat"] = {"type": "literal", "value": commons_cat} -def lat_lon_to_wikidata(lat: float, lon: float) -> dict[str, typing.Any]: +def lat_lon_to_wikidata( + lat: float, lon: float, needs_commons: bool = True +) -> dict[str, typing.Any]: """Lookup lat/lon and find most appropriate Wikidata item.""" scotland_code = scotland.get_scotland_code(lat, lon) @@ -148,32 +150,54 @@ def lat_lon_to_wikidata(lat: float, lon: float) -> dict[str, typing.Any]: if not nearby_result.get("missing"): return {"elements": elements, "result": nearby_result} - # Point is in a broad area (e.g. country) — try nearest specific polygon - nearby = model.Polygon.nearest(lat, lon) - if nearby and nearby.tags: - tags: typing.Mapping[str, str] = nearby.tags - al = get_admin_level(tags) - hit = ( - hit_from_wikidata_tag(tags) - or hit_from_ref_gss_tag(tags) - or hit_from_name(tags, lat, lon) - ) - if hit: - hit["admin_level"] = al - hit["element"] = nearby.osm_id - hit["geojson"] = typing.cast(str, nearby.geojson_str) - nearby_result = wikidata.build_dict(hit, lat, lon) - if not nearby_result.get("missing"): - return {"elements": elements, "result": nearby_result} + if not needs_commons: + # Direct lookup: find Wikidata items whose P402 (OSM relation ID) matches + # one of the OSM polygons that contain this point. + osm_id_to_element: dict[int, model.Polygon] = {} + relation_ids_for_lookup: list[int] = [] + for e in elements: + if e.osm_id < 0: + rel_id = abs(e.osm_id) + relation_ids_for_lookup.append(rel_id) + osm_id_to_element[e.osm_id] = e - row = wikidata.geosearch(lat, lon) + if relation_ids_for_lookup: + lookup_rows = wikidata.lookup_wikidata_by_osm_relation_ids( + relation_ids_for_lookup + ) + rel_to_hit: dict[int, wikidata.Hit] = {} + for row in lookup_rows: + rel_id = int(row["osmRelation"]["value"]) + if rel_id not in rel_to_hit: + rel_to_hit[rel_id] = wikidata.hit_from_row(row) + + # Iterate elements in specificity order (smallest area first, from coords_within). + # Only use hits without a Commons category — if there's a Commons cat, + # the geosearch path will find it (or something more specific). + for e in elements: + if e.osm_id >= 0: + continue + rel_id = abs(e.osm_id) + if rel_id not in rel_to_hit: + continue + hit = rel_to_hit[rel_id] + if hit.get("commons_cat"): + continue + if e.tags: + hit["admin_level"] = get_admin_level(e.tags) + hit["element"] = e.osm_id + hit["geojson"] = typing.cast(str, e.geojson_str) + result = wikidata.build_dict(hit, lat, lon) + return {"elements": elements, "result": result} + + query = wikidata.geosearch_query(lat, lon) + geo_rows = wikidata.wdqs(query) + row = wikidata.filter_geosearch_row(geo_rows) if row: hit = wikidata.commons_from_rows([row]) elements = [] result = wikidata.build_dict(hit, lat, lon) - query = wikidata.geosearch_query(lat, lon) - return {"elements": elements, "result": result, "query": query} @@ -343,7 +367,8 @@ def index() -> str | Response: if error_msg := validate_coordinates(lat, lon): return jsonify(coords={"lat": lat, "lon": lon}, error=error_msg) - result = lat_lon_to_wikidata(lat, lon)["result"] + needs_commons = request.args.get("needs_commons", "true").lower() != "false" + result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"] result.pop("element", None) result.pop("geojson", None) if logging_enabled: @@ -401,7 +426,7 @@ def highlight_sparql(query: str) -> str: app.jinja_env.filters["highlight_sparql"] = highlight_sparql -def build_detail_page(lat: float, lon: float) -> str: +def build_detail_page(lat: float, lon: float, needs_commons: bool = True) -> str: """Run lookup and build detail page.""" if lat < -90 or lat > 90 or lon < -180 or lon > 180: error = ( @@ -411,7 +436,7 @@ def build_detail_page(lat: float, lon: float) -> str: return render_template("query_error.html", lat=lat, lon=lon, error=error) try: - reply = lat_lon_to_wikidata(lat, lon) + reply = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons) except wikidata.QueryError as e: query, r = e.args return render_template("query_error.html", lat=lat, lon=lon, query=query, r=r) @@ -429,6 +454,7 @@ def build_detail_page(lat: float, lon: float) -> str: element_id=element, geojson=geojson, css=css, + needs_commons=needs_commons, **reply, ) @@ -453,7 +479,8 @@ def detail_page() -> Response | str: "coordinate_error.html", lat_str=lat_str, lon_str=lon_str, error=error ) - return build_detail_page(lat, lon) + needs_commons = request.args.get("needs_commons", "true").lower() != "false" + return build_detail_page(lat, lon, needs_commons=needs_commons) @app.route("/reports") diff --git a/templates/detail.html b/templates/detail.html index 0b33d62..970b678 100644 --- a/templates/detail.html +++ b/templates/detail.html @@ -1,6 +1,11 @@ {% extends "base.html" %} -{% block title %}Geocode to Commons{% endblock %} +{% block title %} + {%- if result.commons_cat %}{{ result.commons_cat.title }} + {%- elif result.wikidata %}{{ result.wikidata }} + {%- else %}{{ lat }}, {{ lon }} + {%- endif %} — Geocode +{% endblock %} {% block link %} var map = L.map('map').setView([{{ lat }}, {{ lon }}], 13); -L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', { + L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', { maxZoom: 19, attribution: '© OpenStreetMap' -}).addTo(map); + }).addTo(map); - var marker = L.marker([{{ lat }} , {{ lon }}]).addTo(map); + var marker = L.marker([{{ lat }}, {{ lon }}]).addTo(map); {% if geojson %} - - L.geoJSON({{ geojson | safe }}).addTo(map); - - {% endif %} - + L.geoJSON({{ geojson | safe }}).addTo(map); +{% endif %} - {% endblock %} {% block style %} {% endblock %} {% block content %}
-
-

Geocode coordinates to Commons Category

+
-

-home -| -visit endpoint -| view in OSM -{% if result.commons_cat %} -| Commons category -{% endif %} +

+ {%- if result.commons_cat %}{{ result.commons_cat.title }} + {%- elif result.wikidata %}{{ result.wikidata }} + {%- else %}No result + {%- endif %} +

+

{{ "%.5f"|format(lat) }}, {{ "%.5f"|format(lon) }}

-{% if result.wikidata %} - | {{ result.wikidata }} -{% endif %} - -| # - -

- -

API returns

-
{{ result | tojson(indent=2) }}
- -{% if result.wikidata %} -

Wikidata item: {{ result.wikidata }}

-{% endif %} - -{% if result.commons_cat %} -

Commons category: {{result.commons_cat.title }}

-{% endif %} - -{% if elements %} -

{{ elements.count() }} surrounding elements found

-{% else %} -

No elements found

-{% endif %} - -{% if query %} -

Searching for Wikimedia Commons categories using this SPARQL query. Wikidata Query service

-
-{{ query | highlight_sparql | safe }} -
-{% endif %} - -{% for element in elements %} -{% set tags = element.tags %} -
- {% for key, value in element.tags.items() if not (key == "way_area" or "name:" in key or key.startswith("source")) %} -
{{ key }}: {{ value }}
- {% endfor %} +
+
+
+ {% if result.wikidata %} +
+ Wikidata + {{ result.wikidata }} +
+ {% endif %} + {% if result.commons_cat %} +
+ Commons category + {{ result.commons_cat.title }} +
+ {% endif %} + {% if result.admin_level %} +
+ Admin level + {{ result.admin_level }} +
+ {% endif %} +
+
-{% endfor %} + +
+ Home + API endpoint + OpenStreetMap + {% if result.commons_cat %} + Commons category + {% endif %} + {% if result.wikidata %} + {{ result.wikidata }} + {% endif %} + # + {% if needs_commons %} + Try without Commons Category + {% else %} + Require Commons Category + {% endif %} +
+ +
+ API response +
{{ result | tojson(indent=2) }}
+
+ + {% if elements %} + {% set elem_count = elements.count() %} +
{{ elem_count }} surrounding OSM element{{ 's' if elem_count != 1 }}
+ {% for element in elements %} + {% set tags = element.tags %} +
+
+
+ + {{ tags.name or ('relation' if element.osm_id < 0 else 'way') ~ ' ' ~ element.osm_id|abs }} + +
+ {% if tags.wikidata %} + {{ tags.wikidata }} + {% endif %} + OSM +
+
+
+ {% for key, value in tags.items() if not (key == "way_area" or "name:" in key or key.startswith("source") or key == "name" or key == "wikidata") %} +
+ {{ key }}{{ value }} +
+ {% endfor %} +
+
+
+ {% endfor %} + {% else %} +

No surrounding elements found.

+ {% endif %} + + {% if query %} +
+ SPARQL geosearch query + run on Wikidata ↗ + +
{{ query | highlight_sparql | safe }}
+
+ {% endif %}
{% endblock %} diff --git a/templates/index.html b/templates/index.html index 544e7ef..c65c911 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,39 +2,158 @@ {% block title %}Geocode to Commons{% endblock %} -{% block content %} -
-

Geocode coordinates to Commons Category

+{% block style %} + +{% endblock %} + +{% block content %} +
+ +
+
+

Geocode to Commons Category

+

Convert latitude/longitude to a Wikidata item and Wikimedia Commons category.

+ +
+ + + +
+ +

Interactive map ↗ — click any location to geocode it.

+
+
+ +
+
+
How it works
+
+ {% for step in [ + ("Find OSM polygons", "Query PostGIS for all polygons that contain the given point."), + ("Sort by admin level", "Order polygons by admin_level descending — most specific first."), + ("Check wikidata tag", "For each polygon, look for a wikidata tag and resolve the Commons category."), + ("Check ref:gss tag", "If no wikidata tag, try ref:gss and look up the matching Wikidata item via WDQS."), + ("Match by name", "Try finding a nearby Wikidata item with the same name using WDQS."), + ("Return result", "Return the most specific Wikidata QID and Commons category found.") + ] %} +
+ {{ loop.index }} +
+
{{ step[0] }}
+
{{ step[1] | safe }}
+
+
+ {% endfor %} +
+
+ +
+
API
+
+
+

GET /?lat=<lat>&lon=<lon>

+

Returns JSON with the Wikidata item and Commons category for the given coordinates.

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterValuesDescription
latdecimal degreesLatitude
londecimal degreesLongitude
needs_commons + true (default)
+ false +
+ When true, only returns a result if a Wikimedia Commons + category can be found. When false, returns the best + matching Wikidata item even if it has no Commons category — matched by OSM relation or + way ID via the Wikidata Query Service. +
+
+
+ +
Example response
+
{
+  "wikidata": "Q184618",
+  "commons_cat": {
+    "title": "County Tipperary",
+    "url": "https://commons.wikimedia.org/wiki/Category:County_Tipperary"
+  },
+  "admin_level": 6,
+  "coords": { "lat": 52.41037, "lon": -7.84651 }
+}
+
+
+ +
Examples
+ + + + + + + + + + {% for lat, lon, name in samples %} + + + + + + + {% endfor %} + +
PlaceCoordinatesLinks
{{ name }}{{ lat }}, {{ lon }}detailAPI
+ +

+ Source code: https://git.4angle.com/edward/geocode +

+ +
{% endblock %} diff --git a/templates/sparql/geosearch.sparql b/templates/sparql/geosearch.sparql index c3bc88c..962b5f8 100644 --- a/templates/sparql/geosearch.sparql +++ b/templates/sparql/geosearch.sparql @@ -1,4 +1,4 @@ -SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink WHERE { +SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink ?osmRelation ?osmWay WHERE { { SELECT DISTINCT ?item ?location ?distance ?isa WHERE { VALUES ?want { wd:Q486972 wd:Q56061 } @@ -20,5 +20,7 @@ SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSi OPTIONAL { ?item wdt:P373 ?commonsCat. } OPTIONAL { ?commonsSiteLink schema:about ?item; schema:isPartOf . } + OPTIONAL { ?item wdt:P402 ?osmRelation. } + OPTIONAL { ?item wdt:P10689 ?osmWay. } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } ORDER BY (?distance) diff --git a/templates/sparql/lookup_by_osm_relation.sparql b/templates/sparql/lookup_by_osm_relation.sparql new file mode 100644 index 0000000..11f04ca --- /dev/null +++ b/templates/sparql/lookup_by_osm_relation.sparql @@ -0,0 +1,10 @@ +SELECT ?item ?itemLabel ?commonsCat ?commonsSiteLink ?osmRelation WHERE { + VALUES ?osmRelation { + {% for id in relation_ids %}"{{ id }}" {% endfor %} + } + ?item wdt:P402 ?osmRelation . + OPTIONAL { ?item wdt:P373 ?commonsCat. } + OPTIONAL { ?commonsSiteLink schema:about ?item ; + schema:isPartOf . } + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +}