diff --git a/geocode/wikidata.py b/geocode/wikidata.py index ec87711..14ea217 100644 --- a/geocode/wikidata.py +++ b/geocode/wikidata.py @@ -104,6 +104,7 @@ def qid_to_commons_category(qid: str, check_p910: bool = True) -> str | None: Row = dict[str, dict[str, typing.Any]] +Hit = dict[str, str | int | None] @backoff.on_exception(backoff.expo, QueryError, max_tries=5) @@ -139,10 +140,9 @@ def geosearch_query(lat: float, lon: float) -> str: return query -def geosearch(lat: float, lon: float) -> Row | None: - """Geosearch.""" +def filter_geosearch_row(rows: list[Row]) -> Row | None: + """Filter geosearch rows for the best match that has a Commons category.""" default_max_dist = 1 - rows = wdqs(geosearch_query(lat, lon)) max_dist = { "Q188509": 1, # suburb "Q3957": 2, # town @@ -173,6 +173,33 @@ def geosearch(lat: float, lon: float) -> Row | None: return None +def geosearch(lat: float, lon: float) -> Row | None: + """Geosearch.""" + rows = wdqs(geosearch_query(lat, lon)) + return filter_geosearch_row(rows) + + +def hit_from_row(row: Row) -> Hit: + """Build a Hit from a geosearch row (commons cat is optional).""" + qid = wd_to_qid(row["item"]) + commons_cat: str | None = None + if "commonsCat" in row: + commons_cat = row["commonsCat"]["value"] + elif "commonsSiteLink" in row: + site_link = row["commonsSiteLink"]["value"] + commons_cat = unescape_title(site_link[len(commons_cat_start):]) + return {"wikidata": qid, "commons_cat": commons_cat} + + +def lookup_wikidata_by_osm_relation_ids(relation_ids: list[int]) -> list[Row]: + """Look up Wikidata items that reference the given OSM relation IDs via P402.""" + query = render_template( + "sparql/lookup_by_osm_relation.sparql", + relation_ids=[str(r) for r in relation_ids], + ) + return wdqs(query) + + def lookup_scottish_parish_in_wikidata(code: str) -> list[Row]: """Lookup scottish parish in Wikidata.""" return wdqs(render_template("sparql/scottish_parish.sparql", code=code)) @@ -196,9 +223,6 @@ def unescape_title(t: str) -> str: return urllib.parse.unquote(t.replace("_", " ")) -Hit = dict[str, str | int | None] - - def commons_from_rows(rows: list[Row]) -> Hit | None: """Commons from rows.""" for row in rows: diff --git a/lookup.py b/lookup.py index 70d618b..8344c98 100755 --- a/lookup.py +++ b/lookup.py @@ -106,7 +106,9 @@ def add_missing_commons_cat(rows: list[StrDict]) -> None: row["commonsCat"] = {"type": "literal", "value": commons_cat} -def lat_lon_to_wikidata(lat: float, lon: float) -> dict[str, typing.Any]: +def lat_lon_to_wikidata( + lat: float, lon: float, needs_commons: bool = True +) -> dict[str, typing.Any]: """Lookup lat/lon and find most appropriate Wikidata item.""" scotland_code = scotland.get_scotland_code(lat, lon) @@ -148,32 +150,54 @@ def lat_lon_to_wikidata(lat: float, lon: float) -> dict[str, typing.Any]: if not nearby_result.get("missing"): return {"elements": elements, "result": nearby_result} - # Point is in a broad area (e.g. country) — try nearest specific polygon - nearby = model.Polygon.nearest(lat, lon) - if nearby and nearby.tags: - tags: typing.Mapping[str, str] = nearby.tags - al = get_admin_level(tags) - hit = ( - hit_from_wikidata_tag(tags) - or hit_from_ref_gss_tag(tags) - or hit_from_name(tags, lat, lon) - ) - if hit: - hit["admin_level"] = al - hit["element"] = nearby.osm_id - hit["geojson"] = typing.cast(str, nearby.geojson_str) - nearby_result = wikidata.build_dict(hit, lat, lon) - if not nearby_result.get("missing"): - return {"elements": elements, "result": nearby_result} + if not needs_commons: + # Direct lookup: find Wikidata items whose P402 (OSM relation ID) matches + # one of the OSM polygons that contain this point. + osm_id_to_element: dict[int, model.Polygon] = {} + relation_ids_for_lookup: list[int] = [] + for e in elements: + if e.osm_id < 0: + rel_id = abs(e.osm_id) + relation_ids_for_lookup.append(rel_id) + osm_id_to_element[e.osm_id] = e - row = wikidata.geosearch(lat, lon) + if relation_ids_for_lookup: + lookup_rows = wikidata.lookup_wikidata_by_osm_relation_ids( + relation_ids_for_lookup + ) + rel_to_hit: dict[int, wikidata.Hit] = {} + for row in lookup_rows: + rel_id = int(row["osmRelation"]["value"]) + if rel_id not in rel_to_hit: + rel_to_hit[rel_id] = wikidata.hit_from_row(row) + + # Iterate elements in specificity order (smallest area first, from coords_within). + # Only use hits without a Commons category — if there's a Commons cat, + # the geosearch path will find it (or something more specific). + for e in elements: + if e.osm_id >= 0: + continue + rel_id = abs(e.osm_id) + if rel_id not in rel_to_hit: + continue + hit = rel_to_hit[rel_id] + if hit.get("commons_cat"): + continue + if e.tags: + hit["admin_level"] = get_admin_level(e.tags) + hit["element"] = e.osm_id + hit["geojson"] = typing.cast(str, e.geojson_str) + result = wikidata.build_dict(hit, lat, lon) + return {"elements": elements, "result": result} + + query = wikidata.geosearch_query(lat, lon) + geo_rows = wikidata.wdqs(query) + row = wikidata.filter_geosearch_row(geo_rows) if row: hit = wikidata.commons_from_rows([row]) elements = [] result = wikidata.build_dict(hit, lat, lon) - query = wikidata.geosearch_query(lat, lon) - return {"elements": elements, "result": result, "query": query} @@ -343,7 +367,8 @@ def index() -> str | Response: if error_msg := validate_coordinates(lat, lon): return jsonify(coords={"lat": lat, "lon": lon}, error=error_msg) - result = lat_lon_to_wikidata(lat, lon)["result"] + needs_commons = request.args.get("needs_commons", "true").lower() != "false" + result = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons)["result"] result.pop("element", None) result.pop("geojson", None) if logging_enabled: @@ -401,7 +426,7 @@ def highlight_sparql(query: str) -> str: app.jinja_env.filters["highlight_sparql"] = highlight_sparql -def build_detail_page(lat: float, lon: float) -> str: +def build_detail_page(lat: float, lon: float, needs_commons: bool = True) -> str: """Run lookup and build detail page.""" if lat < -90 or lat > 90 or lon < -180 or lon > 180: error = ( @@ -411,7 +436,7 @@ def build_detail_page(lat: float, lon: float) -> str: return render_template("query_error.html", lat=lat, lon=lon, error=error) try: - reply = lat_lon_to_wikidata(lat, lon) + reply = lat_lon_to_wikidata(lat, lon, needs_commons=needs_commons) except wikidata.QueryError as e: query, r = e.args return render_template("query_error.html", lat=lat, lon=lon, query=query, r=r) @@ -429,6 +454,7 @@ def build_detail_page(lat: float, lon: float) -> str: element_id=element, geojson=geojson, css=css, + needs_commons=needs_commons, **reply, ) @@ -453,7 +479,8 @@ def detail_page() -> Response | str: "coordinate_error.html", lat_str=lat_str, lon_str=lon_str, error=error ) - return build_detail_page(lat, lon) + needs_commons = request.args.get("needs_commons", "true").lower() != "false" + return build_detail_page(lat, lon, needs_commons=needs_commons) @app.route("/reports") diff --git a/templates/detail.html b/templates/detail.html index 0b33d62..970b678 100644 --- a/templates/detail.html +++ b/templates/detail.html @@ -1,6 +1,11 @@ {% extends "base.html" %} -{% block title %}Geocode to Commons{% endblock %} +{% block title %} + {%- if result.commons_cat %}{{ result.commons_cat.title }} + {%- elif result.wikidata %}{{ result.wikidata }} + {%- else %}{{ lat }}, {{ lon }} + {%- endif %} — Geocode +{% endblock %} {% block link %} var map = L.map('map').setView([{{ lat }}, {{ lon }}], 13); -L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', { + L.tileLayer('https://tile.openstreetmap.org/{z}/{x}/{y}.png', { maxZoom: 19, attribution: '© OpenStreetMap' -}).addTo(map); + }).addTo(map); - var marker = L.marker([{{ lat }} , {{ lon }}]).addTo(map); + var marker = L.marker([{{ lat }}, {{ lon }}]).addTo(map); {% if geojson %} - - L.geoJSON({{ geojson | safe }}).addTo(map); - - {% endif %} - + L.geoJSON({{ geojson | safe }}).addTo(map); +{% endif %} - {% endblock %} {% block style %} {% endblock %} {% block content %}
-
-

Geocode coordinates to Commons Category

+
-

-home -| -visit endpoint -| view in OSM -{% if result.commons_cat %} -| Commons category -{% endif %} +

+ {%- if result.commons_cat %}{{ result.commons_cat.title }} + {%- elif result.wikidata %}{{ result.wikidata }} + {%- else %}No result + {%- endif %} +

+

{{ "%.5f"|format(lat) }}, {{ "%.5f"|format(lon) }}

-{% if result.wikidata %} - | {{ result.wikidata }} -{% endif %} - -| # - -

- -

API returns

-
{{ result | tojson(indent=2) }}
- -{% if result.wikidata %} -

Wikidata item: {{ result.wikidata }}

-{% endif %} - -{% if result.commons_cat %} -

Commons category: {{result.commons_cat.title }}

-{% endif %} - -{% if elements %} -

{{ elements.count() }} surrounding elements found

-{% else %} -

No elements found

-{% endif %} - -{% if query %} -

Searching for Wikimedia Commons categories using this SPARQL query. Wikidata Query service

-
-{{ query | highlight_sparql | safe }} -
-{% endif %} - -{% for element in elements %} -{% set tags = element.tags %} -
- {% for key, value in element.tags.items() if not (key == "way_area" or "name:" in key or key.startswith("source")) %} -
{{ key }}: {{ value }}
- {% endfor %} +
+
+
+ {% if result.wikidata %} +
+ Wikidata + {{ result.wikidata }} +
+ {% endif %} + {% if result.commons_cat %} +
+ Commons category + {{ result.commons_cat.title }} +
+ {% endif %} + {% if result.admin_level %} +
+ Admin level + {{ result.admin_level }} +
+ {% endif %} +
+
-{% endfor %} + +
+ Home + API endpoint + OpenStreetMap + {% if result.commons_cat %} + Commons category + {% endif %} + {% if result.wikidata %} + {{ result.wikidata }} + {% endif %} + # + {% if needs_commons %} + Try without Commons Category + {% else %} + Require Commons Category + {% endif %} +
+ +
+ API response +
{{ result | tojson(indent=2) }}
+
+ + {% if elements %} + {% set elem_count = elements.count() %} +
{{ elem_count }} surrounding OSM element{{ 's' if elem_count != 1 }}
+ {% for element in elements %} + {% set tags = element.tags %} +
+
+
+ + {{ tags.name or ('relation' if element.osm_id < 0 else 'way') ~ ' ' ~ element.osm_id|abs }} + +
+ {% if tags.wikidata %} + {{ tags.wikidata }} + {% endif %} + OSM +
+
+
+ {% for key, value in tags.items() if not (key == "way_area" or "name:" in key or key.startswith("source") or key == "name" or key == "wikidata") %} +
+ {{ key }}{{ value }} +
+ {% endfor %} +
+
+
+ {% endfor %} + {% else %} +

No surrounding elements found.

+ {% endif %} + + {% if query %} +
+ SPARQL geosearch query + run on Wikidata ↗ + +
{{ query | highlight_sparql | safe }}
+
+ {% endif %}
{% endblock %} diff --git a/templates/index.html b/templates/index.html index 544e7ef..c65c911 100644 --- a/templates/index.html +++ b/templates/index.html @@ -2,39 +2,158 @@ {% block title %}Geocode to Commons{% endblock %} -{% block content %} -
-

Geocode coordinates to Commons Category

+{% block style %} + +{% endblock %} + +{% block content %} +
+ +
+
+

Geocode to Commons Category

+

Convert latitude/longitude to a Wikidata item and Wikimedia Commons category.

+ +
+ + + +
+ +

Interactive map ↗ — click any location to geocode it.

+
+
+ +
+
+
How it works
+
+ {% for step in [ + ("Find OSM polygons", "Query PostGIS for all polygons that contain the given point."), + ("Sort by admin level", "Order polygons by admin_level descending — most specific first."), + ("Check wikidata tag", "For each polygon, look for a wikidata tag and resolve the Commons category."), + ("Check ref:gss tag", "If no wikidata tag, try ref:gss and look up the matching Wikidata item via WDQS."), + ("Match by name", "Try finding a nearby Wikidata item with the same name using WDQS."), + ("Return result", "Return the most specific Wikidata QID and Commons category found.") + ] %} +
+ {{ loop.index }} +
+
{{ step[0] }}
+
{{ step[1] | safe }}
+
+
+ {% endfor %} +
+
+ +
+
API
+
+
+

GET /?lat=<lat>&lon=<lon>

+

Returns JSON with the Wikidata item and Commons category for the given coordinates.

+ + + + + + + + + + + + + + + + + + + + + + + + + + +
ParameterValuesDescription
latdecimal degreesLatitude
londecimal degreesLongitude
needs_commons + true (default)
+ false +
+ When true, only returns a result if a Wikimedia Commons + category can be found. When false, returns the best + matching Wikidata item even if it has no Commons category — matched by OSM relation or + way ID via the Wikidata Query Service. +
+
+
+ +
Example response
+
{
+  "wikidata": "Q184618",
+  "commons_cat": {
+    "title": "County Tipperary",
+    "url": "https://commons.wikimedia.org/wiki/Category:County_Tipperary"
+  },
+  "admin_level": 6,
+  "coords": { "lat": 52.41037, "lon": -7.84651 }
+}
+
+
+ +
Examples
+ + + + + + + + + + {% for lat, lon, name in samples %} + + + + + + + {% endfor %} + +
PlaceCoordinatesLinks
{{ name }}{{ lat }}, {{ lon }}detailAPI
+ +

+ Source code: https://git.4angle.com/edward/geocode +

+ +
{% endblock %} diff --git a/templates/sparql/geosearch.sparql b/templates/sparql/geosearch.sparql index c3bc88c..962b5f8 100644 --- a/templates/sparql/geosearch.sparql +++ b/templates/sparql/geosearch.sparql @@ -1,4 +1,4 @@ -SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink WHERE { +SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSiteLink ?osmRelation ?osmWay WHERE { { SELECT DISTINCT ?item ?location ?distance ?isa WHERE { VALUES ?want { wd:Q486972 wd:Q56061 } @@ -20,5 +20,7 @@ SELECT DISTINCT ?item ?distance ?itemLabel ?isa ?isaLabel ?commonsCat ?commonsSi OPTIONAL { ?item wdt:P373 ?commonsCat. } OPTIONAL { ?commonsSiteLink schema:about ?item; schema:isPartOf . } + OPTIONAL { ?item wdt:P402 ?osmRelation. } + OPTIONAL { ?item wdt:P10689 ?osmWay. } SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } } ORDER BY (?distance) diff --git a/templates/sparql/lookup_by_osm_relation.sparql b/templates/sparql/lookup_by_osm_relation.sparql new file mode 100644 index 0000000..11f04ca --- /dev/null +++ b/templates/sparql/lookup_by_osm_relation.sparql @@ -0,0 +1,10 @@ +SELECT ?item ?itemLabel ?commonsCat ?commonsSiteLink ?osmRelation WHERE { + VALUES ?osmRelation { + {% for id in relation_ids %}"{{ id }}" {% endfor %} + } + ?item wdt:P402 ?osmRelation . + OPTIONAL { ?item wdt:P373 ?commonsCat. } + OPTIONAL { ?commonsSiteLink schema:about ?item ; + schema:isPartOf . } + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } +}