From e3cefcfcbdba3df317b17ba20696d24a1ee90eac Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Sat, 8 May 2021 10:39:06 +0200 Subject: [PATCH] Show images in popups --- matcher/commons.py | 47 +++++++++++++ matcher/utils.py | 170 +++++++++++++++++++++++++++++++++++++++++++++ static/js/map.js | 3 + web_view.py | 11 ++- 4 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 matcher/commons.py create mode 100644 matcher/utils.py diff --git a/matcher/commons.py b/matcher/commons.py new file mode 100644 index 0000000..b65658c --- /dev/null +++ b/matcher/commons.py @@ -0,0 +1,47 @@ +import requests +import urllib.parse +from . import utils + +commons_start = "http://commons.wikimedia.org/wiki/Special:FilePath/" +commons_url = "https://www.wikidata.org/w/api.php" +page_size = 50 + + +def commons_uri_to_filename(uri): + return urllib.parse.unquote(utils.drop_start(uri, commons_start)) + + +def api_call(params): + call_params = { + "format": "json", + "formatversion": 2, + **params, + } + + return requests.get(commons_url, params=call_params, timeout=5) + + +def image_detail(filenames, thumbheight=None, thumbwidth=None): + params = { + "action": "query", + "prop": "imageinfo", + "iiprop": "url", + } + if thumbheight is not None: + params["iiurlheight"] = thumbheight + if thumbwidth is not None: + params["iiurlwidth"] = thumbwidth + + images = {} + + for cur in utils.chunk(filenames, page_size): + call_params = params.copy() + call_params["titles"] = "|".join(f"File:{f}" for f in cur) + + r = api_call(call_params) + + for image in r.json()["query"]["pages"]: + filename = utils.drop_start(image["title"], "File:") + images[filename] = image["imageinfo"][0] if "imageinfo" in image else None + + return images diff --git a/matcher/utils.py b/matcher/utils.py new file mode 100644 index 0000000..b9d50ec --- /dev/null +++ b/matcher/utils.py @@ -0,0 +1,170 @@ +from flask import current_app, request +from itertools import islice +import os.path +import json +import math +import user_agents +import re +import pattern.en + +metres_per_mile = 1609.344 +feet_per_metre = 3.28084 +feet_per_mile = 5280 + + +def chunk(it, size): + it = iter(it) + return iter(lambda: tuple(islice(it, size)), ()) + + +def flatten(l): + return [item for sublist in l for item in sublist] + + +def drop_start(s, start): + assert s.startswith(start) + return s[len(start) :] + + +def remove_start(s, start): + return s[len(start) :] if s.startswith(start) else s + + +def normalize_url(url): + for start in "http://", "https://", "www.": + url = remove_start(url, start) + return url.rstrip("/") + + +def contains_digit(s): + return any(c.isdigit() for c in s) + + +def cache_dir(): + return current_app.config["CACHE_DIR"] + + +def cache_filename(filename): + return os.path.join(cache_dir(), filename) + + +def load_from_cache(filename): + return json.load(open(cache_filename(filename))) + + +def get_radius(default=1000): + arg_radius = request.args.get("radius") + return int(arg_radius) if arg_radius and arg_radius.isdigit() else default + + +def get_int_arg(name): + if name in request.args and request.args[name].isdigit(): + return int(request.args[name]) + + +def calc_chunk_size(area_in_sq_km, size=22): + side = math.sqrt(area_in_sq_km) + return max(1, math.ceil(side / size)) + + +def file_missing_or_empty(filename): + return os.path.exists(filename) or os.stat(filename).st_size == 0 + + +def is_bot(): + """ Is the current request from a web robot? """ + ua = request.headers.get("User-Agent") + return ua and user_agents.parse(ua).is_bot + + +def log_location(): + return current_app.config["LOG_DIR"] + + +def good_location(): + return os.path.join(log_location(), "complete") + + +def capfirst(value): + """ Uppercase first letter of string, leave rest as is. """ + return value[0].upper() + value[1:] if value else value + + +def any_upper(value): + return any(c.isupper() for c in value) + + +def find_log_file(place): + start = f"{place.place_id}_" + for f in os.scandir(good_location()): + if f.name.startswith(start): + return f.path + + +def get_free_space(config): + s = os.statvfs(config["FREE_SPACE_PATH"]) + return s.f_bsize * s.f_bavail + + +def display_distance(units, dist): + if units in ("miles_and_feet", "miles_and_yards"): + total_feet = dist * feet_per_metre + miles = total_feet / feet_per_mile + + if miles > 0.5: + return f"{miles:,.2f} miles" + else: + return { + "miles_and_feet": f"{total_feet:,.0f} feet", + "miles_and_yards": f"{total_feet / 3:,.0f} yards", + }[units] + + if units == "miles_and_metres": + miles = dist / metres_per_mile + return f"{miles:,.2f} miles" if miles > 0.5 else f"{dist:,.0f} metres" + + if units == "km_and_metres": + units = "km" if dist > 500 else "metres" + if units == "metres": + return f"{dist:,.0f} m" + if units == "km": + return f"{dist / 1000:,.2f} km" + + +re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I) +re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I) +re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b") + + +def is_in_range(address_range, address): + m_number = re_number.match(address) + if not m_number: + return False + + m_range = re_range.search(address_range) + if m_range: + start, end = int(m_range.group(1)), int(m_range.group(2)) + if re_range.search(address): + return False + return start <= int(m_number.group(1)) <= end + + m_list = re_number_list.search(address_range) + if m_list: + numbers = {n.strip() for n in m_list.group(1).split(",")} | {m_list.group(2)} + if re_number_list.search(address): + return False + return m_number.group(1) in numbers + + return False + + +def pluralize_label(label): + text = label["value"] + if label["language"] != "en": + return text + + # pattern.en.pluralize has the plural of 'mine' as 'ours' + if text == "mine": + return "mines" + + return pattern.en.pluralize(text) diff --git a/static/js/map.js b/static/js/map.js index 154da69..4747cf1 100644 --- a/static/js/map.js +++ b/static/js/map.js @@ -297,6 +297,9 @@ function load_wikidata_items() { popup += `
${isa_label} (${isa_qid})`; } } + if (item.image_list && item.image_list.length) { + popup += `
`; + } popup += '

'; marker.bindPopup(popup); marker.addTo(group); diff --git a/web_view.py b/web_view.py index fb6fd8f..a925333 100755 --- a/web_view.py +++ b/web_view.py @@ -3,7 +3,7 @@ from flask import Flask, render_template, request, jsonify, redirect, url_for from sqlalchemy import func from sqlalchemy.orm import selectinload -from matcher import nominatim, model, database +from matcher import nominatim, model, database, commons from collections import Counter from time import time import GeoIP @@ -162,11 +162,13 @@ def get_markers(all_items): if "en" not in item.labels: continue locations = [list(i.get_lat_lon()) for i in item.locations] + image_filenames = item.get_claim("P18") item = { "qid": item.qid, "label": item.label(), "description": item.description(), "markers": locations, + "image_list": image_filenames, "isa_list": [v["id"] for v in item.get_claim("P31")], } items.append(item) @@ -200,6 +202,13 @@ def identifier_index(): return render_template("identifier_index.html", property_map=property_map) +@app.route("/commons/") +def get_commons_image(filename): + detail = commons.image_detail([filename], thumbheight=250, thumbwidth=250) + image = detail[filename] + return redirect(image["thumburl"]) + + @app.route("/identifier/") def identifier_page(pid): per_page = 10