2023-05-14 10:06:34 +01:00
|
|
|
"""Utility functions."""
|
|
|
|
|
2021-05-08 09:39:06 +01:00
|
|
|
import json
|
|
|
|
import math
|
2023-05-13 20:57:58 +01:00
|
|
|
import os.path
|
2021-05-08 09:39:06 +01:00
|
|
|
import re
|
2023-05-13 20:57:58 +01:00
|
|
|
import typing
|
2021-07-02 10:08:53 +01:00
|
|
|
from datetime import date
|
2023-05-13 20:57:58 +01:00
|
|
|
from itertools import islice
|
|
|
|
from typing import Any, cast
|
|
|
|
|
|
|
|
import flask
|
|
|
|
import user_agents
|
2021-11-13 16:27:46 +00:00
|
|
|
from num2words import num2words
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
metres_per_mile = 1609.344
|
|
|
|
feet_per_metre = 3.28084
|
|
|
|
feet_per_mile = 5280
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
T = typing.TypeVar("T")
|
|
|
|
|
2021-05-08 09:39:06 +01:00
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def chunk(it: typing.Iterable[T], size: int) -> typing.Iterator[tuple[T, ...]]:
|
|
|
|
"""Split an iterable into chunks of the given size."""
|
2021-05-08 09:39:06 +01:00
|
|
|
it = iter(it)
|
|
|
|
return iter(lambda: tuple(islice(it, size)), ())
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def flatten(top_list: list[list[T]]) -> list[T]:
|
|
|
|
"""Flatten a list."""
|
|
|
|
return [item for sub_list in top_list for item in sub_list]
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def drop_start(s: str, start: str) -> str:
|
|
|
|
"""Remove string prefix, otherwise throw an error."""
|
2021-05-08 09:39:06 +01:00
|
|
|
assert s.startswith(start)
|
|
|
|
return s[len(start) :]
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def remove_start(s: str, start: str) -> str:
|
|
|
|
"""Remove a string prefix, if present."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return s[len(start) :] if s.startswith(start) else s
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def normalize_url(url: str) -> str:
|
|
|
|
"""Standardize URLs to help in comparison."""
|
2021-05-08 09:39:06 +01:00
|
|
|
for start in "http://", "https://", "www.":
|
|
|
|
url = remove_start(url, start)
|
|
|
|
return url.rstrip("/")
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def contains_digit(s: str) -> bool:
|
|
|
|
"""Check if string contains a digit."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return any(c.isdigit() for c in s)
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def cache_dir() -> str:
|
|
|
|
"""Get cache dir location."""
|
|
|
|
d: str = flask.current_app.config["CACHE_DIR"]
|
|
|
|
return d
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def cache_filename(filename: str) -> str:
|
|
|
|
"""Get absolute path for cache file."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return os.path.join(cache_dir(), filename)
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def load_from_cache(filename: str) -> Any:
|
|
|
|
"""Load JSON data from cache."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return json.load(open(cache_filename(filename)))
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def get_radius(default: int = 1000) -> int | None:
|
|
|
|
"""Get radius request argument with default."""
|
|
|
|
arg_radius = flask.request.args.get("radius")
|
2021-05-08 09:39:06 +01:00
|
|
|
return int(arg_radius) if arg_radius and arg_radius.isdigit() else default
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def get_int_arg(name: str) -> int | None:
|
|
|
|
"""Get an request arg and convert to integer."""
|
|
|
|
v = flask.request.args.get(name)
|
|
|
|
return int(v) if v and v.isdigit() else None
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def calc_chunk_size(area_in_sq_km: float, size: int = 22) -> int:
|
|
|
|
"""Work out the size of a chunk."""
|
2021-05-08 09:39:06 +01:00
|
|
|
side = math.sqrt(area_in_sq_km)
|
|
|
|
return max(1, math.ceil(side / size))
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def file_missing_or_empty(filename: str) -> bool:
|
|
|
|
"""Check if a file is missing or empty."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return os.path.exists(filename) or os.stat(filename).st_size == 0
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def is_bot() -> bool:
|
|
|
|
"""Is the current request from a web robot."""
|
|
|
|
ua = flask.request.headers.get("User-Agent")
|
|
|
|
return bool(ua and user_agents.parse(ua).is_bot)
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def log_location() -> str:
|
|
|
|
"""Get log location from Flask config."""
|
|
|
|
return cast(str, flask.current_app.config["LOG_DIR"])
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def capfirst(value: str) -> str:
|
|
|
|
"""Uppercase first letter of string, leave rest as is."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return value[0].upper() + value[1:] if value else value
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def any_upper(value: str) -> bool:
|
|
|
|
"""Check if string contains any uppercase characters."""
|
2021-05-08 09:39:06 +01:00
|
|
|
return any(c.isupper() for c in value)
|
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def get_free_space(config: flask.config.Config) -> int:
|
|
|
|
"""Return the amount of available free space."""
|
2021-05-08 09:39:06 +01:00
|
|
|
s = os.statvfs(config["FREE_SPACE_PATH"])
|
|
|
|
return s.f_bsize * s.f_bavail
|
|
|
|
|
|
|
|
|
2023-05-14 10:06:34 +01:00
|
|
|
def metric_display_distance(units: str, dist: float) -> str | None:
|
|
|
|
"""Convert distance from metres to the specified metric units."""
|
|
|
|
if units == "km_and_metres":
|
|
|
|
units = "km" if dist > 500 else "metres"
|
|
|
|
if units == "metres":
|
|
|
|
return f"{dist:,.0f} m"
|
|
|
|
if units == "km":
|
|
|
|
return f"{dist / 1000:,.2f} km"
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def display_distance(units: str, dist: float) -> str | None:
|
|
|
|
"""Convert distance from metres to the specified units."""
|
2021-05-08 09:39:06 +01:00
|
|
|
if units in ("miles_and_feet", "miles_and_yards"):
|
|
|
|
total_feet = dist * feet_per_metre
|
|
|
|
miles = total_feet / feet_per_mile
|
|
|
|
|
|
|
|
if miles > 0.5:
|
|
|
|
return f"{miles:,.2f} miles"
|
|
|
|
else:
|
|
|
|
return {
|
|
|
|
"miles_and_feet": f"{total_feet:,.0f} feet",
|
|
|
|
"miles_and_yards": f"{total_feet / 3:,.0f} yards",
|
|
|
|
}[units]
|
|
|
|
|
|
|
|
if units == "miles_and_metres":
|
|
|
|
miles = dist / metres_per_mile
|
|
|
|
return f"{miles:,.2f} miles" if miles > 0.5 else f"{dist:,.0f} metres"
|
|
|
|
|
2023-05-14 10:06:34 +01:00
|
|
|
return metric_display_distance(units, dist)
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
|
2023-05-13 20:57:58 +01:00
|
|
|
def is_in_range(address_range: str, address: str) -> bool:
|
|
|
|
"""Check if an address is within a range."""
|
|
|
|
re_range = re.compile(r"\b(\d+) ?(?:to|-) ?(\d+)\b", re.I)
|
|
|
|
re_number_list = re.compile(r"\b([\d, ]+) (?:and|&) (\d+)\b", re.I)
|
|
|
|
re_number = re.compile(r"^(?:No\.?|Number)? ?(\d+)\b")
|
2021-05-08 09:39:06 +01:00
|
|
|
|
|
|
|
m_number = re_number.match(address)
|
|
|
|
if not m_number:
|
|
|
|
return False
|
|
|
|
|
|
|
|
m_range = re_range.search(address_range)
|
|
|
|
if m_range:
|
|
|
|
start, end = int(m_range.group(1)), int(m_range.group(2))
|
|
|
|
if re_range.search(address):
|
|
|
|
return False
|
|
|
|
return start <= int(m_number.group(1)) <= end
|
|
|
|
|
|
|
|
m_list = re_number_list.search(address_range)
|
|
|
|
if m_list:
|
|
|
|
numbers = {n.strip() for n in m_list.group(1).split(",")} | {m_list.group(2)}
|
|
|
|
if re_number_list.search(address):
|
|
|
|
return False
|
|
|
|
return m_number.group(1) in numbers
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2023-05-14 10:06:34 +01:00
|
|
|
class WikibaseTime(typing.TypedDict):
|
|
|
|
"""Wikibase Time dict."""
|
|
|
|
|
|
|
|
precision: int
|
|
|
|
time: str
|
|
|
|
|
|
|
|
|
|
|
|
def format_wikibase_time(v: WikibaseTime) -> str | None:
|
|
|
|
"""Format wikibase time value into human readable string."""
|
2021-07-02 10:08:53 +01:00
|
|
|
t = v["time"]
|
|
|
|
|
2023-05-14 10:06:34 +01:00
|
|
|
match v["precision"]:
|
|
|
|
case 11: # year, month and day
|
|
|
|
return date.fromisoformat(t[1:11]).strftime("%-d %B %Y")
|
|
|
|
case 10: # year and month
|
|
|
|
return date.fromisoformat(t[1:8] + "-01").strftime("%B %Y")
|
|
|
|
case 9: # year
|
|
|
|
return t[1:5]
|
2023-11-02 15:18:57 +00:00
|
|
|
case 8: # decade
|
|
|
|
return f"{t[1:4]}0s"
|
2023-05-14 10:06:34 +01:00
|
|
|
case 7: # century
|
|
|
|
century = ((int(t[:5]) - 1) // 100) + 1
|
|
|
|
ordinal_num: str = num2words(abs(century), to="ordinal_num")
|
|
|
|
return f"{ordinal_num} {century}{' BC' if century < 0 else ''}"
|
|
|
|
case _: # not handled
|
|
|
|
return None
|