Add integrated Flickr search with photo selection
Instead of showing a link to search Flickr, the app now performs the search directly and displays results as a grid of thumbnails. Each photo shows the photographer's name and license (with Wikipedia-compatible licenses highlighted in green). Clicking a photo takes the user to the message composition page with the selected image displayed alongside. Includes validation to ensure image URLs are from Flickr's static servers. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
6f43cee91b
commit
08d1f9b6c4
2 changed files with 245 additions and 24 deletions
201
main.py
201
main.py
|
|
@ -2,23 +2,208 @@
|
||||||
"""Find photos on flickr for Wikipedia articles and contact the photographer."""
|
"""Find photos on flickr for Wikipedia articles and contact the photographer."""
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
import dataclasses
|
||||||
import inspect
|
import inspect
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import typing
|
import typing
|
||||||
from urllib.parse import unquote
|
from urllib.parse import quote, unquote
|
||||||
|
|
||||||
import flask
|
import flask
|
||||||
import requests
|
import requests
|
||||||
import werkzeug
|
import werkzeug
|
||||||
from werkzeug.debug.tbtools import DebugTraceback
|
from werkzeug.debug.tbtools import DebugTraceback
|
||||||
|
|
||||||
|
|
||||||
app = flask.Flask(__name__)
|
app = flask.Flask(__name__)
|
||||||
app.debug = False
|
app.debug = False
|
||||||
|
|
||||||
enwiki = "en.wikipedia.org/wiki/"
|
enwiki = "en.wikipedia.org/wiki/"
|
||||||
|
|
||||||
|
# Browser-like headers for Flickr requests
|
||||||
|
BROWSER_HEADERS = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Accept-Encoding": "gzip, deflate, br",
|
||||||
|
"Connection": "keep-alive",
|
||||||
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"Sec-Fetch-Dest": "document",
|
||||||
|
"Sec-Fetch-Mode": "navigate",
|
||||||
|
"Sec-Fetch-Site": "none",
|
||||||
|
"Sec-Fetch-User": "?1",
|
||||||
|
"Cache-Control": "max-age=0",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Flickr license codes to human-readable names
|
||||||
|
FLICKR_LICENSES = {
|
||||||
|
0: "All Rights Reserved",
|
||||||
|
1: "CC BY-NC-SA",
|
||||||
|
2: "CC BY-NC",
|
||||||
|
3: "CC BY-NC-ND",
|
||||||
|
4: "CC BY",
|
||||||
|
5: "CC BY-SA",
|
||||||
|
6: "CC BY-ND",
|
||||||
|
7: "No known copyright",
|
||||||
|
8: "US Government",
|
||||||
|
9: "CC0",
|
||||||
|
10: "Public Domain",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class FlickrPhoto:
|
||||||
|
"""Represents a Flickr photo from search results."""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
title: str
|
||||||
|
path_alias: str
|
||||||
|
owner_nsid: str
|
||||||
|
username: str
|
||||||
|
realname: str
|
||||||
|
license: int
|
||||||
|
thumb_url: str
|
||||||
|
medium_url: str
|
||||||
|
|
||||||
|
@property
|
||||||
|
def flickr_url(self) -> str:
|
||||||
|
"""URL to the photo page on Flickr."""
|
||||||
|
return f"https://flickr.com/photos/{self.path_alias}/{self.id}"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def license_name(self) -> str:
|
||||||
|
"""Human-readable license name."""
|
||||||
|
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_flickr_image_url(url: str) -> bool:
|
||||||
|
"""Check if URL is a valid Flickr static image URL."""
|
||||||
|
valid_prefixes = (
|
||||||
|
"https://live.staticflickr.com/",
|
||||||
|
"https://farm", # farm1.staticflickr.com, farm2.staticflickr.com, etc.
|
||||||
|
"https://c1.staticflickr.com/",
|
||||||
|
"https://c2.staticflickr.com/",
|
||||||
|
)
|
||||||
|
if not url.startswith(valid_prefixes):
|
||||||
|
return False
|
||||||
|
# For farm URLs, verify the domain pattern
|
||||||
|
if url.startswith("https://farm"):
|
||||||
|
if ".staticflickr.com/" not in url:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def search_flickr(search_term: str) -> list[FlickrPhoto]:
|
||||||
|
"""Search Flickr for photos matching the search term."""
|
||||||
|
encoded_term = quote(f'"{search_term}"')
|
||||||
|
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
|
||||||
|
|
||||||
|
response = requests.get(url, headers=BROWSER_HEADERS)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
return parse_flickr_search_results(response.text)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||||
|
"""Parse Flickr search results HTML and extract photo data."""
|
||||||
|
# Find the modelExport JSON embedded in the page
|
||||||
|
start = html.find("modelExport:")
|
||||||
|
if start == -1:
|
||||||
|
return []
|
||||||
|
|
||||||
|
start += len("modelExport:")
|
||||||
|
while html[start].isspace():
|
||||||
|
start += 1
|
||||||
|
|
||||||
|
# Parse the JSON by counting braces
|
||||||
|
brace_count = 0
|
||||||
|
i = start
|
||||||
|
in_string = False
|
||||||
|
escape_next = False
|
||||||
|
|
||||||
|
while i < len(html):
|
||||||
|
char = html[i]
|
||||||
|
|
||||||
|
if escape_next:
|
||||||
|
escape_next = False
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == "\\" and in_string:
|
||||||
|
escape_next = True
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if char == '"' and not escape_next:
|
||||||
|
in_string = not in_string
|
||||||
|
elif not in_string:
|
||||||
|
if char == "{":
|
||||||
|
brace_count += 1
|
||||||
|
elif char == "}":
|
||||||
|
brace_count -= 1
|
||||||
|
if brace_count == 0:
|
||||||
|
json_str = html[start : i + 1]
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(json_str)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Extract photos from the parsed data
|
||||||
|
photos: list[FlickrPhoto] = []
|
||||||
|
|
||||||
|
main = data.get("main", {})
|
||||||
|
photos_models = main.get("search-photos-lite-models", [])
|
||||||
|
|
||||||
|
if not photos_models:
|
||||||
|
return []
|
||||||
|
|
||||||
|
photos_data = (
|
||||||
|
photos_models[0]
|
||||||
|
.get("data", {})
|
||||||
|
.get("photos", {})
|
||||||
|
.get("data", {})
|
||||||
|
.get("_data", [])
|
||||||
|
)
|
||||||
|
|
||||||
|
for photo_entry in photos_data:
|
||||||
|
pd = photo_entry.get("data", {})
|
||||||
|
|
||||||
|
sizes = pd.get("sizes", {}).get("data", {})
|
||||||
|
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
|
||||||
|
medium_data = sizes.get("n", sizes.get("m", {})).get("data", {})
|
||||||
|
|
||||||
|
thumb_url = thumb_data.get("url", "")
|
||||||
|
medium_url = medium_data.get("url", "")
|
||||||
|
|
||||||
|
# Ensure URLs have protocol
|
||||||
|
if thumb_url.startswith("//"):
|
||||||
|
thumb_url = "https:" + thumb_url
|
||||||
|
if medium_url.startswith("//"):
|
||||||
|
medium_url = "https:" + medium_url
|
||||||
|
|
||||||
|
photos.append(
|
||||||
|
FlickrPhoto(
|
||||||
|
id=str(pd.get("id", "")),
|
||||||
|
title=pd.get("title", ""),
|
||||||
|
path_alias=pd.get("pathAlias", ""),
|
||||||
|
owner_nsid=pd.get("ownerNsid", ""),
|
||||||
|
username=pd.get("username", ""),
|
||||||
|
realname=pd.get("realname", ""),
|
||||||
|
license=pd.get("license", 0),
|
||||||
|
thumb_url=thumb_url,
|
||||||
|
medium_url=medium_url,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return photos
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
||||||
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
|
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
|
||||||
|
|
@ -80,10 +265,13 @@ def start() -> str:
|
||||||
|
|
||||||
flickr_url = flask.request.args.get("flickr")
|
flickr_url = flask.request.args.get("flickr")
|
||||||
if not flickr_url:
|
if not flickr_url:
|
||||||
|
# Search Flickr for photos
|
||||||
|
photos = search_flickr(name)
|
||||||
return flask.render_template(
|
return flask.render_template(
|
||||||
"combined.html",
|
"combined.html",
|
||||||
name=name,
|
name=name,
|
||||||
enwp=enwp,
|
enwp=enwp,
|
||||||
|
photos=photos,
|
||||||
)
|
)
|
||||||
|
|
||||||
if "/in/" in flickr_url:
|
if "/in/" in flickr_url:
|
||||||
|
|
@ -100,6 +288,11 @@ def start() -> str:
|
||||||
assert nsid
|
assert nsid
|
||||||
print(nsid)
|
print(nsid)
|
||||||
|
|
||||||
|
# Get optional image URL for display, validate it's from Flickr
|
||||||
|
img_url = flask.request.args.get("img")
|
||||||
|
if img_url and not is_valid_flickr_image_url(img_url):
|
||||||
|
img_url = None
|
||||||
|
|
||||||
msg = flask.render_template(
|
msg = flask.render_template(
|
||||||
"message.jinja",
|
"message.jinja",
|
||||||
flickr_url=flickr_url,
|
flickr_url=flickr_url,
|
||||||
|
|
@ -122,6 +315,7 @@ def start() -> str:
|
||||||
subject=subject,
|
subject=subject,
|
||||||
lines=lines,
|
lines=lines,
|
||||||
nsid=nsid,
|
nsid=nsid,
|
||||||
|
img_url=img_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -134,8 +328,9 @@ def get_params(line_iter: collections.abc.Iterable[str]) -> str:
|
||||||
def flickr_usrename_to_nsid(username: str) -> str:
|
def flickr_usrename_to_nsid(username: str) -> str:
|
||||||
"""Get NSID from flickr username."""
|
"""Get NSID from flickr username."""
|
||||||
url = f"https://www.flickr.com/people/{username}/"
|
url = f"https://www.flickr.com/people/{username}/"
|
||||||
r = requests.get(url)
|
r = requests.get(url, headers=BROWSER_HEADERS)
|
||||||
params = json.loads(get_params(r.text.splitlines()))
|
params_str = get_params(r.text.splitlines())
|
||||||
|
params, _ = json.JSONDecoder().raw_decode(params_str)
|
||||||
return typing.cast(str, params["nsid"])
|
return typing.cast(str, params["nsid"])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,36 +15,62 @@
|
||||||
<input type="submit" value="Submit">
|
<input type="submit" value="Submit">
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
{% if name %}
|
{% if name and photos is defined and photos %}
|
||||||
|
|
||||||
<p>Wikipedia article: {{ name }}</p>
|
<p>Wikipedia article: {{ name }}</p>
|
||||||
<p><a href="https://flickr.com/search/?view_all=1&safe_search=3&text={{ '"' + name + '"' | urlencode }}" target="_blank">Search flickr</a></p>
|
<p>Select a photo to compose a message:</p>
|
||||||
|
|
||||||
<form action="{{ url_for(request.endpoint) }}">
|
<div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
|
||||||
<input type="hidden" name="enwp" value="{{ enwp }}"></input>
|
{% for photo in photos %}
|
||||||
<div class="mb-3">
|
<div class="col">
|
||||||
<label for="flickr" class="form-label">Flickr URL:</label>
|
<div class="card h-100">
|
||||||
<input type="text" class="form-control" id="flickr" name="flickr" value="{{ flickr_url }}" required>
|
<a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
|
||||||
|
<img src="{{ photo.thumb_url }}" alt="{{ photo.title }}" class="card-img-top" style="aspect-ratio: 1; object-fit: cover;">
|
||||||
|
</a>
|
||||||
|
<div class="card-body p-2">
|
||||||
|
<p class="card-text small mb-1 text-truncate" title="{{ photo.realname or photo.username }}">{{ photo.realname or photo.username }}</p>
|
||||||
|
<span class="badge {{ 'bg-success' if photo.license in [4, 5, 7, 8, 9, 10] else 'bg-secondary' }}">{{ photo.license_name }}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<input type="submit" value="Submit">
|
<p class="text-muted small">
|
||||||
</form>
|
<a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{% elif name and not flickr_url %}
|
||||||
|
|
||||||
|
<p>Wikipedia article: {{ name }}</p>
|
||||||
|
<p class="text-warning">No photos found. Try a different search term.</p>
|
||||||
|
<p><a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">Search on Flickr directly</a></p>
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if flickr_url %}
|
{% if flickr_url %}
|
||||||
<p><a href="https://www.flickr.com/mail/write/?to={{nsid}}">send message</a>
|
<div class="row">
|
||||||
<div><strong>Subject:</strong> {{ subject }} <button class="btn btn-primary" id="copy-subject">copy</button>
|
{% if img_url %}
|
||||||
|
<div class="col-md-4 mb-3">
|
||||||
|
<a href="{{ flickr_url }}" target="_blank">
|
||||||
|
<img src="{{ img_url }}" alt="Selected photo" class="img-fluid rounded">
|
||||||
|
</a>
|
||||||
|
<p class="mt-2 small"><a href="{{ flickr_url }}" target="_blank">View on Flickr</a></p>
|
||||||
|
</div>
|
||||||
|
<div class="col-md-8">
|
||||||
|
{% else %}
|
||||||
|
<div class="col-12">
|
||||||
|
{% endif %}
|
||||||
|
<p><a href="https://www.flickr.com/mail/write/?to={{nsid}}" class="btn btn-primary">Send message on Flickr</a></p>
|
||||||
|
<div class="mb-2"><strong>Subject:</strong> {{ subject }} <button class="btn btn-sm btn-outline-secondary" id="copy-subject">copy</button></div>
|
||||||
<div>
|
<div>
|
||||||
<h3>message
|
<h5>Message <button class="btn btn-sm btn-outline-secondary" id="copy-message">copy</button></h5>
|
||||||
<button class="btn btn-primary" id="copy-message">copy</button>
|
|
||||||
</h3>
|
|
||||||
{% for p in lines %}
|
{% for p in lines %}
|
||||||
<p>{{ p }}</p>
|
<p>{{ p }}</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<div class="mt-3">Written by <a href="/">Edward Betts</a>. Source code and bug reports: <a href="https://git.4angle.com/edward/flickr-mail">https://git.4angle.com/edward/flickr-mail</div>
|
<div class="mt-3">Written by <a href="/">Edward Betts</a>. Source code and bug reports: <a href="https://git.4angle.com/edward/flickr-mail">https://git.4angle.com/edward/flickr-mail</div>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue