Add integrated Flickr search with photo selection

Instead of showing a link to search Flickr, the app now performs the
search directly and displays results as a grid of thumbnails. Each photo
shows the photographer's name and license (with Wikipedia-compatible
licenses highlighted in green). Clicking a photo takes the user to the
message composition page with the selected image displayed alongside.

Includes validation to ensure image URLs are from Flickr's static servers.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-04 13:41:37 +00:00
parent 6f43cee91b
commit 08d1f9b6c4
2 changed files with 245 additions and 24 deletions

201
main.py
View file

@ -2,23 +2,208 @@
"""Find photos on flickr for Wikipedia articles and contact the photographer.""" """Find photos on flickr for Wikipedia articles and contact the photographer."""
import collections import collections
import dataclasses
import inspect import inspect
import json import json
import sys import sys
import traceback import traceback
import typing import typing
from urllib.parse import unquote from urllib.parse import quote, unquote
import flask import flask
import requests import requests
import werkzeug import werkzeug
from werkzeug.debug.tbtools import DebugTraceback from werkzeug.debug.tbtools import DebugTraceback
app = flask.Flask(__name__) app = flask.Flask(__name__)
app.debug = False app.debug = False
enwiki = "en.wikipedia.org/wiki/" enwiki = "en.wikipedia.org/wiki/"
# Browser-like headers for Flickr requests
BROWSER_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Cache-Control": "max-age=0",
}
# Flickr license codes to human-readable names
FLICKR_LICENSES = {
0: "All Rights Reserved",
1: "CC BY-NC-SA",
2: "CC BY-NC",
3: "CC BY-NC-ND",
4: "CC BY",
5: "CC BY-SA",
6: "CC BY-ND",
7: "No known copyright",
8: "US Government",
9: "CC0",
10: "Public Domain",
}
@dataclasses.dataclass
class FlickrPhoto:
"""Represents a Flickr photo from search results."""
id: str
title: str
path_alias: str
owner_nsid: str
username: str
realname: str
license: int
thumb_url: str
medium_url: str
@property
def flickr_url(self) -> str:
"""URL to the photo page on Flickr."""
return f"https://flickr.com/photos/{self.path_alias}/{self.id}"
@property
def license_name(self) -> str:
"""Human-readable license name."""
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
def is_valid_flickr_image_url(url: str) -> bool:
"""Check if URL is a valid Flickr static image URL."""
valid_prefixes = (
"https://live.staticflickr.com/",
"https://farm", # farm1.staticflickr.com, farm2.staticflickr.com, etc.
"https://c1.staticflickr.com/",
"https://c2.staticflickr.com/",
)
if not url.startswith(valid_prefixes):
return False
# For farm URLs, verify the domain pattern
if url.startswith("https://farm"):
if ".staticflickr.com/" not in url:
return False
return True
def search_flickr(search_term: str) -> list[FlickrPhoto]:
"""Search Flickr for photos matching the search term."""
encoded_term = quote(f'"{search_term}"')
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
response = requests.get(url, headers=BROWSER_HEADERS)
response.raise_for_status()
return parse_flickr_search_results(response.text)
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
"""Parse Flickr search results HTML and extract photo data."""
# Find the modelExport JSON embedded in the page
start = html.find("modelExport:")
if start == -1:
return []
start += len("modelExport:")
while html[start].isspace():
start += 1
# Parse the JSON by counting braces
brace_count = 0
i = start
in_string = False
escape_next = False
while i < len(html):
char = html[i]
if escape_next:
escape_next = False
i += 1
continue
if char == "\\" and in_string:
escape_next = True
i += 1
continue
if char == '"' and not escape_next:
in_string = not in_string
elif not in_string:
if char == "{":
brace_count += 1
elif char == "}":
brace_count -= 1
if brace_count == 0:
json_str = html[start : i + 1]
break
i += 1
else:
return []
try:
data = json.loads(json_str)
except json.JSONDecodeError:
return []
# Extract photos from the parsed data
photos: list[FlickrPhoto] = []
main = data.get("main", {})
photos_models = main.get("search-photos-lite-models", [])
if not photos_models:
return []
photos_data = (
photos_models[0]
.get("data", {})
.get("photos", {})
.get("data", {})
.get("_data", [])
)
for photo_entry in photos_data:
pd = photo_entry.get("data", {})
sizes = pd.get("sizes", {}).get("data", {})
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
medium_data = sizes.get("n", sizes.get("m", {})).get("data", {})
thumb_url = thumb_data.get("url", "")
medium_url = medium_data.get("url", "")
# Ensure URLs have protocol
if thumb_url.startswith("//"):
thumb_url = "https:" + thumb_url
if medium_url.startswith("//"):
medium_url = "https:" + medium_url
photos.append(
FlickrPhoto(
id=str(pd.get("id", "")),
title=pd.get("title", ""),
path_alias=pd.get("pathAlias", ""),
owner_nsid=pd.get("ownerNsid", ""),
username=pd.get("username", ""),
realname=pd.get("realname", ""),
license=pd.get("license", 0),
thumb_url=thumb_url,
medium_url=medium_url,
)
)
return photos
@app.errorhandler(werkzeug.exceptions.InternalServerError) @app.errorhandler(werkzeug.exceptions.InternalServerError)
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]: def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
@ -80,10 +265,13 @@ def start() -> str:
flickr_url = flask.request.args.get("flickr") flickr_url = flask.request.args.get("flickr")
if not flickr_url: if not flickr_url:
# Search Flickr for photos
photos = search_flickr(name)
return flask.render_template( return flask.render_template(
"combined.html", "combined.html",
name=name, name=name,
enwp=enwp, enwp=enwp,
photos=photos,
) )
if "/in/" in flickr_url: if "/in/" in flickr_url:
@ -100,6 +288,11 @@ def start() -> str:
assert nsid assert nsid
print(nsid) print(nsid)
# Get optional image URL for display, validate it's from Flickr
img_url = flask.request.args.get("img")
if img_url and not is_valid_flickr_image_url(img_url):
img_url = None
msg = flask.render_template( msg = flask.render_template(
"message.jinja", "message.jinja",
flickr_url=flickr_url, flickr_url=flickr_url,
@ -122,6 +315,7 @@ def start() -> str:
subject=subject, subject=subject,
lines=lines, lines=lines,
nsid=nsid, nsid=nsid,
img_url=img_url,
) )
@ -134,8 +328,9 @@ def get_params(line_iter: collections.abc.Iterable[str]) -> str:
def flickr_usrename_to_nsid(username: str) -> str: def flickr_usrename_to_nsid(username: str) -> str:
"""Get NSID from flickr username.""" """Get NSID from flickr username."""
url = f"https://www.flickr.com/people/{username}/" url = f"https://www.flickr.com/people/{username}/"
r = requests.get(url) r = requests.get(url, headers=BROWSER_HEADERS)
params = json.loads(get_params(r.text.splitlines())) params_str = get_params(r.text.splitlines())
params, _ = json.JSONDecoder().raw_decode(params_str)
return typing.cast(str, params["nsid"]) return typing.cast(str, params["nsid"])

View file

@ -15,36 +15,62 @@
<input type="submit" value="Submit"> <input type="submit" value="Submit">
</form> </form>
{% if name %} {% if name and photos is defined and photos %}
<p>Wikipedia article: {{ name }}</p> <p>Wikipedia article: {{ name }}</p>
<p><a href="https://flickr.com/search/?view_all=1&safe_search=3&text={{ '"' + name + '"' | urlencode }}" target="_blank">Search flickr</a></p> <p>Select a photo to compose a message:</p>
<form action="{{ url_for(request.endpoint) }}"> <div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
<input type="hidden" name="enwp" value="{{ enwp }}"></input> {% for photo in photos %}
<div class="mb-3"> <div class="col">
<label for="flickr" class="form-label">Flickr URL:</label> <div class="card h-100">
<input type="text" class="form-control" id="flickr" name="flickr" value="{{ flickr_url }}" required> <a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
<img src="{{ photo.thumb_url }}" alt="{{ photo.title }}" class="card-img-top" style="aspect-ratio: 1; object-fit: cover;">
</a>
<div class="card-body p-2">
<p class="card-text small mb-1 text-truncate" title="{{ photo.realname or photo.username }}">{{ photo.realname or photo.username }}</p>
<span class="badge {{ 'bg-success' if photo.license in [4, 5, 7, 8, 9, 10] else 'bg-secondary' }}">{{ photo.license_name }}</span>
</div>
</div>
</div>
{% endfor %}
</div> </div>
<input type="submit" value="Submit"> <p class="text-muted small">
</form> <a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
</p>
{% elif name and not flickr_url %}
<p>Wikipedia article: {{ name }}</p>
<p class="text-warning">No photos found. Try a different search term.</p>
<p><a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">Search on Flickr directly</a></p>
{% endif %} {% endif %}
{% if flickr_url %} {% if flickr_url %}
<p><a href="https://www.flickr.com/mail/write/?to={{nsid}}">send message</a> <div class="row">
<div><strong>Subject:</strong> {{ subject }} <button class="btn btn-primary" id="copy-subject">copy</button> {% if img_url %}
<div class="col-md-4 mb-3">
<a href="{{ flickr_url }}" target="_blank">
<img src="{{ img_url }}" alt="Selected photo" class="img-fluid rounded">
</a>
<p class="mt-2 small"><a href="{{ flickr_url }}" target="_blank">View on Flickr</a></p>
</div>
<div class="col-md-8">
{% else %}
<div class="col-12">
{% endif %}
<p><a href="https://www.flickr.com/mail/write/?to={{nsid}}" class="btn btn-primary">Send message on Flickr</a></p>
<div class="mb-2"><strong>Subject:</strong> {{ subject }} <button class="btn btn-sm btn-outline-secondary" id="copy-subject">copy</button></div>
<div> <div>
<h3>message <h5>Message <button class="btn btn-sm btn-outline-secondary" id="copy-message">copy</button></h5>
<button class="btn btn-primary" id="copy-message">copy</button>
</h3>
{% for p in lines %} {% for p in lines %}
<p>{{ p }}</p> <p>{{ p }}</p>
{% endfor %} {% endfor %}
</div> </div>
</div>
</div>
{% endif %} {% endif %}
<div class="mt-3">Written by <a href="/">Edward Betts</a>. Source code and bug reports: <a href="https://git.4angle.com/edward/flickr-mail">https://git.4angle.com/edward/flickr-mail</div> <div class="mt-3">Written by <a href="/">Edward Betts</a>. Source code and bug reports: <a href="https://git.4angle.com/edward/flickr-mail">https://git.4angle.com/edward/flickr-mail</div>