Add integrated Flickr search with photo selection
Instead of showing a link to search Flickr, the app now performs the search directly and displays results as a grid of thumbnails. Each photo shows the photographer's name and license (with Wikipedia-compatible licenses highlighted in green). Clicking a photo takes the user to the message composition page with the selected image displayed alongside. Includes validation to ensure image URLs are from Flickr's static servers. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
6f43cee91b
commit
08d1f9b6c4
2 changed files with 245 additions and 24 deletions
201
main.py
201
main.py
|
|
@ -2,23 +2,208 @@
|
|||
"""Find photos on flickr for Wikipedia articles and contact the photographer."""
|
||||
|
||||
import collections
|
||||
import dataclasses
|
||||
import inspect
|
||||
import json
|
||||
import sys
|
||||
import traceback
|
||||
import typing
|
||||
from urllib.parse import unquote
|
||||
from urllib.parse import quote, unquote
|
||||
|
||||
import flask
|
||||
import requests
|
||||
import werkzeug
|
||||
from werkzeug.debug.tbtools import DebugTraceback
|
||||
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.debug = False
|
||||
|
||||
enwiki = "en.wikipedia.org/wiki/"
|
||||
|
||||
# Browser-like headers for Flickr requests
|
||||
BROWSER_HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Accept-Encoding": "gzip, deflate, br",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
"Cache-Control": "max-age=0",
|
||||
}
|
||||
|
||||
|
||||
# Flickr license codes to human-readable names
|
||||
FLICKR_LICENSES = {
|
||||
0: "All Rights Reserved",
|
||||
1: "CC BY-NC-SA",
|
||||
2: "CC BY-NC",
|
||||
3: "CC BY-NC-ND",
|
||||
4: "CC BY",
|
||||
5: "CC BY-SA",
|
||||
6: "CC BY-ND",
|
||||
7: "No known copyright",
|
||||
8: "US Government",
|
||||
9: "CC0",
|
||||
10: "Public Domain",
|
||||
}
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FlickrPhoto:
|
||||
"""Represents a Flickr photo from search results."""
|
||||
|
||||
id: str
|
||||
title: str
|
||||
path_alias: str
|
||||
owner_nsid: str
|
||||
username: str
|
||||
realname: str
|
||||
license: int
|
||||
thumb_url: str
|
||||
medium_url: str
|
||||
|
||||
@property
|
||||
def flickr_url(self) -> str:
|
||||
"""URL to the photo page on Flickr."""
|
||||
return f"https://flickr.com/photos/{self.path_alias}/{self.id}"
|
||||
|
||||
@property
|
||||
def license_name(self) -> str:
|
||||
"""Human-readable license name."""
|
||||
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
|
||||
|
||||
|
||||
def is_valid_flickr_image_url(url: str) -> bool:
|
||||
"""Check if URL is a valid Flickr static image URL."""
|
||||
valid_prefixes = (
|
||||
"https://live.staticflickr.com/",
|
||||
"https://farm", # farm1.staticflickr.com, farm2.staticflickr.com, etc.
|
||||
"https://c1.staticflickr.com/",
|
||||
"https://c2.staticflickr.com/",
|
||||
)
|
||||
if not url.startswith(valid_prefixes):
|
||||
return False
|
||||
# For farm URLs, verify the domain pattern
|
||||
if url.startswith("https://farm"):
|
||||
if ".staticflickr.com/" not in url:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def search_flickr(search_term: str) -> list[FlickrPhoto]:
|
||||
"""Search Flickr for photos matching the search term."""
|
||||
encoded_term = quote(f'"{search_term}"')
|
||||
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
|
||||
|
||||
response = requests.get(url, headers=BROWSER_HEADERS)
|
||||
response.raise_for_status()
|
||||
|
||||
return parse_flickr_search_results(response.text)
|
||||
|
||||
|
||||
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||
"""Parse Flickr search results HTML and extract photo data."""
|
||||
# Find the modelExport JSON embedded in the page
|
||||
start = html.find("modelExport:")
|
||||
if start == -1:
|
||||
return []
|
||||
|
||||
start += len("modelExport:")
|
||||
while html[start].isspace():
|
||||
start += 1
|
||||
|
||||
# Parse the JSON by counting braces
|
||||
brace_count = 0
|
||||
i = start
|
||||
in_string = False
|
||||
escape_next = False
|
||||
|
||||
while i < len(html):
|
||||
char = html[i]
|
||||
|
||||
if escape_next:
|
||||
escape_next = False
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == "\\" and in_string:
|
||||
escape_next = True
|
||||
i += 1
|
||||
continue
|
||||
|
||||
if char == '"' and not escape_next:
|
||||
in_string = not in_string
|
||||
elif not in_string:
|
||||
if char == "{":
|
||||
brace_count += 1
|
||||
elif char == "}":
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
json_str = html[start : i + 1]
|
||||
break
|
||||
i += 1
|
||||
else:
|
||||
return []
|
||||
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
|
||||
# Extract photos from the parsed data
|
||||
photos: list[FlickrPhoto] = []
|
||||
|
||||
main = data.get("main", {})
|
||||
photos_models = main.get("search-photos-lite-models", [])
|
||||
|
||||
if not photos_models:
|
||||
return []
|
||||
|
||||
photos_data = (
|
||||
photos_models[0]
|
||||
.get("data", {})
|
||||
.get("photos", {})
|
||||
.get("data", {})
|
||||
.get("_data", [])
|
||||
)
|
||||
|
||||
for photo_entry in photos_data:
|
||||
pd = photo_entry.get("data", {})
|
||||
|
||||
sizes = pd.get("sizes", {}).get("data", {})
|
||||
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
|
||||
medium_data = sizes.get("n", sizes.get("m", {})).get("data", {})
|
||||
|
||||
thumb_url = thumb_data.get("url", "")
|
||||
medium_url = medium_data.get("url", "")
|
||||
|
||||
# Ensure URLs have protocol
|
||||
if thumb_url.startswith("//"):
|
||||
thumb_url = "https:" + thumb_url
|
||||
if medium_url.startswith("//"):
|
||||
medium_url = "https:" + medium_url
|
||||
|
||||
photos.append(
|
||||
FlickrPhoto(
|
||||
id=str(pd.get("id", "")),
|
||||
title=pd.get("title", ""),
|
||||
path_alias=pd.get("pathAlias", ""),
|
||||
owner_nsid=pd.get("ownerNsid", ""),
|
||||
username=pd.get("username", ""),
|
||||
realname=pd.get("realname", ""),
|
||||
license=pd.get("license", 0),
|
||||
thumb_url=thumb_url,
|
||||
medium_url=medium_url,
|
||||
)
|
||||
)
|
||||
|
||||
return photos
|
||||
|
||||
|
||||
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
||||
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
|
||||
|
|
@ -80,10 +265,13 @@ def start() -> str:
|
|||
|
||||
flickr_url = flask.request.args.get("flickr")
|
||||
if not flickr_url:
|
||||
# Search Flickr for photos
|
||||
photos = search_flickr(name)
|
||||
return flask.render_template(
|
||||
"combined.html",
|
||||
name=name,
|
||||
enwp=enwp,
|
||||
photos=photos,
|
||||
)
|
||||
|
||||
if "/in/" in flickr_url:
|
||||
|
|
@ -100,6 +288,11 @@ def start() -> str:
|
|||
assert nsid
|
||||
print(nsid)
|
||||
|
||||
# Get optional image URL for display, validate it's from Flickr
|
||||
img_url = flask.request.args.get("img")
|
||||
if img_url and not is_valid_flickr_image_url(img_url):
|
||||
img_url = None
|
||||
|
||||
msg = flask.render_template(
|
||||
"message.jinja",
|
||||
flickr_url=flickr_url,
|
||||
|
|
@ -122,6 +315,7 @@ def start() -> str:
|
|||
subject=subject,
|
||||
lines=lines,
|
||||
nsid=nsid,
|
||||
img_url=img_url,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -134,8 +328,9 @@ def get_params(line_iter: collections.abc.Iterable[str]) -> str:
|
|||
def flickr_usrename_to_nsid(username: str) -> str:
|
||||
"""Get NSID from flickr username."""
|
||||
url = f"https://www.flickr.com/people/{username}/"
|
||||
r = requests.get(url)
|
||||
params = json.loads(get_params(r.text.splitlines()))
|
||||
r = requests.get(url, headers=BROWSER_HEADERS)
|
||||
params_str = get_params(r.text.splitlines())
|
||||
params, _ = json.JSONDecoder().raw_decode(params_str)
|
||||
return typing.cast(str, params["nsid"])
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue