From 08d1f9b6c4bc9eabc5fc676a28c6ad8505dbb899 Mon Sep 17 00:00:00 2001 From: Edward Betts Date: Wed, 4 Feb 2026 13:41:37 +0000 Subject: [PATCH] Add integrated Flickr search with photo selection Instead of showing a link to search Flickr, the app now performs the search directly and displays results as a grid of thumbnails. Each photo shows the photographer's name and license (with Wikipedia-compatible licenses highlighted in green). Clicking a photo takes the user to the message composition page with the selected image displayed alongside. Includes validation to ensure image URLs are from Flickr's static servers. Co-Authored-By: Claude Opus 4.5 --- main.py | 201 +++++++++++++++++++++++++++++++++++++++- templates/combined.html | 68 +++++++++----- 2 files changed, 245 insertions(+), 24 deletions(-) diff --git a/main.py b/main.py index a7dfe35..cd781aa 100755 --- a/main.py +++ b/main.py @@ -2,23 +2,208 @@ """Find photos on flickr for Wikipedia articles and contact the photographer.""" import collections +import dataclasses import inspect import json import sys import traceback import typing -from urllib.parse import unquote +from urllib.parse import quote, unquote import flask import requests import werkzeug from werkzeug.debug.tbtools import DebugTraceback + app = flask.Flask(__name__) app.debug = False enwiki = "en.wikipedia.org/wiki/" +# Browser-like headers for Flickr requests +BROWSER_HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Cache-Control": "max-age=0", +} + + +# Flickr license codes to human-readable names +FLICKR_LICENSES = { + 0: "All Rights Reserved", + 1: "CC BY-NC-SA", + 2: "CC BY-NC", + 3: "CC BY-NC-ND", + 4: "CC BY", + 5: "CC BY-SA", + 6: "CC BY-ND", + 7: "No known copyright", + 8: "US Government", + 9: "CC0", + 10: "Public Domain", +} + + +@dataclasses.dataclass +class FlickrPhoto: + """Represents a Flickr photo from search results.""" + + id: str + title: str + path_alias: str + owner_nsid: str + username: str + realname: str + license: int + thumb_url: str + medium_url: str + + @property + def flickr_url(self) -> str: + """URL to the photo page on Flickr.""" + return f"https://flickr.com/photos/{self.path_alias}/{self.id}" + + @property + def license_name(self) -> str: + """Human-readable license name.""" + return FLICKR_LICENSES.get(self.license, f"License {self.license}") + + +def is_valid_flickr_image_url(url: str) -> bool: + """Check if URL is a valid Flickr static image URL.""" + valid_prefixes = ( + "https://live.staticflickr.com/", + "https://farm", # farm1.staticflickr.com, farm2.staticflickr.com, etc. + "https://c1.staticflickr.com/", + "https://c2.staticflickr.com/", + ) + if not url.startswith(valid_prefixes): + return False + # For farm URLs, verify the domain pattern + if url.startswith("https://farm"): + if ".staticflickr.com/" not in url: + return False + return True + + +def search_flickr(search_term: str) -> list[FlickrPhoto]: + """Search Flickr for photos matching the search term.""" + encoded_term = quote(f'"{search_term}"') + url = f"https://flickr.com/search/?view_all=1&text={encoded_term}" + + response = requests.get(url, headers=BROWSER_HEADERS) + response.raise_for_status() + + return parse_flickr_search_results(response.text) + + +def parse_flickr_search_results(html: str) -> list[FlickrPhoto]: + """Parse Flickr search results HTML and extract photo data.""" + # Find the modelExport JSON embedded in the page + start = html.find("modelExport:") + if start == -1: + return [] + + start += len("modelExport:") + while html[start].isspace(): + start += 1 + + # Parse the JSON by counting braces + brace_count = 0 + i = start + in_string = False + escape_next = False + + while i < len(html): + char = html[i] + + if escape_next: + escape_next = False + i += 1 + continue + + if char == "\\" and in_string: + escape_next = True + i += 1 + continue + + if char == '"' and not escape_next: + in_string = not in_string + elif not in_string: + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + json_str = html[start : i + 1] + break + i += 1 + else: + return [] + + try: + data = json.loads(json_str) + except json.JSONDecodeError: + return [] + + # Extract photos from the parsed data + photos: list[FlickrPhoto] = [] + + main = data.get("main", {}) + photos_models = main.get("search-photos-lite-models", []) + + if not photos_models: + return [] + + photos_data = ( + photos_models[0] + .get("data", {}) + .get("photos", {}) + .get("data", {}) + .get("_data", []) + ) + + for photo_entry in photos_data: + pd = photo_entry.get("data", {}) + + sizes = pd.get("sizes", {}).get("data", {}) + thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {}) + medium_data = sizes.get("n", sizes.get("m", {})).get("data", {}) + + thumb_url = thumb_data.get("url", "") + medium_url = medium_data.get("url", "") + + # Ensure URLs have protocol + if thumb_url.startswith("//"): + thumb_url = "https:" + thumb_url + if medium_url.startswith("//"): + medium_url = "https:" + medium_url + + photos.append( + FlickrPhoto( + id=str(pd.get("id", "")), + title=pd.get("title", ""), + path_alias=pd.get("pathAlias", ""), + owner_nsid=pd.get("ownerNsid", ""), + username=pd.get("username", ""), + realname=pd.get("realname", ""), + license=pd.get("license", 0), + thumb_url=thumb_url, + medium_url=medium_url, + ) + ) + + return photos + @app.errorhandler(werkzeug.exceptions.InternalServerError) def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]: @@ -80,10 +265,13 @@ def start() -> str: flickr_url = flask.request.args.get("flickr") if not flickr_url: + # Search Flickr for photos + photos = search_flickr(name) return flask.render_template( "combined.html", name=name, enwp=enwp, + photos=photos, ) if "/in/" in flickr_url: @@ -100,6 +288,11 @@ def start() -> str: assert nsid print(nsid) + # Get optional image URL for display, validate it's from Flickr + img_url = flask.request.args.get("img") + if img_url and not is_valid_flickr_image_url(img_url): + img_url = None + msg = flask.render_template( "message.jinja", flickr_url=flickr_url, @@ -122,6 +315,7 @@ def start() -> str: subject=subject, lines=lines, nsid=nsid, + img_url=img_url, ) @@ -134,8 +328,9 @@ def get_params(line_iter: collections.abc.Iterable[str]) -> str: def flickr_usrename_to_nsid(username: str) -> str: """Get NSID from flickr username.""" url = f"https://www.flickr.com/people/{username}/" - r = requests.get(url) - params = json.loads(get_params(r.text.splitlines())) + r = requests.get(url, headers=BROWSER_HEADERS) + params_str = get_params(r.text.splitlines()) + params, _ = json.JSONDecoder().raw_decode(params_str) return typing.cast(str, params["nsid"]) diff --git a/templates/combined.html b/templates/combined.html index 7609539..d4c6613 100644 --- a/templates/combined.html +++ b/templates/combined.html @@ -15,36 +15,62 @@ - {% if name %} + {% if name and photos is defined and photos %}

Wikipedia article: {{ name }}

-

Search flickr

+

Select a photo to compose a message:

-
- -
- - +
+ {% for photo in photos %} +
+
+ + {{ photo.title }} + +
+

{{ photo.realname or photo.username }}

+ {{ photo.license_name }} +
+
+ {% endfor %} +
- - +

+ View full search on Flickr +

+ + {% elif name and not flickr_url %} + +

Wikipedia article: {{ name }}

+

No photos found. Try a different search term.

+

Search on Flickr directly

{% endif %} {% if flickr_url %} -

send message -

Subject: {{ subject }} -
-

message - -

- {% for p in lines %} -

{{ p }}

- {% endfor %} -
- - +
+ {% if img_url %} + +
+ {% else %} +
+ {% endif %} +

Send message on Flickr

+
Subject: {{ subject }}
+
+
Message
+ {% for p in lines %} +

{{ p }}

+ {% endfor %} +
+
+
{% endif %}
Written by Edward Betts. Source code and bug reports: https://git.4angle.com/edward/flickr-mail