Add integrated Flickr search with photo selection

Instead of showing a link to search Flickr, the app now performs the search directly and displays results as a grid of thumbnails. Each photo shows the photographer's name and license (with Wikipedia-compatible licenses highlighted in green). Clicking a photo takes the user to the message composition page with the selected image displayed alongside. Includes validation to ensure image URLs are from Flickr's static servers. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-04 13:41:37 +00:00 · 2026-02-04 13:41:37 +00:00 · 08d1f9b6c4
commit 08d1f9b6c4
parent 6f43cee91b
2 changed files with 245 additions and 24 deletions
--- a/main.py
+++ b/main.py
@ -2,23 +2,208 @@
 """Find photos on flickr for Wikipedia articles and contact the photographer."""

 import collections
+import dataclasses
 import inspect
 import json
 import sys
 import traceback
 import typing
-from urllib.parse import unquote
+from urllib.parse import quote, unquote

 import flask
 import requests
 import werkzeug
 from werkzeug.debug.tbtools import DebugTraceback

+
 app = flask.Flask(__name__)
 app.debug = False

 enwiki = "en.wikipedia.org/wiki/"

+# Browser-like headers for Flickr requests
+BROWSER_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Accept-Encoding": "gzip, deflate, br",
+    "Connection": "keep-alive",
+    "Upgrade-Insecure-Requests": "1",
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "none",
+    "Sec-Fetch-User": "?1",
+    "Cache-Control": "max-age=0",
+}
+
+
+# Flickr license codes to human-readable names
+FLICKR_LICENSES = {
+    0: "All Rights Reserved",
+    1: "CC BY-NC-SA",
+    2: "CC BY-NC",
+    3: "CC BY-NC-ND",
+    4: "CC BY",
+    5: "CC BY-SA",
+    6: "CC BY-ND",
+    7: "No known copyright",
+    8: "US Government",
+    9: "CC0",
+    10: "Public Domain",
+}
+
+
+@dataclasses.dataclass
+class FlickrPhoto:
+    """Represents a Flickr photo from search results."""
+
+    id: str
+    title: str
+    path_alias: str
+    owner_nsid: str
+    username: str
+    realname: str
+    license: int
+    thumb_url: str
+    medium_url: str
+
+    @property
+    def flickr_url(self) -> str:
+        """URL to the photo page on Flickr."""
+        return f"https://flickr.com/photos/{self.path_alias}/{self.id}"
+
+    @property
+    def license_name(self) -> str:
+        """Human-readable license name."""
+        return FLICKR_LICENSES.get(self.license, f"License {self.license}")
+
+
+def is_valid_flickr_image_url(url: str) -> bool:
+    """Check if URL is a valid Flickr static image URL."""
+    valid_prefixes = (
+        "https://live.staticflickr.com/",
+        "https://farm",  # farm1.staticflickr.com, farm2.staticflickr.com, etc.
+        "https://c1.staticflickr.com/",
+        "https://c2.staticflickr.com/",
+    )
+    if not url.startswith(valid_prefixes):
+        return False
+    # For farm URLs, verify the domain pattern
+    if url.startswith("https://farm"):
+        if ".staticflickr.com/" not in url:
+            return False
+    return True
+
+
+def search_flickr(search_term: str) -> list[FlickrPhoto]:
+    """Search Flickr for photos matching the search term."""
+    encoded_term = quote(f'"{search_term}"')
+    url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
+
+    response = requests.get(url, headers=BROWSER_HEADERS)
+    response.raise_for_status()
+
+    return parse_flickr_search_results(response.text)
+
+
+def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
+    """Parse Flickr search results HTML and extract photo data."""
+    # Find the modelExport JSON embedded in the page
+    start = html.find("modelExport:")
+    if start == -1:
+        return []
+
+    start += len("modelExport:")
+    while html[start].isspace():
+        start += 1
+
+    # Parse the JSON by counting braces
+    brace_count = 0
+    i = start
+    in_string = False
+    escape_next = False
+
+    while i < len(html):
+        char = html[i]
+
+        if escape_next:
+            escape_next = False
+            i += 1
+            continue
+
+        if char == "\\" and in_string:
+            escape_next = True
+            i += 1
+            continue
+
+        if char == '"' and not escape_next:
+            in_string = not in_string
+        elif not in_string:
+            if char == "{":
+                brace_count += 1
+            elif char == "}":
+                brace_count -= 1
+                if brace_count == 0:
+                    json_str = html[start : i + 1]
+                    break
+        i += 1
+    else:
+        return []
+
+    try:
+        data = json.loads(json_str)
+    except json.JSONDecodeError:
+        return []
+
+    # Extract photos from the parsed data
+    photos: list[FlickrPhoto] = []
+
+    main = data.get("main", {})
+    photos_models = main.get("search-photos-lite-models", [])
+
+    if not photos_models:
+        return []
+
+    photos_data = (
+        photos_models[0]
+        .get("data", {})
+        .get("photos", {})
+        .get("data", {})
+        .get("_data", [])
+    )
+
+    for photo_entry in photos_data:
+        pd = photo_entry.get("data", {})
+
+        sizes = pd.get("sizes", {}).get("data", {})
+        thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
+        medium_data = sizes.get("n", sizes.get("m", {})).get("data", {})
+
+        thumb_url = thumb_data.get("url", "")
+        medium_url = medium_data.get("url", "")
+
+        # Ensure URLs have protocol
+        if thumb_url.startswith("//"):
+            thumb_url = "https:" + thumb_url
+        if medium_url.startswith("//"):
+            medium_url = "https:" + medium_url
+
+        photos.append(
+            FlickrPhoto(
+                id=str(pd.get("id", "")),
+                title=pd.get("title", ""),
+                path_alias=pd.get("pathAlias", ""),
+                owner_nsid=pd.get("ownerNsid", ""),
+                username=pd.get("username", ""),
+                realname=pd.get("realname", ""),
+                license=pd.get("license", 0),
+                thumb_url=thumb_url,
+                medium_url=medium_url,
+            )
+        )
+
+    return photos
+

@app.errorhandler(werkzeug.exceptions.InternalServerError)
 def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
@ -80,10 +265,13 @@ def start() -> str:

    flickr_url = flask.request.args.get("flickr")
    if not flickr_url:
+        # Search Flickr for photos
+        photos = search_flickr(name)
        return flask.render_template(
            "combined.html",
            name=name,
            enwp=enwp,
+            photos=photos,
        )

    if "/in/" in flickr_url:
@ -100,6 +288,11 @@ def start() -> str:
    assert nsid
    print(nsid)

+    # Get optional image URL for display, validate it's from Flickr
+    img_url = flask.request.args.get("img")
+    if img_url and not is_valid_flickr_image_url(img_url):
+        img_url = None
+
    msg = flask.render_template(
        "message.jinja",
        flickr_url=flickr_url,
@ -122,6 +315,7 @@ def start() -> str:
        subject=subject,
        lines=lines,
        nsid=nsid,
+        img_url=img_url,
    )


@ -134,8 +328,9 @@ def get_params(line_iter: collections.abc.Iterable[str]) -> str:
 def flickr_usrename_to_nsid(username: str) -> str:
    """Get NSID from flickr username."""
    url = f"https://www.flickr.com/people/{username}/"
-    r = requests.get(url)
-    params = json.loads(get_params(r.text.splitlines()))
+    r = requests.get(url, headers=BROWSER_HEADERS)
+    params_str = get_params(r.text.splitlines())
+    params, _ = json.JSONDecoder().raw_decode(params_str)
    return typing.cast(str, params["nsid"])