diff --git a/AGENTS.md b/AGENTS.md index 86993d4..ccd930b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,7 +26,16 @@ in a `modelExport` JavaScript variable which contains photo metadata. - Uses browser-like headers (`BROWSER_HEADERS`) to avoid blocks - Parses embedded JSON by counting braces (not regex) to handle nested structures -- Returns `FlickrPhoto` dataclass instances with id, title, username, license, URLs +- Accepts optional `page` parameter for pagination (25 photos per page) +- Returns `SearchResult` dataclass containing photos and pagination metadata + +### SearchResult Dataclass + +Contains search results with pagination info: +- `photos`: List of `FlickrPhoto` instances +- `total_photos`: Total number of matching photos +- `current_page`: Current page number (1-indexed) +- `total_pages`: Total number of pages (capped at 160 due to Flickr's 4000 result limit) ### FlickrPhoto Dataclass @@ -78,13 +87,13 @@ Then visit http://localhost:5000/ Test search functionality: ```python from main import search_flickr -photos = search_flickr("Big Ben") -print(len(photos), photos[0].title, photos[0].license_name) +result = search_flickr("Big Ben", page=1) +print(f"{len(result.photos)} photos, {result.total_pages} pages") +print(result.photos[0].title, result.photos[0].license_name) ``` ## Potential Improvements -- Add pagination for search results (currently shows ~25 photos) - Cache search results to reduce Flickr requests - Add filtering by license type - Handle Flickr rate limiting/blocks more gracefully diff --git a/README.md b/README.md index b1e0633..85f1cde 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ photographers on Flickr whose photos can be used to enhance Wikipedia articles. CC BY-SA, CC0, Public Domain) are highlighted with a green badge. - **One-click message composition**: Click any photo to compose a permission request message with the photo displayed alongside. +- **Pagination**: Browse through thousands of search results with page navigation. - Generate messages to request permission to use photos on Wikipedia. - Handle exceptions gracefully and provide detailed error information. diff --git a/main.py b/main.py index cd781aa..d8a8496 100755 --- a/main.py +++ b/main.py @@ -53,6 +53,9 @@ FLICKR_LICENSES = { } +PHOTOS_PER_PAGE = 25 + + @dataclasses.dataclass class FlickrPhoto: """Represents a Flickr photo from search results.""" @@ -78,6 +81,16 @@ class FlickrPhoto: return FLICKR_LICENSES.get(self.license, f"License {self.license}") +@dataclasses.dataclass +class SearchResult: + """Flickr search results with pagination metadata.""" + + photos: list[FlickrPhoto] + total_photos: int + current_page: int + total_pages: int + + def is_valid_flickr_image_url(url: str) -> bool: """Check if URL is a valid Flickr static image URL.""" valid_prefixes = ( @@ -95,23 +108,25 @@ def is_valid_flickr_image_url(url: str) -> bool: return True -def search_flickr(search_term: str) -> list[FlickrPhoto]: +def search_flickr(search_term: str, page: int = 1) -> SearchResult: """Search Flickr for photos matching the search term.""" encoded_term = quote(f'"{search_term}"') - url = f"https://flickr.com/search/?view_all=1&text={encoded_term}" + url = f"https://flickr.com/search/?view_all=1&text={encoded_term}&page={page}" response = requests.get(url, headers=BROWSER_HEADERS) response.raise_for_status() - return parse_flickr_search_results(response.text) + return parse_flickr_search_results(response.text, page) -def parse_flickr_search_results(html: str) -> list[FlickrPhoto]: +def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult: """Parse Flickr search results HTML and extract photo data.""" + empty_result = SearchResult(photos=[], total_photos=0, current_page=page, total_pages=0) + # Find the modelExport JSON embedded in the page start = html.find("modelExport:") if start == -1: - return [] + return empty_result start += len("modelExport:") while html[start].isspace(): @@ -148,12 +163,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]: break i += 1 else: - return [] + return empty_result try: data = json.loads(json_str) except json.JSONDecodeError: - return [] + return empty_result # Extract photos from the parsed data photos: list[FlickrPhoto] = [] @@ -162,18 +177,26 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]: photos_models = main.get("search-photos-lite-models", []) if not photos_models: - return [] + return empty_result - photos_data = ( - photos_models[0] - .get("data", {}) - .get("photos", {}) - .get("data", {}) - .get("_data", []) - ) + model_data = photos_models[0].get("data", {}) + photos_container = model_data.get("photos", {}).get("data", {}) + photos_data = photos_container.get("_data", []) + total_photos = photos_container.get("totalItems", 0) + + # Calculate total pages (Flickr caps at 4000 results) + total_pages = min(total_photos, 4000) // PHOTOS_PER_PAGE + if min(total_photos, 4000) % PHOTOS_PER_PAGE: + total_pages += 1 for photo_entry in photos_data: + # Skip None entries (placeholders from pagination) + if photo_entry is None: + continue + pd = photo_entry.get("data", {}) + if not pd: + continue sizes = pd.get("sizes", {}).get("data", {}) thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {}) @@ -202,7 +225,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]: ) ) - return photos + return SearchResult( + photos=photos, + total_photos=total_photos, + current_page=page, + total_pages=total_pages, + ) @app.errorhandler(werkzeug.exceptions.InternalServerError) @@ -266,12 +294,14 @@ def start() -> str: flickr_url = flask.request.args.get("flickr") if not flickr_url: # Search Flickr for photos - photos = search_flickr(name) + page = flask.request.args.get("page", 1, type=int) + page = max(1, page) # Ensure page is at least 1 + search_result = search_flickr(name, page) return flask.render_template( "combined.html", name=name, enwp=enwp, - photos=photos, + search_result=search_result, ) if "/in/" in flickr_url: diff --git a/templates/combined.html b/templates/combined.html index d4c6613..5461c8d 100644 --- a/templates/combined.html +++ b/templates/combined.html @@ -15,13 +15,13 @@ - {% if name and photos is defined and photos %} + {% if name and search_result is defined and search_result.photos %}

Wikipedia article: {{ name }}

-

Select a photo to compose a message:

+

Select a photo to compose a message ({{ search_result.total_photos | default(0) }} results):

- {% for photo in photos %} + {% for photo in search_result.photos %}
+ {% if search_result.total_pages > 1 %} + + {% endif %} +

View full search on Flickr