Add pagination for search results

- Add SearchResult dataclass with pagination metadata
- Update search_flickr() to accept page parameter
- Parse total results count from Flickr response
- Add Bootstrap pagination controls to template
- Display total result count in UI
- Update documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-04 17:03:30 +00:00
parent c3bc6895c4
commit 0062de8ede
4 changed files with 118 additions and 25 deletions

66
main.py
View file

@ -53,6 +53,9 @@ FLICKR_LICENSES = {
}
PHOTOS_PER_PAGE = 25
@dataclasses.dataclass
class FlickrPhoto:
"""Represents a Flickr photo from search results."""
@ -78,6 +81,16 @@ class FlickrPhoto:
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
@dataclasses.dataclass
class SearchResult:
"""Flickr search results with pagination metadata."""
photos: list[FlickrPhoto]
total_photos: int
current_page: int
total_pages: int
def is_valid_flickr_image_url(url: str) -> bool:
"""Check if URL is a valid Flickr static image URL."""
valid_prefixes = (
@ -95,23 +108,25 @@ def is_valid_flickr_image_url(url: str) -> bool:
return True
def search_flickr(search_term: str) -> list[FlickrPhoto]:
def search_flickr(search_term: str, page: int = 1) -> SearchResult:
"""Search Flickr for photos matching the search term."""
encoded_term = quote(f'"{search_term}"')
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}&page={page}"
response = requests.get(url, headers=BROWSER_HEADERS)
response.raise_for_status()
return parse_flickr_search_results(response.text)
return parse_flickr_search_results(response.text, page)
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult:
"""Parse Flickr search results HTML and extract photo data."""
empty_result = SearchResult(photos=[], total_photos=0, current_page=page, total_pages=0)
# Find the modelExport JSON embedded in the page
start = html.find("modelExport:")
if start == -1:
return []
return empty_result
start += len("modelExport:")
while html[start].isspace():
@ -148,12 +163,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
break
i += 1
else:
return []
return empty_result
try:
data = json.loads(json_str)
except json.JSONDecodeError:
return []
return empty_result
# Extract photos from the parsed data
photos: list[FlickrPhoto] = []
@ -162,18 +177,26 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
photos_models = main.get("search-photos-lite-models", [])
if not photos_models:
return []
return empty_result
photos_data = (
photos_models[0]
.get("data", {})
.get("photos", {})
.get("data", {})
.get("_data", [])
)
model_data = photos_models[0].get("data", {})
photos_container = model_data.get("photos", {}).get("data", {})
photos_data = photos_container.get("_data", [])
total_photos = photos_container.get("totalItems", 0)
# Calculate total pages (Flickr caps at 4000 results)
total_pages = min(total_photos, 4000) // PHOTOS_PER_PAGE
if min(total_photos, 4000) % PHOTOS_PER_PAGE:
total_pages += 1
for photo_entry in photos_data:
# Skip None entries (placeholders from pagination)
if photo_entry is None:
continue
pd = photo_entry.get("data", {})
if not pd:
continue
sizes = pd.get("sizes", {}).get("data", {})
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
@ -202,7 +225,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
)
)
return photos
return SearchResult(
photos=photos,
total_photos=total_photos,
current_page=page,
total_pages=total_pages,
)
@app.errorhandler(werkzeug.exceptions.InternalServerError)
@ -266,12 +294,14 @@ def start() -> str:
flickr_url = flask.request.args.get("flickr")
if not flickr_url:
# Search Flickr for photos
photos = search_flickr(name)
page = flask.request.args.get("page", 1, type=int)
page = max(1, page) # Ensure page is at least 1
search_result = search_flickr(name, page)
return flask.render_template(
"combined.html",
name=name,
enwp=enwp,
photos=photos,
search_result=search_result,
)
if "/in/" in flickr_url: