Add pagination for search results

- Add SearchResult dataclass with pagination metadata
- Update search_flickr() to accept page parameter
- Parse total results count from Flickr response
- Add Bootstrap pagination controls to template
- Display total result count in UI
- Update documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-04 17:03:30 +00:00
parent c3bc6895c4
commit 0062de8ede
4 changed files with 118 additions and 25 deletions

View file

@ -26,7 +26,16 @@ in a `modelExport` JavaScript variable which contains photo metadata.
- Uses browser-like headers (`BROWSER_HEADERS`) to avoid blocks
- Parses embedded JSON by counting braces (not regex) to handle nested structures
- Returns `FlickrPhoto` dataclass instances with id, title, username, license, URLs
- Accepts optional `page` parameter for pagination (25 photos per page)
- Returns `SearchResult` dataclass containing photos and pagination metadata
### SearchResult Dataclass
Contains search results with pagination info:
- `photos`: List of `FlickrPhoto` instances
- `total_photos`: Total number of matching photos
- `current_page`: Current page number (1-indexed)
- `total_pages`: Total number of pages (capped at 160 due to Flickr's 4000 result limit)
### FlickrPhoto Dataclass
@ -78,13 +87,13 @@ Then visit http://localhost:5000/
Test search functionality:
```python
from main import search_flickr
photos = search_flickr("Big Ben")
print(len(photos), photos[0].title, photos[0].license_name)
result = search_flickr("Big Ben", page=1)
print(f"{len(result.photos)} photos, {result.total_pages} pages")
print(result.photos[0].title, result.photos[0].license_name)
```
## Potential Improvements
- Add pagination for search results (currently shows ~25 photos)
- Cache search results to reduce Flickr requests
- Add filtering by license type
- Handle Flickr rate limiting/blocks more gracefully

View file

@ -27,6 +27,7 @@ photographers on Flickr whose photos can be used to enhance Wikipedia articles.
CC BY-SA, CC0, Public Domain) are highlighted with a green badge.
- **One-click message composition**: Click any photo to compose a permission
request message with the photo displayed alongside.
- **Pagination**: Browse through thousands of search results with page navigation.
- Generate messages to request permission to use photos on Wikipedia.
- Handle exceptions gracefully and provide detailed error information.

66
main.py
View file

@ -53,6 +53,9 @@ FLICKR_LICENSES = {
}
PHOTOS_PER_PAGE = 25
@dataclasses.dataclass
class FlickrPhoto:
"""Represents a Flickr photo from search results."""
@ -78,6 +81,16 @@ class FlickrPhoto:
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
@dataclasses.dataclass
class SearchResult:
"""Flickr search results with pagination metadata."""
photos: list[FlickrPhoto]
total_photos: int
current_page: int
total_pages: int
def is_valid_flickr_image_url(url: str) -> bool:
"""Check if URL is a valid Flickr static image URL."""
valid_prefixes = (
@ -95,23 +108,25 @@ def is_valid_flickr_image_url(url: str) -> bool:
return True
def search_flickr(search_term: str) -> list[FlickrPhoto]:
def search_flickr(search_term: str, page: int = 1) -> SearchResult:
"""Search Flickr for photos matching the search term."""
encoded_term = quote(f'"{search_term}"')
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}&page={page}"
response = requests.get(url, headers=BROWSER_HEADERS)
response.raise_for_status()
return parse_flickr_search_results(response.text)
return parse_flickr_search_results(response.text, page)
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult:
"""Parse Flickr search results HTML and extract photo data."""
empty_result = SearchResult(photos=[], total_photos=0, current_page=page, total_pages=0)
# Find the modelExport JSON embedded in the page
start = html.find("modelExport:")
if start == -1:
return []
return empty_result
start += len("modelExport:")
while html[start].isspace():
@ -148,12 +163,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
break
i += 1
else:
return []
return empty_result
try:
data = json.loads(json_str)
except json.JSONDecodeError:
return []
return empty_result
# Extract photos from the parsed data
photos: list[FlickrPhoto] = []
@ -162,18 +177,26 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
photos_models = main.get("search-photos-lite-models", [])
if not photos_models:
return []
return empty_result
photos_data = (
photos_models[0]
.get("data", {})
.get("photos", {})
.get("data", {})
.get("_data", [])
)
model_data = photos_models[0].get("data", {})
photos_container = model_data.get("photos", {}).get("data", {})
photos_data = photos_container.get("_data", [])
total_photos = photos_container.get("totalItems", 0)
# Calculate total pages (Flickr caps at 4000 results)
total_pages = min(total_photos, 4000) // PHOTOS_PER_PAGE
if min(total_photos, 4000) % PHOTOS_PER_PAGE:
total_pages += 1
for photo_entry in photos_data:
# Skip None entries (placeholders from pagination)
if photo_entry is None:
continue
pd = photo_entry.get("data", {})
if not pd:
continue
sizes = pd.get("sizes", {}).get("data", {})
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
@ -202,7 +225,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
)
)
return photos
return SearchResult(
photos=photos,
total_photos=total_photos,
current_page=page,
total_pages=total_pages,
)
@app.errorhandler(werkzeug.exceptions.InternalServerError)
@ -266,12 +294,14 @@ def start() -> str:
flickr_url = flask.request.args.get("flickr")
if not flickr_url:
# Search Flickr for photos
photos = search_flickr(name)
page = flask.request.args.get("page", 1, type=int)
page = max(1, page) # Ensure page is at least 1
search_result = search_flickr(name, page)
return flask.render_template(
"combined.html",
name=name,
enwp=enwp,
photos=photos,
search_result=search_result,
)
if "/in/" in flickr_url:

View file

@ -15,13 +15,13 @@
<input type="submit" value="Submit">
</form>
{% if name and photos is defined and photos %}
{% if name and search_result is defined and search_result.photos %}
<p>Wikipedia article: {{ name }}</p>
<p>Select a photo to compose a message:</p>
<p>Select a photo to compose a message ({{ search_result.total_photos | default(0) }} results):</p>
<div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
{% for photo in photos %}
{% for photo in search_result.photos %}
<div class="col">
<div class="card h-100">
<a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
@ -36,6 +36,59 @@
{% endfor %}
</div>
{% if search_result.total_pages > 1 %}
<nav aria-label="Search results pagination">
<ul class="pagination justify-content-center">
{% if search_result.current_page > 1 %}
<li class="page-item">
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page - 1) }}">Previous</a>
</li>
{% else %}
<li class="page-item disabled">
<span class="page-link">Previous</span>
</li>
{% endif %}
{% set start_page = [1, search_result.current_page - 2] | max %}
{% set end_page = [search_result.total_pages, search_result.current_page + 2] | min %}
{% if start_page > 1 %}
<li class="page-item">
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=1) }}">1</a>
</li>
{% if start_page > 2 %}
<li class="page-item disabled"><span class="page-link">...</span></li>
{% endif %}
{% endif %}
{% for p in range(start_page, end_page + 1) %}
<li class="page-item {{ 'active' if p == search_result.current_page else '' }}">
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=p) }}">{{ p }}</a>
</li>
{% endfor %}
{% if end_page < search_result.total_pages %}
{% if end_page < search_result.total_pages - 1 %}
<li class="page-item disabled"><span class="page-link">...</span></li>
{% endif %}
<li class="page-item">
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.total_pages) }}">{{ search_result.total_pages }}</a>
</li>
{% endif %}
{% if search_result.current_page < search_result.total_pages %}
<li class="page-item">
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page + 1) }}">Next</a>
</li>
{% else %}
<li class="page-item disabled">
<span class="page-link">Next</span>
</li>
{% endif %}
</ul>
</nav>
{% endif %}
<p class="text-muted small">
<a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
</p>