Add pagination for search results
- Add SearchResult dataclass with pagination metadata - Update search_flickr() to accept page parameter - Parse total results count from Flickr response - Add Bootstrap pagination controls to template - Display total result count in UI - Update documentation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
c3bc6895c4
commit
0062de8ede
4 changed files with 118 additions and 25 deletions
17
AGENTS.md
17
AGENTS.md
|
|
@ -26,7 +26,16 @@ in a `modelExport` JavaScript variable which contains photo metadata.
|
|||
|
||||
- Uses browser-like headers (`BROWSER_HEADERS`) to avoid blocks
|
||||
- Parses embedded JSON by counting braces (not regex) to handle nested structures
|
||||
- Returns `FlickrPhoto` dataclass instances with id, title, username, license, URLs
|
||||
- Accepts optional `page` parameter for pagination (25 photos per page)
|
||||
- Returns `SearchResult` dataclass containing photos and pagination metadata
|
||||
|
||||
### SearchResult Dataclass
|
||||
|
||||
Contains search results with pagination info:
|
||||
- `photos`: List of `FlickrPhoto` instances
|
||||
- `total_photos`: Total number of matching photos
|
||||
- `current_page`: Current page number (1-indexed)
|
||||
- `total_pages`: Total number of pages (capped at 160 due to Flickr's 4000 result limit)
|
||||
|
||||
### FlickrPhoto Dataclass
|
||||
|
||||
|
|
@ -78,13 +87,13 @@ Then visit http://localhost:5000/
|
|||
Test search functionality:
|
||||
```python
|
||||
from main import search_flickr
|
||||
photos = search_flickr("Big Ben")
|
||||
print(len(photos), photos[0].title, photos[0].license_name)
|
||||
result = search_flickr("Big Ben", page=1)
|
||||
print(f"{len(result.photos)} photos, {result.total_pages} pages")
|
||||
print(result.photos[0].title, result.photos[0].license_name)
|
||||
```
|
||||
|
||||
## Potential Improvements
|
||||
|
||||
- Add pagination for search results (currently shows ~25 photos)
|
||||
- Cache search results to reduce Flickr requests
|
||||
- Add filtering by license type
|
||||
- Handle Flickr rate limiting/blocks more gracefully
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ photographers on Flickr whose photos can be used to enhance Wikipedia articles.
|
|||
CC BY-SA, CC0, Public Domain) are highlighted with a green badge.
|
||||
- **One-click message composition**: Click any photo to compose a permission
|
||||
request message with the photo displayed alongside.
|
||||
- **Pagination**: Browse through thousands of search results with page navigation.
|
||||
- Generate messages to request permission to use photos on Wikipedia.
|
||||
- Handle exceptions gracefully and provide detailed error information.
|
||||
|
||||
|
|
|
|||
66
main.py
66
main.py
|
|
@ -53,6 +53,9 @@ FLICKR_LICENSES = {
|
|||
}
|
||||
|
||||
|
||||
PHOTOS_PER_PAGE = 25
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FlickrPhoto:
|
||||
"""Represents a Flickr photo from search results."""
|
||||
|
|
@ -78,6 +81,16 @@ class FlickrPhoto:
|
|||
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SearchResult:
|
||||
"""Flickr search results with pagination metadata."""
|
||||
|
||||
photos: list[FlickrPhoto]
|
||||
total_photos: int
|
||||
current_page: int
|
||||
total_pages: int
|
||||
|
||||
|
||||
def is_valid_flickr_image_url(url: str) -> bool:
|
||||
"""Check if URL is a valid Flickr static image URL."""
|
||||
valid_prefixes = (
|
||||
|
|
@ -95,23 +108,25 @@ def is_valid_flickr_image_url(url: str) -> bool:
|
|||
return True
|
||||
|
||||
|
||||
def search_flickr(search_term: str) -> list[FlickrPhoto]:
|
||||
def search_flickr(search_term: str, page: int = 1) -> SearchResult:
|
||||
"""Search Flickr for photos matching the search term."""
|
||||
encoded_term = quote(f'"{search_term}"')
|
||||
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
|
||||
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}&page={page}"
|
||||
|
||||
response = requests.get(url, headers=BROWSER_HEADERS)
|
||||
response.raise_for_status()
|
||||
|
||||
return parse_flickr_search_results(response.text)
|
||||
return parse_flickr_search_results(response.text, page)
|
||||
|
||||
|
||||
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||
def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult:
|
||||
"""Parse Flickr search results HTML and extract photo data."""
|
||||
empty_result = SearchResult(photos=[], total_photos=0, current_page=page, total_pages=0)
|
||||
|
||||
# Find the modelExport JSON embedded in the page
|
||||
start = html.find("modelExport:")
|
||||
if start == -1:
|
||||
return []
|
||||
return empty_result
|
||||
|
||||
start += len("modelExport:")
|
||||
while html[start].isspace():
|
||||
|
|
@ -148,12 +163,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
|||
break
|
||||
i += 1
|
||||
else:
|
||||
return []
|
||||
return empty_result
|
||||
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
return empty_result
|
||||
|
||||
# Extract photos from the parsed data
|
||||
photos: list[FlickrPhoto] = []
|
||||
|
|
@ -162,18 +177,26 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
|||
photos_models = main.get("search-photos-lite-models", [])
|
||||
|
||||
if not photos_models:
|
||||
return []
|
||||
return empty_result
|
||||
|
||||
photos_data = (
|
||||
photos_models[0]
|
||||
.get("data", {})
|
||||
.get("photos", {})
|
||||
.get("data", {})
|
||||
.get("_data", [])
|
||||
)
|
||||
model_data = photos_models[0].get("data", {})
|
||||
photos_container = model_data.get("photos", {}).get("data", {})
|
||||
photos_data = photos_container.get("_data", [])
|
||||
total_photos = photos_container.get("totalItems", 0)
|
||||
|
||||
# Calculate total pages (Flickr caps at 4000 results)
|
||||
total_pages = min(total_photos, 4000) // PHOTOS_PER_PAGE
|
||||
if min(total_photos, 4000) % PHOTOS_PER_PAGE:
|
||||
total_pages += 1
|
||||
|
||||
for photo_entry in photos_data:
|
||||
# Skip None entries (placeholders from pagination)
|
||||
if photo_entry is None:
|
||||
continue
|
||||
|
||||
pd = photo_entry.get("data", {})
|
||||
if not pd:
|
||||
continue
|
||||
|
||||
sizes = pd.get("sizes", {}).get("data", {})
|
||||
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
|
||||
|
|
@ -202,7 +225,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
|||
)
|
||||
)
|
||||
|
||||
return photos
|
||||
return SearchResult(
|
||||
photos=photos,
|
||||
total_photos=total_photos,
|
||||
current_page=page,
|
||||
total_pages=total_pages,
|
||||
)
|
||||
|
||||
|
||||
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
||||
|
|
@ -266,12 +294,14 @@ def start() -> str:
|
|||
flickr_url = flask.request.args.get("flickr")
|
||||
if not flickr_url:
|
||||
# Search Flickr for photos
|
||||
photos = search_flickr(name)
|
||||
page = flask.request.args.get("page", 1, type=int)
|
||||
page = max(1, page) # Ensure page is at least 1
|
||||
search_result = search_flickr(name, page)
|
||||
return flask.render_template(
|
||||
"combined.html",
|
||||
name=name,
|
||||
enwp=enwp,
|
||||
photos=photos,
|
||||
search_result=search_result,
|
||||
)
|
||||
|
||||
if "/in/" in flickr_url:
|
||||
|
|
|
|||
|
|
@ -15,13 +15,13 @@
|
|||
<input type="submit" value="Submit">
|
||||
</form>
|
||||
|
||||
{% if name and photos is defined and photos %}
|
||||
{% if name and search_result is defined and search_result.photos %}
|
||||
|
||||
<p>Wikipedia article: {{ name }}</p>
|
||||
<p>Select a photo to compose a message:</p>
|
||||
<p>Select a photo to compose a message ({{ search_result.total_photos | default(0) }} results):</p>
|
||||
|
||||
<div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
|
||||
{% for photo in photos %}
|
||||
{% for photo in search_result.photos %}
|
||||
<div class="col">
|
||||
<div class="card h-100">
|
||||
<a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
|
||||
|
|
@ -36,6 +36,59 @@
|
|||
{% endfor %}
|
||||
</div>
|
||||
|
||||
{% if search_result.total_pages > 1 %}
|
||||
<nav aria-label="Search results pagination">
|
||||
<ul class="pagination justify-content-center">
|
||||
{% if search_result.current_page > 1 %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page - 1) }}">Previous</a>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="page-item disabled">
|
||||
<span class="page-link">Previous</span>
|
||||
</li>
|
||||
{% endif %}
|
||||
|
||||
{% set start_page = [1, search_result.current_page - 2] | max %}
|
||||
{% set end_page = [search_result.total_pages, search_result.current_page + 2] | min %}
|
||||
|
||||
{% if start_page > 1 %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=1) }}">1</a>
|
||||
</li>
|
||||
{% if start_page > 2 %}
|
||||
<li class="page-item disabled"><span class="page-link">...</span></li>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
|
||||
{% for p in range(start_page, end_page + 1) %}
|
||||
<li class="page-item {{ 'active' if p == search_result.current_page else '' }}">
|
||||
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=p) }}">{{ p }}</a>
|
||||
</li>
|
||||
{% endfor %}
|
||||
|
||||
{% if end_page < search_result.total_pages %}
|
||||
{% if end_page < search_result.total_pages - 1 %}
|
||||
<li class="page-item disabled"><span class="page-link">...</span></li>
|
||||
{% endif %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.total_pages) }}">{{ search_result.total_pages }}</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
|
||||
{% if search_result.current_page < search_result.total_pages %}
|
||||
<li class="page-item">
|
||||
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page + 1) }}">Next</a>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="page-item disabled">
|
||||
<span class="page-link">Next</span>
|
||||
</li>
|
||||
{% endif %}
|
||||
</ul>
|
||||
</nav>
|
||||
{% endif %}
|
||||
|
||||
<p class="text-muted small">
|
||||
<a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
|
||||
</p>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue