Add pagination for search results
- Add SearchResult dataclass with pagination metadata - Update search_flickr() to accept page parameter - Parse total results count from Flickr response - Add Bootstrap pagination controls to template - Display total result count in UI - Update documentation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
c3bc6895c4
commit
0062de8ede
4 changed files with 118 additions and 25 deletions
17
AGENTS.md
17
AGENTS.md
|
|
@ -26,7 +26,16 @@ in a `modelExport` JavaScript variable which contains photo metadata.
|
||||||
|
|
||||||
- Uses browser-like headers (`BROWSER_HEADERS`) to avoid blocks
|
- Uses browser-like headers (`BROWSER_HEADERS`) to avoid blocks
|
||||||
- Parses embedded JSON by counting braces (not regex) to handle nested structures
|
- Parses embedded JSON by counting braces (not regex) to handle nested structures
|
||||||
- Returns `FlickrPhoto` dataclass instances with id, title, username, license, URLs
|
- Accepts optional `page` parameter for pagination (25 photos per page)
|
||||||
|
- Returns `SearchResult` dataclass containing photos and pagination metadata
|
||||||
|
|
||||||
|
### SearchResult Dataclass
|
||||||
|
|
||||||
|
Contains search results with pagination info:
|
||||||
|
- `photos`: List of `FlickrPhoto` instances
|
||||||
|
- `total_photos`: Total number of matching photos
|
||||||
|
- `current_page`: Current page number (1-indexed)
|
||||||
|
- `total_pages`: Total number of pages (capped at 160 due to Flickr's 4000 result limit)
|
||||||
|
|
||||||
### FlickrPhoto Dataclass
|
### FlickrPhoto Dataclass
|
||||||
|
|
||||||
|
|
@ -78,13 +87,13 @@ Then visit http://localhost:5000/
|
||||||
Test search functionality:
|
Test search functionality:
|
||||||
```python
|
```python
|
||||||
from main import search_flickr
|
from main import search_flickr
|
||||||
photos = search_flickr("Big Ben")
|
result = search_flickr("Big Ben", page=1)
|
||||||
print(len(photos), photos[0].title, photos[0].license_name)
|
print(f"{len(result.photos)} photos, {result.total_pages} pages")
|
||||||
|
print(result.photos[0].title, result.photos[0].license_name)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Potential Improvements
|
## Potential Improvements
|
||||||
|
|
||||||
- Add pagination for search results (currently shows ~25 photos)
|
|
||||||
- Cache search results to reduce Flickr requests
|
- Cache search results to reduce Flickr requests
|
||||||
- Add filtering by license type
|
- Add filtering by license type
|
||||||
- Handle Flickr rate limiting/blocks more gracefully
|
- Handle Flickr rate limiting/blocks more gracefully
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ photographers on Flickr whose photos can be used to enhance Wikipedia articles.
|
||||||
CC BY-SA, CC0, Public Domain) are highlighted with a green badge.
|
CC BY-SA, CC0, Public Domain) are highlighted with a green badge.
|
||||||
- **One-click message composition**: Click any photo to compose a permission
|
- **One-click message composition**: Click any photo to compose a permission
|
||||||
request message with the photo displayed alongside.
|
request message with the photo displayed alongside.
|
||||||
|
- **Pagination**: Browse through thousands of search results with page navigation.
|
||||||
- Generate messages to request permission to use photos on Wikipedia.
|
- Generate messages to request permission to use photos on Wikipedia.
|
||||||
- Handle exceptions gracefully and provide detailed error information.
|
- Handle exceptions gracefully and provide detailed error information.
|
||||||
|
|
||||||
|
|
|
||||||
66
main.py
66
main.py
|
|
@ -53,6 +53,9 @@ FLICKR_LICENSES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PHOTOS_PER_PAGE = 25
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class FlickrPhoto:
|
class FlickrPhoto:
|
||||||
"""Represents a Flickr photo from search results."""
|
"""Represents a Flickr photo from search results."""
|
||||||
|
|
@ -78,6 +81,16 @@ class FlickrPhoto:
|
||||||
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
|
return FLICKR_LICENSES.get(self.license, f"License {self.license}")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass
|
||||||
|
class SearchResult:
|
||||||
|
"""Flickr search results with pagination metadata."""
|
||||||
|
|
||||||
|
photos: list[FlickrPhoto]
|
||||||
|
total_photos: int
|
||||||
|
current_page: int
|
||||||
|
total_pages: int
|
||||||
|
|
||||||
|
|
||||||
def is_valid_flickr_image_url(url: str) -> bool:
|
def is_valid_flickr_image_url(url: str) -> bool:
|
||||||
"""Check if URL is a valid Flickr static image URL."""
|
"""Check if URL is a valid Flickr static image URL."""
|
||||||
valid_prefixes = (
|
valid_prefixes = (
|
||||||
|
|
@ -95,23 +108,25 @@ def is_valid_flickr_image_url(url: str) -> bool:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def search_flickr(search_term: str) -> list[FlickrPhoto]:
|
def search_flickr(search_term: str, page: int = 1) -> SearchResult:
|
||||||
"""Search Flickr for photos matching the search term."""
|
"""Search Flickr for photos matching the search term."""
|
||||||
encoded_term = quote(f'"{search_term}"')
|
encoded_term = quote(f'"{search_term}"')
|
||||||
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}"
|
url = f"https://flickr.com/search/?view_all=1&text={encoded_term}&page={page}"
|
||||||
|
|
||||||
response = requests.get(url, headers=BROWSER_HEADERS)
|
response = requests.get(url, headers=BROWSER_HEADERS)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
return parse_flickr_search_results(response.text)
|
return parse_flickr_search_results(response.text, page)
|
||||||
|
|
||||||
|
|
||||||
def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult:
|
||||||
"""Parse Flickr search results HTML and extract photo data."""
|
"""Parse Flickr search results HTML and extract photo data."""
|
||||||
|
empty_result = SearchResult(photos=[], total_photos=0, current_page=page, total_pages=0)
|
||||||
|
|
||||||
# Find the modelExport JSON embedded in the page
|
# Find the modelExport JSON embedded in the page
|
||||||
start = html.find("modelExport:")
|
start = html.find("modelExport:")
|
||||||
if start == -1:
|
if start == -1:
|
||||||
return []
|
return empty_result
|
||||||
|
|
||||||
start += len("modelExport:")
|
start += len("modelExport:")
|
||||||
while html[start].isspace():
|
while html[start].isspace():
|
||||||
|
|
@ -148,12 +163,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||||
break
|
break
|
||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
return []
|
return empty_result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(json_str)
|
data = json.loads(json_str)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
return []
|
return empty_result
|
||||||
|
|
||||||
# Extract photos from the parsed data
|
# Extract photos from the parsed data
|
||||||
photos: list[FlickrPhoto] = []
|
photos: list[FlickrPhoto] = []
|
||||||
|
|
@ -162,18 +177,26 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||||
photos_models = main.get("search-photos-lite-models", [])
|
photos_models = main.get("search-photos-lite-models", [])
|
||||||
|
|
||||||
if not photos_models:
|
if not photos_models:
|
||||||
return []
|
return empty_result
|
||||||
|
|
||||||
photos_data = (
|
model_data = photos_models[0].get("data", {})
|
||||||
photos_models[0]
|
photos_container = model_data.get("photos", {}).get("data", {})
|
||||||
.get("data", {})
|
photos_data = photos_container.get("_data", [])
|
||||||
.get("photos", {})
|
total_photos = photos_container.get("totalItems", 0)
|
||||||
.get("data", {})
|
|
||||||
.get("_data", [])
|
# Calculate total pages (Flickr caps at 4000 results)
|
||||||
)
|
total_pages = min(total_photos, 4000) // PHOTOS_PER_PAGE
|
||||||
|
if min(total_photos, 4000) % PHOTOS_PER_PAGE:
|
||||||
|
total_pages += 1
|
||||||
|
|
||||||
for photo_entry in photos_data:
|
for photo_entry in photos_data:
|
||||||
|
# Skip None entries (placeholders from pagination)
|
||||||
|
if photo_entry is None:
|
||||||
|
continue
|
||||||
|
|
||||||
pd = photo_entry.get("data", {})
|
pd = photo_entry.get("data", {})
|
||||||
|
if not pd:
|
||||||
|
continue
|
||||||
|
|
||||||
sizes = pd.get("sizes", {}).get("data", {})
|
sizes = pd.get("sizes", {}).get("data", {})
|
||||||
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
|
thumb_data = sizes.get("q", sizes.get("sq", {})).get("data", {})
|
||||||
|
|
@ -202,7 +225,12 @@ def parse_flickr_search_results(html: str) -> list[FlickrPhoto]:
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return photos
|
return SearchResult(
|
||||||
|
photos=photos,
|
||||||
|
total_photos=total_photos,
|
||||||
|
current_page=page,
|
||||||
|
total_pages=total_pages,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
@app.errorhandler(werkzeug.exceptions.InternalServerError)
|
||||||
|
|
@ -266,12 +294,14 @@ def start() -> str:
|
||||||
flickr_url = flask.request.args.get("flickr")
|
flickr_url = flask.request.args.get("flickr")
|
||||||
if not flickr_url:
|
if not flickr_url:
|
||||||
# Search Flickr for photos
|
# Search Flickr for photos
|
||||||
photos = search_flickr(name)
|
page = flask.request.args.get("page", 1, type=int)
|
||||||
|
page = max(1, page) # Ensure page is at least 1
|
||||||
|
search_result = search_flickr(name, page)
|
||||||
return flask.render_template(
|
return flask.render_template(
|
||||||
"combined.html",
|
"combined.html",
|
||||||
name=name,
|
name=name,
|
||||||
enwp=enwp,
|
enwp=enwp,
|
||||||
photos=photos,
|
search_result=search_result,
|
||||||
)
|
)
|
||||||
|
|
||||||
if "/in/" in flickr_url:
|
if "/in/" in flickr_url:
|
||||||
|
|
|
||||||
|
|
@ -15,13 +15,13 @@
|
||||||
<input type="submit" value="Submit">
|
<input type="submit" value="Submit">
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
{% if name and photos is defined and photos %}
|
{% if name and search_result is defined and search_result.photos %}
|
||||||
|
|
||||||
<p>Wikipedia article: {{ name }}</p>
|
<p>Wikipedia article: {{ name }}</p>
|
||||||
<p>Select a photo to compose a message:</p>
|
<p>Select a photo to compose a message ({{ search_result.total_photos | default(0) }} results):</p>
|
||||||
|
|
||||||
<div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
|
<div class="row row-cols-2 row-cols-md-3 row-cols-lg-4 g-3 mb-3">
|
||||||
{% for photo in photos %}
|
{% for photo in search_result.photos %}
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<div class="card h-100">
|
<div class="card h-100">
|
||||||
<a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
|
<a href="{{ url_for(request.endpoint, enwp=enwp, flickr=photo.flickr_url, img=photo.medium_url) }}">
|
||||||
|
|
@ -36,6 +36,59 @@
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{% if search_result.total_pages > 1 %}
|
||||||
|
<nav aria-label="Search results pagination">
|
||||||
|
<ul class="pagination justify-content-center">
|
||||||
|
{% if search_result.current_page > 1 %}
|
||||||
|
<li class="page-item">
|
||||||
|
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page - 1) }}">Previous</a>
|
||||||
|
</li>
|
||||||
|
{% else %}
|
||||||
|
<li class="page-item disabled">
|
||||||
|
<span class="page-link">Previous</span>
|
||||||
|
</li>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% set start_page = [1, search_result.current_page - 2] | max %}
|
||||||
|
{% set end_page = [search_result.total_pages, search_result.current_page + 2] | min %}
|
||||||
|
|
||||||
|
{% if start_page > 1 %}
|
||||||
|
<li class="page-item">
|
||||||
|
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=1) }}">1</a>
|
||||||
|
</li>
|
||||||
|
{% if start_page > 2 %}
|
||||||
|
<li class="page-item disabled"><span class="page-link">...</span></li>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% for p in range(start_page, end_page + 1) %}
|
||||||
|
<li class="page-item {{ 'active' if p == search_result.current_page else '' }}">
|
||||||
|
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=p) }}">{{ p }}</a>
|
||||||
|
</li>
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
{% if end_page < search_result.total_pages %}
|
||||||
|
{% if end_page < search_result.total_pages - 1 %}
|
||||||
|
<li class="page-item disabled"><span class="page-link">...</span></li>
|
||||||
|
{% endif %}
|
||||||
|
<li class="page-item">
|
||||||
|
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.total_pages) }}">{{ search_result.total_pages }}</a>
|
||||||
|
</li>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if search_result.current_page < search_result.total_pages %}
|
||||||
|
<li class="page-item">
|
||||||
|
<a class="page-link" href="{{ url_for(request.endpoint, enwp=enwp, page=search_result.current_page + 1) }}">Next</a>
|
||||||
|
</li>
|
||||||
|
{% else %}
|
||||||
|
<li class="page-item disabled">
|
||||||
|
<span class="page-link">Next</span>
|
||||||
|
</li>
|
||||||
|
{% endif %}
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<p class="text-muted small">
|
<p class="text-muted small">
|
||||||
<a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
|
<a href="https://flickr.com/search/?view_all=1&text={{ '"' + name + '"' | urlencode }}" target="_blank">View full search on Flickr</a>
|
||||||
</p>
|
</p>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue