Add interaction logging and tighten model NOT NULL constraints

Log searches (article/category) and message-generation events to a new
interaction_log table, capturing IP address and User-Agent.

Also apply NOT NULL constraints to Contribution, SentMessage, FlickrUpload,
and ThumbnailCache fields that are always populated, and remove stale
continue_token references from category.html.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-08 12:34:04 +00:00
parent 252a854e76
commit 08f5128e8d
5 changed files with 160 additions and 76 deletions

116
main.py
View file

@ -18,7 +18,7 @@ from sqlalchemy import func
from werkzeug.debug.tbtools import DebugTraceback
from flickr_mail.database import get_session
from flickr_mail.models import FlickrUpload, SentMessage, ThumbnailCache
from flickr_mail.models import FlickrUpload, InteractionLog, SentMessage, ThumbnailCache
from flickr_mail.url_utils import extract_urls_from_message, normalize_flickr_url
import re
@ -378,15 +378,12 @@ def has_content_image(images: list[dict]) -> bool:
return False
def get_articles_without_images(
category: str, limit: int = 100
) -> tuple[list[ArticleWithoutImage], str | None]:
def get_articles_without_images(category: str) -> list[ArticleWithoutImage]:
"""Get articles in a category that don't have images.
Uses generator=categorymembers with prop=images to efficiently check
multiple articles in a single API request.
Returns a tuple of (articles_list, continue_token).
multiple articles in a single API request, following continuation until
all category members have been processed.
"""
params = {
"action": "query",
@ -394,49 +391,54 @@ def get_articles_without_images(
"gcmtitle": category,
"gcmtype": "page", # Only articles, not subcategories or files
"gcmnamespace": "0", # Main namespace only
"gcmlimit": str(limit),
"gcmlimit": "max",
"prop": "images",
"imlimit": "max", # Need enough to check all pages in batch
"format": "json",
}
headers = {"User-Agent": WIKIMEDIA_USER_AGENT}
try:
response = requests.get(
WIKIPEDIA_API, params=params, headers=headers, timeout=30
)
response.raise_for_status()
data = response.json()
except (requests.RequestException, json.JSONDecodeError) as e:
print(f"Wikipedia API error: {e}")
return [], None
articles_without_images: list[ArticleWithoutImage] = []
continue_token: str | None = None
pages = data.get("query", {}).get("pages", {})
for page in pages.values():
images = page.get("images", [])
while True:
request_params = params.copy()
if continue_token:
request_params["gcmcontinue"] = continue_token
# Skip if page has content images (not just UI icons)
if has_content_image(images):
continue
title = page.get("title", "")
pageid = page.get("pageid", 0)
if title and pageid:
articles_without_images.append(
ArticleWithoutImage(title=title, pageid=pageid)
try:
response = requests.get(
WIKIPEDIA_API, params=request_params, headers=headers, timeout=30
)
response.raise_for_status()
data = response.json()
except (requests.RequestException, json.JSONDecodeError) as e:
print(f"Wikipedia API error: {e}")
break
pages = data.get("query", {}).get("pages", {})
for page in pages.values():
images = page.get("images", [])
# Skip if page has content images (not just UI icons)
if has_content_image(images):
continue
title = page.get("title", "")
pageid = page.get("pageid", 0)
if title and pageid:
articles_without_images.append(
ArticleWithoutImage(title=title, pageid=pageid)
)
continue_token = data.get("continue", {}).get("gcmcontinue")
if not continue_token:
break
# Sort by title for consistent display
articles_without_images.sort(key=lambda a: a.title)
# Get continue token if there are more results
continue_token = data.get("continue", {}).get("gcmcontinue")
return articles_without_images, continue_token
return articles_without_images
def is_valid_flickr_image_url(url: str) -> bool:
@ -583,6 +585,33 @@ def parse_flickr_search_results(html: str, page: int = 1) -> SearchResult:
)
def log_interaction(
interaction_type: str,
query: str | None = None,
flickr_url: str | None = None,
wikipedia_url: str | None = None,
) -> None:
"""Log a user interaction to the database."""
forwarded_for = flask.request.headers.get("X-Forwarded-For")
ip_address = forwarded_for.split(",")[0].strip() if forwarded_for else flask.request.remote_addr
user_agent = flask.request.headers.get("User-Agent")
session = get_session()
try:
entry = InteractionLog(
timestamp=int(time.time()),
interaction_type=interaction_type,
ip_address=ip_address,
user_agent=user_agent,
query=query,
flickr_url=flickr_url,
wikipedia_url=wikipedia_url,
)
session.add(entry)
session.commit()
finally:
session.close()
@app.errorhandler(werkzeug.exceptions.InternalServerError)
def exception_handler(e: werkzeug.exceptions.InternalServerError) -> tuple[str, int]:
"""Handle exception."""
@ -656,6 +685,8 @@ def start() -> str:
# Search Flickr for photos
page = flask.request.args.get("page", 1, type=int)
page = max(1, page) # Ensure page is at least 1
if page == 1:
log_interaction("search_article", query=name, wikipedia_url=wikipedia_url)
search_result = search_flickr(name, page)
return flask.render_template(
"combined.html",
@ -718,6 +749,13 @@ def start() -> str:
previous_messages=previous_messages,
)
log_interaction(
"generate_message",
query=name,
flickr_url=flickr_url,
wikipedia_url=wikipedia_url,
)
msg = flask.render_template(
"message.jinja",
flickr_url=flickr_url,
@ -768,7 +806,8 @@ def category_search() -> str:
cat=cat,
)
articles, continue_token = get_articles_without_images(category)
log_interaction("search_category", query=category)
articles = get_articles_without_images(category)
# Get the display name (without Category: prefix)
category_name = category.replace("Category:", "")
@ -779,7 +818,6 @@ def category_search() -> str:
category=category,
category_name=category_name,
articles=articles,
continue_token=continue_token,
)