Extract flickr_mail package with Mapped models and shared utilities

Move from JSON file storage to SQLite database using SQLAlchemy with
Mapped type hints. Deduplicate URL utility functions into shared
flickr_mail package.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-07 13:07:23 +00:00
parent ac1b01ea68
commit 9f0fb01878
11 changed files with 1129 additions and 300 deletions

52
flickr_mail/url_utils.py Normal file
View file

@ -0,0 +1,52 @@
"""Shared URL utility functions for flickr-mail."""
import re
def normalize_flickr_url(url: str) -> str:
"""Normalize a Flickr photo URL for comparison."""
# Remove protocol
url = url.replace("https://", "").replace("http://", "")
# Remove www.
url = url.replace("www.", "")
# Remove trailing slash
url = url.rstrip("/")
# Ensure it starts with flickr.com
if not url.startswith("flickr.com"):
return ""
return url
def extract_urls_from_message(body: str) -> tuple[str, str]:
"""Extract flickr URL and Wikipedia URL from message body."""
flickr_url = ""
wikipedia_url = ""
# Find flickr photo URLs
flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
flickr_matches = re.findall(flickr_pattern, body)
if flickr_matches:
flickr_url = flickr_matches[0]
if not flickr_url.startswith("http"):
flickr_url = "https://" + flickr_url
# Find Wikipedia URLs
wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
wiki_matches = re.findall(wiki_pattern, body)
if wiki_matches:
wikipedia_url = wiki_matches[0]
if not wikipedia_url.startswith("http"):
wikipedia_url = "https://" + wikipedia_url
return flickr_url, wikipedia_url
def creator_profile_from_flickr_url(flickr_url: str) -> str:
"""Extract creator profile URL from a flickr photo URL."""
parts = flickr_url.split("/")
for i, part in enumerate(parts):
if part == "photos" and i + 1 < len(parts):
username = parts[i + 1]
return f"https://www.flickr.com/photos/{username}"
return ""