Extract flickr_mail package with Mapped models and shared utilities

Move from JSON file storage to SQLite database using SQLAlchemy with Mapped type hints. Deduplicate URL utility functions into shared flickr_mail package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 13:07:23 +00:00 · 2026-02-07 13:07:23 +00:00 · 9f0fb01878
commit 9f0fb01878
parent ac1b01ea68
11 changed files with 1129 additions and 300 deletions
--- a/flickr_mail/url_utils.py
+++ b/flickr_mail/url_utils.py
@ -0,0 +1,52 @@
+"""Shared URL utility functions for flickr-mail."""
+
+import re
+
+
+def normalize_flickr_url(url: str) -> str:
+    """Normalize a Flickr photo URL for comparison."""
+    # Remove protocol
+    url = url.replace("https://", "").replace("http://", "")
+    # Remove www.
+    url = url.replace("www.", "")
+    # Remove trailing slash
+    url = url.rstrip("/")
+    # Ensure it starts with flickr.com
+    if not url.startswith("flickr.com"):
+        return ""
+    return url
+
+
+def extract_urls_from_message(body: str) -> tuple[str, str]:
+    """Extract flickr URL and Wikipedia URL from message body."""
+
+    flickr_url = ""
+    wikipedia_url = ""
+
+    # Find flickr photo URLs
+    flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
+    flickr_matches = re.findall(flickr_pattern, body)
+    if flickr_matches:
+        flickr_url = flickr_matches[0]
+        if not flickr_url.startswith("http"):
+            flickr_url = "https://" + flickr_url
+
+    # Find Wikipedia URLs
+    wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
+    wiki_matches = re.findall(wiki_pattern, body)
+    if wiki_matches:
+        wikipedia_url = wiki_matches[0]
+        if not wikipedia_url.startswith("http"):
+            wikipedia_url = "https://" + wikipedia_url
+
+    return flickr_url, wikipedia_url
+
+
+def creator_profile_from_flickr_url(flickr_url: str) -> str:
+    """Extract creator profile URL from a flickr photo URL."""
+    parts = flickr_url.split("/")
+    for i, part in enumerate(parts):
+        if part == "photos" and i + 1 < len(parts):
+            username = parts[i + 1]
+            return f"https://www.flickr.com/photos/{username}"
+    return ""