Move from JSON file storage to SQLite database using SQLAlchemy with Mapped type hints. Deduplicate URL utility functions into shared flickr_mail package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
52 lines
1.6 KiB
Python
52 lines
1.6 KiB
Python
"""Shared URL utility functions for flickr-mail."""
|
|
|
|
import re
|
|
|
|
|
|
def normalize_flickr_url(url: str) -> str:
|
|
"""Normalize a Flickr photo URL for comparison."""
|
|
# Remove protocol
|
|
url = url.replace("https://", "").replace("http://", "")
|
|
# Remove www.
|
|
url = url.replace("www.", "")
|
|
# Remove trailing slash
|
|
url = url.rstrip("/")
|
|
# Ensure it starts with flickr.com
|
|
if not url.startswith("flickr.com"):
|
|
return ""
|
|
return url
|
|
|
|
|
|
def extract_urls_from_message(body: str) -> tuple[str, str]:
|
|
"""Extract flickr URL and Wikipedia URL from message body."""
|
|
|
|
flickr_url = ""
|
|
wikipedia_url = ""
|
|
|
|
# Find flickr photo URLs
|
|
flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
|
|
flickr_matches = re.findall(flickr_pattern, body)
|
|
if flickr_matches:
|
|
flickr_url = flickr_matches[0]
|
|
if not flickr_url.startswith("http"):
|
|
flickr_url = "https://" + flickr_url
|
|
|
|
# Find Wikipedia URLs
|
|
wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
|
|
wiki_matches = re.findall(wiki_pattern, body)
|
|
if wiki_matches:
|
|
wikipedia_url = wiki_matches[0]
|
|
if not wikipedia_url.startswith("http"):
|
|
wikipedia_url = "https://" + wikipedia_url
|
|
|
|
return flickr_url, wikipedia_url
|
|
|
|
|
|
def creator_profile_from_flickr_url(flickr_url: str) -> str:
|
|
"""Extract creator profile URL from a flickr photo URL."""
|
|
parts = flickr_url.split("/")
|
|
for i, part in enumerate(parts):
|
|
if part == "photos" and i + 1 < len(parts):
|
|
username = parts[i + 1]
|
|
return f"https://www.flickr.com/photos/{username}"
|
|
return ""
|