"""Shared URL utility functions for flickr-mail.""" import re def normalize_flickr_url(url: str) -> str: """Normalize a Flickr photo URL for comparison.""" # Remove protocol url = url.replace("https://", "").replace("http://", "") # Remove www. url = url.replace("www.", "") # Remove trailing slash url = url.rstrip("/") # Ensure it starts with flickr.com if not url.startswith("flickr.com"): return "" return url def extract_urls_from_message(body: str) -> tuple[str, str]: """Extract flickr URL and Wikipedia URL from message body.""" flickr_url = "" wikipedia_url = "" # Find flickr photo URLs flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+" flickr_matches = re.findall(flickr_pattern, body) if flickr_matches: flickr_url = flickr_matches[0] if not flickr_url.startswith("http"): flickr_url = "https://" + flickr_url # Find Wikipedia URLs wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+" wiki_matches = re.findall(wiki_pattern, body) if wiki_matches: wikipedia_url = wiki_matches[0] if not wikipedia_url.startswith("http"): wikipedia_url = "https://" + wikipedia_url return flickr_url, wikipedia_url def creator_profile_from_flickr_url(flickr_url: str) -> str: """Extract creator profile URL from a flickr photo URL.""" parts = flickr_url.split("/") for i, part in enumerate(parts): if part == "photos" and i + 1 < len(parts): username = parts[i + 1] return f"https://www.flickr.com/photos/{username}" return ""