Extract flickr_mail package with Mapped models and shared utilities
Move from JSON file storage to SQLite database using SQLAlchemy with Mapped type hints. Deduplicate URL utility functions into shared flickr_mail package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ac1b01ea68
commit
9f0fb01878
11 changed files with 1129 additions and 300 deletions
52
flickr_mail/url_utils.py
Normal file
52
flickr_mail/url_utils.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Shared URL utility functions for flickr-mail."""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def normalize_flickr_url(url: str) -> str:
|
||||
"""Normalize a Flickr photo URL for comparison."""
|
||||
# Remove protocol
|
||||
url = url.replace("https://", "").replace("http://", "")
|
||||
# Remove www.
|
||||
url = url.replace("www.", "")
|
||||
# Remove trailing slash
|
||||
url = url.rstrip("/")
|
||||
# Ensure it starts with flickr.com
|
||||
if not url.startswith("flickr.com"):
|
||||
return ""
|
||||
return url
|
||||
|
||||
|
||||
def extract_urls_from_message(body: str) -> tuple[str, str]:
|
||||
"""Extract flickr URL and Wikipedia URL from message body."""
|
||||
|
||||
flickr_url = ""
|
||||
wikipedia_url = ""
|
||||
|
||||
# Find flickr photo URLs
|
||||
flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
|
||||
flickr_matches = re.findall(flickr_pattern, body)
|
||||
if flickr_matches:
|
||||
flickr_url = flickr_matches[0]
|
||||
if not flickr_url.startswith("http"):
|
||||
flickr_url = "https://" + flickr_url
|
||||
|
||||
# Find Wikipedia URLs
|
||||
wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
|
||||
wiki_matches = re.findall(wiki_pattern, body)
|
||||
if wiki_matches:
|
||||
wikipedia_url = wiki_matches[0]
|
||||
if not wikipedia_url.startswith("http"):
|
||||
wikipedia_url = "https://" + wikipedia_url
|
||||
|
||||
return flickr_url, wikipedia_url
|
||||
|
||||
|
||||
def creator_profile_from_flickr_url(flickr_url: str) -> str:
|
||||
"""Extract creator profile URL from a flickr photo URL."""
|
||||
parts = flickr_url.split("/")
|
||||
for i, part in enumerate(parts):
|
||||
if part == "photos" and i + 1 < len(parts):
|
||||
username = parts[i + 1]
|
||||
return f"https://www.flickr.com/photos/{username}"
|
||||
return ""
|
||||
Loading…
Add table
Add a link
Reference in a new issue