Extract flickr_mail package with Mapped models and shared utilities

Move from JSON file storage to SQLite database using SQLAlchemy with Mapped type hints. Deduplicate URL utility functions into shared flickr_mail package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-07 13:07:23 +00:00 · 2026-02-07 13:07:23 +00:00 · 9f0fb01878
commit 9f0fb01878
parent ac1b01ea68
11 changed files with 1129 additions and 300 deletions
--- a/flickr_mail/init.py
+++ b/flickr_mail/init.py
--- a/flickr_mail/database.py
+++ b/flickr_mail/database.py
@ -0,0 +1,31 @@
+"""Database engine and session factory for flickr-mail."""
+
+from pathlib import Path
+
+from sqlalchemy import create_engine, event
+from sqlalchemy.orm import Session, sessionmaker
+
+from flickr_mail.models import Base
+
+DB_PATH = Path(__file__).parent.parent / "flickr_mail.db"
+
+engine = create_engine(f"sqlite:///{DB_PATH}")
+SessionLocal = sessionmaker(bind=engine)
+
+
+@event.listens_for(engine, "connect")
+def set_sqlite_pragma(dbapi_connection, connection_record):
+    """Enable WAL mode for concurrent read/write access."""
+    cursor = dbapi_connection.cursor()
+    cursor.execute("PRAGMA journal_mode=WAL")
+    cursor.close()
+
+
+def init_db() -> None:
+    """Create all tables."""
+    Base.metadata.create_all(engine)
+
+
+def get_session() -> Session:
+    """Create a new database session."""
+    return SessionLocal()
--- a/flickr_mail/models.py
+++ b/flickr_mail/models.py
@ -0,0 +1,93 @@
+"""SQLAlchemy models for flickr-mail."""
+
+from sqlalchemy import ForeignKey, Index, Text
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Contribution(Base):
+    __tablename__ = "contributions"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    userid: Mapped[int | None]
+    user: Mapped[str | None]
+    pageid: Mapped[int | None]
+    revid: Mapped[int | None] = mapped_column(unique=True)
+    parentid: Mapped[int | None]
+    ns: Mapped[int | None]
+    title: Mapped[str | None]
+    timestamp: Mapped[str | None]
+    minor: Mapped[str | None]
+    top: Mapped[str | None]
+    comment: Mapped[str | None] = mapped_column(Text)
+    size: Mapped[int | None]
+    sizediff: Mapped[int | None]
+    tags: Mapped[str | None] = mapped_column(Text)  # JSON array stored as text
+
+    __table_args__ = (
+        Index("ix_contributions_timestamp", "timestamp"),
+        Index("ix_contributions_pageid", "pageid"),
+    )
+
+
+class SentMessage(Base):
+    __tablename__ = "sent_messages"
+
+    message_id: Mapped[str] = mapped_column(primary_key=True)
+    subject: Mapped[str | None]
+    url: Mapped[str | None]
+    recipient: Mapped[str | None]
+    date: Mapped[str | None]
+    body: Mapped[str | None] = mapped_column(Text)
+    body_html: Mapped[str | None] = mapped_column(Text)
+    flickr_url: Mapped[str | None]
+    normalized_flickr_url: Mapped[str | None]
+    wikipedia_url: Mapped[str | None]
+    creator_profile_url: Mapped[str | None]
+
+    flickr_uploads: Mapped[list["FlickrUpload"]] = relationship(
+        back_populates="sent_message"
+    )
+
+    __table_args__ = (
+        Index("ix_sent_messages_recipient", "recipient"),
+        Index("ix_sent_messages_normalized_flickr_url", "normalized_flickr_url"),
+    )
+
+
+class FlickrUpload(Base):
+    __tablename__ = "flickr_uploads"
+
+    id: Mapped[int] = mapped_column(primary_key=True)
+    pageid: Mapped[int | None]
+    revid: Mapped[int | None]
+    title: Mapped[str | None]
+    timestamp: Mapped[str | None]
+    flickr_url: Mapped[str | None]
+    normalized_flickr_url: Mapped[str | None]
+    creator: Mapped[str | None]
+    wikipedia_url: Mapped[str | None]
+    creator_profile_url: Mapped[str | None]
+    sent_message_id: Mapped[str | None] = mapped_column(
+        ForeignKey("sent_messages.message_id")
+    )
+
+    sent_message: Mapped[SentMessage | None] = relationship(
+        back_populates="flickr_uploads"
+    )
+
+    __table_args__ = (
+        Index("ix_flickr_uploads_normalized_flickr_url", "normalized_flickr_url"),
+        Index("ix_flickr_uploads_timestamp", "timestamp"),
+    )
+
+
+class ThumbnailCache(Base):
+    __tablename__ = "thumbnail_cache"
+
+    title: Mapped[str] = mapped_column(primary_key=True)
+    thumb_url: Mapped[str | None]
+    fetched_at: Mapped[int | None]  # Unix timestamp
--- a/flickr_mail/url_utils.py
+++ b/flickr_mail/url_utils.py
@ -0,0 +1,52 @@
+"""Shared URL utility functions for flickr-mail."""
+
+import re
+
+
+def normalize_flickr_url(url: str) -> str:
+    """Normalize a Flickr photo URL for comparison."""
+    # Remove protocol
+    url = url.replace("https://", "").replace("http://", "")
+    # Remove www.
+    url = url.replace("www.", "")
+    # Remove trailing slash
+    url = url.rstrip("/")
+    # Ensure it starts with flickr.com
+    if not url.startswith("flickr.com"):
+        return ""
+    return url
+
+
+def extract_urls_from_message(body: str) -> tuple[str, str]:
+    """Extract flickr URL and Wikipedia URL from message body."""
+
+    flickr_url = ""
+    wikipedia_url = ""
+
+    # Find flickr photo URLs
+    flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
+    flickr_matches = re.findall(flickr_pattern, body)
+    if flickr_matches:
+        flickr_url = flickr_matches[0]
+        if not flickr_url.startswith("http"):
+            flickr_url = "https://" + flickr_url
+
+    # Find Wikipedia URLs
+    wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
+    wiki_matches = re.findall(wiki_pattern, body)
+    if wiki_matches:
+        wikipedia_url = wiki_matches[0]
+        if not wikipedia_url.startswith("http"):
+            wikipedia_url = "https://" + wikipedia_url
+
+    return flickr_url, wikipedia_url
+
+
+def creator_profile_from_flickr_url(flickr_url: str) -> str:
+    """Extract creator profile URL from a flickr photo URL."""
+    parts = flickr_url.split("/")
+    for i, part in enumerate(parts):
+        if part == "photos" and i + 1 < len(parts):
+            username = parts[i + 1]
+            return f"https://www.flickr.com/photos/{username}"
+    return ""