Extract flickr_mail package with Mapped models and shared utilities

Move from JSON file storage to SQLite database using SQLAlchemy with
Mapped type hints. Deduplicate URL utility functions into shared
flickr_mail package.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Edward Betts 2026-02-07 13:07:23 +00:00
parent ac1b01ea68
commit 9f0fb01878
11 changed files with 1129 additions and 300 deletions

0
flickr_mail/__init__.py Normal file
View file

31
flickr_mail/database.py Normal file
View file

@ -0,0 +1,31 @@
"""Database engine and session factory for flickr-mail."""
from pathlib import Path
from sqlalchemy import create_engine, event
from sqlalchemy.orm import Session, sessionmaker
from flickr_mail.models import Base
DB_PATH = Path(__file__).parent.parent / "flickr_mail.db"
engine = create_engine(f"sqlite:///{DB_PATH}")
SessionLocal = sessionmaker(bind=engine)
@event.listens_for(engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
"""Enable WAL mode for concurrent read/write access."""
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA journal_mode=WAL")
cursor.close()
def init_db() -> None:
"""Create all tables."""
Base.metadata.create_all(engine)
def get_session() -> Session:
"""Create a new database session."""
return SessionLocal()

93
flickr_mail/models.py Normal file
View file

@ -0,0 +1,93 @@
"""SQLAlchemy models for flickr-mail."""
from sqlalchemy import ForeignKey, Index, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
class Base(DeclarativeBase):
pass
class Contribution(Base):
__tablename__ = "contributions"
id: Mapped[int] = mapped_column(primary_key=True)
userid: Mapped[int | None]
user: Mapped[str | None]
pageid: Mapped[int | None]
revid: Mapped[int | None] = mapped_column(unique=True)
parentid: Mapped[int | None]
ns: Mapped[int | None]
title: Mapped[str | None]
timestamp: Mapped[str | None]
minor: Mapped[str | None]
top: Mapped[str | None]
comment: Mapped[str | None] = mapped_column(Text)
size: Mapped[int | None]
sizediff: Mapped[int | None]
tags: Mapped[str | None] = mapped_column(Text) # JSON array stored as text
__table_args__ = (
Index("ix_contributions_timestamp", "timestamp"),
Index("ix_contributions_pageid", "pageid"),
)
class SentMessage(Base):
__tablename__ = "sent_messages"
message_id: Mapped[str] = mapped_column(primary_key=True)
subject: Mapped[str | None]
url: Mapped[str | None]
recipient: Mapped[str | None]
date: Mapped[str | None]
body: Mapped[str | None] = mapped_column(Text)
body_html: Mapped[str | None] = mapped_column(Text)
flickr_url: Mapped[str | None]
normalized_flickr_url: Mapped[str | None]
wikipedia_url: Mapped[str | None]
creator_profile_url: Mapped[str | None]
flickr_uploads: Mapped[list["FlickrUpload"]] = relationship(
back_populates="sent_message"
)
__table_args__ = (
Index("ix_sent_messages_recipient", "recipient"),
Index("ix_sent_messages_normalized_flickr_url", "normalized_flickr_url"),
)
class FlickrUpload(Base):
__tablename__ = "flickr_uploads"
id: Mapped[int] = mapped_column(primary_key=True)
pageid: Mapped[int | None]
revid: Mapped[int | None]
title: Mapped[str | None]
timestamp: Mapped[str | None]
flickr_url: Mapped[str | None]
normalized_flickr_url: Mapped[str | None]
creator: Mapped[str | None]
wikipedia_url: Mapped[str | None]
creator_profile_url: Mapped[str | None]
sent_message_id: Mapped[str | None] = mapped_column(
ForeignKey("sent_messages.message_id")
)
sent_message: Mapped[SentMessage | None] = relationship(
back_populates="flickr_uploads"
)
__table_args__ = (
Index("ix_flickr_uploads_normalized_flickr_url", "normalized_flickr_url"),
Index("ix_flickr_uploads_timestamp", "timestamp"),
)
class ThumbnailCache(Base):
__tablename__ = "thumbnail_cache"
title: Mapped[str] = mapped_column(primary_key=True)
thumb_url: Mapped[str | None]
fetched_at: Mapped[int | None] # Unix timestamp

52
flickr_mail/url_utils.py Normal file
View file

@ -0,0 +1,52 @@
"""Shared URL utility functions for flickr-mail."""
import re
def normalize_flickr_url(url: str) -> str:
"""Normalize a Flickr photo URL for comparison."""
# Remove protocol
url = url.replace("https://", "").replace("http://", "")
# Remove www.
url = url.replace("www.", "")
# Remove trailing slash
url = url.rstrip("/")
# Ensure it starts with flickr.com
if not url.startswith("flickr.com"):
return ""
return url
def extract_urls_from_message(body: str) -> tuple[str, str]:
"""Extract flickr URL and Wikipedia URL from message body."""
flickr_url = ""
wikipedia_url = ""
# Find flickr photo URLs
flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
flickr_matches = re.findall(flickr_pattern, body)
if flickr_matches:
flickr_url = flickr_matches[0]
if not flickr_url.startswith("http"):
flickr_url = "https://" + flickr_url
# Find Wikipedia URLs
wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
wiki_matches = re.findall(wiki_pattern, body)
if wiki_matches:
wikipedia_url = wiki_matches[0]
if not wikipedia_url.startswith("http"):
wikipedia_url = "https://" + wikipedia_url
return flickr_url, wikipedia_url
def creator_profile_from_flickr_url(flickr_url: str) -> str:
"""Extract creator profile URL from a flickr photo URL."""
parts = flickr_url.split("/")
for i, part in enumerate(parts):
if part == "photos" and i + 1 < len(parts):
username = parts[i + 1]
return f"https://www.flickr.com/photos/{username}"
return ""