Extract flickr_mail package with Mapped models and shared utilities
Move from JSON file storage to SQLite database using SQLAlchemy with Mapped type hints. Deduplicate URL utility functions into shared flickr_mail package. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ac1b01ea68
commit
9f0fb01878
11 changed files with 1129 additions and 300 deletions
0
flickr_mail/__init__.py
Normal file
0
flickr_mail/__init__.py
Normal file
31
flickr_mail/database.py
Normal file
31
flickr_mail/database.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
"""Database engine and session factory for flickr-mail."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from flickr_mail.models import Base
|
||||
|
||||
DB_PATH = Path(__file__).parent.parent / "flickr_mail.db"
|
||||
|
||||
engine = create_engine(f"sqlite:///{DB_PATH}")
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
|
||||
|
||||
@event.listens_for(engine, "connect")
|
||||
def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||
"""Enable WAL mode for concurrent read/write access."""
|
||||
cursor = dbapi_connection.cursor()
|
||||
cursor.execute("PRAGMA journal_mode=WAL")
|
||||
cursor.close()
|
||||
|
||||
|
||||
def init_db() -> None:
|
||||
"""Create all tables."""
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
|
||||
def get_session() -> Session:
|
||||
"""Create a new database session."""
|
||||
return SessionLocal()
|
||||
93
flickr_mail/models.py
Normal file
93
flickr_mail/models.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
"""SQLAlchemy models for flickr-mail."""
|
||||
|
||||
from sqlalchemy import ForeignKey, Index, Text
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class Contribution(Base):
|
||||
__tablename__ = "contributions"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
userid: Mapped[int | None]
|
||||
user: Mapped[str | None]
|
||||
pageid: Mapped[int | None]
|
||||
revid: Mapped[int | None] = mapped_column(unique=True)
|
||||
parentid: Mapped[int | None]
|
||||
ns: Mapped[int | None]
|
||||
title: Mapped[str | None]
|
||||
timestamp: Mapped[str | None]
|
||||
minor: Mapped[str | None]
|
||||
top: Mapped[str | None]
|
||||
comment: Mapped[str | None] = mapped_column(Text)
|
||||
size: Mapped[int | None]
|
||||
sizediff: Mapped[int | None]
|
||||
tags: Mapped[str | None] = mapped_column(Text) # JSON array stored as text
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_contributions_timestamp", "timestamp"),
|
||||
Index("ix_contributions_pageid", "pageid"),
|
||||
)
|
||||
|
||||
|
||||
class SentMessage(Base):
|
||||
__tablename__ = "sent_messages"
|
||||
|
||||
message_id: Mapped[str] = mapped_column(primary_key=True)
|
||||
subject: Mapped[str | None]
|
||||
url: Mapped[str | None]
|
||||
recipient: Mapped[str | None]
|
||||
date: Mapped[str | None]
|
||||
body: Mapped[str | None] = mapped_column(Text)
|
||||
body_html: Mapped[str | None] = mapped_column(Text)
|
||||
flickr_url: Mapped[str | None]
|
||||
normalized_flickr_url: Mapped[str | None]
|
||||
wikipedia_url: Mapped[str | None]
|
||||
creator_profile_url: Mapped[str | None]
|
||||
|
||||
flickr_uploads: Mapped[list["FlickrUpload"]] = relationship(
|
||||
back_populates="sent_message"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_sent_messages_recipient", "recipient"),
|
||||
Index("ix_sent_messages_normalized_flickr_url", "normalized_flickr_url"),
|
||||
)
|
||||
|
||||
|
||||
class FlickrUpload(Base):
|
||||
__tablename__ = "flickr_uploads"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True)
|
||||
pageid: Mapped[int | None]
|
||||
revid: Mapped[int | None]
|
||||
title: Mapped[str | None]
|
||||
timestamp: Mapped[str | None]
|
||||
flickr_url: Mapped[str | None]
|
||||
normalized_flickr_url: Mapped[str | None]
|
||||
creator: Mapped[str | None]
|
||||
wikipedia_url: Mapped[str | None]
|
||||
creator_profile_url: Mapped[str | None]
|
||||
sent_message_id: Mapped[str | None] = mapped_column(
|
||||
ForeignKey("sent_messages.message_id")
|
||||
)
|
||||
|
||||
sent_message: Mapped[SentMessage | None] = relationship(
|
||||
back_populates="flickr_uploads"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_flickr_uploads_normalized_flickr_url", "normalized_flickr_url"),
|
||||
Index("ix_flickr_uploads_timestamp", "timestamp"),
|
||||
)
|
||||
|
||||
|
||||
class ThumbnailCache(Base):
|
||||
__tablename__ = "thumbnail_cache"
|
||||
|
||||
title: Mapped[str] = mapped_column(primary_key=True)
|
||||
thumb_url: Mapped[str | None]
|
||||
fetched_at: Mapped[int | None] # Unix timestamp
|
||||
52
flickr_mail/url_utils.py
Normal file
52
flickr_mail/url_utils.py
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
"""Shared URL utility functions for flickr-mail."""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
def normalize_flickr_url(url: str) -> str:
|
||||
"""Normalize a Flickr photo URL for comparison."""
|
||||
# Remove protocol
|
||||
url = url.replace("https://", "").replace("http://", "")
|
||||
# Remove www.
|
||||
url = url.replace("www.", "")
|
||||
# Remove trailing slash
|
||||
url = url.rstrip("/")
|
||||
# Ensure it starts with flickr.com
|
||||
if not url.startswith("flickr.com"):
|
||||
return ""
|
||||
return url
|
||||
|
||||
|
||||
def extract_urls_from_message(body: str) -> tuple[str, str]:
|
||||
"""Extract flickr URL and Wikipedia URL from message body."""
|
||||
|
||||
flickr_url = ""
|
||||
wikipedia_url = ""
|
||||
|
||||
# Find flickr photo URLs
|
||||
flickr_pattern = r"(?:https?://)?(?:www\.)?flickr\.com/photos/[^/\s]+/\d+"
|
||||
flickr_matches = re.findall(flickr_pattern, body)
|
||||
if flickr_matches:
|
||||
flickr_url = flickr_matches[0]
|
||||
if not flickr_url.startswith("http"):
|
||||
flickr_url = "https://" + flickr_url
|
||||
|
||||
# Find Wikipedia URLs
|
||||
wiki_pattern = r"(?:https?://)?(?:www\.)?en\.wikipedia\.org/wiki/[^\s<\])]+"
|
||||
wiki_matches = re.findall(wiki_pattern, body)
|
||||
if wiki_matches:
|
||||
wikipedia_url = wiki_matches[0]
|
||||
if not wikipedia_url.startswith("http"):
|
||||
wikipedia_url = "https://" + wikipedia_url
|
||||
|
||||
return flickr_url, wikipedia_url
|
||||
|
||||
|
||||
def creator_profile_from_flickr_url(flickr_url: str) -> str:
|
||||
"""Extract creator profile URL from a flickr photo URL."""
|
||||
parts = flickr_url.split("/")
|
||||
for i, part in enumerate(parts):
|
||||
if part == "photos" and i + 1 < len(parts):
|
||||
username = parts[i + 1]
|
||||
return f"https://www.flickr.com/photos/{username}"
|
||||
return ""
|
||||
Loading…
Add table
Add a link
Reference in a new issue