Handle modern UploadWizard comments when indexing Flickr uploads

2026-02-07 13:41:27 +00:00 · 2026-02-07 13:41:27 +00:00 · 2819652afd
commit 2819652afd
parent 4f67960fe1
1 changed files with 21 additions and 5 deletions
--- a/update_flickr_uploads.py
+++ b/update_flickr_uploads.py
@ -2,8 +2,12 @@
 """
 Find UploadWizard contributions that are from Flickr and add them to the database.

-For contributions with comment 'User created page with UploadWizard', queries the
-Commons API to check if the image source is Flickr (by checking the Credit field).
+Supports both UploadWizard comment styles:
+- "User created page with UploadWizard" (older)
+- "Uploaded a work by ... with UploadWizard" (newer, often includes Flickr URL)
+
+If a Flickr URL is not present in the contribution comment, queries Commons API
+to check if the image source is Flickr (by checking the Credit field).
 """

 import json
@ -27,6 +31,13 @@ def extract_flickr_url_from_credit(credit: str) -> str | None:
    return match.group(0) if match else None


+def extract_flickr_url_from_comment(comment: str) -> str | None:
+    """Extract Flickr URL directly from a contribution comment."""
+    pattern = r'https?://(?:www\.)?flickr\.com/photos/[^/\s]+/\d+'
+    match = re.search(pattern, comment or "")
+    return match.group(0) if match else None
+
+
 def get_image_metadata(titles: list[str]) -> dict[str, dict]:
    """Fetch image metadata from Commons API for multiple titles."""
    if not titles:
@ -97,10 +108,12 @@ def main():
        )
        url_to_message = {msg.normalized_flickr_url: msg for msg in sent_messages}

-        # Find UploadWizard contributions (page creations only)
+        # Find UploadWizard file uploads.
+        # Old format: "User created page with UploadWizard"
+        # New format: "Uploaded a work by ... with UploadWizard"
        upload_wizard = (
            session.query(Contribution)
-            .filter(Contribution.comment == "User created page with UploadWizard")
+            .filter(Contribution.comment.contains("UploadWizard"))
            .filter(Contribution.title.startswith("File:"))
            .all()
        )
@ -127,6 +140,9 @@ def main():
                credit = meta.get("credit", "")
                artist = meta.get("artist", "")

+                # Prefer URL directly in comment; fall back to extmetadata Credit.
+                flickr_url = extract_flickr_url_from_comment(c.comment or "")
+                if not flickr_url:
                    flickr_url = extract_flickr_url_from_credit(credit)
                if not flickr_url:
                    continue