diff --git a/update_flickr_uploads.py b/update_flickr_uploads.py index 06b85e0..72c70e8 100644 --- a/update_flickr_uploads.py +++ b/update_flickr_uploads.py @@ -2,8 +2,12 @@ """ Find UploadWizard contributions that are from Flickr and add them to the database. -For contributions with comment 'User created page with UploadWizard', queries the -Commons API to check if the image source is Flickr (by checking the Credit field). +Supports both UploadWizard comment styles: +- "User created page with UploadWizard" (older) +- "Uploaded a work by ... with UploadWizard" (newer, often includes Flickr URL) + +If a Flickr URL is not present in the contribution comment, queries Commons API +to check if the image source is Flickr (by checking the Credit field). """ import json @@ -27,6 +31,13 @@ def extract_flickr_url_from_credit(credit: str) -> str | None: return match.group(0) if match else None +def extract_flickr_url_from_comment(comment: str) -> str | None: + """Extract Flickr URL directly from a contribution comment.""" + pattern = r'https?://(?:www\.)?flickr\.com/photos/[^/\s]+/\d+' + match = re.search(pattern, comment or "") + return match.group(0) if match else None + + def get_image_metadata(titles: list[str]) -> dict[str, dict]: """Fetch image metadata from Commons API for multiple titles.""" if not titles: @@ -97,10 +108,12 @@ def main(): ) url_to_message = {msg.normalized_flickr_url: msg for msg in sent_messages} - # Find UploadWizard contributions (page creations only) + # Find UploadWizard file uploads. + # Old format: "User created page with UploadWizard" + # New format: "Uploaded a work by ... with UploadWizard" upload_wizard = ( session.query(Contribution) - .filter(Contribution.comment == "User created page with UploadWizard") + .filter(Contribution.comment.contains("UploadWizard")) .filter(Contribution.title.startswith("File:")) .all() ) @@ -127,7 +140,10 @@ def main(): credit = meta.get("credit", "") artist = meta.get("artist", "") - flickr_url = extract_flickr_url_from_credit(credit) + # Prefer URL directly in comment; fall back to extmetadata Credit. + flickr_url = extract_flickr_url_from_comment(c.comment or "") + if not flickr_url: + flickr_url = extract_flickr_url_from_credit(credit) if not flickr_url: continue