Handle modern UploadWizard comments when indexing Flickr uploads
This commit is contained in:
parent
4f67960fe1
commit
2819652afd
1 changed files with 21 additions and 5 deletions
|
|
@ -2,8 +2,12 @@
|
|||
"""
|
||||
Find UploadWizard contributions that are from Flickr and add them to the database.
|
||||
|
||||
For contributions with comment 'User created page with UploadWizard', queries the
|
||||
Commons API to check if the image source is Flickr (by checking the Credit field).
|
||||
Supports both UploadWizard comment styles:
|
||||
- "User created page with UploadWizard" (older)
|
||||
- "Uploaded a work by ... with UploadWizard" (newer, often includes Flickr URL)
|
||||
|
||||
If a Flickr URL is not present in the contribution comment, queries Commons API
|
||||
to check if the image source is Flickr (by checking the Credit field).
|
||||
"""
|
||||
|
||||
import json
|
||||
|
|
@ -27,6 +31,13 @@ def extract_flickr_url_from_credit(credit: str) -> str | None:
|
|||
return match.group(0) if match else None
|
||||
|
||||
|
||||
def extract_flickr_url_from_comment(comment: str) -> str | None:
|
||||
"""Extract Flickr URL directly from a contribution comment."""
|
||||
pattern = r'https?://(?:www\.)?flickr\.com/photos/[^/\s]+/\d+'
|
||||
match = re.search(pattern, comment or "")
|
||||
return match.group(0) if match else None
|
||||
|
||||
|
||||
def get_image_metadata(titles: list[str]) -> dict[str, dict]:
|
||||
"""Fetch image metadata from Commons API for multiple titles."""
|
||||
if not titles:
|
||||
|
|
@ -97,10 +108,12 @@ def main():
|
|||
)
|
||||
url_to_message = {msg.normalized_flickr_url: msg for msg in sent_messages}
|
||||
|
||||
# Find UploadWizard contributions (page creations only)
|
||||
# Find UploadWizard file uploads.
|
||||
# Old format: "User created page with UploadWizard"
|
||||
# New format: "Uploaded a work by ... with UploadWizard"
|
||||
upload_wizard = (
|
||||
session.query(Contribution)
|
||||
.filter(Contribution.comment == "User created page with UploadWizard")
|
||||
.filter(Contribution.comment.contains("UploadWizard"))
|
||||
.filter(Contribution.title.startswith("File:"))
|
||||
.all()
|
||||
)
|
||||
|
|
@ -127,7 +140,10 @@ def main():
|
|||
credit = meta.get("credit", "")
|
||||
artist = meta.get("artist", "")
|
||||
|
||||
flickr_url = extract_flickr_url_from_credit(credit)
|
||||
# Prefer URL directly in comment; fall back to extmetadata Credit.
|
||||
flickr_url = extract_flickr_url_from_comment(c.comment or "")
|
||||
if not flickr_url:
|
||||
flickr_url = extract_flickr_url_from_credit(credit)
|
||||
if not flickr_url:
|
||||
continue
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue