Stop fetching all pages when downloading sent mail

This commit is contained in:
Edward Betts 2026-02-07 13:17:34 +00:00
parent 9f0fb01878
commit e072279566
2 changed files with 44 additions and 17 deletions

View file

@ -48,12 +48,8 @@ def fetch_contributions(
return contributions, new_continue
def upsert_contribution(session, c: dict) -> None:
"""Insert or update a contribution by revid."""
existing = session.query(Contribution).filter_by(revid=c["revid"]).first()
if existing:
return # Already have this revision
def insert_contribution(session, c: dict) -> None:
"""Insert a contribution row (caller must ensure revid is new)."""
session.add(Contribution(
userid=c.get("userid"),
user=c.get("user"),
@ -108,13 +104,24 @@ def main() -> None:
print("no results")
break
# One DB query per batch to identify already-known revisions.
revids = [c["revid"] for c in contributions if "revid" in c]
existing_revids = {
row[0]
for row in (
session.query(Contribution.revid)
.filter(Contribution.revid.in_(revids))
.all()
)
}
batch_new = 0
for c in contributions:
# Stop if we've reached contributions we already have
existing = session.query(Contribution).filter_by(revid=c["revid"]).first()
if existing:
revid = c.get("revid")
if revid in existing_revids:
continue
upsert_contribution(session, c)
insert_contribution(session, c)
batch_new += 1
new_count += batch_new