Stop fetching all pages when downloading sent mail
This commit is contained in:
parent
9f0fb01878
commit
e072279566
2 changed files with 44 additions and 17 deletions
|
|
@ -48,12 +48,8 @@ def fetch_contributions(
|
|||
return contributions, new_continue
|
||||
|
||||
|
||||
def upsert_contribution(session, c: dict) -> None:
|
||||
"""Insert or update a contribution by revid."""
|
||||
existing = session.query(Contribution).filter_by(revid=c["revid"]).first()
|
||||
if existing:
|
||||
return # Already have this revision
|
||||
|
||||
def insert_contribution(session, c: dict) -> None:
|
||||
"""Insert a contribution row (caller must ensure revid is new)."""
|
||||
session.add(Contribution(
|
||||
userid=c.get("userid"),
|
||||
user=c.get("user"),
|
||||
|
|
@ -108,13 +104,24 @@ def main() -> None:
|
|||
print("no results")
|
||||
break
|
||||
|
||||
# One DB query per batch to identify already-known revisions.
|
||||
revids = [c["revid"] for c in contributions if "revid" in c]
|
||||
existing_revids = {
|
||||
row[0]
|
||||
for row in (
|
||||
session.query(Contribution.revid)
|
||||
.filter(Contribution.revid.in_(revids))
|
||||
.all()
|
||||
)
|
||||
}
|
||||
|
||||
batch_new = 0
|
||||
for c in contributions:
|
||||
# Stop if we've reached contributions we already have
|
||||
existing = session.query(Contribution).filter_by(revid=c["revid"]).first()
|
||||
if existing:
|
||||
revid = c.get("revid")
|
||||
if revid in existing_revids:
|
||||
continue
|
||||
upsert_contribution(session, c)
|
||||
|
||||
insert_contribution(session, c)
|
||||
batch_new += 1
|
||||
|
||||
new_count += batch_new
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue