fix size count typo, unique == not dupe!

This commit is contained in:
Ilya Kreymer
2025-12-11 10:37:53 -08:00
parent 1eba37aea7
commit f00d791e1b
2 changed files with 5 additions and 4 deletions

View File

@@ -349,7 +349,8 @@ export class RedisDedupeIndex {
commitToAllKey = false,
) {
crawlId = crawlId || this.crawlId;
if (isDupe) {
// if not a dupe, add to unique size count
if (!isDupe) {
await this.dedupeRedis.hincrby(`h:${crawlId}:counts`, "uniqueSize", size);
if (commitToAllKey) {
await this.dedupeRedis.hincrby(DUPE_ALL_COUNTS, "uniqueSize", size);

View File

@@ -143,7 +143,7 @@ test("check revisit records written on duplicate crawl, same collection, no wacz
numResponses = response;
await checkSizeStats(numResponses, "allcounts", 0, 180000);
await checkSizeStats(numResponses, "allcounts", 0, 10000);
});
@@ -190,7 +190,7 @@ test("check revisit records written on duplicate crawl, different collections, w
numResponses = response;
await checkSizeStats(numResponses, "allcounts", 1, 48400000);
await checkSizeStats(numResponses, "allcounts", 1, 27000);
});
@@ -226,7 +226,7 @@ test("verify crawl with imported dupe index has same dupes as dedupe against ori
// matches same number of revisits as original
expect(revisit).toBe(numResponses);
await checkSizeStats(numResponses, "allcounts", 2, 48400000);
await checkSizeStats(numResponses, "allcounts", 2, 27000);
});
test("test requires in datapackage.json of wacz deduped against previous crawl", () => {