mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-24 19:10:15 +00:00
fix size count typo, unique == not dupe!
This commit is contained in:
@@ -349,7 +349,8 @@ export class RedisDedupeIndex {
|
||||
commitToAllKey = false,
|
||||
) {
|
||||
crawlId = crawlId || this.crawlId;
|
||||
if (isDupe) {
|
||||
// if not a dupe, add to unique size count
|
||||
if (!isDupe) {
|
||||
await this.dedupeRedis.hincrby(`h:${crawlId}:counts`, "uniqueSize", size);
|
||||
if (commitToAllKey) {
|
||||
await this.dedupeRedis.hincrby(DUPE_ALL_COUNTS, "uniqueSize", size);
|
||||
|
||||
@@ -143,7 +143,7 @@ test("check revisit records written on duplicate crawl, same collection, no wacz
|
||||
|
||||
numResponses = response;
|
||||
|
||||
await checkSizeStats(numResponses, "allcounts", 0, 180000);
|
||||
await checkSizeStats(numResponses, "allcounts", 0, 10000);
|
||||
});
|
||||
|
||||
|
||||
@@ -190,7 +190,7 @@ test("check revisit records written on duplicate crawl, different collections, w
|
||||
|
||||
numResponses = response;
|
||||
|
||||
await checkSizeStats(numResponses, "allcounts", 1, 48400000);
|
||||
await checkSizeStats(numResponses, "allcounts", 1, 27000);
|
||||
});
|
||||
|
||||
|
||||
@@ -226,7 +226,7 @@ test("verify crawl with imported dupe index has same dupes as dedupe against ori
|
||||
// matches same number of revisits as original
|
||||
expect(revisit).toBe(numResponses);
|
||||
|
||||
await checkSizeStats(numResponses, "allcounts", 2, 48400000);
|
||||
await checkSizeStats(numResponses, "allcounts", 2, 27000);
|
||||
});
|
||||
|
||||
test("test requires in datapackage.json of wacz deduped against previous crawl", () => {
|
||||
|
||||
Reference in New Issue
Block a user