mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-25 11:20:18 +00:00
fix size count typo, unique == not dupe!
This commit is contained in:
@@ -349,7 +349,8 @@ export class RedisDedupeIndex {
|
|||||||
commitToAllKey = false,
|
commitToAllKey = false,
|
||||||
) {
|
) {
|
||||||
crawlId = crawlId || this.crawlId;
|
crawlId = crawlId || this.crawlId;
|
||||||
if (isDupe) {
|
// if not a dupe, add to unique size count
|
||||||
|
if (!isDupe) {
|
||||||
await this.dedupeRedis.hincrby(`h:${crawlId}:counts`, "uniqueSize", size);
|
await this.dedupeRedis.hincrby(`h:${crawlId}:counts`, "uniqueSize", size);
|
||||||
if (commitToAllKey) {
|
if (commitToAllKey) {
|
||||||
await this.dedupeRedis.hincrby(DUPE_ALL_COUNTS, "uniqueSize", size);
|
await this.dedupeRedis.hincrby(DUPE_ALL_COUNTS, "uniqueSize", size);
|
||||||
|
|||||||
@@ -143,7 +143,7 @@ test("check revisit records written on duplicate crawl, same collection, no wacz
|
|||||||
|
|
||||||
numResponses = response;
|
numResponses = response;
|
||||||
|
|
||||||
await checkSizeStats(numResponses, "allcounts", 0, 180000);
|
await checkSizeStats(numResponses, "allcounts", 0, 10000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
@@ -190,7 +190,7 @@ test("check revisit records written on duplicate crawl, different collections, w
|
|||||||
|
|
||||||
numResponses = response;
|
numResponses = response;
|
||||||
|
|
||||||
await checkSizeStats(numResponses, "allcounts", 1, 48400000);
|
await checkSizeStats(numResponses, "allcounts", 1, 27000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
@@ -226,7 +226,7 @@ test("verify crawl with imported dupe index has same dupes as dedupe against ori
|
|||||||
// matches same number of revisits as original
|
// matches same number of revisits as original
|
||||||
expect(revisit).toBe(numResponses);
|
expect(revisit).toBe(numResponses);
|
||||||
|
|
||||||
await checkSizeStats(numResponses, "allcounts", 2, 48400000);
|
await checkSizeStats(numResponses, "allcounts", 2, 27000);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("test requires in datapackage.json of wacz deduped against previous crawl", () => {
|
test("test requires in datapackage.json of wacz deduped against previous crawl", () => {
|
||||||
|
|||||||
Reference in New Issue
Block a user