From b30a35604c536f2cbd3fdc82cd555b6f5804c2b5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 19 Dec 2025 21:13:26 -0800 Subject: [PATCH] indexer: ensure indexer size is number --- src/indexer.ts | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/indexer.ts b/src/indexer.ts index cfd1baf4..5e369682 100644 --- a/src/indexer.ts +++ b/src/indexer.ts @@ -184,6 +184,7 @@ export class CrawlIndexer { const date = line.split(" ", 2)[1]; const url = cdx.url; const hash = cdx.digest; + const size = Number(cdx.length); if (url.startsWith("urn:")) { continue; @@ -194,13 +195,9 @@ export class CrawlIndexer { // check if original is already in index const res = await dedupeIndex.getHashDupe(hash, crawlId); if (res && res.size) { - await dedupeIndex.addStats( - res.size - cdx.length, - crawlId, - commitToAllkey, - ); + await dedupeIndex.addStats(res.size - size, crawlId, commitToAllkey); } else { - await dedupeIndex.addRevisitSize(hash, cdx.length, crawlId); + await dedupeIndex.addRevisitSize(hash, size, crawlId); } continue; } @@ -210,16 +207,11 @@ export class CrawlIndexer { hash, url, date, - cdx.length, - crawlId, - commitToAllkey, - ); - await dedupeIndex.matchRevisitSize( - hash, - cdx.length, + size, crawlId, commitToAllkey, ); + await dedupeIndex.matchRevisitSize(hash, size, crawlId, commitToAllkey); } else { logger.warn("Skipping invalid CDXJ, data missing", { url,