add logging

This commit is contained in:
Ilya Kreymer
2025-12-19 21:40:15 -08:00
parent b090df7f74
commit ea866db738
2 changed files with 5 additions and 7 deletions

View File

@@ -165,6 +165,7 @@ export class CrawlIndexer {
let count = 0;
for await (const line of reader.iterLines()) {
count += 1;
const inx = line.indexOf(" {");
if (inx < 0) {
logger.error("Skipping invalid CDXJ, no JSON", { line });
@@ -197,12 +198,10 @@ export class CrawlIndexer {
if (res && res.size) {
await dedupeIndex.addStats(res.size - size, crawlId, commitToAllkey);
} else {
console.log("NO DUPE", hash, res);
await dedupeIndex.addRevisitSize(hash, size, crawlId);
}
continue;
}
if (url && date && hash) {
} else if (url && date && hash) {
await dedupeIndex.addHashDupe(
hash,
url,
@@ -211,6 +210,7 @@ export class CrawlIndexer {
crawlId,
commitToAllkey,
);
console.log("MATCH DUPE", hash, size);
await dedupeIndex.matchRevisitSize(hash, size, crawlId, commitToAllkey);
} else {
logger.warn("Skipping invalid CDXJ, data missing", {
@@ -218,10 +218,7 @@ export class CrawlIndexer {
date,
digest: hash,
});
continue;
}
count += 1;
}
logger.debug("Processed", { count });

View File

@@ -401,6 +401,7 @@ export class RedisDedupeIndex {
const { size, crawlId } = JSON.parse(res);
await this.addStats(origSize - size, crawlId, commitToAllKey);
} catch (e) {
console.log(e);
// ignore
}
}