Files
browsertrix-crawler/main.js
Tessa Walsh 0192d05f4c Implement improved json-l logging
- Add Logger class with methods for info, error, warn, debug, fatal
- Add context, timestamp, and details fields to log entries
- Log messages as JSON Lines
- Replace puppeteer-cluster stats with custom stats implementation
- Log behaviors by default
- Amend argParser to reflect logging changes
- Capture and log stdout/stderr from awaited child_processes
- Modify tests to use webrecorder.net to avoid timeouts
2023-01-19 14:17:27 -05:00

66 lines
1.4 KiB
JavaScript
Executable File

#!/usr/bin/env -S node --experimental-global-webcrypto
import { Crawler } from "./crawler.js";
import { Logger } from "./util/logger.js";
const logger = new Logger();
var crawler = null;
var lastSigInt = 0;
let forceTerm = false;
async function handleTerminate(signame) {
logger.info(`${signame} received...`);
if (!crawler || !crawler.crawlState) {
logger.error("error: no crawler running, exiting");
process.exit(1);
}
if (crawler.done) {
logger.info("success: crawler done, exiting");
process.exit(0);
}
try {
if (!crawler.crawlState.drainMax) {
logger.info("SIGNAL: gracefully finishing current pages...");
crawler.gracefulFinish();
} else if (forceTerm || (Date.now() - lastSigInt) > 200) {
logger.info("SIGNAL: stopping crawl now...");
await crawler.serializeAndExit();
}
lastSigInt = Date.now();
} catch (e) {
logger.error("Error stopping crawl after receiving termination signal", e);
}
}
process.on("SIGINT", () => handleTerminate("SIGINT"));
process.on("SIGTERM", () => handleTerminate("SIGTERM"));
process.on("SIGABRT", async () => {
logger.info("SIGABRT received, will force immediate exit on SIGTERM/SIGINT");
forceTerm = true;
});
process.on("SIGUSR1", () => {
if (crawler) {
crawler.prepareForExit(true);
}
});
process.on("SIGUSR2", () => {
if (crawler) {
crawler.prepareForExit(false);
}
});
crawler = new Crawler();
crawler.run();