Files
browsertrix-crawler/util/screenshots.js
Tessa Walsh 0192d05f4c Implement improved json-l logging
- Add Logger class with methods for info, error, warn, debug, fatal
- Add context, timestamp, and details fields to log entries
- Log messages as JSON Lines
- Replace puppeteer-cluster stats with custom stats implementation
- Log behaviors by default
- Amend argParser to reflect logging changes
- Capture and log stdout/stderr from awaited child_processes
- Modify tests to use webrecorder.net to avoid timeouts
2023-01-19 14:17:27 -05:00

79 lines
2.1 KiB
JavaScript

import fs from "fs";
import path from "path";
import * as warcio from "warcio";
import { Logger } from "./logger.js";
const logger = new Logger();
// ============================================================================
export const screenshotTypes = {
"view": {
type: "png",
omitBackground: true,
fullPage: false
},
"thumbnail": {
type: "jpeg",
omitBackground: true,
fullPage: false,
quality: 75
},
"fullPage": {
type: "png",
omitBackground: true,
fullPage: false
}
};
export class Screenshots {
constructor({page, url, date, directory}) {
this.page = page;
this.url = url;
this.directory = directory;
this.warcName = path.join(this.directory, "screenshots.warc.gz");
this.date = date ? date : new Date();
}
async take(screenshotType="view") {
try {
await this.page.setViewport({width: 1920, height: 1080});
const options = screenshotTypes[screenshotType];
const screenshotBuffer = await this.page.screenshot(options);
const warcRecord = await this.wrap(screenshotBuffer, screenshotType, options.type);
const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true});
fs.appendFileSync(this.warcName, warcRecordBuffer);
logger.info(`Screenshot (type: ${screenshotType}) for ${this.url} written to ${this.warcName}`);
} catch (e) {
logger.error(`Taking screenshot (type: ${screenshotType}) failed for ${this.url}`, e.message);
}
}
async takeFullPage() {
await this.take("fullPage");
}
async takeThumbnail() {
await this.take("thumbnail");
}
async wrap(buffer, screenshotType="screenshot", imageType="png") {
const warcVersion = "WARC/1.1";
const warcRecordType = "resource";
const warcHeaders = {"Content-Type": `image/${imageType}`};
async function* content() {
yield buffer;
}
let screenshotUrl = `urn:${screenshotType}:` + this.url;
return warcio.WARCRecord.create({
url: screenshotUrl,
date: this.date.toISOString(),
type: warcRecordType,
warcVersion,
warcHeaders}, content());
}
}