track size of page resources:

- add 'size' entry to each resource in urn:pageinfo records
- add 'size' entry to pages in pages.jsonl, set to sum of the size of all resources listed in urn:pageinfo record
This commit is contained in:
Ilya Kreymer
2025-12-20 11:10:08 -08:00
parent 0ecaa38e68
commit 2c8d22f76a
4 changed files with 20 additions and 4 deletions

View File

@@ -98,6 +98,7 @@ type PageEntry = {
title?: string;
loadState?: number;
mime?: string;
size?: number;
seed?: boolean;
text?: string;
favIconUrl?: string;
@@ -2650,6 +2651,7 @@ self.__bx_behaviors.selectMainBehavior();
text,
loadState,
mime,
size,
favicon,
status,
} = state;
@@ -2673,6 +2675,9 @@ self.__bx_behaviors.selectMainBehavior();
if (mime) {
row.mime = mime;
}
if (size) {
row.size = size;
}
if (status) {
row.status = status;

View File

@@ -66,6 +66,7 @@ export type PageInfoValue = {
mime?: string;
type?: string;
error?: string;
size?: number;
fromBrowserCache?: boolean;
};
@@ -116,6 +117,7 @@ export class Recorder extends EventEmitter {
pendingRequests!: Map<string, RequestResponseInfo>;
skipIds!: Set<string>;
pageInfo!: PageInfoRecord;
pageSize = 0;
mainFrameId: string | null = null;
skipRangeUrls!: Map<string, number>;
skipPageInfo = false;
@@ -950,6 +952,7 @@ export class Recorder extends EventEmitter {
this.skipRangeUrls = new Map<string, number>();
this.skipPageInfo = false;
this.pageFinished = false;
this.pageSize = 0;
this.pageInfo = {
pageid,
urls: {},
@@ -963,8 +966,9 @@ export class Recorder extends EventEmitter {
addPageRecord(reqresp: RequestResponseInfo) {
if (this.isValidUrl(reqresp.url)) {
const { status, resourceType: type } = reqresp;
const size = reqresp.readSize || reqresp.payload?.length || 0;
const mime = reqresp.getMimeType();
const info: PageInfoValue = { status, mime, type };
const info: PageInfoValue = { status, mime, type, size };
if (reqresp.errorText) {
info.error = reqresp.errorText;
}
@@ -973,6 +977,7 @@ export class Recorder extends EventEmitter {
// info.fromBrowserCache = true;
// }
this.pageInfo.urls[reqresp.getCanonURL()] = info;
this.pageSize += size;
}
}
@@ -983,7 +988,7 @@ export class Recorder extends EventEmitter {
{ url: "urn:pageinfo:" + this.pageUrl },
"recorder",
);
return;
return null;
}
const text = JSON.stringify(this.pageInfo, null, 2);
@@ -1000,7 +1005,7 @@ export class Recorder extends EventEmitter {
"recorder",
);
return this.pageInfo.ts;
return { ts: this.pageInfo.ts, size: this.pageSize };
}
async awaitPageResources() {

View File

@@ -94,6 +94,7 @@ export class PageState {
title?: string;
mime?: string;
ts?: Date;
size?: number;
callbacks: PageCallbacks = {};

View File

@@ -301,7 +301,12 @@ export class PageWorker {
} finally {
try {
if (this.recorder) {
opts.data.ts = this.recorder.writePageInfoRecord();
const res = this.recorder.writePageInfoRecord();
if (res) {
const { size, ts } = res;
opts.data.ts = ts;
opts.data.size = size;
}
}
} catch (e) {
logger.error(