mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-08 17:30:09 +02:00
[api-minor] Replace PDFDocumentProxy.getStats
with a synchronous PDFDocumentProxy.stats
getter
*Please note:* These changes will primarily benefit longer documents, somewhat at the expense of e.g. one-page documents.
The existing `PDFDocumentProxy.getStats` function, which in the default viewer is called for each rendered page, requires a round-trip to the worker-thread in order to obtain the current document stats. In the default viewer, we currently make one such API-call for *every rendered* page.
This patch proposes replacing that method with a *synchronous* `PDFDocumentProxy.stats` getter instead, combined with re-factoring the worker-thread code by adding a `DocStats`-class to track Stream/Font-types and *only send* them to the main-thread *the first time* that a type is encountered.
Note that in practice most PDF documents only use a fairly limited number of Stream/Font-types, which means that in longer documents most of the `PDFDocumentProxy.getStats`-calls will return the same data.[1]
This re-factoring will obviously benefit longer document the most[2], and could actually be seen as a regression for one-page documents, since in practice there'll usually be a couple of "DocStats" messages sent during the parsing of the first page. However, if the user zooms/rotates the document (which causes re-rendering), note that even a one-page document would start to benefit from these changes.
Another benefit of having the data available/cached in the API is that unless the document stats change during parsing, repeated `PDFDocumentProxy.stats`-calls will return *the same identical* object.
This is something that we can easily take advantage of in the default viewer, by now *only* reporting "documentStats" telemetry[3] when the data actually have changed rather than once per rendered page (again beneficial in longer documents).
---
[1] Furthermore, the maximium number of `StreamType`/`FontType` are `10` respectively `12`, which means that regardless of the complexity and page count in a PDF document there'll never be more than twenty-two "DocStats" messages sent; see 41ac3f0c07/src/shared/util.js (L206-L232)
[2] One example is the `pdf.pdf` document in the test-suite, where rendering all of its 1310 pages only result in a total of seven "DocStats" messages being sent from the worker-thread.
[3] Reporting telemetry, in Firefox, includes using `JSON.stringify` on the data and then sending an event to the `PdfStreamConverter.jsm`-code.
In that code the event is handled and `JSON.parse` is used to retrieve the data, and in the "documentStats"-case we'll then iterate through the data to avoid double-reporting telemetry; see https://searchfox.org/mozilla-central/rev/8f4c180b87e52f3345ef8a3432d6e54bd1eb18dc/toolkit/components/pdfjs/content/PdfStreamConverter.jsm#515-549
This commit is contained in:
parent
41ac3f0c07
commit
6da0944fc7
10 changed files with 158 additions and 67 deletions
|
@ -701,6 +701,16 @@ class PDFDocumentProxy {
|
|||
return this.fingerprints[0];
|
||||
},
|
||||
});
|
||||
|
||||
Object.defineProperty(this, "getStats", {
|
||||
value: async () => {
|
||||
deprecated(
|
||||
"`PDFDocumentProxy.getStats`, " +
|
||||
"please use the `PDFDocumentProxy.stats`-getter instead."
|
||||
);
|
||||
return this.stats || { streamTypes: {}, fontTypes: {} };
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -728,6 +738,24 @@ class PDFDocumentProxy {
|
|||
return this._pdfInfo.fingerprints;
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} PDFDocumentStats
|
||||
* @property {Object<string, boolean>} streamTypes - Used stream types in the
|
||||
* document (an item is set to true if specific stream ID was used in the
|
||||
* document).
|
||||
* @property {Object<string, boolean>} fontTypes - Used font types in the
|
||||
* document (an item is set to true if specific font ID was used in the
|
||||
* document).
|
||||
*/
|
||||
|
||||
/**
|
||||
* @type {PDFDocumentStats | null} The current statistics about document
|
||||
* structures, or `null` when no statistics exists.
|
||||
*/
|
||||
get stats() {
|
||||
return this._transport.stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* @type {boolean} True if only XFA form.
|
||||
*/
|
||||
|
@ -940,25 +968,6 @@ class PDFDocumentProxy {
|
|||
return this._transport.downloadInfoCapability.promise;
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} PDFDocumentStats
|
||||
* @property {Object<string, boolean>} streamTypes - Used stream types in the
|
||||
* document (an item is set to true if specific stream ID was used in the
|
||||
* document).
|
||||
* @property {Object<string, boolean>} fontTypes - Used font types in the
|
||||
* document (an item is set to true if specific font ID was used in the
|
||||
* document).
|
||||
*/
|
||||
|
||||
/**
|
||||
* @returns {Promise<PDFDocumentStats>} A promise this is resolved with
|
||||
* current statistics about document structures (see
|
||||
* {@link PDFDocumentStats}).
|
||||
*/
|
||||
getStats() {
|
||||
return this._transport.getStats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up resources allocated by the document on both the main and worker
|
||||
* threads.
|
||||
|
@ -2392,6 +2401,8 @@ if (typeof PDFJSDev !== "undefined" && PDFJSDev.test("GENERIC")) {
|
|||
* @ignore
|
||||
*/
|
||||
class WorkerTransport {
|
||||
#docStats = null;
|
||||
|
||||
constructor(messageHandler, loadingTask, networkStream, params) {
|
||||
this.messageHandler = messageHandler;
|
||||
this.loadingTask = loadingTask;
|
||||
|
@ -2433,6 +2444,10 @@ class WorkerTransport {
|
|||
return shadow(this, "annotationStorage", new AnnotationStorage());
|
||||
}
|
||||
|
||||
get stats() {
|
||||
return this.#docStats;
|
||||
}
|
||||
|
||||
getRenderingIntent(
|
||||
intent,
|
||||
annotationMode = AnnotationMode.ENABLE,
|
||||
|
@ -2843,6 +2858,18 @@ class WorkerTransport {
|
|||
});
|
||||
});
|
||||
|
||||
messageHandler.on("DocStats", data => {
|
||||
if (this.destroyed) {
|
||||
return; // Ignore any pending requests if the worker was terminated.
|
||||
}
|
||||
// Ensure that a `PDFDocumentProxy.stats` call-site cannot accidentally
|
||||
// modify this internal data.
|
||||
this.#docStats = Object.freeze({
|
||||
streamTypes: Object.freeze(data.streamTypes),
|
||||
fontTypes: Object.freeze(data.fontTypes),
|
||||
});
|
||||
});
|
||||
|
||||
messageHandler.on(
|
||||
"UnsupportedFeature",
|
||||
this._onUnsupportedFeature.bind(this)
|
||||
|
@ -3055,10 +3082,6 @@ class WorkerTransport {
|
|||
return this.messageHandler.sendWithPromise("GetMarkInfo", null);
|
||||
}
|
||||
|
||||
getStats() {
|
||||
return this.messageHandler.sendWithPromise("GetStats", null);
|
||||
}
|
||||
|
||||
async startCleanup(keepLoadedFonts = false) {
|
||||
await this.messageHandler.sendWithPromise("Cleanup", null);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue