mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-08 09:20:06 +02:00
Merge pull request #18052 from Snuffleupagus/textLayer-only-ReadableStream
Restore broken functionality and simplify the implementation in `src/display/text_layer.js`
This commit is contained in:
commit
c0b5d93ef4
3 changed files with 109 additions and 82 deletions
|
@ -16,7 +16,7 @@
|
||||||
/** @typedef {import("./display_utils").PageViewport} PageViewport */
|
/** @typedef {import("./display_utils").PageViewport} PageViewport */
|
||||||
/** @typedef {import("./api").TextContent} TextContent */
|
/** @typedef {import("./api").TextContent} TextContent */
|
||||||
|
|
||||||
import { AbortException, Util } from "../shared/util.js";
|
import { AbortException, Util, warn } from "../shared/util.js";
|
||||||
import { setLayerDimensions } from "./display_utils.js";
|
import { setLayerDimensions } from "./display_utils.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -162,7 +162,7 @@ function getAscent(fontFamily) {
|
||||||
return DEFAULT_FONT_ASCENT;
|
return DEFAULT_FONT_ASCENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
function appendText(task, geom, styles) {
|
function appendText(task, geom) {
|
||||||
// Initialize all used properties to keep the caches monomorphic.
|
// Initialize all used properties to keep the caches monomorphic.
|
||||||
const textDiv = document.createElement("span");
|
const textDiv = document.createElement("span");
|
||||||
const textDivProperties = {
|
const textDivProperties = {
|
||||||
|
@ -176,7 +176,7 @@ function appendText(task, geom, styles) {
|
||||||
|
|
||||||
const tx = Util.transform(task._transform, geom.transform);
|
const tx = Util.transform(task._transform, geom.transform);
|
||||||
let angle = Math.atan2(tx[1], tx[0]);
|
let angle = Math.atan2(tx[1], tx[0]);
|
||||||
const style = styles[geom.fontName];
|
const style = task._styleCache[geom.fontName];
|
||||||
if (style.vertical) {
|
if (style.vertical) {
|
||||||
angle += Math.PI / 2;
|
angle += Math.PI / 2;
|
||||||
}
|
}
|
||||||
|
@ -250,9 +250,7 @@ function appendText(task, geom, styles) {
|
||||||
textDivProperties.canvasWidth = style.vertical ? geom.height : geom.width;
|
textDivProperties.canvasWidth = style.vertical ? geom.height : geom.width;
|
||||||
}
|
}
|
||||||
task._textDivProperties.set(textDiv, textDivProperties);
|
task._textDivProperties.set(textDiv, textDivProperties);
|
||||||
if (task._isReadableStream) {
|
task._layoutText(textDiv);
|
||||||
task._layoutText(textDiv);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function layout(params) {
|
function layout(params) {
|
||||||
|
@ -284,30 +282,11 @@ function layout(params) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function render(task) {
|
|
||||||
if (task._canceled) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const textDivs = task._textDivs;
|
|
||||||
const capability = task._capability;
|
|
||||||
const textDivsLength = textDivs.length;
|
|
||||||
|
|
||||||
// No point in rendering many divs as it would make the browser
|
|
||||||
// unusable even after the divs are rendered.
|
|
||||||
if (textDivsLength > MAX_TEXT_DIVS_TO_RENDER) {
|
|
||||||
capability.resolve();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!task._isReadableStream) {
|
|
||||||
for (const textDiv of textDivs) {
|
|
||||||
task._layoutText(textDiv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
capability.resolve();
|
|
||||||
}
|
|
||||||
|
|
||||||
class TextLayerRenderTask {
|
class TextLayerRenderTask {
|
||||||
|
#reader = null;
|
||||||
|
|
||||||
|
#textContentSource = null;
|
||||||
|
|
||||||
constructor({
|
constructor({
|
||||||
textContentSource,
|
textContentSource,
|
||||||
container,
|
container,
|
||||||
|
@ -316,14 +295,26 @@ class TextLayerRenderTask {
|
||||||
textDivProperties,
|
textDivProperties,
|
||||||
textContentItemsStr,
|
textContentItemsStr,
|
||||||
}) {
|
}) {
|
||||||
this._textContentSource = textContentSource;
|
if (textContentSource instanceof ReadableStream) {
|
||||||
this._isReadableStream = textContentSource instanceof ReadableStream;
|
this.#textContentSource = textContentSource;
|
||||||
|
} else if (
|
||||||
|
(typeof PDFJSDev === "undefined" || PDFJSDev.test("GENERIC")) &&
|
||||||
|
typeof textContentSource === "object"
|
||||||
|
) {
|
||||||
|
this.#textContentSource = new ReadableStream({
|
||||||
|
start(controller) {
|
||||||
|
controller.enqueue(textContentSource);
|
||||||
|
controller.close();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
throw new Error('No "textContentSource" parameter specified.');
|
||||||
|
}
|
||||||
this._container = this._rootContainer = container;
|
this._container = this._rootContainer = container;
|
||||||
this._textDivs = textDivs || [];
|
this._textDivs = textDivs || [];
|
||||||
this._textContentItemsStr = textContentItemsStr || [];
|
this._textContentItemsStr = textContentItemsStr || [];
|
||||||
this._fontInspectorEnabled = !!globalThis.FontInspector?.enabled;
|
this._fontInspectorEnabled = !!globalThis.FontInspector?.enabled;
|
||||||
|
|
||||||
this._reader = null;
|
|
||||||
this._textDivProperties = textDivProperties || new WeakMap();
|
this._textDivProperties = textDivProperties || new WeakMap();
|
||||||
this._canceled = false;
|
this._canceled = false;
|
||||||
this._capability = Promise.withResolvers();
|
this._capability = Promise.withResolvers();
|
||||||
|
@ -335,6 +326,7 @@ class TextLayerRenderTask {
|
||||||
properties: null,
|
properties: null,
|
||||||
ctx: getCtx(),
|
ctx: getCtx(),
|
||||||
};
|
};
|
||||||
|
this._styleCache = Object.create(null);
|
||||||
const { pageWidth, pageHeight, pageX, pageY } = viewport.rawDims;
|
const { pageWidth, pageHeight, pageX, pageY } = viewport.rawDims;
|
||||||
this._transform = [1, 0, 0, -1, -pageX, pageY + pageHeight];
|
this._transform = [1, 0, 0, -1, -pageX, pageY + pageHeight];
|
||||||
this._pageWidth = pageWidth;
|
this._pageWidth = pageWidth;
|
||||||
|
@ -346,6 +338,7 @@ class TextLayerRenderTask {
|
||||||
this._capability.promise
|
this._capability.promise
|
||||||
.finally(() => {
|
.finally(() => {
|
||||||
this._layoutTextParams = null;
|
this._layoutTextParams = null;
|
||||||
|
this._styleCache = null;
|
||||||
})
|
})
|
||||||
.catch(() => {
|
.catch(() => {
|
||||||
// Avoid "Uncaught promise" messages in the console.
|
// Avoid "Uncaught promise" messages in the console.
|
||||||
|
@ -365,22 +358,33 @@ class TextLayerRenderTask {
|
||||||
*/
|
*/
|
||||||
cancel() {
|
cancel() {
|
||||||
this._canceled = true;
|
this._canceled = true;
|
||||||
if (this._reader) {
|
const abortEx = new AbortException("TextLayer task cancelled.");
|
||||||
this._reader
|
|
||||||
.cancel(new AbortException("TextLayer task cancelled."))
|
this.#reader?.cancel(abortEx).catch(() => {
|
||||||
.catch(() => {
|
// Avoid "Uncaught promise" messages in the console.
|
||||||
// Avoid "Uncaught promise" messages in the console.
|
});
|
||||||
});
|
this.#reader = null;
|
||||||
this._reader = null;
|
|
||||||
}
|
this._capability.reject(abortEx);
|
||||||
this._capability.reject(new AbortException("TextLayer task cancelled."));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
_processItems(items, styleCache) {
|
_processItems(items) {
|
||||||
|
const textDivs = this._textDivs,
|
||||||
|
textContentItemsStr = this._textContentItemsStr;
|
||||||
|
|
||||||
for (const item of items) {
|
for (const item of items) {
|
||||||
|
// No point in rendering many divs as it would make the browser
|
||||||
|
// unusable even after the divs are rendered.
|
||||||
|
if (textDivs.length > MAX_TEXT_DIVS_TO_RENDER) {
|
||||||
|
warn("Ignoring additional textDivs for performance reasons.");
|
||||||
|
|
||||||
|
this._processItems = () => {}; // Avoid multiple warnings for one page.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (item.str === undefined) {
|
if (item.str === undefined) {
|
||||||
if (
|
if (
|
||||||
item.type === "beginMarkedContentProps" ||
|
item.type === "beginMarkedContentProps" ||
|
||||||
|
@ -398,8 +402,8 @@ class TextLayerRenderTask {
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
this._textContentItemsStr.push(item.str);
|
textContentItemsStr.push(item.str);
|
||||||
appendText(this, item, styleCache);
|
appendText(this, item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -426,37 +430,22 @@ class TextLayerRenderTask {
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
_render() {
|
_render() {
|
||||||
const { promise, resolve, reject } = Promise.withResolvers();
|
const styleCache = this._styleCache;
|
||||||
let styleCache = Object.create(null);
|
|
||||||
|
|
||||||
if (this._isReadableStream) {
|
const pump = () => {
|
||||||
const pump = () => {
|
this.#reader.read().then(({ value, done }) => {
|
||||||
this._reader.read().then(({ value, done }) => {
|
if (done) {
|
||||||
if (done) {
|
this._capability.resolve();
|
||||||
resolve();
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Object.assign(styleCache, value.styles);
|
Object.assign(styleCache, value.styles);
|
||||||
this._processItems(value.items, styleCache);
|
this._processItems(value.items);
|
||||||
pump();
|
pump();
|
||||||
}, reject);
|
}, this._capability.reject);
|
||||||
};
|
};
|
||||||
|
this.#reader = this.#textContentSource.getReader();
|
||||||
this._reader = this._textContentSource.getReader();
|
pump();
|
||||||
pump();
|
|
||||||
} else if (this._textContentSource) {
|
|
||||||
const { items, styles } = this._textContentSource;
|
|
||||||
this._processItems(items, styles);
|
|
||||||
resolve();
|
|
||||||
} else {
|
|
||||||
throw new Error('No "textContentSource" parameter specified.');
|
|
||||||
}
|
|
||||||
|
|
||||||
promise.then(() => {
|
|
||||||
styleCache = null;
|
|
||||||
render(this);
|
|
||||||
}, this._capability.reject);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -335,15 +335,11 @@ class Rasterize {
|
||||||
|
|
||||||
await task.promise;
|
await task.promise;
|
||||||
|
|
||||||
const { _pageWidth, _pageHeight, _textContentSource, _textDivs } = task;
|
const { _pageWidth, _pageHeight, _textDivs } = task;
|
||||||
const boxes = [];
|
const boxes = [];
|
||||||
let posRegex;
|
let j = 0,
|
||||||
for (
|
posRegex;
|
||||||
let i = 0, j = 0, ii = _textContentSource.items.length;
|
for (const { width, height, type } of textContent.items) {
|
||||||
i < ii;
|
|
||||||
i++
|
|
||||||
) {
|
|
||||||
const { width, height, type } = _textContentSource.items[i];
|
|
||||||
if (type) {
|
if (type) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -396,7 +392,7 @@ class Rasterize {
|
||||||
|
|
||||||
drawLayer.destroy();
|
drawLayer.destroy();
|
||||||
} catch (reason) {
|
} catch (reason) {
|
||||||
throw new Error(`Rasterize.textLayer: "${reason?.message}".`);
|
throw new Error(`Rasterize.highlightLayer: "${reason?.message}".`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -58,5 +58,47 @@ describe("textLayer", function () {
|
||||||
"",
|
"",
|
||||||
"page 1 / 3",
|
"page 1 / 3",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("creates textLayer from TextContent", async function () {
|
||||||
|
if (isNodeJS) {
|
||||||
|
pending("document.createElement is not supported in Node.js.");
|
||||||
|
}
|
||||||
|
const loadingTask = getDocument(buildGetDocumentParams("basicapi.pdf"));
|
||||||
|
const pdfDocument = await loadingTask.promise;
|
||||||
|
const page = await pdfDocument.getPage(1);
|
||||||
|
|
||||||
|
const textContentItemsStr = [];
|
||||||
|
|
||||||
|
const textLayerRenderTask = renderTextLayer({
|
||||||
|
textContentSource: await page.getTextContent(),
|
||||||
|
container: document.createElement("div"),
|
||||||
|
viewport: page.getViewport({ scale: 1 }),
|
||||||
|
textContentItemsStr,
|
||||||
|
});
|
||||||
|
expect(textLayerRenderTask instanceof TextLayerRenderTask).toEqual(true);
|
||||||
|
|
||||||
|
await textLayerRenderTask.promise;
|
||||||
|
expect(textContentItemsStr).toEqual([
|
||||||
|
"Table Of Content",
|
||||||
|
"",
|
||||||
|
"Chapter 1",
|
||||||
|
" ",
|
||||||
|
"..........................................................",
|
||||||
|
" ",
|
||||||
|
"2",
|
||||||
|
"",
|
||||||
|
"Paragraph 1.1",
|
||||||
|
" ",
|
||||||
|
"......................................................",
|
||||||
|
" ",
|
||||||
|
"3",
|
||||||
|
"",
|
||||||
|
"page 1 / 3",
|
||||||
|
]);
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue