mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-09 09:45:42 +02:00
Merge pull request #8488 from mukulmishra18/streams-getTextContent
Streams get text content
This commit is contained in:
commit
e2ca894fec
8 changed files with 275 additions and 114 deletions
|
@ -954,6 +954,24 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
|
|||
return intentState.opListReadCapability.promise;
|
||||
},
|
||||
|
||||
/**
|
||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||
* @return {ReadableStream} ReadableStream to read textContent chunks.
|
||||
*/
|
||||
streamTextContent(params = {}) {
|
||||
const TEXT_CONTENT_CHUNK_SIZE = 100;
|
||||
return this.transport.messageHandler.sendWithStream('GetTextContent', {
|
||||
pageIndex: this.pageNumber - 1,
|
||||
normalizeWhitespace: (params.normalizeWhitespace === true),
|
||||
combineTextItems: (params.disableCombineTextItems !== true),
|
||||
}, {
|
||||
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
|
||||
size(textContent) {
|
||||
return textContent.items.length;
|
||||
},
|
||||
});
|
||||
},
|
||||
|
||||
/**
|
||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||
* @return {Promise} That is resolved a {@link TextContent}
|
||||
|
@ -961,10 +979,28 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
|
|||
*/
|
||||
getTextContent: function PDFPageProxy_getTextContent(params) {
|
||||
params = params || {};
|
||||
return this.transport.messageHandler.sendWithPromise('GetTextContent', {
|
||||
pageIndex: this.pageNumber - 1,
|
||||
normalizeWhitespace: (params.normalizeWhitespace === true),
|
||||
combineTextItems: (params.disableCombineTextItems !== true),
|
||||
let readableStream = this.streamTextContent(params);
|
||||
|
||||
return new Promise(function(resolve, reject) {
|
||||
function pump() {
|
||||
reader.read().then(function({ value, done, }) {
|
||||
if (done) {
|
||||
resolve(textContent);
|
||||
return;
|
||||
}
|
||||
Util.extendObj(textContent.styles, value.styles);
|
||||
Util.appendToArray(textContent.items, value.items);
|
||||
pump();
|
||||
}, reject);
|
||||
}
|
||||
|
||||
let reader = readableStream.getReader();
|
||||
let textContent = {
|
||||
items: [],
|
||||
styles: Object.create(null),
|
||||
};
|
||||
|
||||
pump();
|
||||
});
|
||||
},
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue