mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-09 09:45:42 +02:00
[api-minor] Add a parameter to PDFPageProxy_getTextContent
that enables replacing of all whitespace with standard spaces in the textLayer (issue 6612)
This patch goes a bit further than issue 6612 requires, and replaces all kinds of whitespace with standard spaces. When testing this locally, it actually seemed to slightly improve two existing test-cases (`tracemonkey-text` and `taro-text`). Fixes 6612.
This commit is contained in:
parent
c2dfe9e9a9
commit
6dfe53b976
12 changed files with 75 additions and 24 deletions
|
@ -708,6 +708,14 @@ var PDFDocumentProxy = (function PDFDocumentProxyClosure() {
|
|||
return PDFDocumentProxy;
|
||||
})();
|
||||
|
||||
/**
|
||||
* Page getTextContent parameters.
|
||||
*
|
||||
* @typedef {Object} getTextContentParameters
|
||||
* @param {boolean} normalizeWhitespace - replaces all occurrences of
|
||||
* whitespace with standard spaces (0x20). The default value is `false`.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Page text content.
|
||||
*
|
||||
|
@ -986,12 +994,16 @@ var PDFPageProxy = (function PDFPageProxyClosure() {
|
|||
},
|
||||
|
||||
/**
|
||||
* @param {getTextContentParameters} params - getTextContent parameters.
|
||||
* @return {Promise} That is resolved a {@link TextContent}
|
||||
* object that represent the page text content.
|
||||
*/
|
||||
getTextContent: function PDFPageProxy_getTextContent() {
|
||||
getTextContent: function PDFPageProxy_getTextContent(params) {
|
||||
var normalizeWhitespace = (params && params.normalizeWhitespace) || false;
|
||||
|
||||
return this.transport.messageHandler.sendWithPromise('GetTextContent', {
|
||||
pageIndex: this.pageNumber - 1
|
||||
pageIndex: this.pageNumber - 1,
|
||||
normalizeWhitespace: normalizeWhitespace,
|
||||
});
|
||||
},
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue