[api-minor] Fix the way to chunk the strings (#13257)

- Improve chunking in order to fix some bugs where the spaces aren't here:
    * track the last position where a glyph has been drawn;
    * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break:
      - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions;
      - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done.
  - Add some breaks in order to get lines;
  - Remove the multiple whites spaces:
    * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool;
    * other pdf readers replace spaces by one white space.

Update src/core/evaluator.js

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>

Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
This commit is contained in:
calixteman 2021-04-30 14:41:13 +02:00 committed by GitHub
parent e6fcb1e70b
commit af4dc55019
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 428 additions and 272 deletions

View file

@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const DEFAULT_FONT_SIZE = 30;
const DEFAULT_FONT_ASCENT = 0.8;
const ascentCache = new Map();
const NonWhitespaceRegexp = /\S/;
function isAllWhitespace(str) {
return !NonWhitespaceRegexp.test(str);
}
const AllWhitespaceRegexp = /^\s+$/g;
function getAscent(fontFamily, ctx) {
const cachedAscent = ascentCache.get(fontFamily);
@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
const textDivProperties = {
angle: 0,
canvasWidth: 0,
isWhitespace: false,
hasText: geom.str !== "",
hasEOL: geom.hasEOL,
originalTransform: null,
paddingBottom: 0,
paddingLeft: 0,
@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
scale: 1,
};
textDiv.textContent = geom.str;
task._textDivs.push(textDiv);
if (isAllWhitespace(geom.str)) {
textDivProperties.isWhitespace = true;
task._textDivProperties.set(textDiv, textDivProperties);
return;
}
const tx = Util.transform(task._viewport.transform, geom.transform);
let angle = Math.atan2(tx[1], tx[0]);
@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() {
// Keeps screen readers from pausing on every new text span.
textDiv.setAttribute("role", "presentation");
textDiv.textContent = geom.str;
// geom.dir may be 'ttb' for vertical texts.
textDiv.dir = geom.dir;
@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() {
// little effect on text highlighting. This makes scrolling on docs with
// lots of such divs a lot faster.
let shouldScaleText = false;
if (geom.str.length > 1) {
if (
geom.str.length > 1 ||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
) {
shouldScaleText = true;
} else if (geom.transform[0] !== geom.transform[3]) {
const absScaleX = Math.abs(geom.transform[0]),
@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
task._layoutText(textDiv);
}
if (task._enhanceTextSelection) {
if (task._enhanceTextSelection && textDivProperties.hasText) {
let angleCos = 1,
angleSin = 0;
if (angle !== 0) {
@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() {
_layoutText(textDiv) {
const textDivProperties = this._textDivProperties.get(textDiv);
if (textDivProperties.isWhitespace) {
return;
}
let transform = "";
if (textDivProperties.canvasWidth !== 0) {
if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) {
const { fontSize, fontFamily } = textDiv.style;
// Only build font string and set to context if different from last.
@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() {
}
textDiv.style.transform = transform;
}
this._textDivProperties.set(textDiv, textDivProperties);
this._container.appendChild(textDiv);
if (textDivProperties.hasText) {
this._container.appendChild(textDiv);
}
if (textDivProperties.hasEOL) {
const br = document.createElement("br");
br.setAttribute("role", "presentation");
this._container.appendChild(br);
}
},
_render: function TextLayer_render(timeout) {
@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
const div = this._textDivs[i];
const divProps = this._textDivProperties.get(div);
if (divProps.isWhitespace) {
if (!divProps.hasText) {
continue;
}
if (expandDivs) {