mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-09 01:35:43 +02:00
[api-minor] Fix the way to chunk the strings (#13257)
- Improve chunking in order to fix some bugs where the spaces aren't here: * track the last position where a glyph has been drawn; * when a new glyph (first glyph in a chunk) is added then compare its position with the last saved one and add a space or break: - there are multiple ways to move the glyphs and to avoid to have to deal with all the different possibilities it's a way easier to just compare positions; - and so there is now one function (i.e. "compareWithLastPosition") where all the job is done. - Add some breaks in order to get lines; - Remove the multiple whites spaces: * some spaces were filled with several whites spaces and so it makes harder to find some sequences of words using the search tool; * other pdf readers replace spaces by one white space. Update src/core/evaluator.js Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com> Co-authored-by: Jonas Jenwald <jonas.jenwald@gmail.com>
This commit is contained in:
parent
e6fcb1e70b
commit
af4dc55019
3 changed files with 428 additions and 272 deletions
|
@ -57,12 +57,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const DEFAULT_FONT_SIZE = 30;
|
||||
const DEFAULT_FONT_ASCENT = 0.8;
|
||||
const ascentCache = new Map();
|
||||
|
||||
const NonWhitespaceRegexp = /\S/;
|
||||
|
||||
function isAllWhitespace(str) {
|
||||
return !NonWhitespaceRegexp.test(str);
|
||||
}
|
||||
const AllWhitespaceRegexp = /^\s+$/g;
|
||||
|
||||
function getAscent(fontFamily, ctx) {
|
||||
const cachedAscent = ascentCache.get(fontFamily);
|
||||
|
@ -133,7 +128,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const textDivProperties = {
|
||||
angle: 0,
|
||||
canvasWidth: 0,
|
||||
isWhitespace: false,
|
||||
hasText: geom.str !== "",
|
||||
hasEOL: geom.hasEOL,
|
||||
originalTransform: null,
|
||||
paddingBottom: 0,
|
||||
paddingLeft: 0,
|
||||
|
@ -142,12 +138,8 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
scale: 1,
|
||||
};
|
||||
|
||||
textDiv.textContent = geom.str;
|
||||
task._textDivs.push(textDiv);
|
||||
if (isAllWhitespace(geom.str)) {
|
||||
textDivProperties.isWhitespace = true;
|
||||
task._textDivProperties.set(textDiv, textDivProperties);
|
||||
return;
|
||||
}
|
||||
|
||||
const tx = Util.transform(task._viewport.transform, geom.transform);
|
||||
let angle = Math.atan2(tx[1], tx[0]);
|
||||
|
@ -176,7 +168,6 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
// Keeps screen readers from pausing on every new text span.
|
||||
textDiv.setAttribute("role", "presentation");
|
||||
|
||||
textDiv.textContent = geom.str;
|
||||
// geom.dir may be 'ttb' for vertical texts.
|
||||
textDiv.dir = geom.dir;
|
||||
|
||||
|
@ -192,7 +183,10 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
// little effect on text highlighting. This makes scrolling on docs with
|
||||
// lots of such divs a lot faster.
|
||||
let shouldScaleText = false;
|
||||
if (geom.str.length > 1) {
|
||||
if (
|
||||
geom.str.length > 1 ||
|
||||
(task._enhanceTextSelection && AllWhitespaceRegexp.test(geom.str))
|
||||
) {
|
||||
shouldScaleText = true;
|
||||
} else if (geom.transform[0] !== geom.transform[3]) {
|
||||
const absScaleX = Math.abs(geom.transform[0]),
|
||||
|
@ -218,7 +212,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
task._layoutText(textDiv);
|
||||
}
|
||||
|
||||
if (task._enhanceTextSelection) {
|
||||
if (task._enhanceTextSelection && textDivProperties.hasText) {
|
||||
let angleCos = 1,
|
||||
angleSin = 0;
|
||||
if (angle !== 0) {
|
||||
|
@ -666,12 +660,9 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
|
||||
_layoutText(textDiv) {
|
||||
const textDivProperties = this._textDivProperties.get(textDiv);
|
||||
if (textDivProperties.isWhitespace) {
|
||||
return;
|
||||
}
|
||||
|
||||
let transform = "";
|
||||
if (textDivProperties.canvasWidth !== 0) {
|
||||
if (textDivProperties.canvasWidth !== 0 && textDivProperties.hasText) {
|
||||
const { fontSize, fontFamily } = textDiv.style;
|
||||
|
||||
// Only build font string and set to context if different from last.
|
||||
|
@ -700,8 +691,15 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
}
|
||||
textDiv.style.transform = transform;
|
||||
}
|
||||
this._textDivProperties.set(textDiv, textDivProperties);
|
||||
this._container.appendChild(textDiv);
|
||||
|
||||
if (textDivProperties.hasText) {
|
||||
this._container.appendChild(textDiv);
|
||||
}
|
||||
if (textDivProperties.hasEOL) {
|
||||
const br = document.createElement("br");
|
||||
br.setAttribute("role", "presentation");
|
||||
this._container.appendChild(br);
|
||||
}
|
||||
},
|
||||
|
||||
_render: function TextLayer_render(timeout) {
|
||||
|
@ -778,7 +776,7 @@ const renderTextLayer = (function renderTextLayerClosure() {
|
|||
const div = this._textDivs[i];
|
||||
const divProps = this._textDivProperties.get(div);
|
||||
|
||||
if (divProps.isWhitespace) {
|
||||
if (!divProps.hasText) {
|
||||
continue;
|
||||
}
|
||||
if (expandDivs) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue