mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-08 17:30:09 +02:00
Don't replace cr by a white space when the last char on the line is an ideographic char
This commit is contained in:
parent
50d72fc111
commit
6c6f6fb2b8
4 changed files with 35 additions and 4 deletions
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -538,3 +538,4 @@
|
||||||
!bug1782186.pdf
|
!bug1782186.pdf
|
||||||
!tracemonkey_a11y.pdf
|
!tracemonkey_a11y.pdf
|
||||||
!bug1782564.pdf
|
!bug1782564.pdf
|
||||||
|
!issue15340.pdf
|
||||||
|
|
BIN
test/pdfs/issue15340.pdf
Normal file
BIN
test/pdfs/issue15340.pdf
Normal file
Binary file not shown.
|
@ -626,4 +626,25 @@ describe("pdf_find_controller", function () {
|
||||||
pageMatchesLength: [[8]],
|
pageMatchesLength: [[8]],
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("performs a search in a text containing an ideographic at the end of a line", async function () {
|
||||||
|
const { eventBus, pdfFindController } = await initPdfFindController(
|
||||||
|
"issue15340.pdf"
|
||||||
|
);
|
||||||
|
|
||||||
|
await testSearch({
|
||||||
|
eventBus,
|
||||||
|
pdfFindController,
|
||||||
|
state: {
|
||||||
|
query: "検知機構",
|
||||||
|
},
|
||||||
|
matchesPerPage: [1],
|
||||||
|
selectedMatch: {
|
||||||
|
pageIndex: 0,
|
||||||
|
matchIndex: 0,
|
||||||
|
},
|
||||||
|
pageMatches: [[29]],
|
||||||
|
pageMatchesLength: [[4]],
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -126,7 +126,7 @@ function normalize(text) {
|
||||||
} else {
|
} else {
|
||||||
// Compile the regular expression for text normalization once.
|
// Compile the regular expression for text normalization once.
|
||||||
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
|
const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join("");
|
||||||
const regexp = `([${replace}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\n)`;
|
const regexp = `([${replace}])|(\\p{M}+(?:-\\n)?)|(\\S-\\n)|(\\p{Ideographic}\\n)|(\\n)`;
|
||||||
|
|
||||||
if (syllablePositions.length === 0) {
|
if (syllablePositions.length === 0) {
|
||||||
// Most of the syllables belong to Hangul so there are no need
|
// Most of the syllables belong to Hangul so there are no need
|
||||||
|
@ -188,7 +188,7 @@ function normalize(text) {
|
||||||
|
|
||||||
normalized = normalized.replace(
|
normalized = normalized.replace(
|
||||||
normalizationRegex,
|
normalizationRegex,
|
||||||
(match, p1, p2, p3, p4, p5, i) => {
|
(match, p1, p2, p3, p4, p5, p6, i) => {
|
||||||
i -= shiftOrigin;
|
i -= shiftOrigin;
|
||||||
if (p1) {
|
if (p1) {
|
||||||
// Maybe fractions or quotations mark...
|
// Maybe fractions or quotations mark...
|
||||||
|
@ -248,6 +248,15 @@ function normalize(text) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p4) {
|
if (p4) {
|
||||||
|
// An ideographic at the end of a line doesn't imply adding an extra
|
||||||
|
// white space.
|
||||||
|
positions.push([i - shift + 1, shift]);
|
||||||
|
shiftOrigin += 1;
|
||||||
|
eol += 1;
|
||||||
|
return p4.charAt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (p5) {
|
||||||
// eol is replaced by space: "foo\nbar" is likely equivalent to
|
// eol is replaced by space: "foo\nbar" is likely equivalent to
|
||||||
// "foo bar".
|
// "foo bar".
|
||||||
positions.push([i - shift + 1, shift - 1]);
|
positions.push([i - shift + 1, shift - 1]);
|
||||||
|
@ -257,7 +266,7 @@ function normalize(text) {
|
||||||
return " ";
|
return " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
// p5
|
// p6
|
||||||
if (i + eol === syllablePositions[syllableIndex]?.[1]) {
|
if (i + eol === syllablePositions[syllableIndex]?.[1]) {
|
||||||
// A syllable (1 char) is replaced with several chars (n) so
|
// A syllable (1 char) is replaced with several chars (n) so
|
||||||
// newCharsLen = n - 1.
|
// newCharsLen = n - 1.
|
||||||
|
@ -269,7 +278,7 @@ function normalize(text) {
|
||||||
shift -= newCharLen;
|
shift -= newCharLen;
|
||||||
shiftOrigin += newCharLen;
|
shiftOrigin += newCharLen;
|
||||||
}
|
}
|
||||||
return p5;
|
return p6;
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue