mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-08 09:20:06 +02:00
Remove the invisible format marks from the text chunks
- it aims to fix issue #9186.
This commit is contained in:
parent
88236e1163
commit
e1d3a3b414
7 changed files with 99 additions and 14 deletions
|
@ -47,19 +47,67 @@ describe("unicode", function () {
|
|||
it("should correctly determine the character category", function () {
|
||||
const tests = {
|
||||
// Whitespace
|
||||
" ": { isDiacritic: false, isWhitespace: true },
|
||||
"\t": { isDiacritic: false, isWhitespace: true },
|
||||
"\u2001": { isDiacritic: false, isWhitespace: true },
|
||||
"\uFEFF": { isDiacritic: false, isWhitespace: true },
|
||||
" ": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: true,
|
||||
},
|
||||
"\t": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: true,
|
||||
},
|
||||
"\u2001": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: true,
|
||||
},
|
||||
"\uFEFF": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: true,
|
||||
},
|
||||
|
||||
// Diacritic
|
||||
"\u0302": { isDiacritic: true, isWhitespace: false },
|
||||
"\u0344": { isDiacritic: true, isWhitespace: false },
|
||||
"\u0361": { isDiacritic: true, isWhitespace: false },
|
||||
"\u0302": {
|
||||
isZeroWidthDiacritic: true,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: false,
|
||||
},
|
||||
"\u0344": {
|
||||
isZeroWidthDiacritic: true,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: false,
|
||||
},
|
||||
"\u0361": {
|
||||
isZeroWidthDiacritic: true,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: false,
|
||||
},
|
||||
|
||||
// No whitespace or diacritic
|
||||
a: { isDiacritic: false, isWhitespace: false },
|
||||
1: { isDiacritic: false, isWhitespace: false },
|
||||
// Invisible format mark
|
||||
"\u200B": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: true,
|
||||
isWhitespace: false,
|
||||
},
|
||||
"\u200D": {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: true,
|
||||
isWhitespace: false,
|
||||
},
|
||||
|
||||
// No whitespace or diacritic or invisible format mark
|
||||
a: {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: false,
|
||||
},
|
||||
1: {
|
||||
isZeroWidthDiacritic: false,
|
||||
isInvisibleFormatMark: false,
|
||||
isWhitespace: false,
|
||||
},
|
||||
};
|
||||
for (const [character, expectation] of Object.entries(tests)) {
|
||||
expect(getCharUnicodeCategory(character)).toEqual(expectation);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue