mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-10 02:05:37 +02:00
Merge pull request #18390 from alexcat3/fix-issue-18099
Handle toUnicode cMaps that omit leading zeros in hex encoded UTF-16 (issue 18099)
This commit is contained in:
commit
5ee61690f3
4 changed files with 21 additions and 0 deletions
|
@ -3852,6 +3852,11 @@ class PartialEvaluator {
|
||||||
map[charCode] = String.fromCodePoint(token);
|
map[charCode] = String.fromCodePoint(token);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// Add back omitted leading zeros on odd length tokens
|
||||||
|
// (fixes issue #18099)
|
||||||
|
if (token.length % 2 !== 0) {
|
||||||
|
token = "\u0000" + token;
|
||||||
|
}
|
||||||
const str = [];
|
const str = [];
|
||||||
for (let k = 0; k < token.length; k += 2) {
|
for (let k = 0; k < token.length; k += 2) {
|
||||||
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
const w1 = (token.charCodeAt(k) << 8) | token.charCodeAt(k + 1);
|
||||||
|
|
1
test/pdfs/.gitignore
vendored
1
test/pdfs/.gitignore
vendored
|
@ -653,3 +653,4 @@
|
||||||
!bug1539074.1.pdf
|
!bug1539074.1.pdf
|
||||||
!issue18305.pdf
|
!issue18305.pdf
|
||||||
!issue18360.pdf
|
!issue18360.pdf
|
||||||
|
!issue18099_reduced.pdf
|
||||||
|
|
BIN
test/pdfs/issue18099_reduced.pdf
Normal file
BIN
test/pdfs/issue18099_reduced.pdf
Normal file
Binary file not shown.
|
@ -3419,6 +3419,21 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
|
||||||
await loadingTask.destroy();
|
await loadingTask.destroy();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("gets text content, correctly handling documents with toUnicode cmaps that omit leading zeros on hex-encoded UTF-16", async function () {
|
||||||
|
const loadingTask = getDocument(
|
||||||
|
buildGetDocumentParams("issue18099_reduced.pdf")
|
||||||
|
);
|
||||||
|
const pdfDoc = await loadingTask.promise;
|
||||||
|
const pdfPage = await pdfDoc.getPage(1);
|
||||||
|
const { items } = await pdfPage.getTextContent({
|
||||||
|
disableNormalization: true,
|
||||||
|
});
|
||||||
|
const text = mergeText(items);
|
||||||
|
expect(text).toEqual("Hello world!");
|
||||||
|
|
||||||
|
await loadingTask.destroy();
|
||||||
|
});
|
||||||
|
|
||||||
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
|
it("gets text content, and check that out-of-page text is not present (bug 1755201)", async function () {
|
||||||
if (isNodeJS) {
|
if (isNodeJS) {
|
||||||
pending("Linked test-cases are not supported in Node.js.");
|
pending("Linked test-cases are not supported in Node.js.");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue