Support multi-byte ToUnicode entries, when using predefined CMaps (issue 16176)

Hopefully this makes sense, since we already "create" multi-byte ToUnicode entries in other cases (see e.g. the `getNormalizedUnicodes` table).
This commit is contained in:
Jonas Jenwald 2023-03-21 12:24:21 +01:00
parent b1e0253f29
commit d4bcfe8c16
4 changed files with 27 additions and 4 deletions

View file

@ -28,6 +28,7 @@ import {
} from "../../src/shared/util.js";
import {
buildGetDocumentParams,
CMAP_URL,
DefaultFileReaderFactory,
TEST_PDFS_PATH,
} from "./test_utils.js";
@ -2593,6 +2594,23 @@ Caron Broadcasting, Inc., an Ohio corporation (“Lessee”).`)
await loadingTask.destroy();
});
it("gets text content with multi-byte entries, using predefined CMaps (issue 16176)", async function () {
const loadingTask = getDocument(
buildGetDocumentParams("issue16176.pdf", {
cMapUrl: CMAP_URL,
useWorkerFetch: false,
})
);
const pdfDoc = await loadingTask.promise;
const pdfPage = await pdfDoc.getPage(1);
const { items } = await pdfPage.getTextContent();
const text = mergeText(items);
expect(text).toEqual("𠮷");
await loadingTask.destroy();
});
it("gets empty structure tree", async function () {
const tree = await page.getStructTree();