Merge pull request #13171 from brendandahl/struct-tree

[api-minor] Add support for basic structure tree for accessibility.
This commit is contained in:
Tim van der Meij 2021-04-09 21:32:44 +02:00 committed by GitHub
commit 03c8c89002
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 911 additions and 14 deletions

View file

@ -1026,13 +1026,17 @@ class PDFDocumentProxy {
* whitespace with standard spaces (0x20). The default value is `false`.
* @property {boolean} disableCombineTextItems - Do not attempt to combine
* same line {@link TextItem}'s. The default value is `false`.
* @property {boolean} [includeMarkedContent] - When true include marked
* content items in the items array of TextContent. The default is `false`.
*/
/**
* Page text content.
*
* @typedef {Object} TextContent
* @property {Array<TextItem>} items - Array of {@link TextItem} objects.
* @property {Array<TextItem | TextMarkedContent>} items - Array of
* {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent
* items are included when includeMarkedContent is true.
* @property {Object<string, TextStyle>} styles - {@link TextStyle} objects,
* indexed by font name.
*/
@ -1047,6 +1051,17 @@ class PDFDocumentProxy {
* @property {number} width - Width in device space.
* @property {number} height - Height in device space.
* @property {string} fontName - Font name used by PDF.js for converted font.
*
*/
/**
* Page text marked content part.
*
* @typedef {Object} TextMarkedContent
* @property {string} type - Either 'beginMarkedContent',
* 'beginMarkedContentProps', or 'endMarkedContent'.
* @property {string} id - The marked content identifier. Only used for type
* 'beginMarkedContentProps'.
*/
/**
@ -1103,6 +1118,25 @@ class PDFDocumentProxy {
* states set.
*/
/**
* Structure tree node. The root node will have a role "Root".
*
* @typedef {Object} StructTreeNode
* @property {Array<StructTreeNode | StructTreeContent>} children - Array of
* {@link StructTreeNode} and {@link StructTreeContent} objects.
* @property {string} role - element's role, already mapped if a role map exists
* in the PDF.
*/
/**
* Structure tree content.
*
* @typedef {Object} StructTreeContent
* @property {string} type - either "content" for page and stream structure
* elements or "object" for object references.
* @property {string} id - unique id that will map to the text layer.
*/
/**
* PDF page operator list.
*
@ -1435,6 +1469,7 @@ class PDFPageProxy {
streamTextContent({
normalizeWhitespace = false,
disableCombineTextItems = false,
includeMarkedContent = false,
} = {}) {
const TEXT_CONTENT_CHUNK_SIZE = 100;
@ -1444,6 +1479,7 @@ class PDFPageProxy {
pageIndex: this._pageIndex,
normalizeWhitespace: normalizeWhitespace === true,
combineTextItems: disableCombineTextItems !== true,
includeMarkedContent: includeMarkedContent === true,
},
{
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
@ -1484,6 +1520,16 @@ class PDFPageProxy {
});
}
/**
* @returns {Promise<StructTreeNode>} A promise that is resolved with a
* {@link StructTreeNode} object that represents the page's structure tree.
*/
getStructTree() {
return (this._structTreePromise ||= this._transport.getStructTree(
this._pageIndex
));
}
/**
* Destroys the page object.
* @private
@ -1513,6 +1559,7 @@ class PDFPageProxy {
this._annotationsPromise = null;
this._jsActionsPromise = null;
this._xfaPromise = null;
this._structTreePromise = null;
this.pendingCleanup = false;
return Promise.all(waitOn);
}
@ -1548,6 +1595,7 @@ class PDFPageProxy {
this._annotationsPromise = null;
this._jsActionsPromise = null;
this._xfaPromise = null;
this._structTreePromise = null;
if (resetStats && this._stats) {
this._stats = new StatTimer();
}
@ -2773,6 +2821,12 @@ class WorkerTransport {
});
}
getStructTree(pageIndex) {
return this.messageHandler.sendWithPromise("GetStructTree", {
pageIndex,
});
}
getOutline() {
return this.messageHandler.sendWithPromise("GetOutline", null);
}