Add support for basic structure tree for accessibility.

When a PDF is "marked" we now generate a separate DOM that represents
the structure tree from the PDF.  This DOM is inserted into the <canvas>
element and allows screen readers to walk the tree and have more
information about headings, images, links, etc. To link the structure
tree DOM (which is empty) to the text layer aria-owns is used. This
required modifying the text layer creation so that marked items are
now tracked.
This commit is contained in:
Brendan Dahl 2021-03-31 15:07:02 -07:00
parent 6429ccc002
commit fc9501a637
22 changed files with 911 additions and 14 deletions

View file

@ -1013,13 +1013,17 @@ class PDFDocumentProxy {
* whitespace with standard spaces (0x20). The default value is `false`.
* @property {boolean} disableCombineTextItems - Do not attempt to combine
* same line {@link TextItem}'s. The default value is `false`.
* @property {boolean} [includeMarkedContent] - When true include marked
* content items in the items array of TextContent. The default is `false`.
*/
/**
* Page text content.
*
* @typedef {Object} TextContent
* @property {Array<TextItem>} items - Array of {@link TextItem} objects.
* @property {Array<TextItem | TextMarkedContent>} items - Array of
* {@link TextItem} and {@link TextMarkedContent} objects. TextMarkedContent
* items are included when includeMarkedContent is true.
* @property {Object<string, TextStyle>} styles - {@link TextStyle} objects,
* indexed by font name.
*/
@ -1034,6 +1038,17 @@ class PDFDocumentProxy {
* @property {number} width - Width in device space.
* @property {number} height - Height in device space.
* @property {string} fontName - Font name used by PDF.js for converted font.
*
*/
/**
* Page text marked content part.
*
* @typedef {Object} TextMarkedContent
* @property {string} type - Either 'beginMarkedContent',
* 'beginMarkedContentProps', or 'endMarkedContent'.
* @property {string} id - The marked content identifier. Only used for type
* 'beginMarkedContentProps'.
*/
/**
@ -1089,6 +1104,25 @@ class PDFDocumentProxy {
* states set.
*/
/**
* Structure tree node. The root node will have a role "Root".
*
* @typedef {Object} StructTreeNode
* @property {Array<StructTreeNode | StructTreeContent>} children - Array of
* {@link StructTreeNode} and {@link StructTreeContent} objects.
* @property {string} role - element's role, already mapped if a role map exists
* in the PDF.
*/
/**
* Structure tree content.
*
* @typedef {Object} StructTreeContent
* @property {string} type - either "content" for page and stream structure
* elements or "object" for object references.
* @property {string} id - unique id that will map to the text layer.
*/
/**
* PDF page operator list.
*
@ -1408,6 +1442,7 @@ class PDFPageProxy {
streamTextContent({
normalizeWhitespace = false,
disableCombineTextItems = false,
includeMarkedContent = false,
} = {}) {
const TEXT_CONTENT_CHUNK_SIZE = 100;
@ -1417,6 +1452,7 @@ class PDFPageProxy {
pageIndex: this._pageIndex,
normalizeWhitespace: normalizeWhitespace === true,
combineTextItems: disableCombineTextItems !== true,
includeMarkedContent: includeMarkedContent === true,
},
{
highWaterMark: TEXT_CONTENT_CHUNK_SIZE,
@ -1457,6 +1493,16 @@ class PDFPageProxy {
});
}
/**
* @returns {Promise<StructTreeNode>} A promise that is resolved with a
* {@link StructTreeNode} object that represents the page's structure tree.
*/
getStructTree() {
return (this._structTreePromise ||= this._transport.getStructTree(
this._pageIndex
));
}
/**
* Destroys the page object.
* @private
@ -1486,6 +1532,7 @@ class PDFPageProxy {
this._annotationsPromise = null;
this._jsActionsPromise = null;
this._xfaPromise = null;
this._structTreePromise = null;
this.pendingCleanup = false;
return Promise.all(waitOn);
}
@ -1521,6 +1568,7 @@ class PDFPageProxy {
this._annotationsPromise = null;
this._jsActionsPromise = null;
this._xfaPromise = null;
this._structTreePromise = null;
if (resetStats && this._stats) {
this._stats = new StatTimer();
}
@ -2755,6 +2803,12 @@ class WorkerTransport {
});
}
getStructTree(pageIndex) {
return this.messageHandler.sendWithPromise("GetStructTree", {
pageIndex,
});
}
getOutline() {
return this.messageHandler.sendWithPromise("GetOutline", null);
}