Decompress when it's possible images in using DecompressionStream

Getting images is already asynchronous, so we can use this opportunity
to use DecompressStream (which is async too) to decompress images.
This commit is contained in:
Calixte Denizet 2024-05-24 23:26:02 +02:00
parent 53dfb5a6ba
commit 9654ad570a
7 changed files with 149 additions and 30 deletions

View file

@ -44,6 +44,27 @@ class BaseStream {
unreachable("Abstract method `getBytes` called"); unreachable("Abstract method `getBytes` called");
} }
/**
* NOTE: This method can only be used to get image-data that is guaranteed
* to be fully loaded, since otherwise intermittent errors may occur;
* note the `ObjectLoader` class.
*/
async getImageData(length, ignoreColorSpace) {
return this.getBytes(length, ignoreColorSpace);
}
async asyncGetBytes() {
unreachable("Abstract method `asyncGetBytes` called");
}
get isAsync() {
return false;
}
get canAsyncDecodeImageFromBuffer() {
return false;
}
peekByte() { peekByte() {
const peekedByte = this.getByte(); const peekedByte = this.getByte();
if (peekedByte !== -1) { if (peekedByte !== -1) {

View file

@ -99,6 +99,14 @@ class DecodeStream extends BaseStream {
return this.buffer.subarray(pos, end); return this.buffer.subarray(pos, end);
} }
async getImageData(length, ignoreColorSpace = false) {
if (!this.canAsyncDecodeImageFromBuffer) {
return this.getBytes(length, ignoreColorSpace);
}
const data = await this.stream.asyncGetBytes();
return this.decodeImage(data, ignoreColorSpace);
}
reset() { reset() {
this.pos = 0; this.pos = 0;
} }

View file

@ -21,6 +21,7 @@
import { FormatError, info } from "../shared/util.js"; import { FormatError, info } from "../shared/util.js";
import { DecodeStream } from "./decode_stream.js"; import { DecodeStream } from "./decode_stream.js";
import { Stream } from "./stream.js";
const codeLenCodeMap = new Int32Array([ const codeLenCodeMap = new Int32Array([
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
@ -148,6 +149,57 @@ class FlateStream extends DecodeStream {
this.codeBuf = 0; this.codeBuf = 0;
} }
async getImageData(length, _ignoreColorSpace) {
const data = await this.asyncGetBytes();
return data?.subarray(0, length) || this.getBytes(length);
}
async asyncGetBytes() {
this.str.reset();
const bytes = this.str.getBytes();
try {
const { readable, writable } = new DecompressionStream("deflate");
const writer = writable.getWriter();
writer.write(bytes);
writer.close();
const chunks = [];
let totalLength = 0;
for await (const chunk of readable) {
chunks.push(chunk);
totalLength += chunk.byteLength;
}
const data = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
data.set(chunk, offset);
offset += chunk.byteLength;
}
return data;
} catch {
// DecompressionStream failed (for example because there are some extra
// bytes after the end of the compressed data), so we fallback to our
// decoder.
// We already get the bytes from the underlying stream, so we just reuse
// them to avoid get them again.
this.str = new Stream(
bytes,
2 /* = header size (see ctor) */,
bytes.length,
this.str.dict
);
this.reset();
return null;
}
}
get isAsync() {
return true;
}
getBits(bits) { getBits(bits) {
const str = this.str; const str = this.str;
let codeSize = this.codeSize; let codeSize = this.codeSize;

View file

@ -565,7 +565,7 @@ class PDFImage {
return output; return output;
} }
fillOpacity(rgbaBuf, width, height, actualHeight, image) { async fillOpacity(rgbaBuf, width, height, actualHeight, image) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert( assert(
rgbaBuf instanceof Uint8ClampedArray, rgbaBuf instanceof Uint8ClampedArray,
@ -580,7 +580,7 @@ class PDFImage {
sw = smask.width; sw = smask.width;
sh = smask.height; sh = smask.height;
alphaBuf = new Uint8ClampedArray(sw * sh); alphaBuf = new Uint8ClampedArray(sw * sh);
smask.fillGrayBuffer(alphaBuf); await smask.fillGrayBuffer(alphaBuf);
if (sw !== width || sh !== height) { if (sw !== width || sh !== height) {
alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height); alphaBuf = resizeImageMask(alphaBuf, smask.bpc, sw, sh, width, height);
} }
@ -590,7 +590,7 @@ class PDFImage {
sh = mask.height; sh = mask.height;
alphaBuf = new Uint8ClampedArray(sw * sh); alphaBuf = new Uint8ClampedArray(sw * sh);
mask.numComps = 1; mask.numComps = 1;
mask.fillGrayBuffer(alphaBuf); await mask.fillGrayBuffer(alphaBuf);
// Need to invert values in rgbaBuf // Need to invert values in rgbaBuf
for (i = 0, ii = sw * sh; i < ii; ++i) { for (i = 0, ii = sw * sh; i < ii; ++i) {
@ -716,7 +716,7 @@ class PDFImage {
drawWidth === originalWidth && drawWidth === originalWidth &&
drawHeight === originalHeight drawHeight === originalHeight
) { ) {
const data = this.getImageBytes(originalHeight * rowBytes, {}); const data = await this.getImageBytes(originalHeight * rowBytes, {});
if (isOffscreenCanvasSupported) { if (isOffscreenCanvasSupported) {
if (mustBeResized) { if (mustBeResized) {
return ImageResizer.createImage( return ImageResizer.createImage(
@ -774,7 +774,7 @@ class PDFImage {
} }
if (isHandled) { if (isHandled) {
const rgba = this.getImageBytes(imageLength, { const rgba = await this.getImageBytes(imageLength, {
drawWidth, drawWidth,
drawHeight, drawHeight,
forceRGBA: true, forceRGBA: true,
@ -794,7 +794,7 @@ class PDFImage {
case "DeviceRGB": case "DeviceRGB":
case "DeviceCMYK": case "DeviceCMYK":
imgData.kind = ImageKind.RGB_24BPP; imgData.kind = ImageKind.RGB_24BPP;
imgData.data = this.getImageBytes(imageLength, { imgData.data = await this.getImageBytes(imageLength, {
drawWidth, drawWidth,
drawHeight, drawHeight,
forceRGB: true, forceRGB: true,
@ -809,7 +809,7 @@ class PDFImage {
} }
} }
const imgArray = this.getImageBytes(originalHeight * rowBytes, { const imgArray = await this.getImageBytes(originalHeight * rowBytes, {
internal: true, internal: true,
}); });
// imgArray can be incomplete (e.g. after CCITT fax encoding). // imgArray can be incomplete (e.g. after CCITT fax encoding).
@ -852,7 +852,7 @@ class PDFImage {
maybeUndoPreblend = true; maybeUndoPreblend = true;
// Color key masking (opacity) must be performed before decoding. // Color key masking (opacity) must be performed before decoding.
this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps); await this.fillOpacity(data, drawWidth, drawHeight, actualHeight, comps);
} }
if (this.needsDecode) { if (this.needsDecode) {
@ -893,7 +893,7 @@ class PDFImage {
return imgData; return imgData;
} }
fillGrayBuffer(buffer) { async fillGrayBuffer(buffer) {
if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) { if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
assert( assert(
buffer instanceof Uint8ClampedArray, buffer instanceof Uint8ClampedArray,
@ -913,7 +913,9 @@ class PDFImage {
// rows start at byte boundary // rows start at byte boundary
const rowBytes = (width * numComps * bpc + 7) >> 3; const rowBytes = (width * numComps * bpc + 7) >> 3;
const imgArray = this.getImageBytes(height * rowBytes, { internal: true }); const imgArray = await this.getImageBytes(height * rowBytes, {
internal: true,
});
const comps = this.getComponents(imgArray); const comps = this.getComponents(imgArray);
let i, length; let i, length;
@ -975,7 +977,7 @@ class PDFImage {
}; };
} }
getImageBytes( async getImageBytes(
length, length,
{ {
drawWidth, drawWidth,
@ -990,7 +992,10 @@ class PDFImage {
this.image.drawHeight = drawHeight || this.height; this.image.drawHeight = drawHeight || this.height;
this.image.forceRGBA = !!forceRGBA; this.image.forceRGBA = !!forceRGBA;
this.image.forceRGB = !!forceRGB; this.image.forceRGB = !!forceRGB;
const imageBytes = this.image.getBytes(length, this.ignoreColorSpace); const imageBytes = await this.image.getImageData(
length,
this.ignoreColorSpace
);
// If imageBytes came from a DecodeStream, we're safe to transfer it // If imageBytes came from a DecodeStream, we're safe to transfer it
// (and thus detach its underlying buffer) because it will constitute // (and thus detach its underlying buffer) because it will constitute

View file

@ -44,9 +44,14 @@ class Jbig2Stream extends DecodeStream {
} }
readBlock() { readBlock() {
if (this.eof) { this.decodeImage();
return;
} }
decodeImage(bytes) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
const jbig2Image = new Jbig2Image(); const jbig2Image = new Jbig2Image();
const chunks = []; const chunks = [];
@ -57,7 +62,7 @@ class Jbig2Stream extends DecodeStream {
chunks.push({ data: globals, start: 0, end: globals.length }); chunks.push({ data: globals, start: 0, end: globals.length });
} }
} }
chunks.push({ data: this.bytes, start: 0, end: this.bytes.length }); chunks.push({ data: bytes, start: 0, end: bytes.length });
const data = jbig2Image.parseChunks(chunks); const data = jbig2Image.parseChunks(chunks);
const dataLength = data.length; const dataLength = data.length;
@ -68,6 +73,12 @@ class Jbig2Stream extends DecodeStream {
this.buffer = data; this.buffer = data;
this.bufferLength = dataLength; this.bufferLength = dataLength;
this.eof = true; this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
} }
} }

View file

@ -24,16 +24,6 @@ import { shadow } from "../shared/util.js";
*/ */
class JpegStream extends DecodeStream { class JpegStream extends DecodeStream {
constructor(stream, maybeLength, params) { constructor(stream, maybeLength, params) {
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
let ch;
while ((ch = stream.getByte()) !== -1) {
// Find the first byte of the SOI marker (0xFFD8).
if (ch === 0xff) {
stream.skip(-1); // Reset the stream position to the SOI.
break;
}
}
super(maybeLength); super(maybeLength);
this.stream = stream; this.stream = stream;
@ -53,8 +43,24 @@ class JpegStream extends DecodeStream {
} }
readBlock() { readBlock() {
this.decodeImage();
}
decodeImage(bytes) {
if (this.eof) { if (this.eof) {
return; return this.buffer;
}
bytes ||= this.bytes;
// Some images may contain 'junk' before the SOI (start-of-image) marker.
// Note: this seems to mainly affect inline images.
for (let i = 0, ii = bytes.length - 1; i < ii; i++) {
if (bytes[i] === 0xff && bytes[i + 1] === 0xd8) {
if (i > 0) {
bytes = bytes.subarray(i);
}
break;
}
} }
const jpegOptions = { const jpegOptions = {
decodeTransform: undefined, decodeTransform: undefined,
@ -89,7 +95,7 @@ class JpegStream extends DecodeStream {
} }
const jpegImage = new JpegImage(jpegOptions); const jpegImage = new JpegImage(jpegOptions);
jpegImage.parse(this.bytes); jpegImage.parse(bytes);
const data = jpegImage.getData({ const data = jpegImage.getData({
width: this.drawWidth, width: this.drawWidth,
height: this.drawHeight, height: this.drawHeight,
@ -100,6 +106,12 @@ class JpegStream extends DecodeStream {
this.buffer = data; this.buffer = data;
this.bufferLength = data.length; this.bufferLength = data.length;
this.eof = true; this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
} }
} }

View file

@ -42,13 +42,23 @@ class JpxStream extends DecodeStream {
} }
readBlock(ignoreColorSpace) { readBlock(ignoreColorSpace) {
if (this.eof) { this.decodeImage(null, ignoreColorSpace);
return;
} }
this.buffer = JpxImage.decode(this.bytes, ignoreColorSpace); decodeImage(bytes, ignoreColorSpace) {
if (this.eof) {
return this.buffer;
}
bytes ||= this.bytes;
this.buffer = JpxImage.decode(bytes, ignoreColorSpace);
this.bufferLength = this.buffer.length; this.bufferLength = this.buffer.length;
this.eof = true; this.eof = true;
return this.buffer;
}
get canAsyncDecodeImageFromBuffer() {
return this.stream.isAsync;
} }
} }