pdf.js/PDFFont.js

370 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

var Type1Parser = function(aAsciiStream, aBinaryStream) {
var lexer = new Lexer(aAsciiStream);
// Turn on this flag for additional debugging logs
var debug = false;
var dump = function(aData) {
if (debug)
log(aData);
};
/*
* Decrypt a Sequence of Ciphertext Bytes to Produce the Original Sequence
* of Plaintext Bytes. The function took a key as a parameter which can be
* for decrypting the eexec block of for decoding charStrings.
*/
var kEexecEncryptionKey = 55665;
var kCharStringsEncryptionKey = 4330;
function decrypt(aStream, aKey, aDiscardNumber) {
var r = aKey, c1 = 52845, c2 = 22719;
var decryptedString = [];
var value = "";
var count = aStream.length;
for (var i = 0; i < count; i++) {
value = aStream.getByte();
decryptedString[i] = String.fromCharCode(value ^ (r >> 8));
r = ((value + r) * c1 + c2) & ((1 << 16) - 1);
}
return decryptedString.slice(aDiscardNumber);
}
/*
* CharStrings are encoded following the the CharString Encoding sequence
* describe in Chapter 6 of the "Adobe Type1 Font Format" specification.
* The value in a byte indicates a command, a number, or subsequent bytes
* that are to be interpreted in a special way.
*
* CharString Number Encoding:
* A CharString byte containing the values from 32 through 255 inclusive
* indicate an integer. These values are decoded in four ranges.
*
* 1. A CharString byte containing a value, v, between 32 and 246 inclusive,
* indicate the integer v - 139. Thus, the integer values from -107 through
* 107 inclusive may be encoded in single byte.
*
* 2. A CharString byte containing a value, v, between 247 and 250 inclusive,
* indicates an integer involving the next byte, w, according to the formula:
* [(v - 247) x 256] + w + 108
*
* 3. A CharString byte containing a value, v, between 251 and 254 inclusive,
* indicates an integer involving the next byte, w, according to the formula:
* -[(v - 251) * 256] - w - 108
*
* 4. A CharString containing the value 255 indicates that the next 4 bytes
* are a two complement signed integer. The first of these bytes contains the
* highest order bits, the second byte contains the next higher order bits
* and the fourth byte contain the lowest order bits.
*
*
* CharString Command Encoding:
* CharStrings commands are encoded in 1 or 2 bytes.
*
* Single byte commands are encoded in 1 byte that contains a value between
* 0 and 31 inclusive.
* If a command byte contains the value 12, then the value in the next byte
* indicates a command. This "escape" mechanism allows many extra commands
* to be encoded and this encoding technique helps to minimize the length of
* the charStrings.
*/
function decodeCharString(aStream) {
var charString = [];
var cmd = {
"1": "hstem",
"3": "vstem",
"4": "vmoveto",
"5": "rlineto",
"6": "hlineto",
"7": "vlineto",
"8": "rrcurveto",
"9": "closepath",
"10": "callsubr",
"11": "return",
"12": {
"0": "dotsection",
"1": "vstem3",
"3": "hstem3",
"6": "seac",
"7": "sbw",
"12": "div",
"16": "callothersubr",
"17": "pop",
"33": "setcurrentpoint"
},
"13": "hsbw",
"14": "endchar",
"21": "rmoveto",
"22": "hmoveto",
"30": "vhcurveto",
"31": "hcurveto"
}
var value = "";
var count = aStream.length;
for (var i = 0; i < count; i++) {
value = aStream.getByte();
if (value < 0) {
continue;
} else if (value < 32) {
if (value == 12) {
value = cmd["12"][aStream.getByte()];
count++;
} else {
value = cmd[value];
}
} else if (value <= 246) {
value = parseInt(value) - 139;
} else if (value <= 250) {
value = ((value - 247) * 256) + parseInt(aStream.getByte()) + 108;
count++;
} else if (value <= 254) {
value = -((value - 251) * 256) - parseInt(aStream.getByte()) - 108;
count++;
} else {
error("Two complement signed integers are ignored for the moment");
}
charString.push(value);
}
return charString;
}
/*
* The operand stack holds arbitrary PostScript objects that are the operands
* and results of PostScript operators being executed. The interpreter pushes
* objects on the operand stack when it encounters them as literal data in a
* program being executed. When an operator requires one or more operands, it
* obtains them by popping them off the top of the operand stack. When an
* operator returns one or more results, it does so by pushing them on the
* operand stack.
*/
var operandStack = {
__innerStack__: [],
push: function(aOperand) {
this.__innerStack__.push(aOperand);
},
pop: function() {
return this.__innerStack__.pop();
},
peek: function() {
return this.__innerStack__[this.__innerStack__.length - 1];
},
get length() {
return this.__innerStack__.length;
}
};
// Flag indicating if the topmost operand of the operandStack is an array
var operandIsArray = 0;
/*
* The dictionary stack holds only dictionary objects. The current set of
* dictionaries on the dictionary stack defines the environment for all
* implicit name searches, such as those that occur when the interpreter
* encounters an executable name. The role of the dictionary stack is
* introduced in Section 3.3, “Data Types and Objects,” and is further
* explained in Section 3.5, “Execution.” of the PostScript Language
* Reference.
*/
var systemDict = new Dict(),
globalDict = new Dict(),
userDict = new Dict();
var dictionaryStack = {
__innerStack__: [systemDict, globalDict],
push: function(aDictionary) {
this.__innerStack__.push(aDictionary);
},
pop: function() {
if (this.__innerStack__.length == 2)
return null;
return this.__innerStack__.pop();
},
peek: function() {
return this.__innerStack__[this.__innerStack__.length - 1];
},
get length() {
return this.__innerStack__.length;
}
}
/*
* The execution stack holds executable objects (mainly procedures and files)
* that are in intermediate stages of execution. At any point in the
* execution of a PostScript program, this stack represents the programs
* call stack. Whenever the interpreter suspends execution of an object to
* execute some other object, it pushes the new object on the execution
* stack. When the interpreter finishes executing an object, it pops that
* object off the execution stack and resumes executing the suspended object
* beneath it.
*/
var executionStack = [];
/*
* Parse a font file from the first segment to the last assuming the eexec
* block is binary data.
*
* The method thrown an error if it encounters an unknown token.
*/
this.getObj = function() {
var obj = lexer.getObj();
if (operandIsArray && !IsCmd(obj, "{") && !IsCmd(obj, "[") &&
!IsCmd(obj, "}") && !IsCmd(obj, "]")) {
operandStack.peek().push(obj);
this.getObj();
} else if (IsCmd(obj, "{") || IsCmd(obj, "[")) {
dump("Start Array: " + obj);
operandStack.push([]);
operandIsArray++;
this.getObj();
} else if (IsCmd(obj, "}") || IsCmd(obj, "]")) {
dump("End Array: " + obj);
operandIsArray--;
this.getObj();
} else if (IsBool(obj) || IsInt(obj) || IsNum(obj) || IsString(obj)) {
//dump("Value: " + obj);
operandStack.push(obj);
this.getObj();
} else if (IsCmd(obj, "dup")) {
dump("Duplicate");
operandStack.push(operandStack.peek());
this.getObj();
} else if (IsCmd(obj, "currentdict")) {
dump("currentdict");
operandStack.push(dictionaryStack.peek());
this.getObj();
} else if (IsCmd(obj, "systemdict")) {
dump("systemdict");
operandStack.push(systemDict);
this.getObj();
} else if (IsCmd(obj, "readonly") || IsCmd(obj, "executeonly") ||
IsCmd(obj, "currentfile") || IsCmd(obj, "NP")) {
// Do nothing for the moment
this.getObj();
} else if (IsName(obj)) {
//dump("Name: " + obj.name);
operandStack.push(obj.name);
this.getObj();
} else if (IsCmd(obj, "dict")) {
dump("Dict: " + obj);
var size = operandStack.pop();
var dict = new Dict(size);
operandStack.push(dict);
this.getObj();
} else if (IsCmd(obj, "begin")) {
dump("begin a dictionary");
dictionaryStack.push(operandStack.pop());
this.getObj();
} else if (IsCmd(obj, "end")) {
dump("Ending a dictionary");
dictionaryStack.pop();
this.getObj();
} else if (IsCmd(obj, "def")) {
var value = operandStack.pop();
var key = operandStack.pop();
dump("def: " + key + " = " + value);
dictionaryStack.peek().set(key, value);
this.getObj();
} else if (IsCmd(obj, "eexec")) {
// All the first segment data has been read, decrypt the second segment
// and start interpreting it in order to decode it
var eexecString = decrypt(aBinaryStream, kEexecEncryptionKey, 4).join("");
lexer = new Lexer(new StringStream(eexecString));
this.getObj();
} else if (IsCmd(obj, "known")) {
dump("known");
var name = operandStack.pop();
var dict = operandStack.pop();
// returns dict.hasKey(name);
this.getObj();
} else if (IsCmd(obj, "RD")) {
dump("RD");
var size = operandStack.pop();
var key = operandStack.pop();
// Add '1' because of the space separator, this is dirty
var stream = lexer.stream.makeSubStream(lexer.stream.pos + 1, size);
lexer.stream.skip(size + 1);
var charString = decrypt(stream, kCharStringsEncryptionKey, 4).join("");
var charStream = new StringStream(charString);
// XXX do we want to store that on the top dictionary or somewhere else
dictionaryStack.peek().set(key, charStream);
var decodedCharString = decodeCharString(charStream);
log(decodedCharString);
this.getObj();
} else if (IsCmd(obj, "LenIV")) {
error("LenIV: argh! we need to modify the length of discard characters for charStrings");
} else if (IsCmd(obj, "closefile")) {
// End of binary data;
} else if (IsCmd(obj, "StandardEncoding")) {
// For some reason the value is considered as a command, maybe it is
// because of the uppercae 'S'
operandStack.push(obj.cmd);
this.getObj();
} else {
dump(obj);
error("Unknow token while parsing font");
}
return operandStack.peek();
}
};
var hack = false;
var Type1Font = function(aFontName, aFontFile) {
// All Type1 font program should begin with the comment %!
if (aFontFile.getByte() != 0x25 || aFontFile.getByte() != 0x21)
error("Invalid file header");
if (!hack) {
log(aFontName);
var ASCIIStream = aFontFile.makeSubStream(0, aFontFile.dict.get("Length1"), aFontFile.dict);
var binaryStream = aFontFile.makeSubStream(aFontFile.dict.get("Length1"), aFontFile.dict.get("Length2"), aFontFile.dict);
this.parser = new Type1Parser(ASCIIStream, binaryStream);
var fontDictionary = this.parser.getObj();
log(fontDictionary + "\t" + "fontInfo: " + fontDictionary.get("FontInfo"));
hack = true;
}
this.info = {};
this.name = aFontName;
this.encoding = [];
this.paintType = 0;
this.fontType = 0;
this.fontMatrix = [];
this.fontBBox = [];
this.uniqueID = 0;
this.metrics = {};
this.strokeWidth = 0.0;
this.private = {};
this.charStrings = {}
this.FID = 0;
};