mirror of
https://github.com/zen-browser/pdf.js.git
synced 2025-07-08 09:20:06 +02:00
Cache the normalized unicode-value on the Glyph
-instance
Currently, during text-extraction, we're repeatedly normalizing and (when necessary) reversing the unicode-values every time. This seems a little unnecessary, since the result won't change, hence this patch moves that into the `Glyph`-instance and makes it *lazily* initialized. Taking the `tracemonkey.pdf` document as an example: When extracting the text-content there's a total of 69236 characters but only 595 unique `Glyph`-instances, which mean a 99.1 percent cache hit-rate. Generally speaking, the longer a PDF document is the more beneficial this should be. *Please note:* The old code is fast enough that it unfortunately seems difficult to measure a (clear) performance improvement with this patch, so I completely understand if it's deemed an unnecessary change.
This commit is contained in:
parent
eda51d1dcc
commit
c33b8d7692
3 changed files with 24 additions and 11 deletions
|
@ -498,7 +498,7 @@ function createValidAbsoluteUrl(url, baseUrl = null, options = null) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function shadow(obj, prop, value) {
|
||||
function shadow(obj, prop, value, nonSerializable = false) {
|
||||
if (
|
||||
typeof PDFJSDev === "undefined" ||
|
||||
PDFJSDev.test("!PRODUCTION || TESTING")
|
||||
|
@ -510,7 +510,7 @@ function shadow(obj, prop, value) {
|
|||
}
|
||||
Object.defineProperty(obj, prop, {
|
||||
value,
|
||||
enumerable: true,
|
||||
enumerable: !nonSerializable,
|
||||
configurable: true,
|
||||
writable: false,
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue