diff -u poppler/Lexer.cc \kjk\src\sumatrapdf\poppler\poppler/Lexer.cc --- poppler/Lexer.cc 2006-01-17 12:35:32.000000000 -0800 +++ \kjk\src\sumatrapdf\poppler\poppler/Lexer.cc 2006-08-12 23:30:28.614500000 -0700 @@ -50,6 +50,7 @@ Lexer::Lexer(XRef *xrefA, Stream *str) { Object obj; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; xref = xrefA; curStr.initStream(str); @@ -63,6 +64,7 @@ Lexer::Lexer(XRef *xrefA, Object *obj) { Object obj2; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; xref = xrefA; if (obj->isStream()) { @@ -90,9 +92,15 @@ } } -int Lexer::getChar() { +int inline Lexer::getChar() { int c; + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + c = lookCharLastValueCached; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + return c; + } + c = EOF; while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { curStr.streamClose(); @@ -106,11 +114,12 @@ return c; } -int Lexer::lookChar() { - if (curStr.isNone()) { - return EOF; +int inline Lexer::lookChar() { + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + return lookCharLastValueCached; } - return curStr.streamLookChar(); + lookCharLastValueCached = getChar(); + return lookCharLastValueCached; } Object *Lexer::getObj(Object *obj, int objNum) { diff -u poppler/Lexer.h \kjk\src\sumatrapdf\poppler\poppler/Lexer.h --- poppler/Lexer.h 2006-01-17 12:35:32.000000000 -0800 +++ \kjk\src\sumatrapdf\poppler\poppler/Lexer.h 2006-08-12 23:15:44.505125000 -0700 @@ -68,6 +68,15 @@ int getChar(); int lookChar(); + // often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar + // is called right after lookChar. In order to avoid expensive re-doing + // getChar() of underlying stream, we cache the last value found by + // lookChar() in lookCharLastValueCached. A special value + // LOOK_VALUE_NOT_CACHED that should never be part of stream indicates + // that no value was cached + static const int LOOK_VALUE_NOT_CACHED = -3; + int lookCharLastValueCached; + Array *streams; // array of input streams int strPtr; // index of current stream Object curStr; // current stream diff -u poppler/PageLabelInfo.cc \kjk\src\sumatrapdf\poppler\poppler/PageLabelInfo.cc --- poppler/PageLabelInfo.cc 2006-05-30 22:36:10.000000000 -0700 +++ \kjk\src\sumatrapdf\poppler\poppler/PageLabelInfo.cc 2006-07-12 14:20:26.000000000 -0700 @@ -1,3 +1,4 @@ +#include #include #include #include diff -u poppler/Parser.cc \kjk\src\sumatrapdf\poppler\poppler/Parser.cc --- poppler/Parser.cc 2006-01-18 13:32:14.000000000 -0800 +++ \kjk\src\sumatrapdf\poppler\poppler/Parser.cc 2006-08-12 23:34:57.395750000 -0700 @@ -37,9 +37,9 @@ } Object *Parser::getObj(Object *obj, - Guchar *fileKey, int keyLength, - int objNum, int objGen) { - char *key; + Guchar *fileKey, int keyLength, + int objNum, int objGen) { + UGooString key; Stream *str; Object obj2; int num; @@ -73,31 +73,30 @@ obj->initDict(xref); while (!buf1.isCmd(">>") && !buf1.isEOF()) { if (!buf1.isName()) { - error(getPos(), "Dictionary key must be a name object"); - shift(); + error(getPos(), "Dictionary key must be a name object"); + shift(); } else { - key = copyString(buf1.getName()); - shift(); - if (buf1.isEOF() || buf1.isError()) { - gfree(key); - break; - } - obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); - gfree(key); + // buf1 might go away in shift(), so remember the name + key.Set(buf1.getName()); + shift(); + if (buf1.isEOF() || buf1.isError()) { + break; + } + obj->dictAdd(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); } } if (buf1.isEOF()) error(getPos(), "End of file inside dictionary"); if (buf2.isCmd("stream")) { if ((str = makeStream(obj))) { - obj->initStream(str); - if (fileKey) { - str->getBaseStream()->doDecryption(fileKey, keyLength, - objNum, objGen); - } + obj->initStream(str); + if (fileKey) { + str->getBaseStream()->doDecryption(fileKey, keyLength, + objNum, objGen); + } } else { - obj->free(); - obj->initError(); + obj->free(); + obj->initError(); } } else { shift(); @@ -121,8 +120,8 @@ s = obj->getString(); decrypt = new Decrypt(fileKey, keyLength, objNum, objGen); for (i = 0, p = obj->getString()->getCString(); - i < s->getLength(); - ++i, ++p) { + i < s->getLength(); + ++i, ++p) { *p = decrypt->decryptByte(*p); } delete decrypt; diff -u poppler/UGooString.cc \kjk\src\sumatrapdf\poppler\poppler/UGooString.cc --- poppler/UGooString.cc 2006-06-30 14:13:12.000000000 -0700 +++ \kjk\src\sumatrapdf\poppler\poppler/UGooString.cc 2006-08-13 01:20:22.255125000 -0700 @@ -15,61 +15,133 @@ #include "PDFDocEncoding.h" #include "UGooString.h" -UGooString::UGooString(Unicode *u, int l) +int inline UGooString::roundedSize(int len) { + int delta; + if (len <= STR_STATIC_SIZE-1) + return STR_STATIC_SIZE; + delta = len < 256 ? 7 : 255; + return ((len + 1) + delta) & ~delta; +} + +// Make sure that the buffer is big enough to contain characters +// plus terminating 0. +// We assume that if this is being called from the constructor, was set +// to NULL and was set to 0 to indicate unused string before calling us. +void inline UGooString::resize(int newLength) { + Unicode *s1 = s; + + if (!s || (roundedSize(length) != roundedSize(newLength))) { + // requires re-allocating data for string + if (newLength < STR_STATIC_SIZE) + s1 = sStatic; + else + s1 = new Unicode[roundedSize(newLength)]; + + // we had to re-allocate the memory, so copy the content of previous + // buffer into a new buffer + if (s) { + if (newLength < length) { + memcpy(s1, s, newLength); + } else { + memcpy(s1, s, length); + } + } + if (s != sStatic) + delete[] s; + } + + s = s1; + length = newLength; + s[length] = '\0'; +} + +UGooString::UGooString(void) { - s = u; - length = l; + s = NULL; + length = 0; + resize(0); } UGooString::UGooString(GooString &str) { - if ((str.getChar(0) & 0xff) == 0xfe && (str.getChar(1) & 0xff) == 0xff) + s = NULL; + length = 0; + if (str.hasUnicodeMarker()) { - length = (str.getLength() - 2) / 2; - s = (Unicode *)gmallocn(length, sizeof(Unicode)); + resize((str.getLength() - 2) / 2); for (int j = 0; j < length; ++j) { s[j] = ((str.getChar(2 + 2*j) & 0xff) << 8) | (str.getChar(3 + 2*j) & 0xff); } } else - initChar(str); + Set(str.getCString(), str.getLength()); } UGooString::UGooString(const UGooString &str) { - length = str.length; - s = (Unicode *)gmallocn(length, sizeof(Unicode)); - memcpy(s, str.s, length * sizeof(Unicode)); + s = NULL; + length = 0; + Set(str); } -UGooString::UGooString(const char *str) +UGooString::UGooString(const char *str, int strLen) { - GooString aux(str); - initChar(aux); + s = NULL; + length = 0; + if (CALC_STRING_LEN == strLen) + strLen = strlen(str); + Set(str, strLen); } -void UGooString::initChar(GooString &str) +UGooString *UGooString::Set(const UGooString &str) { - length = str.getLength(); - s = (Unicode *)gmallocn(length, sizeof(Unicode)); - bool anyNonEncoded = false; - for (int j = 0; j < length && !anyNonEncoded; ++j) { - s[j] = pdfDocEncoding[str.getChar(j) & 0xff]; - if (!s[j]) anyNonEncoded = true; + resize(str.length); + memcpy(s, str.s, length * sizeof(Unicode)); + return this; +} + +UGooString* UGooString::Set(const char *str, int strLen) +{ + int j; + bool foundUnencoded = false; + + if (CALC_STRING_LEN == strLen) + strLen = strlen(str); + + resize(strLen); + for (j = 0; j < length; ++j) { + s[j] = pdfDocEncoding[str[j] & 0xff]; + if (!s[j]) { + foundUnencoded = true; + break; + } } - if ( anyNonEncoded ) + if ( foundUnencoded ) { - for (int j = 0; j < length; ++j) { - s[j] = str.getChar(j); + for (j = 0; j < length; ++j) { + s[j] = str[j]; } } + return this; +} + +UGooString *UGooString::clear(void) +{ + resize(0); + return this; } UGooString::~UGooString() { - gfree(s); + if (s != sStatic) + delete[] s; } -int UGooString::cmp(UGooString *str) const +int UGooString::cmp(const UGooString &str) const +{ + return cmp(&str); +} + +int UGooString::cmp(const UGooString *str) const { int n1, n2, i, x; Unicode *p1, *p2; @@ -85,6 +157,14 @@ return n1 - n2; } +// FIXME: +// a) this is confusing because GooString::getCSTring() returns a pointer +// but UGooString returns a newly allocated copy. Should give this +// a different name, like copyAsAscii() or copyAsGooString() +// b) this interface requires copying. It should be changed to take a +// GooString& as a param and put the data inside it so that it uses +// caching optimization of GooString. Callers should be changed to use +// this new interface char *UGooString::getCString() const { char *res = new char[length + 1]; @@ -92,3 +172,4 @@ res[length] = '\0'; return res; } + diff -u poppler/UGooString.h \kjk\src\sumatrapdf\poppler\poppler/UGooString.h --- poppler/UGooString.h 2006-01-18 13:36:02.000000000 -0800 +++ \kjk\src\sumatrapdf\poppler\poppler/UGooString.h 2006-08-13 01:20:45.145750000 -0700 @@ -18,8 +18,9 @@ class UGooString { public: - // Create an unicode string - UGooString(Unicode *u, int l); + + // Create empty unicode string + UGooString(void); // Create a unicode string from . UGooString(GooString &str); @@ -28,26 +29,46 @@ UGooString(const UGooString &str); // Create a unicode string from . - UGooString(const char *str); + UGooString(const char *str, int strLen = CALC_STRING_LEN); + + UGooString *Set(const char *str, int strLen = CALC_STRING_LEN); + UGooString *Set(const UGooString &str); + + // Set the string to empty string, freeing all dynamically allocated memory + // as a side effect + UGooString *clear(void); - // Destructor. ~UGooString(); - // Get length. + void resize(int newLength); + int getLength() const { return length; } // Compare two strings: -1:< 0:= +1:> - int cmp(UGooString *str) const; + int cmp(const UGooString *str) const; + int cmp(const UGooString &str) const; // get the unicode Unicode *unicode() const { return s; } - // get the const char* + // Return a newly allocated copy of the string converted to + // ascii (non-Unicode) format. Caller has to delete [] the result char *getCString() const; private: - void initChar(GooString &str); + // you can tweak this number for a different speed/memory usage tradeoffs. + // In libc malloc() rounding is 16 so it's best to choose a value that + // results in sizeof(UGooString) be a multiple of 16. + // 20 makes sizeof(UGooString) to be 48. + static const int STR_STATIC_SIZE = 20; + // a special value telling that the length of the string is not given + // so it must be calculated from the strings + static const int CALC_STRING_LEN = -1; + + int roundedSize(int len); + void initChar(const char *str, int strLen); + Unicode sStatic[STR_STATIC_SIZE]; int length; Unicode *s; };