Index: poppler/Object.cc =================================================================== --- poppler/Object.cc (revision 57) +++ poppler/Object.cc (working copy) @@ -118,17 +118,17 @@ g_objectStringCache.free(name); break; case objArray: - if (!array->decRef()) { + if (0 == array->decRef()) { delete array; } break; case objDict: - if (!dict->decRef()) { + if (0 == dict->decRef()) { delete dict; } break; case objStream: - if (!stream->decRef()) { + if (0 == stream->decRef()) { delete stream; } break; Index: poppler/Object.h =================================================================== --- poppler/Object.h (revision 57) +++ poppler/Object.h (working copy) @@ -330,8 +330,9 @@ inline void Object::streamClose() { stream->close(); } -inline int Object::streamGetChar() - { return stream->getChar(); } +inline int Object::streamGetChar(){ + return stream->getChar(); +} inline int Object::streamLookChar() { return stream->lookChar(); } Index: poppler/Stream.cc =================================================================== --- poppler/Stream.cc (revision 57) +++ poppler/Stream.cc (working copy) @@ -3,6 +3,7 @@ // Stream.cc // // Copyright 1996-2003 Glyph & Cog, LLC +// Copyright 2006: Krzysztof Kowalczyk (http://blog.kowalczyk.info) // //======================================================================== @@ -21,6 +22,7 @@ #endif #include #include +#include #include "goo/gmem.h" #include "goo/gfile.h" #include "poppler-config.h" @@ -35,6 +37,14 @@ #include "Stream-CCITT.h" #include "UGooString.h" +/* You can disable getBuf() on File/Flate/Embed streams by commenting those out. +This might be helpful when tracking down problems (i.e. if you suspect getBuf() +handling has a bug, you can easily disable it to verify that it's gone when +getBuf() is not used anywhere */ +#define ENABLE_FILE_GET_BUF 1 +#define ENABLE_FLATE_GET_BUF 1 +#define ENABLE_EMBED_GET_BUF 1 + #ifdef ENABLE_LIBJPEG #include "DCTStream.h" #endif @@ -55,6 +65,17 @@ #endif #endif +// FIXME: add more names when you need +const char *getStreamKindName(StreamKind kind) +{ + if (strFile == kind) + return "FileStream"; + else if (strFlate == kind) + return "FlateStream"; + else + return "Unknown"; +} + //------------------------------------------------------------------------ // Stream (base class) //------------------------------------------------------------------------ @@ -666,6 +687,40 @@ bufPos = start; } +GBool FileStream::hasGetBuf() { +#ifdef ENABLE_FILE_GET_BUF + return gTrue; +#else + return gFalse; +#endif +} + +GBool FileStream::getBuf(char **bufOut, int *bufSizeOut, int maxSize) +{ + int size; + if (bufPtr >= bufEnd) { + if (!fillBuf()) { + return gFalse; + } + } + assert(bufEnd > bufPtr); + size = bufEnd - bufPtr; + if (Stream::NO_SIZE_LIMIT != maxSize) { + assert(maxSize > 0); + if (size > maxSize) + size = maxSize; + } + *bufOut = bufPtr; + *bufSizeOut = size; + bufPtr += size; + return gTrue; +} + +void FileStream::ungetBuf(int sizeToGoBack) { + bufPtr -= sizeToGoBack; + assert(bufPtr >= buf); +} + //------------------------------------------------------------------------ // MemStream //------------------------------------------------------------------------ @@ -772,11 +827,53 @@ return NULL; } +GBool EmbedStream::hasGetBuf() { +#ifdef ENABLE_EMBED_GET_BUF + return str->hasGetBuf(); +#else + return gFalse; +#endif +} + +void EmbedStream::ungetBuf(int sizeToGoBack) { + str->ungetBuf(sizeToGoBack); + if (limited) { + length += sizeToGoBack; + } +} + +GBool EmbedStream::getBuf(char **bufOut, int *bufSizeOut, int maxSize) { + if (limited && (0 == length)) { + return gFalse; + } + + if (limited) { + if (Stream::NO_SIZE_LIMIT == maxSize) { + maxSize = length; + } else { + if (maxSize > (int)length) + maxSize = (int)length; + } + } + GBool hasData = str->getBuf(bufOut, bufSizeOut, maxSize); + if (!hasData) + return gFalse; + + if (limited) { + assert((int)length >= *bufSizeOut); + length -= *bufSizeOut; + } + assert(*bufSizeOut > 0); + return gTrue; +} + int EmbedStream::getChar() { - if (limited && !length) { - return EOF; + if (limited) { + if (0 == length) { + return EOF; + } + --length; } - --length; return str->getChar(); } @@ -3883,6 +3980,49 @@ eof = gFalse; } +GBool FlateStream::hasGetBuf() { +#ifdef ENABLE_FLATE_GET_BUF + if (pred) + return gFalse; + return gTrue; +#else + return gFalse; +#endif +} + +void FlateStream::ungetBuf(int sizeToGoBack) { + remain += sizeToGoBack; + index -= sizeToGoBack; + if (index < 0) + index = flateWindow + index; +} + +GBool FlateStream::getBuf(char **bufOut, int *bufSizeOut, int maxSize) { + int sizeToReturn; + assert(hasGetBuf()); + while (remain == 0) { + if (endOfBlock && eof) + return gFalse; + readSome(); + } + // buf wraps, for simplicity we return only the part that doesn't wrap + sizeToReturn = remain; + if (index + remain > flateWindow) { + sizeToReturn = flateWindow - index; + } + if (Stream::NO_SIZE_LIMIT != maxSize) { + assert(maxSize > 0); + if (sizeToReturn > maxSize) + sizeToReturn = maxSize; + } + assert(sizeToReturn > 0); + remain -= sizeToReturn; + *bufOut = (char*)&(buf[index]); + *bufSizeOut = sizeToReturn; + index = (index + sizeToReturn) & flateMask; + return gTrue; +} + int FlateStream::getChar() { int c; Index: poppler/Parser.cc =================================================================== --- poppler/Parser.cc (revision 57) +++ poppler/Parser.cc (working copy) @@ -79,6 +79,7 @@ UGooString *key = new UGooString(buf1.getNameC()); shift(); if (buf1.isEOF() || buf1.isError()) { + delete key; break; } obj->dictAddOwnKeyVal(key, getObj(&obj2, fileKey, keyLength, objNum, objGen)); @@ -171,15 +172,7 @@ return NULL; } baseStr = lexer->getStream()->getBaseStream(); - - // skip over stream data - if (Lexer::LOOK_VALUE_NOT_CACHED != lexer->lookCharLastValueCached) { - // take into account the fact that we've cached one value - pos = pos - 1; - lexer->lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED; - } lexer->setPos(pos + length); - // refill token buffers and check for 'endstream' shift(); // kill '>>' shift(); // kill 'stream' Index: poppler/Stream.h =================================================================== --- poppler/Stream.h (revision 57) +++ poppler/Stream.h (working copy) @@ -3,6 +3,7 @@ // Stream.h // // Copyright 1996-2003 Glyph & Cog, LLC +// Copyright 2006: Krzysztof Kowalczyk (http://blog.kowalczyk.info) // //======================================================================== @@ -14,6 +15,7 @@ #endif #include +#include #include "goo/gtypes.h" #include "Object.h" @@ -50,10 +52,8 @@ class Stream { public: - // Constructor. Stream(); - // Destructor. virtual ~Stream(); // Reference counting. @@ -113,6 +113,12 @@ // Returns the new stream. Stream *addFilters(Object *dict); + static const int NO_SIZE_LIMIT = -1; + + virtual GBool hasGetBuf() = 0; + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize = NO_SIZE_LIMIT) = 0; + virtual void ungetBuf(int sizeToGoBack) = 0; + private: Stream *makeFilter(char *name, Stream *str, Object *params); @@ -172,6 +178,16 @@ virtual BaseStream *getBaseStream() { return str->getBaseStream(); } virtual Dict *getDict() { return str->getDict(); } + // override those in specific filters when you add support for getBuf() + virtual GBool hasGetBuf() { return gFalse; } + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize) { + assert(0); + return gFalse; + } + virtual void ungetBuf(int sizeToGoBack) { + assert(0); + } + protected: Stream *str; @@ -278,6 +294,10 @@ virtual Guint getStart() { return start; } virtual void moveStart(int delta); + virtual GBool hasGetBuf(); + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize); + virtual void ungetBuf(int sizeToGoBack); + private: GBool fillBuf(); @@ -319,6 +339,13 @@ virtual void doDecryption(Guchar *fileKey, int keyLength, int objNum, int objGen); + virtual GBool hasGetBuf() { return gFalse; } + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize) { + assert(0); + return gFalse; + } + virtual void ungetBuf(int sizeToGoBack) { assert(0); } + private: char *buf; @@ -355,6 +382,10 @@ virtual Guint getStart(); virtual void moveStart(int delta); + virtual GBool hasGetBuf(); + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize); + virtual void ungetBuf(int sizeToGoBack); + private: Stream *str; @@ -677,6 +708,10 @@ virtual GooString *getPSFilter(int psLevel, char *indent); virtual GBool isBinary(GBool last = gTrue); + virtual GBool hasGetBuf(); + virtual GBool getBuf(char **bufOut, int *bufSizeOut, int maxSize); + virtual void ungetBuf(int sizeToGoBack); + private: StreamPredictor *pred; // predictor Index: poppler/Lexer.cc =================================================================== --- poppler/Lexer.cc (revision 57) +++ poppler/Lexer.cc (working copy) @@ -1,510 +1,566 @@ -//======================================================================== -// -// Lexer.cc -// -// Copyright 1996-2003 Glyph & Cog, LLC -// -//======================================================================== - -#include - -#ifdef USE_GCC_PRAGMAS -#pragma implementation -#endif - -#include -#include -#include -#include -#include "Lexer.h" -#include "Error.h" -#include "XRef.h" - -//------------------------------------------------------------------------ - -// A '1' in this array means the character is white space. A '1' or -// '2' means the character ends a name or command. -static char specialChars[256] = { - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x - 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx -}; - -//------------------------------------------------------------------------ -// Lexer -//------------------------------------------------------------------------ - -Lexer::Lexer(XRef *xrefA, Stream *str) { - Object obj; - - lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; - xref = xrefA; - - curStr.initStream(str); - streams = new Array(xref); - streams->add(curStr.copy(&obj)); - strPtr = 0; - freeArray = gTrue; - curStr.streamReset(); -} - -Lexer::Lexer(XRef *xrefA, Object *obj) { - Object obj2; - - lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; - xref = xrefA; - - if (obj->isStream()) { - streams = new Array(xref); - freeArray = gTrue; - streams->add(obj->copy(&obj2)); - } else { - streams = obj->getArray(); - freeArray = gFalse; - } - strPtr = 0; - if (streams->getLength() > 0) { - streams->get(strPtr, &curStr); - curStr.streamReset(); - } -} - -Lexer::~Lexer() { - if (!curStr.isNone()) { - curStr.streamClose(); - curStr.free(); - } - if (freeArray) { - delete streams; - } -} - -int inline Lexer::getChar() { - int c; - - if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { - c = lookCharLastValueCached; - lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; - return c; - } - - c = EOF; - while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { - curStr.streamClose(); - curStr.free(); - ++strPtr; - if (strPtr < streams->getLength()) { - streams->get(strPtr, &curStr); - curStr.streamReset(); - } - } - return c; -} - -int inline Lexer::lookChar() { - if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { - return lookCharLastValueCached; - } - lookCharLastValueCached = getChar(); - return lookCharLastValueCached; -} - -Object *Lexer::getObj(Object *obj, int objNum) { - char *p; - int c, c2; - GBool comment, neg, done; - int numParen; - int xi; - double xf, scale; - GooString *s; - int n, m; - - // skip whitespace and comments - comment = gFalse; - while (1) { - if ((c = getChar()) == EOF) { - return obj->initEOF(); - } - if (comment) { - if (c == '\r' || c == '\n') - comment = gFalse; - } else if (c == '%') { - comment = gTrue; - } else if (specialChars[c] != 1) { - break; - } - } - - // start reading token - switch (c) { - - // number - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '-': case '.': - neg = gFalse; - xi = 0; - if (c == '-') { - neg = gTrue; - } else if (c == '.') { - goto doReal; - } else { - xi = c - '0'; - } - while (1) { - c = lookChar(); - if (isdigit(c)) { - getChar(); - xi = xi * 10 + (c - '0'); - } else if (c == '.') { - getChar(); - goto doReal; - } else { - break; - } - } - if (neg) - xi = -xi; - obj->initInt(xi); - break; - doReal: - xf = xi; - scale = 0.1; - while (1) { - c = lookChar(); - if (c == '-') { - // ignore minus signs in the middle of numbers to match - // Adobe's behavior - error(getPos(), "Badly formatted number"); - getChar(); - continue; - } - if (!isdigit(c)) { - break; - } - getChar(); - xf = xf + scale * (c - '0'); - scale *= 0.1; - } - if (neg) - xf = -xf; - obj->initReal(xf); - break; - - // string - case '(': - p = tokBuf; - n = 0; - numParen = 1; - done = gFalse; - s = NULL; - do { - c2 = EOF; - switch (c = getChar()) { - - case EOF: -#if 0 - // This breaks some PDF files, e.g., ones from Photoshop. - case '\r': - case '\n': -#endif - error(getPos(), "Unterminated string"); - done = gTrue; - break; - - case '(': - ++numParen; - c2 = c; - break; - - case ')': - if (--numParen == 0) { - done = gTrue; - } else { - c2 = c; - } - break; - - case '\\': - switch (c = getChar()) { - case 'n': - c2 = '\n'; - break; - case 'r': - c2 = '\r'; - break; - case 't': - c2 = '\t'; - break; - case 'b': - c2 = '\b'; - break; - case 'f': - c2 = '\f'; - break; - case '\\': - case '(': - case ')': - c2 = c; - break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - c2 = c - '0'; - c = lookChar(); - if (c >= '0' && c <= '7') { - getChar(); - c2 = (c2 << 3) + (c - '0'); - c = lookChar(); - if (c >= '0' && c <= '7') { - getChar(); - c2 = (c2 << 3) + (c - '0'); - } - } - break; - case '\r': - c = lookChar(); - if (c == '\n') { - getChar(); - } - break; - case '\n': - break; - case EOF: - error(getPos(), "Unterminated string"); - done = gTrue; - break; - default: - c2 = c; - break; - } - break; - - default: - c2 = c; - break; - } - - if (c2 != EOF) { - if (n == tokBufSize) { - if (!s) - s = new GooString(tokBuf, tokBufSize); - else - s->append(tokBuf, tokBufSize); - p = tokBuf; - n = 0; - - // we are growing see if the document is not malformed and we are growing too much - if (objNum != -1) - { - int newObjNum = xref->getNumEntry(getPos()); - if (newObjNum != objNum) - { - error(getPos(), "Unterminated string"); - done = gTrue; - } - } - } - *p++ = (char)c2; - ++n; - } - } while (!done); - if (!s) - s = new GooString(tokBuf, n); - else - s->append(tokBuf, n); - obj->initString(s); - break; - - // name - case '/': - p = tokBuf; - n = 0; - while ((c = lookChar()) != EOF && !specialChars[c]) { - getChar(); - if (c == '#') { - c2 = lookChar(); - if (c2 >= '0' && c2 <= '9') { - c = c2 - '0'; - } else if (c2 >= 'A' && c2 <= 'F') { - c = c2 - 'A' + 10; - } else if (c2 >= 'a' && c2 <= 'f') { - c = c2 - 'a' + 10; - } else { - goto notEscChar; - } - getChar(); - c <<= 4; - c2 = getChar(); - if (c2 >= '0' && c2 <= '9') { - c += c2 - '0'; - } else if (c2 >= 'A' && c2 <= 'F') { - c += c2 - 'A' + 10; - } else if (c2 >= 'a' && c2 <= 'f') { - c += c2 - 'a' + 10; - } else { - error(getPos(), "Illegal digit in hex char in name"); - } - } - notEscChar: - if (++n == tokBufSize) { - error(getPos(), "Name token too long"); - break; - } - *p++ = c; - } - *p = '\0'; - obj->initName(tokBuf); - break; - - // array punctuation - case '[': - case ']': - tokBuf[0] = c; - tokBuf[1] = '\0'; - obj->initCmd(tokBuf); - break; - - // hex string or dict punctuation - case '<': - c = lookChar(); - - // dict punctuation - if (c == '<') { - getChar(); - tokBuf[0] = tokBuf[1] = '<'; - tokBuf[2] = '\0'; - obj->initCmd(tokBuf); - - // hex string - } else { - p = tokBuf; - m = n = 0; - c2 = 0; - s = NULL; - while (1) { - c = getChar(); - if (c == '>') { - break; - } else if (c == EOF) { - error(getPos(), "Unterminated hex string"); - break; - } else if (specialChars[c] != 1) { - c2 = c2 << 4; - if (c >= '0' && c <= '9') - c2 += c - '0'; - else if (c >= 'A' && c <= 'F') - c2 += c - 'A' + 10; - else if (c >= 'a' && c <= 'f') - c2 += c - 'a' + 10; - else - error(getPos(), "Illegal character <%02x> in hex string", c); - if (++m == 2) { - if (n == tokBufSize) { - if (!s) - s = new GooString(tokBuf, tokBufSize); - else - s->append(tokBuf, tokBufSize); - p = tokBuf; - n = 0; - } - *p++ = (char)c2; - ++n; - c2 = 0; - m = 0; - } - } - } - if (!s) - s = new GooString(tokBuf, n); - else - s->append(tokBuf, n); - if (m == 1) - s->append((char)(c2 << 4)); - obj->initString(s); - } - break; - - // dict punctuation - case '>': - c = lookChar(); - if (c == '>') { - getChar(); - tokBuf[0] = tokBuf[1] = '>'; - tokBuf[2] = '\0'; - obj->initCmd(tokBuf); - } else { - error(getPos(), "Illegal character '>'"); - obj->initError(); - } - break; - - // error - case ')': - case '{': - case '}': - error(getPos(), "Illegal character '%c'", c); - obj->initError(); - break; - - // command - default: - p = tokBuf; - *p++ = c; - n = 1; - while ((c = lookChar()) != EOF && !specialChars[c]) { - getChar(); - if (++n == tokBufSize) { - error(getPos(), "Command token too long"); - break; - } - *p++ = c; - } - *p = '\0'; - if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) { - obj->initBool(gTrue); - } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) { - obj->initBool(gFalse); - } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) { - obj->initNull(); - } else { - obj->initCmd(tokBuf); - } - break; - } - - return obj; -} - -void Lexer::skipToNextLine() { - int c; - - while (1) { - c = getChar(); - if (c == EOF || c == '\n') { - return; - } - if (c == '\r') { - if ((c = lookChar()) == '\n') { - getChar(); - } - return; - } - } -} - -GBool Lexer::isSpace(int c) { - return c >= 0 && c <= 0xff && specialChars[c] == 1; -} +//======================================================================== +// +// Lexer.cc +// +// Copyright 1996-2003 Glyph & Cog, LLC +// Copyright 2006 Krzysztof Kowalczyk (http://blog.kowalczyk.info) +// +//======================================================================== + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include +#include +#include +#include +#include +#include "Lexer.h" +#include "Error.h" +#include "XRef.h" + +//------------------------------------------------------------------------ + +// A '1' in this array means the character is white space. A '1' or +// '2' means the character ends a name or command. +static char specialChars[256] = { + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, // 0x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x + 1, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, // 2x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, // 3x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 5x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, // 7x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9x + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ax + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // bx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // cx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // dx + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ex + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // fx +}; + +#define IS_DIGIT(c) (((c >= '0') && (c <= '9')) ? 1 : 0) + +//------------------------------------------------------------------------ +// Lexer +//------------------------------------------------------------------------ + +Lexer::Lexer(XRef *xrefA, Stream *str) { + Object obj; + + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + xref = xrefA; + + buf = NULL; + bufCurPos = NULL; + bufSize = 0; + bufLeft = 0; + curStr.initStream(str); + streams = new Array(xref); + streams->add(curStr.copy(&obj)); + strPtr = 0; + freeArray = gTrue; + curStr.streamReset(); + curStrHasGetBuf = curStr.getStream()->hasGetBuf(); +} + +Lexer::Lexer(XRef *xrefA, Object *obj) { + Object obj2; + + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + xref = xrefA; + curStrHasGetBuf = gFalse; + buf = NULL; + bufCurPos = NULL; + bufSize = 0; + bufLeft = 0; + + if (obj->isStream()) { + streams = new Array(xref); + freeArray = gTrue; + streams->add(obj->copy(&obj2)); + } else { + assert(obj->isArray()); + streams = obj->getArray(); + freeArray = gFalse; + } + strPtr = 0; + if (streams->getLength() > 0) { + streams->get(strPtr, &curStr); + curStr.streamReset(); + curStrHasGetBuf = curStr.getStream()->hasGetBuf(); + } +} + +Lexer::~Lexer() { + if (!curStr.isNone()) { + curStr.streamClose(); + curStr.free(); + } + if (freeArray) { + delete streams; + } +} + +GBool Lexer::fillBuf() { + assert(curStrHasGetBuf); + assert(curStr.getStream()->hasGetBuf()); + assert(0 == bufLeft); + + GBool hasData = curStr.getStream()->getBuf(&buf, &bufSize); + if (!hasData) + return gFalse; + assert(bufSize > 0); + bufCurPos = buf; + bufLeft = bufSize; + return gTrue; +} + +void Lexer::nextStream() { + curStr.streamClose(); + curStr.free(); + ++strPtr; + curStrHasGetBuf = gFalse; // important for getChar() while (curStrHasGetBuf) correct break + if (strPtr < streams->getLength()) { + streams->get(strPtr, &curStr); + curStr.streamReset(); + curStrHasGetBuf = curStr.getStream()->hasGetBuf(); + } +} + +Stream *Lexer::getStream() +{ + if (curStr.isNone()) + return (Stream *)NULL; + + if (bufLeft > 0) { + // the caller expects sequential stream but we have buffered some stuff + // so we have to give it back to the stream + assert(curStrHasGetBuf); + curStr.getStream()->ungetBuf(bufLeft); + bufLeft = 0; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + } + return curStr.getStream(); +} + +int Lexer::getPos() { + Guint pos; + + if (curStr.isNone()) + return -1; + + pos = curStr.streamGetPos(); + if (curStrHasGetBuf) { + pos -= bufLeft; + return (int)pos; + } + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) + --pos; + return (int)pos; +} + +void Lexer::setPos(Guint pos, int dir) { + if (curStr.isNone()) + return; + curStr.streamSetPos(pos, dir); + lookCharLastValueCached = Lexer::LOOK_VALUE_NOT_CACHED; + bufLeft = 0; +} + +void Lexer::skipChar() { + getChar(); +} + +Object *Lexer::getObj(Object *obj, int objNum) { + char *p; + int c, c2; + GBool comment, neg, done; + int numParen; + int xi; + double xf, scale; + GooString *s; + int n, m; + + // skip whitespace and comments + comment = gFalse; + while (1) { + if ((c = getChar()) == EOF) { + return obj->initEOF(); + } + if (comment) { + if (c == '\r' || c == '\n') + comment = gFalse; + } else if (c == '%') { + comment = gTrue; + } else if (specialChars[c] != 1) { + break; + } + } + + // start reading token + switch (c) { + + // number + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '-': case '.': + neg = gFalse; + xi = 0; + if (c == '-') { + neg = gTrue; + } else if (c == '.') { + goto doReal; + } else { + xi = c - '0'; + } + while (1) { + c = lookChar(); + if (IS_DIGIT(c)) { + skipChar(); + xi = xi * 10 + (c - '0'); + } else if (c == '.') { + skipChar(); + goto doReal; + } else { + break; + } + } + if (neg) + xi = -xi; + obj->initInt(xi); + break; + doReal: + xf = xi; + scale = 0.1; + while (1) { + c = lookChar(); + if (c == '-') { + // ignore minus signs in the middle of numbers to match + // Adobe's behavior + error(getPos(), "Badly formatted number"); + skipChar(); + continue; + } + if (!IS_DIGIT(c)) { + break; + } + skipChar(); + xf = xf + scale * (c - '0'); + scale *= 0.1; + } + if (neg) + xf = -xf; + obj->initReal(xf); + break; + + // string + case '(': + p = tokBuf; + n = 0; + numParen = 1; + done = gFalse; + s = NULL; + do { + c2 = EOF; + switch (c = getChar()) { + + case EOF: +#if 0 + // This breaks some PDF files, e.g., ones from Photoshop. + case '\r': + case '\n': +#endif + error(getPos(), "Unterminated string"); + done = gTrue; + break; + + case '(': + ++numParen; + c2 = c; + break; + + case ')': + if (--numParen == 0) { + done = gTrue; + } else { + c2 = c; + } + break; + + case '\\': + switch (c = getChar()) { + case 'n': + c2 = '\n'; + break; + case 'r': + c2 = '\r'; + break; + case 't': + c2 = '\t'; + break; + case 'b': + c2 = '\b'; + break; + case 'f': + c2 = '\f'; + break; + case '\\': + case '(': + case ')': + c2 = c; + break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c2 = c - '0'; + c = lookChar(); + if (c >= '0' && c <= '7') { + skipChar(); + c2 = (c2 << 3) + (c - '0'); + c = lookChar(); + if (c >= '0' && c <= '7') { + skipChar(); + c2 = (c2 << 3) + (c - '0'); + } + } + break; + case '\r': + c = lookChar(); + if (c == '\n') { + skipChar(); + } + break; + case '\n': + break; + case EOF: + error(getPos(), "Unterminated string"); + done = gTrue; + break; + default: + c2 = c; + break; + } + break; + + default: + c2 = c; + break; + } + + if (c2 != EOF) { + if (n == tokBufSize) { + if (!s) + s = new GooString(tokBuf, tokBufSize); + else + s->append(tokBuf, tokBufSize); + p = tokBuf; + n = 0; + + // we are growing see if the document is not malformed and we are growing too much + if (objNum != -1) + { + int newObjNum = xref->getNumEntry(getPos()); + if (newObjNum != objNum) + { + error(getPos(), "Unterminated string"); + done = gTrue; + } + } + } + *p++ = (char)c2; + ++n; + } + } while (!done); + if (!s) + s = new GooString(tokBuf, n); + else + s->append(tokBuf, n); + obj->initString(s); + break; + + // name + case '/': + p = tokBuf; + n = 0; + while ((c = lookChar()) != EOF && !specialChars[c]) { + skipChar(); + if (c == '#') { + c2 = lookChar(); + if (c2 >= '0' && c2 <= '9') { + c = c2 - '0'; + } else if (c2 >= 'A' && c2 <= 'F') { + c = c2 - 'A' + 10; + } else if (c2 >= 'a' && c2 <= 'f') { + c = c2 - 'a' + 10; + } else { + goto notEscChar; + } + skipChar(); + c <<= 4; + c2 = getChar(); + if (c2 >= '0' && c2 <= '9') { + c += c2 - '0'; + } else if (c2 >= 'A' && c2 <= 'F') { + c += c2 - 'A' + 10; + } else if (c2 >= 'a' && c2 <= 'f') { + c += c2 - 'a' + 10; + } else { + error(getPos(), "Illegal digit in hex char in name"); + } + } + notEscChar: + if (++n == tokBufSize) { + error(getPos(), "Name token too long"); + break; + } + *p++ = c; + } + *p = '\0'; + obj->initName(tokBuf); + break; + + // array punctuation + case '[': + case ']': + tokBuf[0] = c; + tokBuf[1] = '\0'; + obj->initCmd(tokBuf); + break; + + // hex string or dict punctuation + case '<': + c = lookChar(); + + // dict punctuation + if (c == '<') { + skipChar(); + tokBuf[0] = tokBuf[1] = '<'; + tokBuf[2] = '\0'; + obj->initCmd(tokBuf); + + // hex string + } else { + p = tokBuf; + m = n = 0; + c2 = 0; + s = NULL; + while (1) { + c = getChar(); + if (c == '>') { + break; + } else if (c == EOF) { + error(getPos(), "Unterminated hex string"); + break; + } else if (specialChars[c] != 1) { + c2 = c2 << 4; + if (c >= '0' && c <= '9') + c2 += c - '0'; + else if (c >= 'A' && c <= 'F') + c2 += c - 'A' + 10; + else if (c >= 'a' && c <= 'f') + c2 += c - 'a' + 10; + else + error(getPos(), "Illegal character <%02x> in hex string", c); + if (++m == 2) { + if (n == tokBufSize) { + if (!s) + s = new GooString(tokBuf, tokBufSize); + else + s->append(tokBuf, tokBufSize); + p = tokBuf; + n = 0; + } + *p++ = (char)c2; + ++n; + c2 = 0; + m = 0; + } + } + } + if (!s) + s = new GooString(tokBuf, n); + else + s->append(tokBuf, n); + if (m == 1) + s->append((char)(c2 << 4)); + obj->initString(s); + } + break; + + // dict punctuation + case '>': + c = lookChar(); + if (c == '>') { + skipChar(); + tokBuf[0] = tokBuf[1] = '>'; + tokBuf[2] = '\0'; + obj->initCmd(tokBuf); + } else { + error(getPos(), "Illegal character '>'"); + obj->initError(); + } + break; + + // error + case ')': + case '{': + case '}': + error(getPos(), "Illegal character '%c'", c); + obj->initError(); + break; + + // command + default: + p = tokBuf; + *p++ = c; + n = 1; + while ((c = lookChar()) != EOF && !specialChars[c]) { + skipChar(); + if (++n == tokBufSize) { + error(getPos(), "Command token too long"); + break; + } + *p++ = c; + } + *p = '\0'; + if (tokBuf[0] == 't' && !strcmp(tokBuf, "true")) { + obj->initBool(gTrue); + } else if (tokBuf[0] == 'f' && !strcmp(tokBuf, "false")) { + obj->initBool(gFalse); + } else if (tokBuf[0] == 'n' && !strcmp(tokBuf, "null")) { + obj->initNull(); + } else { + obj->initCmd(tokBuf); + } + break; + } + + return obj; +} + +void Lexer::skipToNextLine() { + int c; + + while (1) { + c = getChar(); + if (c == EOF || c == '\n') { + return; + } + if (c == '\r') { + if ((c = lookChar()) == '\n') { + skipChar(); + } + return; + } + } +} + +GBool Lexer::isSpace(int c) { + return c >= 0 && c <= 0xff && specialChars[c] == 1; +} Index: poppler/Lexer.h =================================================================== --- poppler/Lexer.h (revision 57) +++ poppler/Lexer.h (working copy) @@ -3,6 +3,7 @@ // Lexer.h // // Copyright 1996-2003 Glyph & Cog, LLC +// Copyright 2006 Krzysztof Kowalczyk (http://blog.kowalczyk.info) // //======================================================================== @@ -13,6 +14,7 @@ #pragma interface #endif +#include #include "Object.h" #include "Stream.h" @@ -45,25 +47,70 @@ void skipToNextLine(); // Skip over one character. - void skipChar() { getChar(); } + void skipChar(); - // Get stream. - Stream *getStream() - { return curStr.isNone() ? (Stream *)NULL : curStr.getStream(); } + Stream *getStream(); // Get current position in file. This is only used for error // messages, so it returns an int instead of a Guint. - int getPos() - { return curStr.isNone() ? -1 : (int)curStr.streamGetPos(); } + int getPos(); // Set position in file. - void setPos(Guint pos, int dir = 0) - { if (!curStr.isNone()) curStr.streamSetPos(pos, dir); } + void setPos(Guint pos, int dir = 0); // Returns true if is a whitespace character. static GBool isSpace(int c); +private: + // we really want lookChar() and getChar() to be inlined + int lookChar() { + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + return lookCharLastValueCached; + } + lookCharLastValueCached = getChar(); + return lookCharLastValueCached; + } + + int getChar() { + int c; + GBool hasMoreData; + + if (LOOK_VALUE_NOT_CACHED != lookCharLastValueCached) { + c = lookCharLastValueCached; + lookCharLastValueCached = LOOK_VALUE_NOT_CACHED; + assert( (c >= LOOK_VALUE_NOT_CACHED) && (c < 256)); + return c; + } + + while (curStrHasGetBuf) { + if (bufLeft > 0) { + c = *bufCurPos++ & 0xff; + bufLeft--; + return c; + } + hasMoreData = fillBuf(); + if (!hasMoreData) + nextStream(); + } + + c = EOF; + while (!curStr.isNone() && (c = curStr.streamGetChar()) == EOF) { + nextStream(); + } + return c; + } + + void nextStream(); + GBool fillBuf(); + + Array * streams; // array of input streams + int strPtr; // index of current stream + Object curStr; // current stream + GBool freeArray; // should lexer free the streams array? + char tokBuf[tokBufSize]; // temporary token buffer + XRef * xref; + // often (e.g. ~30% on PDF Refernce 1.6 pdf file from Adobe site) getChar // is called right after lookChar. In order to avoid expensive re-doing // getChar() of underlying stream, we cache the last value found by @@ -71,20 +118,13 @@ // LOOK_VALUE_NOT_CACHED that should never be part of stream indicates // that no value was cached static const int LOOK_VALUE_NOT_CACHED = -3; - int lookCharLastValueCached; + int lookCharLastValueCached; -private: - - int getChar(); - int lookChar(); - - Array *streams; // array of input streams - int strPtr; // index of current stream - Object curStr; // current stream - GBool freeArray; // should lexer free the streams array? - char tokBuf[tokBufSize]; // temporary token buffer - - XRef *xref; + GBool curStrHasGetBuf; // does current stream support GetBuf() ? + char * buf; + char * bufCurPos; + int bufSize; + int bufLeft; }; #endif Index: goo/FastAlloc.cc =================================================================== --- goo/FastAlloc.cc (revision 57) +++ goo/FastAlloc.cc (working copy) @@ -152,6 +152,7 @@ #define RecordRealloc(oldP, newP, newSize) #endif +#ifdef USE_FAST_ALLOC void malloc_hook(void *p, size_t size) { RecordMalloc(p, size); @@ -297,4 +298,4 @@ realloc_hook(oldP, newP, size); return newP; } - +#endif Index: goo/FastAlloc.h =================================================================== --- goo/FastAlloc.h (revision 57) +++ goo/FastAlloc.h (working copy) @@ -12,7 +12,9 @@ #include // size_t #endif +#ifndef DEBUG #define USE_FAST_ALLOC 1 +#endif #ifdef __cplusplus extern "C" Index: goo/gmem.c =================================================================== --- goo/gmem.c (revision 57) +++ goo/gmem.c (working copy) @@ -236,7 +236,8 @@ char *copyString(char *s) { char *s1; - s1 = (char *)gmalloc(strlen(s) + 1); + int len = strlen(s) + 1; + s1 = (char *)gmalloc(len); strcpy(s1, s); return s1; }