commit b0f9f6642b93a4034bd697a026d109ba193ca54e Author: William Bader Date: Sun Feb 21 03:30:08 2016 +0100 Merge xpdf-3.04 support for LZW encoding in PSOutputDev and Stream. Level 2 and Level 3 PostScript now use LZW encoding instead of Run Length encoding, which can make some images one tenth the size. PSOutputDev provides setEnableLZW() and getEnableLZW() to control support for LZW encoding. --- poppler/PSOutputDev.cc | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------- poppler/PSOutputDev.h | 3 +++ poppler/Stream.cc | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ poppler/Stream.h | 38 ++++++++++++++++++++++++++++++++++ 4 files changed, 301 insertions(+), 49 deletions(-) diff --git a/poppler/PSOutputDev.cc b/poppler/PSOutputDev.cc index 2d01edb..128233f 100644 --- a/poppler/PSOutputDev.cc +++ b/poppler/PSOutputDev.cc @@ -1243,6 +1243,7 @@ void PSOutputDev::init(PSOutputFunc outputFuncA, void *outputStreamA, generateOPI = gFalse; useASCIIHex = gFalse; useBinary = gFalse; + enableLZW = gTrue; rasterMono = gFalse; rasterResolution = 300; uncompressPreloadedImages = gFalse; @@ -2953,7 +2954,7 @@ void PSOutputDev::setupImages(Dict *resDict) { } void PSOutputDev::setupImage(Ref id, Stream *str, GBool mask) { - GBool useRLE, useCompressed, doUseASCIIHex; + GBool useLZW, useRLE, useCompressed, doUseASCIIHex; GooString *s; int c; int size, line, col, i; @@ -2963,21 +2964,27 @@ void PSOutputDev::setupImage(Ref id, Stream *str, GBool mask) { //~ this does not correctly handle the DeviceN color space //~ -- need to use DeviceNRecoder if (level < psLevel2) { - useRLE = gFalse; + useLZW = useRLE = gFalse; useCompressed = gFalse; doUseASCIIHex = gTrue; } else { if (uncompressPreloadedImages) { - useRLE = gFalse; + useLZW = useRLE = gFalse; useCompressed = gFalse; } else { s = str->getPSFilter(level < psLevel3 ? 2 : 3, ""); if (s) { - useRLE = gFalse; + useLZW = useRLE = gFalse; useCompressed = gTrue; delete s; } else { - useRLE = gTrue; + if (getEnableLZW()) { + useLZW = gTrue; + useRLE = gFalse; + } else { + useRLE = gTrue; + useLZW = gFalse; + } useCompressed = gFalse; } } @@ -2986,7 +2993,9 @@ void PSOutputDev::setupImage(Ref id, Stream *str, GBool mask) { if (useCompressed) { str = str->getUndecodedStream(); } - if (useRLE) { + if (useLZW) { + str = new LZWEncoder(str); + } else if (useRLE) { str = new RunLengthEncoder(str); } if (doUseASCIIHex) { @@ -3028,9 +3037,9 @@ void PSOutputDev::setupImage(Ref id, Stream *str, GBool mask) { } } while (c != (doUseASCIIHex ? '>' : '~') && c != EOF); // add one entry for the final line of data; add another entry - // because the RunLengthDecode filter may read past the end + // because the LZWDecode/RunLengthDecode filter may read past the end ++size; - if (useRLE) { + if (useLZW || useRLE) { ++size; } outerSize = size/65535 + 1; @@ -3090,7 +3099,7 @@ void PSOutputDev::setupImage(Ref id, Stream *str, GBool mask) { } if (c == (doUseASCIIHex ? '>' : '~') || c == EOF) { writePS((char *)(doUseASCIIHex ? "> put\n" : "~> put\n")); - if (useRLE) { + if (useLZW || useRLE) { ++line; writePSFmt("{0:d} <> put\n", line); } else { @@ -3231,6 +3240,7 @@ GBool PSOutputDev::checkPageSlice(Page *page, double /*hDPI*/, double /*vDPI*/, PreScanOutputDev *scan; GBool rasterize; #if HAVE_SPLASH + GBool useLZW; SplashOutputDev *splashOut; SplashColor paperColor; PDFRectangle box; @@ -3269,6 +3279,8 @@ GBool PSOutputDev::checkPageSlice(Page *page, double /*hDPI*/, double /*vDPI*/, } #if HAVE_SPLASH + // get the rasterization parameters + useLZW = getEnableLZW(); // start the PS page page->makeBox(rasterResolution, rasterResolution, rotateA, useMediaBox, gFalse, sliceX, sliceY, sliceW, sliceH, &box, &crop); @@ -3579,14 +3591,26 @@ GBool PSOutputDev::checkPageSlice(Page *page, double /*hDPI*/, double /*vDPI*/, isGray = gFalse; } str0->reset(); - if (isGray && numComps == 4) { - str = new RunLengthEncoder(new CMYKGrayEncoder(str0)); - numComps = 1; - } else if (isGray && numComps == 3) { - str = new RunLengthEncoder(new RGBGrayEncoder(str0)); - numComps = 1; + if (useLZW) { + if (isGray && numComps == 4) { + str = new LZWEncoder(new CMYKGrayEncoder(str0)); + numComps = 1; + } else if (isGray && numComps == 3) { + str = new LZWEncoder(new RGBGrayEncoder(str0)); + numComps = 1; + } else { + str = new LZWEncoder(str0); + } } else { - str = new RunLengthEncoder(str0); + if (isGray && numComps == 4) { + str = new RunLengthEncoder(new CMYKGrayEncoder(str0)); + numComps = 1; + } else if (isGray && numComps == 3) { + str = new RunLengthEncoder(new RGBGrayEncoder(str0)); + numComps = 1; + } else { + str = new RunLengthEncoder(str0); + } } if (numComps == 1) { writePS("/DeviceGray setcolorspace\n"); @@ -3615,7 +3639,11 @@ GBool PSOutputDev::checkPageSlice(Page *page, double /*hDPI*/, double /*vDPI*/, } else { writePS(" /ASCII85Decode filter\n"); } - writePS(" /RunLengthDecode filter\n"); + if (useLZW) { + writePS(" /LZWDecode filter\n"); + } else { + writePS(" /RunLengthDecode filter\n"); + } writePS(">>\n"); if (useBinary) { /* nothing to do */; @@ -4349,7 +4377,7 @@ void PSOutputDev::restoreTextPos(GfxState *state) { void PSOutputDev::stroke(GfxState *state) { doPath(state->getPath()); if (inType3Char && t3FillColorOnly) { - // if we're construct a cacheable Type 3 glyph, we need to do + // if we're constructing a cacheable Type 3 glyph, we need to do // everything in the fill color writePS("Sf\n"); } else { @@ -5651,7 +5679,7 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, GBool emitRect, addRect, extendRect; GooString *s; int n, numComps; - GBool useRLE, useASCII, useCompressed; + GBool useLZW, useRLE, useASCII, useCompressed; GfxSeparationColorSpace *sepCS; GfxColor color; GfxCMYK cmyk; @@ -5835,7 +5863,11 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, if (inlineImg) { // create an array str2 = new FixedLengthEncoder(str, len); - str2 = new RunLengthEncoder(str2); + if (getEnableLZW()) { + str2 = new LZWEncoder(str2); + } else { + str2 = new RunLengthEncoder(str2); + } if (useASCIIHex) { str2 = new ASCIIHexEncoder(str2); } else { @@ -5878,7 +5910,7 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, } } while (c != (useASCIIHex ? '>' : '~') && c != EOF); writePS((char *)(useASCIIHex ? ">\n" : "~>\n")); - // add an extra entry because the RunLengthDecode filter may + // add an extra entry because the LZWDecode/RunLengthDecode filter may // read past the end writePS("<>]\n"); writePS("0\n"); @@ -5956,7 +5988,7 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, if ((mode == psModeForm || inType3Char || preloadImagesForms) && uncompressPreloadedImages) { s = NULL; - useRLE = gFalse; + useLZW = useRLE = gFalse; useCompressed = gFalse; useASCII = gFalse; } else { @@ -5964,11 +5996,17 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, " "); if ((colorMap && colorMap->getColorSpace()->getMode() == csDeviceN) || inlineImg || !s) { - useRLE = gTrue; + if (getEnableLZW()) { + useLZW = gTrue; + useRLE = gFalse; + } else { + useRLE = gTrue; + useLZW = gFalse; + } useASCII = !(mode == psModeForm || inType3Char || preloadImagesForms); useCompressed = gFalse; } else { - useRLE = gFalse; + useLZW = useRLE = gFalse; useASCII = str->isBinary() && !(mode == psModeForm || inType3Char || preloadImagesForms); useCompressed = gTrue; @@ -5978,7 +6016,9 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, writePSFmt(" /ASCII{0:s}Decode filter\n", useASCIIHex ? "Hex" : "85"); } - if (useRLE) { + if (useLZW) { + writePS(" /LZWDecode filter\n"); + } else if (useRLE) { writePS(" /RunLengthDecode filter\n"); } if (useCompressed) { @@ -6011,8 +6051,10 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, str = new DeviceNRecoder(str, width, height, colorMap); } - // add RunLengthEncode and ASCIIHex/85 encode filters - if (useRLE) { + // add LZWEncode/RunLengthEncode and ASCIIHex/85 encode filters + if (useLZW) { + str = new LZWEncoder(str); + } else if (useRLE) { str = new RunLengthEncoder(str); } if (useASCII) { @@ -6033,7 +6075,7 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, n = 0; } else { // need to read the stream to count characters -- the length - // is data-dependent (because of ASCII and RLE filters) + // is data-dependent (because of ASCII and LZW/RLE filters) str->reset(); n = 0; while ((c = str->getChar()) != EOF) { @@ -6085,7 +6127,7 @@ void PSOutputDev::doImageL2(Object *ref, GfxImageColorMap *colorMap, #endif // delete encoders - if (useRLE || useASCII || inlineImg) { + if (useLZW || useRLE || useASCII || inlineImg) { delete str; } } @@ -6108,8 +6150,8 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, Stream *str2; GooString *s; int n, numComps; - GBool useRLE, useASCII, useCompressed; - GBool maskUseRLE, maskUseASCII, maskUseCompressed; + GBool useLZW, useRLE, useASCII, useCompressed; + GBool maskUseLZW, maskUseRLE, maskUseASCII, maskUseCompressed; GooString *maskFilters; GfxSeparationColorSpace *sepCS; GfxColor color; @@ -6117,8 +6159,8 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, int c; int col, i; - useRLE = useASCII = useCompressed = gFalse; // make gcc happy - maskUseRLE = maskUseASCII = maskUseCompressed = gFalse; // make gcc happy + useLZW = useRLE = useASCII = useCompressed = gFalse; // make gcc happy + maskUseLZW = maskUseRLE = maskUseASCII = maskUseCompressed = gFalse; // make gcc happy maskFilters = NULL; // make gcc happy // explicit masking @@ -6128,17 +6170,23 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, if ((mode == psModeForm || inType3Char || preloadImagesForms) && uncompressPreloadedImages) { s = NULL; - maskUseRLE = gFalse; + maskUseLZW = maskUseRLE = gFalse; maskUseCompressed = gFalse; maskUseASCII = gFalse; } else { s = maskStr->getPSFilter(3, " "); if (!s) { - maskUseRLE = gTrue; + if (getEnableLZW()) { + maskUseLZW = gTrue; + maskUseRLE = gFalse; + } else { + maskUseRLE = gTrue; + maskUseLZW = gFalse; + } maskUseASCII = !(mode == psModeForm || inType3Char || preloadImagesForms); maskUseCompressed = gFalse; } else { - maskUseRLE = gFalse; + maskUseLZW = maskUseRLE = gFalse; maskUseASCII = maskStr->isBinary() && !(mode == psModeForm || inType3Char || preloadImagesForms); maskUseCompressed = gTrue; @@ -6149,7 +6197,9 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, maskFilters->appendf(" /ASCII{0:s}Decode filter\n", useASCIIHex ? "Hex" : "85"); } - if (maskUseRLE) { + if (maskUseLZW) { + maskFilters->append(" /LZWDecode filter\n"); + } else if (maskUseRLE) { maskFilters->append(" /RunLengthDecode filter\n"); } if (maskUseCompressed) { @@ -6166,11 +6216,13 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, writePS(maskFilters->getCString()); writePS("pdfMask\n"); - // add RunLengthEncode and ASCIIHex/85 encode filters + // add LZWEncode/RunLengthEncode and ASCIIHex/85 encode filters if (maskUseCompressed) { maskStr = maskStr->getUndecodedStream(); } - if (maskUseRLE) { + if (maskUseLZW) { + maskStr = new LZWEncoder(maskStr); + } else if (maskUseRLE) { maskStr = new RunLengthEncoder(maskStr); } if (maskUseASCII) { @@ -6191,7 +6243,7 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, writePS("%-EOD-\n"); // delete encoders - if (maskUseRLE || maskUseASCII) { + if (maskUseLZW || maskUseRLE || maskUseASCII) { delete maskStr; } } @@ -6208,7 +6260,11 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, if (inlineImg) { // create an array str2 = new FixedLengthEncoder(str, len); - str2 = new RunLengthEncoder(str2); + if (getEnableLZW()) { + str2 = new LZWEncoder(str2); + } else { + str2 = new RunLengthEncoder(str2); + } if (useASCIIHex) { str2 = new ASCIIHexEncoder(str2); } else { @@ -6251,7 +6307,7 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, } } while (c != (useASCIIHex ? '>' : '~') && c != EOF); writePS((char *)(useASCIIHex ? ">\n" : "~>\n")); - // add an extra entry because the RunLengthDecode filter may + // add an extra entry because the LZWDecode/RunLengthDecode filter may // read past the end writePS("<>]\n"); writePS("0\n"); @@ -6337,7 +6393,7 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, if ((mode == psModeForm || inType3Char || preloadImagesForms) && uncompressPreloadedImages) { s = NULL; - useRLE = gFalse; + useLZW = useRLE = gFalse; useCompressed = gFalse; useASCII = gFalse; } else { @@ -6345,11 +6401,17 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, " "); if ((colorMap && colorMap->getColorSpace()->getMode() == csDeviceN) || inlineImg || !s) { - useRLE = gTrue; + if (getEnableLZW()) { + useLZW = gTrue; + useRLE = gFalse; + } else { + useRLE = gTrue; + useLZW = gFalse; + } useASCII = !(mode == psModeForm || inType3Char || preloadImagesForms); useCompressed = gFalse; } else { - useRLE = gFalse; + useLZW = useRLE = gFalse; useASCII = str->isBinary() && !(mode == psModeForm || inType3Char || preloadImagesForms); useCompressed = gTrue; @@ -6359,7 +6421,9 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, writePSFmt(" /ASCII{0:s}Decode filter\n", useASCIIHex ? "Hex" : "85"); } - if (useRLE) { + if (useLZW) { + writePS(" /LZWDecode filter\n"); + } else if (useRLE) { writePS(" /RunLengthDecode filter\n"); } if (useCompressed) { @@ -6435,8 +6499,10 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, str = str->getUndecodedStream(); } - // add RunLengthEncode and ASCIIHex/85 encode filters - if (useRLE) { + // add LZWEncode/RunLengthEncode and ASCIIHex/85 encode filters + if (useLZW) { + str = new LZWEncoder(str); + } else if (useRLE) { str = new RunLengthEncoder(str); } if (useASCII) { @@ -6459,7 +6525,7 @@ void PSOutputDev::doImageL3(Object *ref, GfxImageColorMap *colorMap, writePS("%-EOD-\n"); // delete encoders - if (useRLE || useASCII || inlineImg) { + if (useLZW || useRLE || useASCII || inlineImg) { delete str; } } diff --git a/poppler/PSOutputDev.h b/poppler/PSOutputDev.h index 7318caf..bd5b075 100644 --- a/poppler/PSOutputDev.h +++ b/poppler/PSOutputDev.h @@ -320,6 +320,7 @@ public: GBool getEmbedCIDTrueType() const { return embedCIDTrueType; } GBool getFontPassthrough() const { return fontPassthrough; } GBool getOptimizeColorSpace() const { return optimizeColorSpace; } + GBool getEnableLZW() const { return enableLZW; }; void setEmbedType1(GBool b) { embedType1 = b; } void setEmbedTrueType(GBool b) { embedTrueType = b; } void setEmbedCIDPostScript(GBool b) { embedCIDPostScript = b; } @@ -330,6 +331,7 @@ public: void setGenerateOPI(GBool b) { generateOPI = b; } void setUseASCIIHex(GBool b) { useASCIIHex = b; } void setUseBinary(GBool b) { useBinary = b; } + void setEnableLZW(GBool b) { enableLZW = b; } private: @@ -532,6 +534,7 @@ private: GBool generateOPI; // generate PostScript OPI comments? GBool useASCIIHex; // use ASCIIHex instead of ASCII85? GBool useBinary; // use binary instead of hex + GBool enableLZW; // enable LZW compression #if OPI_SUPPORT int opi13Nest; // nesting level of OPI 1.3 objects diff --git a/poppler/Stream.cc b/poppler/Stream.cc index 9617678..f2ca60b 100644 --- a/poppler/Stream.cc +++ b/poppler/Stream.cc @@ -5302,6 +5302,151 @@ GBool RunLengthEncoder::fillBuf() { } //------------------------------------------------------------------------ +// LZWEncoder +//------------------------------------------------------------------------ + +LZWEncoder::LZWEncoder(Stream *strA): + FilterStream(strA) +{ + inBufLen = 0; + outBufLen = 0; +} + +LZWEncoder::~LZWEncoder() { + if (str->isEncoder()) { + delete str; + } +} + +void LZWEncoder::reset() { + int i; + + str->reset(); + + // initialize code table + for (i = 0; i < 256; ++i) { + table[i].byte = i; + table[i].next = NULL; + table[i].children = NULL; + } + nextSeq = 258; + codeLen = 9; + + // initialize input buffer + inBufLen = str->doGetChars(sizeof(inBuf), inBuf); + + // initialize output buffer with a clear-table code + outBuf = 256; + outBufLen = 9; + needEOD = gFalse; +} + +int LZWEncoder::getChar() { + int ret; + + if (inBufLen == 0 && !needEOD && outBufLen == 0) { + return EOF; + } + if (outBufLen < 8 && (inBufLen > 0 || needEOD)) { + fillBuf(); + } + if (outBufLen >= 8) { + ret = (outBuf >> (outBufLen - 8)) & 0xff; + outBufLen -= 8; + } else { + ret = (outBuf << (8 - outBufLen)) & 0xff; + outBufLen = 0; + } + return ret; +} + +int LZWEncoder::lookChar() { + if (inBufLen == 0 && !needEOD && outBufLen == 0) { + return EOF; + } + if (outBufLen < 8 && (inBufLen > 0 || needEOD)) { + fillBuf(); + } + if (outBufLen >= 8) { + return (outBuf >> (outBufLen - 8)) & 0xff; + } else { + return (outBuf << (8 - outBufLen)) & 0xff; + } +} + +// On input, outBufLen < 8. +// This function generates, at most, 2 12-bit codes +// --> outBufLen < 8 + 12 + 12 = 32 +void LZWEncoder::fillBuf() { + LZWEncoderNode *p0, *p1; + int seqLen, code, i; + + if (needEOD) { + outBuf = (outBuf << codeLen) | 257; + outBufLen += codeLen; + needEOD = gFalse; + return; + } + + // find longest matching sequence (if any) + p0 = table + inBuf[0]; + seqLen = 1; + while (inBufLen > seqLen) { + for (p1 = p0->children; p1; p1 = p1->next) { + if (p1->byte == inBuf[seqLen]) { + break; + } + } + if (!p1) { + break; + } + p0 = p1; + ++seqLen; + } + code = (int)(p0 - table); + + // generate an output code + outBuf = (outBuf << codeLen) | code; + outBufLen += codeLen; + + // update the table + table[nextSeq].byte = seqLen < inBufLen ? inBuf[seqLen] : 0; + table[nextSeq].children = NULL; + if (table[code].children) { + table[nextSeq].next = table[code].children; + } else { + table[nextSeq].next = NULL; + } + table[code].children = table + nextSeq; + ++nextSeq; + + // update the input buffer + memmove(inBuf, inBuf + seqLen, inBufLen - seqLen); + inBufLen -= seqLen; + inBufLen += str->doGetChars(sizeof(inBuf) - inBufLen, inBuf + inBufLen); + + // increment codeLen; generate clear-table code + if (nextSeq == (1 << codeLen)) { + ++codeLen; + if (codeLen == 13) { + outBuf = (outBuf << 12) | 256; + outBufLen += 12; + for (i = 0; i < 256; ++i) { + table[i].next = NULL; + table[i].children = NULL; + } + nextSeq = 258; + codeLen = 9; + } + } + + // generate EOD next time + if (inBufLen == 0) { + needEOD = gTrue; + } +} + +//------------------------------------------------------------------------ // CMYKGrayEncoder //------------------------------------------------------------------------ diff --git a/poppler/Stream.h b/poppler/Stream.h index 00b2925..84a8cf9 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -1198,6 +1198,44 @@ private: }; //------------------------------------------------------------------------ +// LZWEncoder +//------------------------------------------------------------------------ + +struct LZWEncoderNode { + int byte; + LZWEncoderNode *next; // next sibling + LZWEncoderNode *children; // first child +}; + +class LZWEncoder: public FilterStream { +public: + + LZWEncoder(Stream *strA); + virtual ~LZWEncoder(); + virtual StreamKind getKind() { return strWeird; } + virtual void reset(); + virtual int getChar(); + virtual int lookChar(); + virtual GooString *getPSFilter(int psLevel, const char *indent) + { return NULL; } + virtual GBool isBinary(GBool last = gTrue) { return gTrue; } + virtual GBool isEncoder() { return gTrue; } + +private: + + LZWEncoderNode table[4096]; + int nextSeq; + int codeLen; + Guchar inBuf[4096]; + int inBufLen; + int outBuf; + int outBufLen; + GBool needEOD; + + void fillBuf(); +}; + +//------------------------------------------------------------------------ // CMYKGrayEncoder //------------------------------------------------------------------------