From 821a4f4ba60357b303371adfd38ed26ce121075e Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 3 Nov 2013 20:01:13 +1030 Subject: [PATCH 03/11] Add DeflateStream for compressing XObject streams in PDFWriter To avoid using #ifdef ENABLE_ZLIB everywhere the filter is used, when zlib is disabled create the DeflateStream filter passes through data unaltered. A DeflateStream::enabled() function is provided to check if a /FlateDecode filter needs to be added to the output stream. --- CMakeLists.txt | 13 ++++-- config.h.cmake | 5 ++- configure.ac | 86 +++++++++++++++++++++--------------- poppler/DeflateStream.cc | 103 ++++++++++++++++++++++++++++++++++++++++++++ poppler/DeflateStream.h | 49 +++++++++++++++++++++ poppler/Makefile.am | 13 +++++- poppler/PDFWriter.cc | 17 +++----- poppler/PDFWriter.h | 1 - poppler/Stream.cc | 37 +++++++++++++++- poppler/Stream.h | 26 +++++++++-- poppler/poppler-config.h.in | 6 +-- 11 files changed, 295 insertions(+), 61 deletions(-) create mode 100644 poppler/DeflateStream.cc create mode 100644 poppler/DeflateStream.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 72fc806..cb0303c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,8 @@ option(ENABLE_CPP "Compile poppler cpp wrapper." ON) option(ENABLE_LIBOPENJPEG "Use libopenjpeg for JPX streams." ON) set(ENABLE_CMS "auto" CACHE STRING "Use color management system. Possible values: auto, lcms1, lcms2. 'auto' prefers lcms2 over lcms1 if both are available. Unset to disable color management system.") option(ENABLE_LIBCURL "Build libcurl based HTTP support." OFF) -option(ENABLE_ZLIB "Build with zlib (not totally safe)." OFF) +option(ENABLE_ZLIB "Build with zlib." ON) +option(ENABLE_ZLIB_UNCOMPRESS "Use zlib to uncompress flate streams (not totally safe)." OFF) option(USE_FIXEDPOINT "Use fixed point arithmetic in the Splash backend" OFF) option(USE_FLOAT "Use single precision arithmetic in the Splash backend" OFF) if(WIN32) @@ -377,9 +378,14 @@ if(JPEG_FOUND) endif(JPEG_FOUND) if(ENABLE_ZLIB) set(poppler_SRCS ${poppler_SRCS} - poppler/FlateStream.cc + poppler/DeflateStream.cc ) set(poppler_LIBS ${poppler_LIBS} ${ZLIB_LIBRARIES}) +if(ENABLE_ZLIB_UNCOMPRESS) + set(poppler_SRCS ${poppler_SRCS} + poppler/FlateStream.cc + ) +endif(ENABLE_ZLIB_UNCOMPRESS) endif(ENABLE_ZLIB) if(ENABLE_LIBCURL) set(poppler_SRCS ${poppler_SRCS} @@ -658,7 +664,8 @@ show_end_message("use gtk-doc" "not supported with this CMake build system") show_end_message_yesno("use libjpeg" ENABLE_LIBJPEG) show_end_message_yesno("use libpng" ENABLE_LIBPNG) show_end_message_yesno("use libtiff" ENABLE_LIBTIFF) -show_end_message_yesno("use zlib" ENABLE_ZLIB) +show_end_message_yesno("use zlib compress" ENABLE_ZLIB) +show_end_message_yesno("use zlib uncompress" ENABLE_ZLIB_UNCOMPRESS) show_end_message_yesno("use curl" ENABLE_LIBCURL) show_end_message_yesno("use libopenjpeg" LIBOPENJPEG_FOUND) show_end_message_yesno("use cms" USE_CMS) diff --git a/config.h.cmake b/config.h.cmake index b707ba7..07248f6 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -18,9 +18,12 @@ /* Do not hardcode the library location */ #cmakedefine ENABLE_RELOCATABLE 1 -/* Use zlib instead of builtin zlib decoder. */ +/* Build against zlib. */ #cmakedefine ENABLE_ZLIB 1 +/* Use zlib instead of builtin zlib decoder to uncompress flate streams. */ +#cmakedefine ENABLE_ZLIB_UNCOMPRESS 1 + /* Use cairo for rendering. */ #cmakedefine HAVE_CAIRO 1 diff --git a/configure.ac b/configure.ac index 88112cb..7b3db22 100644 --- a/configure.ac +++ b/configure.ac @@ -271,31 +271,46 @@ fi AC_CHECK_FUNCS(pread64 lseek64) dnl Test for zlib -AC_ARG_ENABLE([zlib], - [AS_HELP_STRING([--enable-zlib],[Build with zlib])], - [],[enable_zlib="no"]) +AC_ARG_ENABLE(zlib, + AC_HELP_STRING([--disable-zlib], + [Don't build against zlib.]), + enable_zlib=$enableval, + enable_zlib="try") + +AC_ARG_ENABLE([zlib_uncompress], + AS_HELP_STRING([--enable-zlib-uncompress], + [Use zlib to uncompress flate streams (not totally safe)]), + enable_zlib_uncompress=$enableval, + enable_zlib_uncompress="no") + if test x$enable_zlib = xyes; then - AC_CHECK_LIB([z], [inflate],, - AC_MSG_ERROR("*** zlib library not found ***")) - AC_CHECK_HEADERS([zlib.h],, - AC_MSG_ERROR("*** zlib headers not found ***")) + AC_CHECK_LIB([z], [inflate],, + AC_MSG_ERROR("*** zlib library not found ***")) + AC_CHECK_HEADERS([zlib.h],, + AC_MSG_ERROR("*** zlib headers not found ***")) elif test x$enable_zlib = xtry; then - AC_CHECK_LIB([z], [inflate], - [enable_zlib="yes"], - [enable_zlib="no"]) - AC_CHECK_HEADERS([zlib.h],, - [enable_zlib="no"]) + AC_CHECK_LIB([z], [inflate], + [enable_zlib="yes"], + [enable_zlib="no"]) + AC_CHECK_HEADERS([zlib.h],, + [enable_zlib="no"]) fi if test x$enable_zlib = xyes; then - ZLIB_LIBS="-lz" - AC_SUBST(ZLIB_LIBS) - AC_DEFINE(ENABLE_ZLIB) + ZLIB_LIBS="-lz" + AC_SUBST(ZLIB_LIBS) + AC_DEFINE(ENABLE_ZLIB, 1, [Build against zlib.]) + + if test x$enable_zlib_uncompress = xyes; then + AC_DEFINE(ENABLE_ZLIB_UNCOMPRESS, 1, + [Use zlib instead of builtin zlib decoder to uncompress flate streams.]) + fi +else + enable_zlib_uncompress="no" fi AM_CONDITIONAL(BUILD_ZLIB, test x$enable_zlib = xyes) -AH_TEMPLATE([ENABLE_ZLIB], - [Use zlib instead of builtin zlib decoder.]) +AM_CONDITIONAL(BUILD_ZLIB_UNCOMPRESS, test x$enable_zlib_uncompress = xyes) dnl Test for libcurl AC_ARG_ENABLE(libcurl, @@ -885,22 +900,23 @@ poppler-cpp-uninstalled.pc]) echo "" echo "Building poppler with support for:" -echo " font configuration: $with_font_configuration" -echo " splash output: $enable_splash_output" -echo " cairo output: $use_cairo" -echo " qt4 wrapper: $enable_poppler_qt4" -echo " qt5 wrapper: $enable_poppler_qt5" -echo " glib wrapper: $use_glib" -echo " introspection: $found_introspection" -echo " cpp wrapper: $enable_poppler_cpp" -echo " use gtk-doc: $enable_gtk_doc" -echo " use libjpeg: $enable_libjpeg" -echo " use libpng: $enable_libpng" -echo " use libtiff: $enable_libtiff" -echo " use zlib: $enable_zlib" -echo " use libcurl: $enable_libcurl" -echo " use libopenjpeg: $enable_libopenjpeg" -echo " use cms: $enable_cms" +echo " font configuration: $with_font_configuration" +echo " splash output: $enable_splash_output" +echo " cairo output: $use_cairo" +echo " qt4 wrapper: $enable_poppler_qt4" +echo " qt5 wrapper: $enable_poppler_qt5" +echo " glib wrapper: $use_glib" +echo " introspection: $found_introspection" +echo " cpp wrapper: $enable_poppler_cpp" +echo " use gtk-doc: $enable_gtk_doc" +echo " use libjpeg: $enable_libjpeg" +echo " use libpng: $enable_libpng" +echo " use libtiff: $enable_libtiff" +echo " use zlib compress: $enable_zlib" +echo " use zlib uncompress: $enable_zlib_uncompress" +echo " use libcurl: $enable_libcurl" +echo " use libopenjpeg: $enable_libopenjpeg" +echo " use cms: $enable_cms" if test x$enable_cms = xyes;then if test x$lcms1 = xyes;then echo " with lcms1" @@ -924,8 +940,8 @@ if test x$enable_libjpeg != xyes; then echo " Warning: Using libjpeg is recommended" fi -if test x$enable_zlib != xno; then - echo " Warning: Using zlib is not totally safe" +if test x$enable_zlib_uncompress != xno; then + echo " Warning: Using zlib for decompression is not totally safe" fi if test x$enable_libopenjpeg != xyes; then diff --git a/poppler/DeflateStream.cc b/poppler/DeflateStream.cc new file mode 100644 index 0000000..a713311 --- /dev/null +++ b/poppler/DeflateStream.cc @@ -0,0 +1,103 @@ +//======================================================================== +// +// DeflateStream.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include +#include "DeflateStream.h" + +extern "C" { +#include +} + +#define BUFFER_SIZE 16384 + +struct DeflateStreamPrivate { + OutStream *outputStr; + GBool open; + unsigned char input_buf[BUFFER_SIZE]; + unsigned char output_buf[BUFFER_SIZE]; + z_stream zlib_stream; +}; + +DeflateStream::DeflateStream(OutStream *strA) +{ + priv = new DeflateStreamPrivate; + priv->outputStr = strA; + priv->zlib_stream.zalloc = Z_NULL; + priv->zlib_stream.zfree = Z_NULL; + priv->zlib_stream.opaque = Z_NULL; + deflateInit (&priv->zlib_stream, Z_DEFAULT_COMPRESSION); + priv->zlib_stream.next_in = priv->input_buf; + priv->zlib_stream.avail_in = 0; + priv->zlib_stream.next_out = priv->output_buf; + priv->zlib_stream.avail_out = BUFFER_SIZE; + priv->open = gTrue; +} + +DeflateStream::~DeflateStream() +{ + if (priv->open) + close(); + delete priv; +} + +void DeflateStream::close() +{ + compressBuffer(gTrue); + deflateEnd (&priv->zlib_stream); + priv->open = gFalse; +} + +Goffset DeflateStream::getPos() +{ + return priv->outputStr->getPos(); +} + +void DeflateStream::compressBuffer(GBool flush) +{ + int ret; + GBool finished; + + do { + ret = deflate (&priv->zlib_stream, flush ? Z_FINISH : Z_NO_FLUSH); + if (flush || priv->zlib_stream.avail_out == 0) { + priv->outputStr->write(priv->output_buf, BUFFER_SIZE - priv->zlib_stream.avail_out); + priv->zlib_stream.next_out = priv->output_buf; + priv->zlib_stream.avail_out = BUFFER_SIZE; + } + finished = gTrue; + if (priv->zlib_stream.avail_in != 0) + finished = gFalse; + if (flush && ret != Z_STREAM_END) + finished = gFalse; + } while (!finished); + priv->zlib_stream.next_in = priv->input_buf; +} + +void DeflateStream::write(const Guchar *data, long length) +{ + unsigned int count; + const unsigned char *p = data; + + while (length) { + count = length; + if (count > BUFFER_SIZE - priv->zlib_stream.avail_in) + count = BUFFER_SIZE - priv->zlib_stream.avail_in; + memcpy (priv->input_buf + priv->zlib_stream.avail_in, p, count); + p += count; + priv->zlib_stream.avail_in += count; + length -= count; + + if (priv->zlib_stream.avail_in == BUFFER_SIZE) + compressBuffer(gFalse); + } +} diff --git a/poppler/DeflateStream.h b/poppler/DeflateStream.h new file mode 100644 index 0000000..9cd3758 --- /dev/null +++ b/poppler/DeflateStream.h @@ -0,0 +1,49 @@ +//======================================================================== +// +// DeflateStream.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef DEFLATESTREAM_H +#define DEFLATESTREAM_H + +#ifdef ENABLE_ZLIB + +#include "poppler-config.h" +#include +#include "Object.h" +#include "Stream.h" + +struct DeflateStreamPrivate; + +class DeflateStream : public OutStream { +public: + DeflateStream(OutStream *strA); + virtual ~DeflateStream(); + + // Close the stream + virtual void close(); + + virtual Goffset getPos(); + + virtual void write(const Guchar *data, long length); + + static GBool enabled() { return gTrue; }; +private: + DeflateStream(const DeflateStream &other); + DeflateStream& operator=(const DeflateStream &other); + void compressBuffer(GBool flush); + + DeflateStreamPrivate *priv; +}; + +#endif + +#endif diff --git a/poppler/Makefile.am b/poppler/Makefile.am index 6386c0e..5719b63 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -89,14 +89,22 @@ endif if BUILD_ZLIB zlib_sources = \ - FlateStream.h \ - FlateStream.cc + DeflateStream.h \ + DeflateStream.cc zlib_libs = \ $(ZLIB_LIBS) endif +if BUILD_ZLIB_UNCOMPRESS + +zlib_uncompress_sources = \ + FlateStream.h \ + FlateStream.cc + +endif + if BUILD_LIBCURL libcurl_libs = \ @@ -247,6 +255,7 @@ libpoppler_la_SOURCES = \ $(cairo_sources) \ $(libjpeg_sources) \ $(zlib_sources) \ + $(zlib_uncompress_sources) \ $(libjpeg2000_sources) \ $(curl_sources) \ Annot.cc \ diff --git a/poppler/PDFWriter.cc b/poppler/PDFWriter.cc index e9540fc..e37c82f 100644 --- a/poppler/PDFWriter.cc +++ b/poppler/PDFWriter.cc @@ -12,6 +12,7 @@ //======================================================================== #include "PDFWriter.h" +#include "DeflateStream.h" #include PDFWriter::PDFWriter(PDFDoc *docA) @@ -475,14 +476,6 @@ void PDFWriter::markPageObjects(Dict *pageDict, GBool markContent) } } -void PDFWriter::writeStream(Stream* str) -{ - str->reset(); - int c; - while ((c = str->getChar()) != EOF) - outputStr->put(c); -} - // Convert a Page object to an XObject (for the n-up > 1 case). The // XObject BBox is copied from the Page MediaBox. The XObject Matrix // incorporates the Page Rotate value. The XObject Resources and @@ -543,12 +536,15 @@ void PDFWriter::writeXObject(int pageNum) mat.m[0], mat.m[1], mat.m[2], mat.m[3], mat.m[4], mat.m[5]); } + if (DeflateStream::enabled()) + outputStr->printf("/Filter /FlateDecode\n"); outputStr->printf("/Length %d %d R\n", lengthRef.num, lengthRef.gen); outputStr->printf(">>\n"); // concatenate content streams into a single stream outputStr->printf("stream\n"); Goffset streamStart = outputStr->getPos(); + DeflateStream deflateStr(outputStr); page->getContents(&obj); if (obj.isArray()) { for (int i = 0; i < obj.arrayGetLength(); ++i) { @@ -560,16 +556,17 @@ void PDFWriter::writeXObject(int pageNum) break; } Stream *str = obj2.getStream(); - writeStream(str); + deflateStr.copyStream(str); obj2.free(); } } else if (obj.isStream()) { Stream *str = obj.getStream(); - writeStream(str); + deflateStr.copyStream(str); } else { error(errSyntaxError, -1, "Weird page contents"); } obj.free(); + deflateStr.close(); Goffset length = outputStr->getPos() - streamStart; outputStr->printf("\nendstream\n"); outputStr->printf("endobj\n"); diff --git a/poppler/PDFWriter.h b/poppler/PDFWriter.h index 92f4cfe..d4e5a4c 100644 --- a/poppler/PDFWriter.h +++ b/poppler/PDFWriter.h @@ -109,7 +109,6 @@ private: void beginIndirectObject(Ref *ref); void writeObject(Object *obj); void writePageObject(int pageNum, int copy, PDFRectangle *mediaSize); - void writeStream(Stream* str); void writeXObject(int pageNum); void writeSheetPageObject(int copy, PDFRectangle *mediaSize); void writeBlankPage(int copy, PDFRectangle *mediaSize); diff --git a/poppler/Stream.cc b/poppler/Stream.cc index f582bc4..0ca7889 100644 --- a/poppler/Stream.cc +++ b/poppler/Stream.cc @@ -67,7 +67,7 @@ #include "DCTStream.h" #endif -#ifdef ENABLE_ZLIB +#ifdef ENABLE_ZLIB_UNCOMPRESS #include "FlateStream.h" #endif @@ -368,6 +368,30 @@ OutStream::~OutStream () { } +void OutStream::put (char c) +{ + Guchar buf = c; + write(&buf, 1); +} + +void OutStream::printf (const char *format, ...) +{ + va_list argptr; + int len; + + va_start (argptr, format); + len = vsnprintf((char*)buffer, BUFFER_SIZE, format, argptr); + if (len < BUFFER_SIZE) { + write(buffer, len); + } else { + char *buf = (char*)gmalloc(len+1); + vsnprintf(buf, len+1, format, argptr); + write((Guchar*)buf, len); + gfree(buf); + } + va_end (argptr); +} + void OutStream::format(const char *format, ...) { va_list argptr; @@ -378,6 +402,15 @@ void OutStream::format(const char *format, ...) va_end (argptr); } +void OutStream::copyStream (Stream *str) +{ + int n; + str->reset(); + do { + n = str->doGetChars(BUFFER_SIZE, buffer); + write(buffer, n); + } while (n); +} //------------------------------------------------------------------------ // FileOutStream @@ -3872,7 +3905,7 @@ GBool DCTStream::isBinary(GBool last) { #endif -#ifndef ENABLE_ZLIB +#ifndef ENABLE_ZLIB_UNCOMPRESS //------------------------------------------------------------------------ // FlateStream //------------------------------------------------------------------------ diff --git a/poppler/Stream.h b/poppler/Stream.h index c659e2a..925b951 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -266,16 +266,19 @@ public: virtual void write (const Guchar *data, long length) = 0; // Put a char in the stream - virtual void put (char c) = 0; + virtual void put (char c); - virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3) = 0; + virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3); // GooString formatting virtual void format (const char *format, ...); + virtual void copyStream (Stream *str); + private: int ref; // reference count - + static const int BUFFER_SIZE = 4096; + unsigned char buffer[BUFFER_SIZE]; }; //------------------------------------------------------------------------ @@ -302,6 +305,21 @@ private: }; +#ifndef ENABLE_ZLIB + +class DeflateStream : public OutStream { +public: + DeflateStream(OutStream *strA) : outputStr(strA) {} + virtual ~DeflateStream() {} + virtual void close() {} + virtual Goffset getPos() { return outputStr->getPos(); } + virtual void write(const Guchar *data, long length) { outputStr->write(data, length); } + static GBool enabled() { return gFalse; }; +private: + OutStream *outputStr; +}; + +#endif //------------------------------------------------------------------------ // BaseStream @@ -949,7 +967,7 @@ private: #endif -#ifndef ENABLE_ZLIB +#ifndef ENABLE_ZLIB_UNCOMPRESS //------------------------------------------------------------------------ // FlateStream //------------------------------------------------------------------------ diff --git a/poppler/poppler-config.h.in b/poppler/poppler-config.h.in index c5e181c..0ee68b4 100644 --- a/poppler/poppler-config.h.in +++ b/poppler/poppler-config.h.in @@ -64,9 +64,9 @@ #undef ENABLE_LIBPNG #endif -/* Use zlib instead of builtin zlib decoder. */ -#ifndef ENABLE_ZLIB -#undef ENABLE_ZLIB +/* Use zlib instead of builtin zlib decoder for uncompressing flate streams. */ +#ifndef ENABLE_ZLIB_UNCOMPRESS +#undef ENABLE_ZLIB_UNCOMPRESS #endif /* Define to 1 if you have the header file, and it defines `DIR'. -- 1.8.3.2