From 6b321a74383f1064cb7cd64752369fddc3667c01 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 3 Nov 2013 20:01:13 +1030 Subject: [PATCH 3/8] Add DeflateStream for compressing XObject streams in PDFWriter --- configure.ac | 76 +++++++++++++++++++---------------------- poppler/DeflateStream.cc | 82 +++++++++++++++++++++++++++++++++++++++++++++ poppler/DeflateStream.h | 50 +++++++++++++++++++++++++++ poppler/Makefile.am | 4 ++- poppler/PDFWriter.cc | 21 ++++++++---- poppler/PDFWriter.h | 1 - poppler/Stream.cc | 28 ++++++++++++++-- poppler/Stream.h | 9 ++--- poppler/poppler-config.h.in | 6 ++-- 9 files changed, 217 insertions(+), 60 deletions(-) create mode 100644 poppler/DeflateStream.cc create mode 100644 poppler/DeflateStream.h diff --git a/configure.ac b/configure.ac index ee6b37b..2e13c52 100644 --- a/configure.ac +++ b/configure.ac @@ -271,31 +271,23 @@ fi AC_CHECK_FUNCS(pread64 lseek64) dnl Test for zlib +AC_CHECK_LIB([z], [inflate],, + AC_MSG_ERROR("*** zlib library not found ***")) +AC_CHECK_HEADERS([zlib.h],, + AC_MSG_ERROR("*** zlib headers not found ***")) +ZLIB_LIBS="-lz" +AC_SUBST(ZLIB_LIBS) + AC_ARG_ENABLE([zlib], - [AS_HELP_STRING([--enable-zlib],[Build with zlib])], - [],[enable_zlib="no"]) -if test x$enable_zlib = xyes; then - AC_CHECK_LIB([z], [inflate],, - AC_MSG_ERROR("*** zlib library not found ***")) - AC_CHECK_HEADERS([zlib.h],, - AC_MSG_ERROR("*** zlib headers not found ***")) -elif test x$enable_zlib = xtry; then - AC_CHECK_LIB([z], [inflate], - [enable_zlib="yes"], - [enable_zlib="no"]) - AC_CHECK_HEADERS([zlib.h],, - [enable_zlib="no"]) -fi - -if test x$enable_zlib = xyes; then - ZLIB_LIBS="-lz" - AC_SUBST(ZLIB_LIBS) - AC_DEFINE(ENABLE_ZLIB) -fi - -AM_CONDITIONAL(BUILD_ZLIB, test x$enable_zlib = xyes) -AH_TEMPLATE([ENABLE_ZLIB], - [Use zlib instead of builtin zlib decoder.]) + [AS_HELP_STRING([--enable-zlib-uncompress],[Use zlib to uncompress flate streams])], + [],[enable_zlib_uncompress="no"]) +if test x$enable_zlib_uncompress = xyes; then + AC_DEFINE(ENABLE_ZLIB_UNCOMPRESS) +fi + +AM_CONDITIONAL(BUILD_ZLIB_UNCOMPRESS, test x$enable_zlib = xyes) +AH_TEMPLATE([ENABLE_ZLIB_UNCOMPRESS], + [Use zlib instead of builtin zlib decoder to uncompress flate streams.]) dnl Test for libcurl AC_ARG_ENABLE(libcurl, @@ -885,22 +877,22 @@ poppler-cpp-uninstalled.pc]) echo "" echo "Building poppler with support for:" -echo " font configuration: $with_font_configuration" -echo " splash output: $enable_splash_output" -echo " cairo output: $use_cairo" -echo " qt4 wrapper: $enable_poppler_qt4" -echo " qt5 wrapper: $enable_poppler_qt5" -echo " glib wrapper: $use_glib" -echo " introspection: $found_introspection" -echo " cpp wrapper: $enable_poppler_cpp" -echo " use gtk-doc: $enable_gtk_doc" -echo " use libjpeg: $enable_libjpeg" -echo " use libpng: $enable_libpng" -echo " use libtiff: $enable_libtiff" -echo " use zlib: $enable_zlib" -echo " use libcurl: $enable_libcurl" -echo " use libopenjpeg: $enable_libopenjpeg" -echo " use cms: $enable_cms" +echo " font configuration: $with_font_configuration" +echo " splash output: $enable_splash_output" +echo " cairo output: $use_cairo" +echo " qt4 wrapper: $enable_poppler_qt4" +echo " qt5 wrapper: $enable_poppler_qt5" +echo " glib wrapper: $use_glib" +echo " introspection: $found_introspection" +echo " cpp wrapper: $enable_poppler_cpp" +echo " use gtk-doc: $enable_gtk_doc" +echo " use libjpeg: $enable_libjpeg" +echo " use libpng: $enable_libpng" +echo " use libtiff: $enable_libtiff" +echo " use zlib uncompress: $enable_zlib_uncompress" +echo " use libcurl: $enable_libcurl" +echo " use libopenjpeg: $enable_libopenjpeg" +echo " use cms: $enable_cms" if test x$enable_cms = xyes;then if test x$lcms1 = xyes;then echo " with lcms1" @@ -924,8 +916,8 @@ if test x$enable_libjpeg != xyes; then echo " Warning: Using libjpeg is recommended" fi -if test x$enable_zlib != xno; then - echo " Warning: Using zlib is not totally safe" +if test x$enable_zlib_uncompress != xno; then + echo " Warning: Using zlib for decompression is not totally safe" fi if test x$enable_libopenjpeg != xyes; then diff --git a/poppler/DeflateStream.cc b/poppler/DeflateStream.cc new file mode 100644 index 0000000..6993c24 --- /dev/null +++ b/poppler/DeflateStream.cc @@ -0,0 +1,82 @@ +//======================================================================== +// +// DeflateStream.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include +#include "DeflateStream.h" + +DeflateStream::DeflateStream(OutStream *strA) +{ + outputStr = strA; + zlib_stream.zalloc = Z_NULL; + zlib_stream.zfree = Z_NULL; + zlib_stream.opaque = Z_NULL; + deflateInit (&zlib_stream, Z_DEFAULT_COMPRESSION); + zlib_stream.next_in = input_buf; + zlib_stream.avail_in = 0; + zlib_stream.next_out = output_buf; + zlib_stream.avail_out = BUFFER_SIZE; + open = gTrue; +} + +DeflateStream::~DeflateStream() { + if (open) + close(); +} + +void DeflateStream::close() +{ + compressBuffer(gTrue); + deflateEnd (&zlib_stream); + open = gFalse; +} + +void DeflateStream::compressBuffer(GBool flush) +{ + int ret; + GBool finished; + + do { + ret = deflate (&zlib_stream, flush ? Z_FINISH : Z_NO_FLUSH); + if (flush || zlib_stream.avail_out == 0) { + outputStr->write(output_buf, BUFFER_SIZE - zlib_stream.avail_out); + zlib_stream.next_out = output_buf; + zlib_stream.avail_out = BUFFER_SIZE; + } + finished = gTrue; + if (zlib_stream.avail_in != 0) + finished = gFalse; + if (flush && ret != Z_STREAM_END) + finished = gFalse; + } while (!finished); + zlib_stream.next_in = input_buf; +} + +void DeflateStream::write(const Guchar *data, long length) +{ + unsigned int count; + const unsigned char *p = data; + + while (length) { + count = length; + if (count > BUFFER_SIZE - zlib_stream.avail_in) + count = BUFFER_SIZE - zlib_stream.avail_in; + memcpy (input_buf + zlib_stream.avail_in, p, count); + p += count; + zlib_stream.avail_in += count; + length -= count; + + if (zlib_stream.avail_in == BUFFER_SIZE) + compressBuffer(gFalse); + } +} + diff --git a/poppler/DeflateStream.h b/poppler/DeflateStream.h new file mode 100644 index 0000000..0097b70 --- /dev/null +++ b/poppler/DeflateStream.h @@ -0,0 +1,50 @@ +//======================================================================== +// +// DeflateStream.h +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef DEFLATESTREAM_H +#define DEFLATESTREAM_H + +#include "poppler-config.h" +#include +#include "Object.h" +#include "Stream.h" + +extern "C" { +#include +} + +class DeflateStream : public OutStream { +public: + DeflateStream(OutStream *strA); + virtual ~DeflateStream(); + + // Close the stream + virtual void close(); + + virtual Goffset getPos() { return outputStr->getPos(); } + + virtual void write(const Guchar *data, long length); + +private: + void compressBuffer(GBool flush); + + OutStream *outputStr; + GBool open; + static const int BUFFER_SIZE = 16384; + unsigned char input_buf[BUFFER_SIZE]; + unsigned char output_buf[BUFFER_SIZE]; + z_stream zlib_stream; + +}; + +#endif diff --git a/poppler/Makefile.am b/poppler/Makefile.am index 6386c0e..ad44c53 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -86,7 +86,7 @@ libjpeg2000_sources = \ endif -if BUILD_ZLIB +if BUILD_ZLIB_UNCOMPRESS zlib_sources = \ FlateStream.h \ @@ -175,6 +175,7 @@ poppler_include_HEADERS = \ CMap.h \ DateInfo.h \ Decrypt.h \ + DeflateStream.h \ Dict.h \ Error.h \ FileSpec.h \ @@ -259,6 +260,7 @@ libpoppler_la_SOURCES = \ CMap.cc \ DateInfo.cc \ Decrypt.cc \ + DeflateStream.cc \ Dict.cc \ Error.cc \ FileSpec.cc \ diff --git a/poppler/PDFWriter.cc b/poppler/PDFWriter.cc index 13e67cc..f5e55da 100644 --- a/poppler/PDFWriter.cc +++ b/poppler/PDFWriter.cc @@ -12,6 +12,7 @@ //======================================================================== #include "PDFWriter.h" +#include "DeflateStream.h" #include PDFWriter::PDFWriter(PDFDoc *docA) @@ -459,12 +460,15 @@ void PDFWriter::writePageObject(int pageNum, int copy, PDFRectangle *mediaSize) outputStr->printf("endobj\n"); } -void PDFWriter::writeStream(Stream* str) +static void copyStream(Stream *inStr, OutStream *outStr) { - str->reset(); - int c; - while ((c = str->getChar()) != EOF) - outputStr->put(c); + int n; + Guchar buf[4096]; + inStr->reset(); + do { + n = inStr->doGetChars(sizeof(buf), buf); + outStr->write(buf, n); + } while (n); } // Convert a Page object to an XObject (for the n-up > 1 case). The @@ -528,12 +532,14 @@ void PDFWriter::writeXObject(int pageNum) mat.m[0], mat.m[1], mat.m[2], mat.m[3], mat.m[4], mat.m[5]); } + outputStr->printf("/Filter /FlateDecode\n"); outputStr->printf("/Length %d %d R\n", lengthRef.num, lengthRef.gen); outputStr->printf(">>\n"); // concatenate content streams into a single stream outputStr->printf("stream\n"); Goffset streamStart = outputStr->getPos(); + DeflateStream deflateStr(outputStr); page->getContents(&obj); if (obj.isArray()) { for (int i = 0; i < obj.arrayGetLength(); ++i) { @@ -545,15 +551,16 @@ void PDFWriter::writeXObject(int pageNum) break; } Stream *str = obj2.getStream(); - writeStream(str); + copyStream(str, &deflateStr); obj2.free(); } } else if (obj.isStream()) { Stream *str = obj.getStream(); - writeStream(str); + copyStream(str, &deflateStr); } else { error(errSyntaxError, -1, "Weird page contents"); } + deflateStr.close(); Goffset length = outputStr->getPos() - streamStart; outputStr->printf("\nendstream\n"); outputStr->printf("endobj\n"); diff --git a/poppler/PDFWriter.h b/poppler/PDFWriter.h index 6799ab3..200c955 100644 --- a/poppler/PDFWriter.h +++ b/poppler/PDFWriter.h @@ -99,7 +99,6 @@ private: void beginIndirectObject(Ref *ref); void writeObject(Object *obj); void writePageObject(int pageNum, int copy, PDFRectangle *mediaSize); - void writeStream(Stream* str); void writeXObject(int pageNum); void writeSheetPageObject(int copy, PDFRectangle *mediaSize); void writeBlankPage(int copy, PDFRectangle *mediaSize); diff --git a/poppler/Stream.cc b/poppler/Stream.cc index 167e4df..9b7b2e8 100644 --- a/poppler/Stream.cc +++ b/poppler/Stream.cc @@ -67,7 +67,7 @@ #include "DCTStream.h" #endif -#ifdef ENABLE_ZLIB +#ifdef ENABLE_ZLIB_UNCOMPRESS #include "FlateStream.h" #endif @@ -368,6 +368,30 @@ OutStream::~OutStream () { } +void OutStream::put (char c) +{ + Guchar buf = c; + write(&buf, 1); +} + +void OutStream::printf (const char *format, ...) +{ + va_list argptr; + int len; + + va_start (argptr, format); + len = vsnprintf((char*)printf_buf, BUFFER_SIZE, format, argptr); + if (len < BUFFER_SIZE) { + write(printf_buf, len); + } else { + char *buf = (char*)gmalloc(len+1); + vsnprintf(buf, len+1, format, argptr); + write(printf_buf, len); + gfree(buf); + } + va_end (argptr); +} + void OutStream::format(const char *format, ...) { va_list argptr; @@ -3872,7 +3896,7 @@ GBool DCTStream::isBinary(GBool last) { #endif -#ifndef ENABLE_ZLIB +#ifndef ENABLE_ZLIB_UNCOMPRESS //------------------------------------------------------------------------ // FlateStream //------------------------------------------------------------------------ diff --git a/poppler/Stream.h b/poppler/Stream.h index c659e2a..ae089b0 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -266,16 +266,17 @@ public: virtual void write (const Guchar *data, long length) = 0; // Put a char in the stream - virtual void put (char c) = 0; + virtual void put (char c); - virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3) = 0; + virtual void printf (const char *format, ...) GCC_PRINTF_FORMAT(2,3); // GooString formatting virtual void format (const char *format, ...); private: int ref; // reference count - + static const int BUFFER_SIZE = 4096; + unsigned char printf_buf[BUFFER_SIZE]; }; //------------------------------------------------------------------------ @@ -949,7 +950,7 @@ private: #endif -#ifndef ENABLE_ZLIB +#ifndef ENABLE_ZLIB_UNCOMPRESS //------------------------------------------------------------------------ // FlateStream //------------------------------------------------------------------------ diff --git a/poppler/poppler-config.h.in b/poppler/poppler-config.h.in index c5e181c..0ee68b4 100644 --- a/poppler/poppler-config.h.in +++ b/poppler/poppler-config.h.in @@ -64,9 +64,9 @@ #undef ENABLE_LIBPNG #endif -/* Use zlib instead of builtin zlib decoder. */ -#ifndef ENABLE_ZLIB -#undef ENABLE_ZLIB +/* Use zlib instead of builtin zlib decoder for uncompressing flate streams. */ +#ifndef ENABLE_ZLIB_UNCOMPRESS +#undef ENABLE_ZLIB_UNCOMPRESS #endif /* Define to 1 if you have the header file, and it defines `DIR'. -- 1.8.3.2