From 4ef78898a8ff1c72ef8b846e118b681ead8c76e9 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Wed, 21 Aug 2013 22:22:28 +0930 Subject: [PATCH 6/7] pdfimages: support ccitt output --- poppler/Stream.h | 5 +++++ utils/ImageOutputDev.cc | 35 +++++++++++++++++++++++++++++++++++ utils/ImageOutputDev.h | 4 ++++ utils/pdfimages.1 | 39 ++++++++++++++++++++++++++++++++++++++- utils/pdfimages.cc | 4 ++++ 5 files changed, 86 insertions(+), 1 deletion(-) diff --git a/poppler/Stream.h b/poppler/Stream.h index 9b40fd1..00b2925 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -797,6 +797,11 @@ public: virtual void unfilteredReset (); + int getEncoding() { return encoding; } + GBool getEndOfLine() { return endOfLine; } + int getColumns() { return columns; } + GBool getBlackIs1() { return black; } + private: void ccittReset(GBool unfiltered); diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index ebdd644..ae7d309 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -454,6 +454,41 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JBIG2 embedded file writeRawImage(str, "jb2e"); + } else if (dumpCCITT && str->getKind() == strCCITTFax && !inlineImg) { + // write CCITT parameters + CCITTFaxStream *ccittStr = static_cast(str); + FILE *f; + setFilename("params"); + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + if (ccittStr->getEncoding() < 0) + fprintf(f, "-4 "); + else if (ccittStr->getEncoding() == 0) + fprintf(f, "-1 "); + else + fprintf(f, "-2 "); + + if (ccittStr->getEndOfLine()) + fprintf(f, "-A "); + else + fprintf(f, "-P "); + + fprintf(f, "-X %d ", ccittStr->getColumns()); + + if (ccittStr->getBlackIs1()) + fprintf(f, "-W "); + else + fprintf(f, "-B "); + + fprintf(f, "-M\n"); // PDF uses MSB first + + fclose(f); + + // dump CCITT file + writeRawImage(str, "ccitt"); + } else if (outputPNG) { // output in PNG format diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 75063a2..8d0785c 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -85,6 +85,9 @@ public: // Use JBIG2 format for JBIG2 files void enableJBig2(GBool jbig2) { dumpJBIG2 = jbig2; } + // Use CCITT format for CCITT files + void enableCCITT(GBool ccitt) { dumpCCITT = ccitt; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -162,6 +165,7 @@ private: GBool dumpJPEG; // set to dump native JPEG files GBool dumpJP2; // set to dump native JPEG2000 files GBool dumpJBIG2; // set to dump native JBIG2 files + GBool dumpCCITT; // set to dump native CCITT files GBool outputPNG; // set to output in PNG format GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 23530c5..5a87573 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -50,12 +50,49 @@ Write images in JPEG2000 format as JP2 files instead of the default format. The .B \-jbig2 Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is indentical to the JBIG2 data in the PDF. .TP +.B \-ccitt +Write images in CCITT format as CCITT files instead of the default +format. The CCITT file is identical to the JPEG2000 data stored in the +PDF. PDF files contain additional parameters specifying +how to decode the CCITT data. These parameters are translated to +fax2tiff input options and written to a .params file with the same image +number. The parameters are: +.RS +.TP +.B \-1 +1D Group 3 encoding +.TP +.B \-2 +2D Group 3 encoding +.TP +.B \-4 +Group 4 encoding +.TP +.B \-A +Beginning of line is aligned on a byte boundary +.TP +.B \-P +Beginning of line is not aligned on a byte boundary +.TP +.B \-X n +The image width in pixels +.TP +.B \-W +Encoding uses 1 for black and 0 for white +.TP +.B \-B +Encoding uses 0 for black and 1 for white +.TP +.B \-M +Input data fills from most significant bit to least significant bit. +.RE +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root with this option. .IP -The following information is listed for each font: +The following information is listed for each image: .RS .TP .B page diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index c83f300..5d74738 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -55,6 +55,7 @@ static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool dumpJP2 = gFalse; static GBool dumpJBIG2 = gFalse; +static GBool dumpCCITT = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -81,6 +82,8 @@ static const ArgDesc argDesc[] = { "write JPEG2000 images as JP2 files"}, {"-jbig2", argFlag, &dumpJBIG2, 0, "write JBIG2 images as JBIG2 files"}, + {"-ccitt", argFlag, &dumpCCITT, 0, + "write CCITT images as CCITT files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -191,6 +194,7 @@ int main(int argc, char *argv[]) { imgOut->enableJpeg(dumpJPEG); imgOut->enableJpeg2000(dumpJP2); imgOut->enableJBig2(dumpJBIG2); + imgOut->enableCCITT(dumpCCITT); doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } -- 1.8.1.2