From e0da1710a26e48fb1fc49418529e99eb409a64e3 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 18 Aug 2013 17:29:00 +0930 Subject: [PATCH 3/5] pdfimages: add support for png and tiff output --- utils/ImageOutputDev.cc | 67 ++++++++++++++++++++++++++++++++++++++++++++++--- utils/ImageOutputDev.h | 17 +++++++++++-- utils/pdfimages.1 | 22 ++++++++++------ utils/pdfimages.cc | 19 +++++++++++--- 4 files changed, 110 insertions(+), 15 deletions(-) diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index bd8dc35..ea073ad 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -42,13 +42,15 @@ #include #include "goo/gmem.h" #include "goo/NetPBMWriter.h" +#include "goo/PNGWriter.h" +#include "goo/TiffWriter.h" #include "Error.h" #include "GfxState.h" #include "Object.h" #include "Stream.h" #include "ImageOutputDev.h" -ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA, GBool listImagesExtraA) { +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA, GBool listImagesExtraA) { listImages = listImagesA; listImagesExtra = listImagesExtraA; if (listImagesExtra) @@ -57,7 +59,9 @@ ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEG fileRoot = copyString(fileRootA); fileName = (char *)gmalloc(strlen(fileRoot) + 45); } - dumpJPEG = dumpJPEGA; + outputPNG = gFalse; + outputTiff = gFalse; + dumpJPEG = gFalse; pageNames = pageNamesA; imgNum = 0; pageNum = 0; @@ -368,6 +372,21 @@ void ImageOutputDev::writeImageFile(ImgWriter *writer, ImageFormat format, const writer->writeRow(&row); break; + case imgGray: + p = imgStr->getLine(); + rowp = row; + for (int x = 0; x < width; ++x) { + if (p) { + colorMap->getGray(p, &gray); + *rowp++ = colToByte(gray); + p += colorMap->getNumPixelComps(); + } else { + *rowp++ = 0; + } + } + writer->writeRow(&row); + break; + case imgMonochrome: int size = (width + 7)/8; for (int x = 0; x < size; x++) @@ -400,10 +419,52 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JPEG file writeRawImage(str, "jpg"); + } else if (outputPNG) { + // output in PNG format + +#if ENABLE_LIBPNG + ImgWriter *writer; + + if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { + writer = new PNGWriter(PNGWriter::MONOCHROME); + format = imgMonochrome; + } else if (colorMap->getColorSpace()->getMode() == csDeviceGray || + colorMap->getColorSpace()->getMode() == csCalGray) { + writer = new PNGWriter(PNGWriter::GRAY); + format = imgGray; + } else { + writer = new PNGWriter(PNGWriter::RGB); + format = imgRGB; + } + + writeImageFile(writer, format, "png", str, width, height, colorMap); +#endif + + } else if (outputTiff) { + // output in TIFF format + +#if ENABLE_LIBTIFF + ImgWriter *writer; + + if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { + writer = new TiffWriter(TiffWriter::MONOCHROME); + format = imgMonochrome; + } else if (colorMap->getColorSpace()->getMode() == csDeviceGray || + colorMap->getColorSpace()->getMode() == csCalGray) { + writer = new TiffWriter(TiffWriter::GRAY); + format = imgGray; + } else { + writer = new TiffWriter(TiffWriter::RGB); + format = imgRGB; + } + + writeImageFile(writer, format, "tif", str, width, height, colorMap); +#endif + } else { + // output in PPM/PBM format ImgWriter *writer; - // dump PBM file if (!colorMap || (colorMap->getNumPixelComps() == 1 && colorMap->getBits() == 1)) { writer = new NetPBMWriter(NetPBMWriter::MONOCHROME); format = imgMonochrome; diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 5d8536c..0cfd9ff 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -55,19 +55,30 @@ public: }; enum ImageFormat { imgRGB, + imgGray, imgMonochrome }; // Create an OutputDev which will write images to files named // -NNN. or -PPP-NNN., if // is set. Normally, all images are written as PBM - // (.pbm) or PPM (.ppm) files. If is set, JPEG images + // (.pbm) or PPM (.ppm) files unless PNG or Tiff output is enabled + // (PNG is used if both are enabled). If Jpeg is enabled, JPEG images // are written as JPEG (.jpg) files. - ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA, GBool listImagesExtraA); + ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA, GBool listImagesExtraA); // Destructor. virtual ~ImageOutputDev(); + // Use PNG format for output + void enablePNG(GBool png) { outputPNG = png; } + + // Use TIFF format for output + void enableTiff(GBool tiff) { outputTiff = tiff; } + + // Use Jpeg format for Jpeg files + void enableJpeg(GBool jpeg) { dumpJPEG = jpeg; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -144,6 +155,8 @@ private: GBool listImages; // list images instead of dumping GBool listImagesExtra; // list images with extra info instead of dumping GBool dumpJPEG; // set to dump native JPEG files + GBool outputPNG; // set to output in PNG format + GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names int pageNum; // current page number int imgNum; // current image number diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 408e685..8c00094 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -10,17 +10,22 @@ pdfimages \- Portable Document Format (PDF) image extractor .SH DESCRIPTION .B Pdfimages saves images from a Portable Document Format (PDF) file as Portable -Pixmap (PPM), Portable Bitmap (PBM), or JPEG files. +Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), +Tagged Image File Format (TIFF), or JPEG files. .PP Pdfimages reads the PDF file .IR PDF-file , -scans one or more pages, and writes one PPM, PBM, or JPEG file for each image, +scans one or more pages, and writes one file for each image, .IR image-root - nnn . xxx , where .I nnn is the image number and .I xxx -is the image type (.ppm, .pbm, .jpg). +is the image type (.ppm, .pbm, .png, .tif, or .jpg). +.PP +The default output format is PBM (for monochrome images) or PPM for non-monochrome. The +\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j option +will cause JPEG images in the PDF file to be written in JPEG format. .SH OPTIONS .TP .BI \-f " number" @@ -29,11 +34,14 @@ Specifies the first page to scan. .BI \-l " number" Specifies the last page to scan. .TP +.B \-png +Change the default output format to PNG. +.TP +.B \-tiff +Change the default output format to TIFF. +.TP .B \-j -Normally, all images are written as PBM (for monochrome images) or PPM -(for non-monochrome images) files. With this option, images in DCT -format are saved as JPEG files. All non-DCT images are saved in -PBM/PPM format as usual. +Write images in JPEG format as JPEG files instead of the default format. The JPEG file is identical to the JPEG data stored in the PDF. .TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 6f41392..d8315ea 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -51,6 +51,8 @@ static int firstPage = 1; static int lastPage = 0; static GBool listImages = gFalse; static GBool listImagesExtra = gFalse; +static GBool enablePNG = gFalse; +static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; @@ -64,6 +66,14 @@ static const ArgDesc argDesc[] = { "first page to convert"}, {"-l", argInt, &lastPage, 0, "last page to convert"}, +#if ENABLE_LIBPNG + {"-png", argFlag, &enablePNG, 0, + "write image in PNG format"}, +#endif +#if ENABLE_LIBTIFF + {"-tiff", argFlag, &enableTiff, 0, + "write image in TIFF format"}, +#endif {"-j", argFlag, &dumpJPEG, 0, "write JPEG images as JPEG files"}, {"-list", argFlag, &listImages, 0, @@ -171,10 +181,13 @@ int main(int argc, char *argv[]) { lastPage = doc->getNumPages(); // write image files - imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages, listImagesExtra); + imgOut = new ImageOutputDev(imgRoot, pageNames, listImages, listImagesExtra); if (imgOut->isOk()) { - doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, - gTrue, gFalse, gFalse); + imgOut->enablePNG(enablePNG); + imgOut->enableTiff(enableTiff); + imgOut->enableJpeg(dumpJPEG); + doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, + gTrue, gFalse, gFalse); } delete imgOut; -- 1.8.1.2