From 25da41d5152479ada5cd57d0bb4a87214bf4ece8 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Mon, 13 Feb 2012 22:05:18 +1030 Subject: [PATCH] pdfimages: add -list option to list all images --- utils/ImageOutputDev.cc | 205 +++++++++++++++++++++++++++++++++++++++++------ utils/ImageOutputDev.h | 20 +++++- utils/pdfimages.1 | 101 +++++++++++++++++++++++ utils/pdfimages.cc | 12 ++- 4 files changed, 308 insertions(+), 30 deletions(-) diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index c043641..5332cc5 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -44,19 +44,29 @@ #include "Stream.h" #include "ImageOutputDev.h" -ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA) { - fileRoot = copyString(fileRootA); - fileName = (char *)gmalloc(strlen(fileRoot) + 45); +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) { + listImages = listImagesA; + if (!listImages) { + fileRoot = copyString(fileRootA); + fileName = (char *)gmalloc(strlen(fileRoot) + 45); + } dumpJPEG = dumpJPEGA; pageNames = pageNamesA; imgNum = 0; pageNum = 0; ok = gTrue; + if (listImages) { + printf("page num type width height color comp bpc enc interp object ID\n"); + printf("---------------------------------------------------------------------\n"); + } } + ImageOutputDev::~ImageOutputDev() { - gfree(fileName); - gfree(fileRoot); + if (!listImages) { + gfree(fileName); + gfree(fileRoot); + } } void ImageOutputDev::setFilename(const char *fileExt) { @@ -67,18 +77,125 @@ void ImageOutputDev::setFilename(const char *fileExt) { } } -GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, - double *pmat, int paintType, int tilingType, Dict *resDict, - double *mat, double *bbox, - int x0, int y0, int x1, int y1, - double xStep, double yStep) { - return gTrue; - // do nothing -- this avoids the potentially slow loop in Gfx.cc +void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, GBool inlineImg, + ImageType imageType) { + Object obj; + const char *type; + const char *colorspace; + const char *enc; + int components, bpc; + + printf("%4d %5d ", pageNum, imgNum); + type = ""; + switch (imageType) { + case imgImage: + type = "image"; + break; + case imgStencil: + type = "stencil"; + break; + case imgMask: + type = "mask"; + break; + case imgSmask: + type = "smask"; + break; + } + printf("%-7s %5d %5d ", type, width, height); + + colorspace = "-"; + /* masks and stencils default to ncomps = 1 and bpc = 1 */ + components = 1; + bpc = 1; + if (colorMap && colorMap->isOk()) { + switch (colorMap->getColorSpace()->getMode()) { + case csDeviceGray: + case csCalGray: + colorspace = "gray"; + break; + case csDeviceRGB: + case csCalRGB: + colorspace = "rgb"; + break; + case csDeviceCMYK: + colorspace = "cmyk"; + break; + case csLab: + colorspace = "lab"; + break; + case csICCBased: + colorspace = "icc"; + break; + case csIndexed: + colorspace = "index"; + break; + case csSeparation: + colorspace = "sep"; + break; + case csDeviceN: + colorspace = "devn"; + break; + case csPattern: + default: + colorspace = "-"; + break; + } + components = colorMap->getNumPixelComps(); + bpc = colorMap->getBits(); + } + printf("%-5s %2d %2d ", colorspace, components, bpc); + + switch (str->getKind()) { + case strCCITTFax: + enc = "ccitt"; + break; + case strDCT: + enc = "jpeg"; + break; + case strJPX: + enc = "jpx"; + break; + case strJBIG2: + enc = "jbig2"; + break; + case strFile: + case strFlate: + case strCachedFile: + case strASCIIHex: + case strASCII85: + case strLZW: + case strRunLength: + case strWeird: + default: + enc = "image"; + break; + } + printf("%-5s ", enc); + + printf("%-3s ", interpolate ? "yes" : "no"); + + if (inlineImg) { + printf("[inline]\n"); + } else if (ref->isRef()) { + const Ref imageRef = ref->getRef(); + if (imageRef.gen >= 100000) { + printf("[none]\n"); + } else { + printf(" %6d %2d\n", imageRef.num, imageRef.gen); + } + } else { + printf("[none]\n"); + } + + ++imgNum; } -void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, - int width, int height, GBool invert, - GBool interpolate, GBool inlineImg) { +void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg) { FILE *f; int c; int size, i; @@ -132,10 +249,10 @@ void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, } } -void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, - int width, int height, - GfxImageColorMap *colorMap, - GBool interpolate, int *maskColors, GBool inlineImg) { +void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg) { FILE *f; ImageStream *imgStr; Guchar *p; @@ -250,13 +367,46 @@ void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, } } +GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, + double *pmat, int paintType, int tilingType, Dict *resDict, + double *mat, double *bbox, + int x0, int y0, int x1, int y1, + double xStep, double yStep) { + return gTrue; + // do nothing -- this avoids the potentially slow loop in Gfx.cc +} + +void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg) { + if (listImages) + listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask); + else + writeMask(state, ref, str, width, height, invert, interpolate, inlineImg); +} + +void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg) { + if (listImages) + listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage); + else + writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); +} + void ImageOutputDev::drawMaskedImage( GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, GBool interpolate, Stream *maskStr, int maskWidth, int maskHeight, GBool maskInvert, GBool maskInterpolate) { - drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); - drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert, - maskInterpolate, gFalse); + if (listImages) { + listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); + listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask); + } else { + drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); + drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert, + maskInterpolate, gFalse); + } } void ImageOutputDev::drawSoftMaskedImage( @@ -264,7 +414,12 @@ void ImageOutputDev::drawSoftMaskedImage( int width, int height, GfxImageColorMap *colorMap, GBool interpolate, Stream *maskStr, int maskWidth, int maskHeight, GfxImageColorMap *maskColorMap, GBool maskInterpolate) { - drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); - drawImage(state, ref, maskStr, maskWidth, maskHeight, - maskColorMap, maskInterpolate, NULL, gFalse); + if (listImages) { + listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); + listImage(state, ref, maskStr, maskWidth, height, maskColorMap, maskInterpolate, gFalse, imgSmask); + } else { + drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); + drawImage(state, ref, maskStr, maskWidth, maskHeight, + maskColorMap, maskInterpolate, NULL, gFalse); + } } diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index fabe6b5..90aea2d 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -44,13 +44,19 @@ class GfxState; class ImageOutputDev: public OutputDev { public: + enum ImageType { + imgImage, + imgStencil, + imgMask, + imgSmask, + }; // Create an OutputDev which will write images to files named // -NNN. or -PPP-NNN., if // is set. Normally, all images are written as PBM // (.pbm) or PPM (.ppm) files. If is set, JPEG images // are written as JPEG (.jpg) files. - ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA); + ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA); // Destructor. virtual ~ImageOutputDev(); @@ -115,10 +121,22 @@ public: private: // Sets the output filename with a given file extension void setFilename(const char *fileExt); + void listImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, GBool inlineImg, + ImageType imageType); + void writeMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg); + void writeImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg); char *fileRoot; // root of output file names char *fileName; // buffer for output file names + GBool listImages; // list images instead of dumping GBool dumpJPEG; // set to dump native JPEG files GBool pageNames; // set to include page number in file names int pageNum; // current page number diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 96116fa..955d8b3 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -35,6 +35,107 @@ Normally, all images are written as PBM (for monochrome images) or PPM format are saved as JPEG files. All non-DCT images are saved in PBM/PPM format as usual. .TP +.B \-list +Instead of writing the images, list the images along with various information for each image. Do not specify an +.IR image-root +with this option. +.IP +The following information is listed for each font: +.RS +.TP +.B page +the page number containing the image +.TP +.B num +the image number +.TP +.B type +the image type: +.PP +.RS +image - an opaque image +.RE +.RS +mask - a monochrome mask image +.RE +.RS +smask - a soft-mask image +.RE +.RS +stencil - a monochrome mask image used for painting a color or pattern +.RE +.PP +Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask. +The mask/smask used as part of a transparent image always immediately follows the image in the image list. +.TP +.B width +image width (in pixels) +.TP +.B height +image height (in pixels) +.PP +Note: the image width/height is the size of the embedded image, not the size the image will be rendered at. +.TP +.B color +image color space: +.PP +.RS +gray - Gray +.RE +.RS +rgb - RGB +.RE +.RS +cmyk - CMYK +.RE +.RS +lab - L*a*b +.RE +.RS +icc - ICC Based +.RE +.RS +index - Indexed Color +.RE +.RS +sep - Separation +.RE +.RS +devn - DeviceN +.RE +.TP +.B comp +number of color components +.TP +.B bpc +bits per component +.TP +.B enc +encoding: +.PP +.RS +image - raster image (may be Flate or LZW compressed but does not use an image encoding) +.RE +.RS +jpeg - Joint Photographic Experts Group +.RE +.RS +jp2 - JPEG2000 +.RE +.RS +jbig2 - Joint Bi-Level Image Experts Group +.RE +.RS +ccitt - CCITT Group 3 or Group 4 Fax +.RE +.TP +.B interp +"yes" if the interpolation is to be performed when scaling up the image +.TP +.B object ID +the font dictionary object ID (number and generation) +.RE +.TP .BI \-opw " password" Specify the owner password for the PDF file. Providing this will bypass all security restrictions. diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 2383b6b..fdbd64d 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -48,6 +48,7 @@ static int firstPage = 1; static int lastPage = 0; +static GBool listImages = gFalse; static GBool dumpJPEG = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; @@ -63,6 +64,8 @@ static const ArgDesc argDesc[] = { "last page to convert"}, {"-j", argFlag, &dumpJPEG, 0, "write JPEG images as JPEG files"}, + {"-list", argFlag, &listImages, 0, + "print list of images instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)"}, {"-upw", argString, userPassword, sizeof(userPassword), @@ -87,7 +90,7 @@ static const ArgDesc argDesc[] = { int main(int argc, char *argv[]) { PDFDoc *doc; GooString *fileName; - char *imgRoot; + char *imgRoot = NULL; GooString *ownerPW, *userPW; ImageOutputDev *imgOut; GBool ok; @@ -97,7 +100,7 @@ int main(int argc, char *argv[]) { // parse args ok = parseArgs(argDesc, &argc, argv); - if (!ok || argc != 3 || printVersion || printHelp) { + if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) { fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION); fprintf(stderr, "%s\n", popplerCopyright); fprintf(stderr, "%s\n", xpdfCopyright); @@ -109,7 +112,8 @@ int main(int argc, char *argv[]) { goto err0; } fileName = new GooString(argv[1]); - imgRoot = argv[2]; + if (!listImages) + imgRoot = argv[2]; // read config file globalParams = new GlobalParams(); @@ -163,7 +167,7 @@ int main(int argc, char *argv[]) { lastPage = doc->getNumPages(); // write image files - imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG); + imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages); if (imgOut->isOk()) { doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); -- 1.7.5.4