From c8b44fc3971accfd5fdfd96f92097bac9f0b5bb0 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sat, 17 Aug 2013 15:17:11 +0930 Subject: [PATCH 1/2] pdfimages: add -list-extra option to print size, ratio, and ppi --- utils/ImageOutputDev.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++++-- utils/ImageOutputDev.h | 3 +- utils/pdfimages.1 | 16 +++++++++ utils/pdfimages.cc | 9 ++++-- 4 files changed, 107 insertions(+), 7 deletions(-) diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 8b18d2b..55f1ce1 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -20,7 +20,7 @@ // Copyright (C) 2009 Carlos Garcia Campos // Copyright (C) 2009 William Bader // Copyright (C) 2010 Jakob Voss -// Copyright (C) 2012 Adrian Johnson +// Copyright (C) 2012, 2013 Adrian Johnson // Copyright (C) 2013 Thomas Fischer // // To see a description of the changes please see the Changelog file that @@ -39,6 +39,7 @@ #include #include #include +#include #include "goo/gmem.h" #include "Error.h" #include "GfxState.h" @@ -46,8 +47,11 @@ #include "Stream.h" #include "ImageOutputDev.h" -ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) { +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA, GBool listImagesExtraA) { listImages = listImagesA; + listImagesExtra = listImagesExtraA; + if (listImagesExtra) + listImages = gTrue; if (!listImages) { fileRoot = copyString(fileRootA); fileName = (char *)gmalloc(strlen(fileRoot) + 45); @@ -57,7 +61,10 @@ ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEG imgNum = 0; pageNum = 0; ok = gTrue; - if (listImages) { + if (listImagesExtra) { + printf("page num type width height color comp bpc enc interp x-ppi y-ppi size ratio object ID\n"); + printf("--------------------------------------------------------------------------------------------\n"); + } else if (listImages) { printf("page num type width height color comp bpc enc interp object ID\n"); printf("---------------------------------------------------------------------\n"); } @@ -79,6 +86,34 @@ void ImageOutputDev::setFilename(const char *fileExt) { } } + +// Print a floating point number between 0 - 9999 using 4 characters +// eg '1.23', '12.3', ' 123', '1234' +// +// We need to be careful to handle the cases where rounding adds an +// extra digit before the decimal. eg printf("%4.2f", 9.99999) +// outputs "10.00" instead of "9.99". +static void printNumber(double d) +{ + char buf[10]; + + if (d < 10.0) { + sprintf(buf, "%4.2f", d); + buf[4] = 0; + printf("%s", buf); + } else if (d < 100.0) { + sprintf(buf, "%4.1f", d); + if (!isdigit(buf[3])) { + buf[3] = 0; + printf(" %s", buf); + } else { + printf("%s", buf); + } + } else { + printf("%4.0f", d); + } +} + void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, @@ -178,6 +213,51 @@ void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, printf("%-3s ", interpolate ? "yes" : "no"); + if (listImagesExtra) { + double *mat = state->getCTM(); + int xppi = fabs(width*72.0/mat[0]) + 0.5; + int yppi = fabs(height*72.0/mat[3]) + 0.5; + Goffset embedSize = str->getBaseStream()->getLength(); + long long size = 0; + double ratio = -1.0; + if (colorMap && colorMap->isOk()) + size = ((long long)width * height * colorMap->getNumPixelComps() * colorMap->getBits())/8; + else + size = (long long)width*height/8; // ImageMask + + if (size > 0) + ratio = 100.0*embedSize/size; + + printf("%5d %5d ", xppi, yppi); + + if (size <= 9999) { + printf("%4lldB", size); + } else { + double d = size/1024.0; + if (d <= 9999.0) { + printNumber(d); + putchar('K'); + } else { + d /= 1024.0; + if (d <= 9999.0) { + printNumber(d); + putchar('M'); + } else { + d /= 1024.0; + printNumber(d); + putchar('G'); + } + } + } + + if (ratio > 9.9) + printf(" %3.0f%% ", ratio); + else if (ratio >= 0.0) + printf(" %3.1f%% ", ratio); + else + printf(" - "); + } + if (inlineImg) { printf("[inline]\n"); } else if (ref->isRef()) { diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 13911ed..878bdb9 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -58,7 +58,7 @@ public: // is set. Normally, all images are written as PBM // (.pbm) or PPM (.ppm) files. If is set, JPEG images // are written as JPEG (.jpg) files. - ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA); + ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA, GBool listImagesExtraA); // Destructor. virtual ~ImageOutputDev(); @@ -139,6 +139,7 @@ private: char *fileRoot; // root of output file names char *fileName; // buffer for output file names GBool listImages; // list images instead of dumping + GBool listImagesExtra; // list images with extra info instead of dumping GBool dumpJPEG; // set to dump native JPEG files GBool pageNames; // set to include page number in file names int pageNum; // current page number diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index 955d8b3..408e685 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -136,6 +136,22 @@ ccitt - CCITT Group 3 or Group 4 Fax the font dictionary object ID (number and generation) .RE .TP +.B \-list\-extra +Like \-list, but includes the following additional columns: +.RS +.TP +.B x\-ppi +The horizontal resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B y\-ppi +The vertical resolution of the image (in pixels per inch) when rendered on the pdf page. +.TP +.B size +The size of the embedded image in the pdf file. The following suffixes are used: 'B' bytes, 'K' kilobytes, 'M' megabytes, and 'G' gigabytes. +.TP +.B ratio +The compression ratio of the embedded image. +.TP .BI \-opw " password" Specify the owner password for the PDF file. Providing this will bypass all security restrictions. diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index 82c301c..6f41392 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -18,7 +18,7 @@ // Copyright (C) 2007-2008, 2010 Albert Astals Cid // Copyright (C) 2010 Hib Eris // Copyright (C) 2010 Jakob Voss -// Copyright (C) 2012 Adrian Johnson +// Copyright (C) 2012, 2013 Adrian Johnson // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -50,6 +50,7 @@ static int firstPage = 1; static int lastPage = 0; static GBool listImages = gFalse; +static GBool listImagesExtra = gFalse; static GBool dumpJPEG = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; @@ -67,6 +68,8 @@ static const ArgDesc argDesc[] = { "write JPEG images as JPEG files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, + {"-list-extra", argFlag, &listImagesExtra, 0, + "print list of images with extra information instead of saving"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)"}, {"-upw", argString, userPassword, sizeof(userPassword), @@ -101,7 +104,7 @@ int main(int argc, char *argv[]) { // parse args ok = parseArgs(argDesc, &argc, argv); - if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) { + if (!ok || ((listImages||listImagesExtra) && argc != 2) || (!(listImages||listImagesExtra) && argc != 3) || printVersion || printHelp) { fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION); fprintf(stderr, "%s\n", popplerCopyright); fprintf(stderr, "%s\n", xpdfCopyright); @@ -168,7 +171,7 @@ int main(int argc, char *argv[]) { lastPage = doc->getNumPages(); // write image files - imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages); + imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages, listImagesExtra); if (imgOut->isOk()) { doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); -- 1.8.1.2