From 41a7018cacc3ec38541908681cd6802f052a4511 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 18 Aug 2013 20:37:01 +0930 Subject: [PATCH 5/5] pdfimages: support JBIG2 output --- poppler/JBIG2Stream.h | 1 + utils/ImageOutputDev.cc | 25 +++++++++++++++++++++++++ utils/ImageOutputDev.h | 4 ++++ utils/pdfimages.1 | 13 ++++++++----- utils/pdfimages.cc | 4 ++++ 5 files changed, 42 insertions(+), 5 deletions(-) diff --git a/poppler/JBIG2Stream.h b/poppler/JBIG2Stream.h index be1b3bd..0ee2518 100644 --- a/poppler/JBIG2Stream.h +++ b/poppler/JBIG2Stream.h @@ -56,6 +56,7 @@ public: virtual int lookChar(); virtual GooString *getPSFilter(int psLevel, const char *indent); virtual GBool isBinary(GBool last = gTrue); + virtual Object *getGlobalsStream() { return &globalsStream; } private: virtual GBool hasGetChars() { return true; } diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc index 38c6849..1a68d0e 100644 --- a/utils/ImageOutputDev.cc +++ b/utils/ImageOutputDev.cc @@ -48,6 +48,7 @@ #include "GfxState.h" #include "Object.h" #include "Stream.h" +#include "JBIG2Stream.h" #include "ImageOutputDev.h" ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool listImagesA, GBool listImagesExtraA) { @@ -420,6 +421,30 @@ void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, // dump JPEG2000 file writeRawImage(str, "jp2"); + } else if (dumpJBIG2 && str->getKind() == strJBIG2 && !inlineImg) { + // dump JBIG2 globals stream if available + JBIG2Stream *jb2Str = static_cast(str); + Object *globals = jb2Str->getGlobalsStream(); + if (globals->isStream()) { + FILE *f; + int c; + Stream *str = globals->getStream(); + + setFilename("jb2g"); + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + str->reset(); + while ((c = str->getChar()) != EOF) + fputc(c, f); + str->close(); + fclose(f); + } + + // dump JBIG2 embedded file + writeRawImage(str, "jb2e"); + } else if (outputPNG) { // output in PNG format diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h index 25852d0..f4e97ab 100644 --- a/utils/ImageOutputDev.h +++ b/utils/ImageOutputDev.h @@ -82,6 +82,9 @@ public: // Use Jpeg2000 format for Jpeg2000 files void enableJpeg2000(GBool jp2) { dumpJP2 = jp2; } + // Use JBIG2 format for JBIG2 files + void enableJBig2(GBool jbig2) { dumpJBIG2 = jbig2; } + // Check if file was successfully created. virtual GBool isOk() { return ok; } @@ -159,6 +162,7 @@ private: GBool listImagesExtra; // list images with extra info instead of dumping GBool dumpJPEG; // set to dump native JPEG files GBool dumpJP2; // set to dump native JPEG2000 files + GBool dumpJBIG2; // set to dump native JBIG2 files GBool outputPNG; // set to output in PNG format GBool outputTiff; // set to output in TIFF format GBool pageNames; // set to include page number in file names diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 index bdd99bb..38b2056 100644 --- a/utils/pdfimages.1 +++ b/utils/pdfimages.1 @@ -11,7 +11,7 @@ pdfimages \- Portable Document Format (PDF) image extractor .B Pdfimages saves images from a Portable Document Format (PDF) file as Portable Pixmap (PPM), Portable Bitmap (PBM), Portable Network Graphics (PNG), -Tagged Image File Format (TIFF), JPEG, or JPEG2000 files. +Tagged Image File Format (TIFF), JPEG, JPEG2000, or JBIG2 files. .PP Pdfimages reads the PDF file .IR PDF-file , @@ -21,12 +21,12 @@ where .I nnn is the image number and .I xxx -is the image type (.ppm, .pbm, .png, .tif, .jpg, or jp2). +is the image type (.ppm, .pbm, .png, .tif, .jpg, jp2, jb2e, or jb2g). .PP The default output format is PBM (for monochrome images) or PPM for non-monochrome. The -\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j and -\-jp2 options will cause JPEG and JPEG2000, respectively, images in the PDF file to be written in their -native format. +\-png or \-tiff options change to default output to PNG or TIFF respectively. In addition the \-j, +\-jp2, and \-jbig2 options will cause JPEG, JPEG2000, and JBIG2, respectively, images in the PDF file +to be written in their native format. .SH OPTIONS .TP .BI \-f " number" @@ -47,6 +47,9 @@ Write images in JPEG format as JPEG files instead of the default format. The JPE .B \-jp2 Write images in JPEG2000 format as JP2 files instead of the default format. The JP2 file is identical to the JPEG2000 data stored in the PDF. .TP +.B \-jbig2 +Write images in JBIG2 format as JBIG2 files instead of the default format. JBIG2 data in PDF is of the embedded type. The embedded type of JBIG2 has an optional separate file containing global data. The embedded data is written with the extension .jb2e and the global data (if available) will be written to the same image number with the extension .jb2g. The content of both these files is indentical to the JBIG2 data in the PDF. +.TP .B \-list Instead of writing the images, list the images along with various information for each image. Do not specify an .IR image-root diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc index bc15df0..5f7f292 100644 --- a/utils/pdfimages.cc +++ b/utils/pdfimages.cc @@ -55,6 +55,7 @@ static GBool enablePNG = gFalse; static GBool enableTiff = gFalse; static GBool dumpJPEG = gFalse; static GBool dumpJP2 = gFalse; +static GBool dumpJBIG2 = gFalse; static GBool pageNames = gFalse; static char ownerPassword[33] = "\001"; static char userPassword[33] = "\001"; @@ -75,6 +76,8 @@ static const ArgDesc argDesc[] = { "write JPEG images as JPEG files"}, {"-jp2", argFlag, &dumpJP2, 0, "write JPEG2000 images as JP2 files"}, + {"-jbig2", argFlag, &dumpJBIG2, 0, + "write JBIG2 images as JBIG2 files"}, {"-list", argFlag, &listImages, 0, "print list of images instead of saving"}, {"-list-extra", argFlag, &listImagesExtra, 0, @@ -186,6 +189,7 @@ int main(int argc, char *argv[]) { imgOut->enableTiff(enableTiff); imgOut->enableJpeg(dumpJPEG); imgOut->enableJpeg2000(dumpJP2); + imgOut->enableJBig2(dumpJBIG2); doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse, gFalse); } -- 1.8.1.2