diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index 3c74c6e..c45f246 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -15,6 +15,7 @@ // // Copyright (C) 2007-2008, 2010 Albert Astals Cid // Copyright (C) 2010 Hib Eris +// Copyright (C) 2010 Mike Slegeir // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -44,6 +45,8 @@ #include "PDFDoc.h" #include "PDFDocFactory.h" #include "HtmlOutputDev.h" +#include "SplashOutputDev.h" +#include "splash/SplashBitmap.h" #include "PSOutputDev.h" #include "GlobalParams.h" #include "Error.h" @@ -54,6 +57,10 @@ # define GHOSTSCRIPT "gs" #endif +#ifdef _WIN32 +# define strcasecmp stricmp +#endif + static int firstPage = 1; static int lastPage = 0; static GBool rawOrder = gTrue; @@ -62,7 +69,8 @@ static GBool printHelp = gFalse; GBool printHtml = gFalse; GBool complexMode=gFalse; GBool ignore=gFalse; -//char extension[5]=".png"; +GBool useSplash=gTrue; +char extension[5]="png"; double scale=1.5; GBool noframes=gFalse; GBool stout=gFalse; @@ -74,7 +82,7 @@ GBool showHidden = gFalse; GBool noMerge = gFalse; static char ownerPassword[33] = ""; static char userPassword[33] = ""; -static char gsDevice[33] = "png16m"; +static char gsDevice[33] = "none"; static GBool printVersion = gFalse; static GooString* getInfoString(Dict *infoDict, char *key); @@ -117,6 +125,8 @@ static const ArgDesc argDesc[] = { "output text encoding name"}, {"-dev", argString, gsDevice, sizeof(gsDevice), "output device name for Ghostscript (png16m, jpeg etc)"}, + {"-fmt", argString, extension, sizeof(extension), + "image file format for Splash output (png or jpg)"}, {"-v", argFlag, &printVersion, 0, "print copyright and version info"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -128,18 +138,43 @@ static const ArgDesc argDesc[] = { {NULL} }; +class SplashOutputDevNoText : public SplashOutputDev { +public: + SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA, + GBool reverseVideoA, SplashColorPtr paperColorA, + GBool bitmapTopDownA = gTrue, + GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA, + bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA, + allowAntialiasA) { } + virtual ~SplashOutputDevNoText() { } + + void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) { } + GBool beginType3Char(GfxState *state, double x, double y, + double dx, double dy, + CharCode code, Unicode *u, int uLen) { return false; } + void endType3Char(GfxState *state) { } + void beginTextObject(GfxState *state) { } + GBool deviceHasTextClip(GfxState *state) { return false; } + void endTextObject(GfxState *state) { } + GBool interpretType3Chars() { return gFalse; } +}; + int main(int argc, char *argv[]) { PDFDoc *doc = NULL; GooString *fileName = NULL; GooString *docTitle = NULL; GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL; GooString *htmlFileName = NULL; + GooString *imgFileName = NULL; GooString *psFileName = NULL; HtmlOutputDev *htmlOut = NULL; + SplashOutputDev *splashOut = NULL; PSOutputDev *psOut = NULL; GBool ok; char *p; - char extension[16] = "png"; GooString *ownerPW, *userPW; Object info; char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL}; @@ -289,16 +324,19 @@ int main(int argc, char *argv[]) { info.free(); if( !docTitle ) docTitle = new GooString(htmlFileName); - /* determine extensions of output backgroun images */ - {int i; - for(i = 0; extsList[i]; i++) - { - if( strstr(gsDevice, extsList[i]) != (char *) NULL ) - { - strncpy(extension, extsList[i], sizeof(extension)); - break; - } - }} + if( strcmp("none", gsDevice) ) { + useSplash = gFalse; + /* determine extensions of output background images */ + int i; + for(i = 0; extsList[i]; i++) + { + if( strstr(gsDevice, extsList[i]) != (char *) NULL ) + { + strncpy(extension, extsList[i], sizeof(extension)); + break; + } + } + } rawOrder = complexMode; // todo: figure out what exactly rawOrder do :) @@ -342,55 +380,89 @@ int main(int argc, char *argv[]) { } if( complexMode && !xml && !ignore ) { - int h=xoutRound(htmlOut->getPageHeight()/scale); - int w=xoutRound(htmlOut->getPageWidth()/scale); - //int h=xoutRound(doc->getPageHeight(1)/scale); - //int w=xoutRound(doc->getPageWidth(1)/scale); - - psFileName = new GooString(htmlFileName->getCString()); - psFileName->append(".ps"); - - psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(), - doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h); - psOut->setDisplayText(gFalse); - doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0, - gTrue, gFalse, gFalse); - delete psOut; + if(useSplash) { + // White paper color + SplashColor color; + color[0] = color[1] = color[2] = 255; + // If the user specified "jpg" use JPEG, otherwise PNG + SplashImageFileFormat format = strcasecmp(extension, "jpg") ? + splashFormatPng : splashFormatJpeg; + + splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color); + splashOut->startDoc(doc->getXRef()); + + for (int pg = firstPage; pg <= lastPage; ++pg) { + int pg_w = doc->getPageMediaWidth(pg) / scale; + int pg_h = doc->getPageMediaHeight(pg) / scale; + if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) { + int tmp = pg_w; + pg_w = pg_h; + pg_h = tmp; + } + + doc->displayPage(splashOut, pg, 72, 72, 0, gTrue, gFalse, gFalse); + SplashBitmap *bitmap = splashOut->getBitmap(); + + imgFileName = GooString::format("{0:s}{1:03d}.{2:s}", + htmlFileName->getCString(), pg, extension); - /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h, + bitmap->writeImgFile(format, imgFileName->getCString(), 72, 72); + + delete imgFileName; + } + + delete splashOut; + } else { + int h=xoutRound(htmlOut->getPageHeight()/scale); + int w=xoutRound(htmlOut->getPageWidth()/scale); + //int h=xoutRound(doc->getPageHeight(1)/scale); + //int w=xoutRound(doc->getPageWidth(1)/scale); + + psFileName = new GooString(htmlFileName->getCString()); + psFileName->append(".ps"); + + psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(), + doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h); + psOut->setDisplayText(gFalse); + doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0, + gTrue, gFalse, gFalse); + delete psOut; + + /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h, psFileName->getCString());*/ - - GooString *gsCmd = new GooString(GHOSTSCRIPT); - GooString *tw, *th, *sc; - gsCmd->append(" -sDEVICE="); - gsCmd->append(gsDevice); - gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r"); - sc = GooString::fromInt(static_cast(72*scale)); - gsCmd->append(sc); - gsCmd->append(" -sOutputFile="); - gsCmd->append("\""); - gsCmd->append(htmlFileName); - gsCmd->append("%03d."); - gsCmd->append(extension); - gsCmd->append("\" -g"); - tw = GooString::fromInt(static_cast(scale*w)); - gsCmd->append(tw); - gsCmd->append("x"); - th = GooString::fromInt(static_cast(scale*h)); - gsCmd->append(th); - gsCmd->append(" -q \""); - gsCmd->append(psFileName); - gsCmd->append("\""); -// printf("running: %s\n", gsCmd->getCString()); - if( !executeCommand(gsCmd->getCString()) && !errQuiet) { - error(-1, "Failed to launch Ghostscript!\n"); + + GooString *gsCmd = new GooString(GHOSTSCRIPT); + GooString *tw, *th, *sc; + gsCmd->append(" -sDEVICE="); + gsCmd->append(gsDevice); + gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r"); + sc = GooString::fromInt(static_cast(72*scale)); + gsCmd->append(sc); + gsCmd->append(" -sOutputFile="); + gsCmd->append("\""); + gsCmd->append(htmlFileName); + gsCmd->append("%03d."); + gsCmd->append(extension); + gsCmd->append("\" -g"); + tw = GooString::fromInt(static_cast(scale*w)); + gsCmd->append(tw); + gsCmd->append("x"); + th = GooString::fromInt(static_cast(scale*h)); + gsCmd->append(th); + gsCmd->append(" -q \""); + gsCmd->append(psFileName); + gsCmd->append("\""); + // printf("running: %s\n", gsCmd->getCString()); + if( !executeCommand(gsCmd->getCString()) && !errQuiet) { + error(-1, "Failed to launch Ghostscript!\n"); + } + unlink(psFileName->getCString()); + delete tw; + delete th; + delete sc; + delete gsCmd; + delete psFileName; } - unlink(psFileName->getCString()); - delete tw; - delete th; - delete sc; - delete gsCmd; - delete psFileName; } delete htmlOut;