diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index 3c74c6e..dc064a5 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -15,6 +15,7 @@ // // Copyright (C) 2007-2008, 2010 Albert Astals Cid // Copyright (C) 2010 Hib Eris +// Copyright (C) 2010 Mike Slegeir // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -44,16 +45,13 @@ #include "PDFDoc.h" #include "PDFDocFactory.h" #include "HtmlOutputDev.h" -#include "PSOutputDev.h" +#include "SplashOutputDev.h" +#include "splash/SplashBitmap.h" #include "GlobalParams.h" #include "Error.h" #include "DateInfo.h" #include "goo/gfile.h" -#ifndef GHOSTSCRIPT -# define GHOSTSCRIPT "gs" -#endif - static int firstPage = 1; static int lastPage = 0; static GBool rawOrder = gTrue; @@ -62,7 +60,7 @@ static GBool printHelp = gFalse; GBool printHtml = gFalse; GBool complexMode=gFalse; GBool ignore=gFalse; -//char extension[5]=".png"; +char extension[4]="png"; double scale=1.5; GBool noframes=gFalse; GBool stout=gFalse; @@ -74,7 +72,6 @@ GBool showHidden = gFalse; GBool noMerge = gFalse; static char ownerPassword[33] = ""; static char userPassword[33] = ""; -static char gsDevice[33] = "png16m"; static GBool printVersion = gFalse; static GooString* getInfoString(Dict *infoDict, char *key); @@ -115,8 +112,8 @@ static const ArgDesc argDesc[] = { "do not merge paragraphs"}, {"-enc", argString, textEncName, sizeof(textEncName), "output text encoding name"}, - {"-dev", argString, gsDevice, sizeof(gsDevice), - "output device name for Ghostscript (png16m, jpeg etc)"}, + {"-fmt", argString, extension, sizeof(extension), + "image file format: png or jpg"}, {"-v", argFlag, &printVersion, 0, "print copyright and version info"}, {"-opw", argString, ownerPassword, sizeof(ownerPassword), @@ -128,21 +125,42 @@ static const ArgDesc argDesc[] = { {NULL} }; +class SplashOutputDevNoText : public SplashOutputDev { +public: + SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA, + GBool reverseVideoA, SplashColorPtr paperColorA, + GBool bitmapTopDownA = gTrue, + GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA, + bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA, + allowAntialiasA) { } + virtual ~SplashOutputDevNoText() { } + + void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) { } + GBool beginType3Char(GfxState *state, double x, double y, + double dx, double dy, + CharCode code, Unicode *u, int uLen) { return false; } + void endType3Char(GfxState *state) { } + void beginTextObject(GfxState *state) { } + GBool deviceHasTextClip(GfxState *state) { return false; } + void endTextObject(GfxState *state) { } +}; + int main(int argc, char *argv[]) { PDFDoc *doc = NULL; GooString *fileName = NULL; GooString *docTitle = NULL; GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL; GooString *htmlFileName = NULL; - GooString *psFileName = NULL; + GooString *imgFileName = NULL; HtmlOutputDev *htmlOut = NULL; - PSOutputDev *psOut = NULL; + SplashOutputDev *splashOut = NULL; GBool ok; char *p; - char extension[16] = "png"; GooString *ownerPW, *userPW; Object info; - char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL}; // parse args ok = parseArgs(argDesc, &argc, argv); @@ -289,17 +307,6 @@ int main(int argc, char *argv[]) { info.free(); if( !docTitle ) docTitle = new GooString(htmlFileName); - /* determine extensions of output backgroun images */ - {int i; - for(i = 0; extsList[i]; i++) - { - if( strstr(gsDevice, extsList[i]) != (char *) NULL ) - { - strncpy(extension, extsList[i], sizeof(extension)); - break; - } - }} - rawOrder = complexMode; // todo: figure out what exactly rawOrder do :) // write text file @@ -342,55 +349,37 @@ int main(int argc, char *argv[]) { } if( complexMode && !xml && !ignore ) { - int h=xoutRound(htmlOut->getPageHeight()/scale); - int w=xoutRound(htmlOut->getPageWidth()/scale); - //int h=xoutRound(doc->getPageHeight(1)/scale); - //int w=xoutRound(doc->getPageWidth(1)/scale); - - psFileName = new GooString(htmlFileName->getCString()); - psFileName->append(".ps"); - - psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(), - doc->getCatalog(), NULL, firstPage, lastPage, psModePS, w, h); - psOut->setDisplayText(gFalse); - doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0, - gTrue, gFalse, gFalse); - delete psOut; - - /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h, - psFileName->getCString());*/ + // White paper color + SplashColor color; + color[0] = color[1] = color[2] = 255; + // If the user specified "jpg" use JPEG, otherwise PNG + SplashImageFileFormat format = strcasecmp(extension, "jpg") ? + splashFormatPng : splashFormatJpeg; - GooString *gsCmd = new GooString(GHOSTSCRIPT); - GooString *tw, *th, *sc; - gsCmd->append(" -sDEVICE="); - gsCmd->append(gsDevice); - gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r"); - sc = GooString::fromInt(static_cast(72*scale)); - gsCmd->append(sc); - gsCmd->append(" -sOutputFile="); - gsCmd->append("\""); - gsCmd->append(htmlFileName); - gsCmd->append("%03d."); - gsCmd->append(extension); - gsCmd->append("\" -g"); - tw = GooString::fromInt(static_cast(scale*w)); - gsCmd->append(tw); - gsCmd->append("x"); - th = GooString::fromInt(static_cast(scale*h)); - gsCmd->append(th); - gsCmd->append(" -q \""); - gsCmd->append(psFileName); - gsCmd->append("\""); -// printf("running: %s\n", gsCmd->getCString()); - if( !executeCommand(gsCmd->getCString()) && !errQuiet) { - error(-1, "Failed to launch Ghostscript!\n"); + splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color); + splashOut->startDoc(doc->getXRef()); + + for (int pg = firstPage; pg <= lastPage; ++pg) { + int pg_w = doc->getPageMediaWidth(pg) / scale; + int pg_h = doc->getPageMediaHeight(pg) / scale; + if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) { + int tmp = pg_w; + pg_w = pg_h; + pg_h = tmp; + } + + doc->displayPage(splashOut, pg, 72, 72, 0, gTrue, gFalse, gFalse); + SplashBitmap *bitmap = splashOut->getBitmap(); + + imgFileName = GooString::format("{0:s}{1:03d}.{2:s}", + htmlFileName->getCString(), pg, extension); + + bitmap->writeImgFile(format, imgFileName->getCString(), 72, 72); + + delete imgFileName; } - unlink(psFileName->getCString()); - delete tw; - delete th; - delete sc; - delete gsCmd; - delete psFileName; + + delete splashOut; } delete htmlOut;