From f9324b3f2bf64574bf20e37686624b8600c3ecc4 Mon Sep 17 00:00:00 2001 From: Corentin Allemand Date: Wed, 31 Oct 2012 17:18:37 +0100 Subject: [PATCH 1/2] Add with media params --- utils/HtmlOutputDev.cc | 4 +++- utils/pdftohtml.cc | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index e4bd0b1..7d60975 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -97,6 +97,7 @@ extern GBool stout; extern GBool xml; extern GBool showHidden; extern GBool noMerge; +extern GBool withMedia; extern double wordBreakThreshold; @@ -877,7 +878,8 @@ void HtmlPage::dumpComplex(FILE *file, int page){ fprintf(pageFile,"
\n", page, pageWidth, pageHeight); - if( !ignore ) + + if( !ignore && !withMedia) { fprintf(pageFile, "\"background\n", diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index 97372be..72e784f 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -84,6 +84,7 @@ double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1 GBool showHidden = gFalse; GBool noMerge = gFalse; GBool fontFullName = gFalse; +GBool withMedia = gFalse; static char ownerPassword[33] = ""; static char userPassword[33] = ""; static GBool printVersion = gFalse; @@ -141,7 +142,9 @@ static const ArgDesc argDesc[] = { {"-wbt", argFP, &wordBreakThreshold, 0, "word break threshold (default 10 percent)"}, {"-fontfullname", argFlag, &fontFullName, 0, - "outputs font full name"}, + "outputs font full name"}, + {"-withmedia", argFlag, &withMedia, 0, + "extract all media"}, {NULL} }; @@ -383,7 +386,7 @@ int main(int argc, char *argv[]) { htmlOut->dumpDocOutline(doc); } - if ((complexMode || singleHtml) && !xml && !ignore) { + if ((complexMode || singleHtml) && !xml && !ignore && !withMedia) { #ifdef HAVE_SPLASH GooString *imgFileName = NULL; // White paper color -- 1.7.10.2 (Apple Git-33) From 1d677d4f2c4106bc73e25be5294161b02bbd3810 Mon Sep 17 00:00:00 2001 From: Corentin Allemand Date: Wed, 31 Oct 2012 18:16:19 +0100 Subject: [PATCH 2/2] Extract images with "-withMedia" parameter --- utils/HtmlOutputDev.cc | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 7d60975..916ec91 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -889,6 +889,26 @@ void HtmlPage::dumpComplex(FILE *file, int page){ delete tmp; + if(!ignore && withMedia){ + int listlen=imgList->getLength(); + GooString *imgfname; + for (int i = 0; i < listlen; i++) { + HtmlImage *img = (HtmlImage*)imgList->del(0); + imgfname=basename(img->fName); + fprintf(pageFile, + "
", + xoutRound(img->yMin), + xoutRound(img->xMin), + xoutRound(img->xMax-img->xMin), + xoutRound(img->yMax-img->yMin)); + + fprintf(pageFile,"xMax-img->xMin),xoutRound(img->yMax-img->yMin)); + fprintf(pageFile,"src=\"%s\"/>",imgfname->getCString()); + fputs("
\n", pageFile); + delete img; + } + } + for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){ if (tmp1->htext){ fprintf(pageFile, @@ -906,6 +926,9 @@ void HtmlPage::dumpComplex(FILE *file, int page){ } } + + + fputs("
\n", pageFile); if( !noframes ) @@ -1466,7 +1489,7 @@ void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, GBool invert, GBool interpolate, GBool inlineImg) { - if (ignore||(complexMode && !xml)) { + if (ignore|| !(complexMode && withMedia)) { OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); return; } @@ -1488,7 +1511,7 @@ void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, GBool interpolate, int *maskColors, GBool inlineImg) { - if (ignore||(complexMode && !xml)) { + if (ignore || !(complexMode && withMedia)) { OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); return; -- 1.7.10.2 (Apple Git-33)