\n", f); + fputs(str->getCString(),f); + delete str; + if (reFlow) + fputs("
\n", f); + else + fputs(" tag. With
+this flag off, paragraph lines are separated by
tags and paragraphs are
+also separated by
tags.
+.TP
.B \-xml
output for XML post-processing
.TP
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
index 41312de..8be7974 100644
--- a/utils/pdftohtml.cc
+++ b/utils/pdftohtml.cc
@@ -69,6 +69,7 @@ static GBool errQuiet=gFalse;
static GBool noDrm=gFalse;
GBool showHidden = gFalse;
+GBool reFlow = gFalse;
GBool noMerge = gFalse;
static char ownerPassword[33] = "";
static char userPassword[33] = "";
@@ -107,12 +108,14 @@ static const ArgDesc argDesc[] = {
"zoom the pdf document (default 1.5)"},
{"-xml", argFlag, &xml, 0,
"output for XML post-processing"},
+ {"-reflow", argFlag, &reFlow, 0,
+ "output reflow paragraphs"},
{"-hidden", argFlag, &showHidden, 0,
"output hidden text"},
{"-nomerge", argFlag, &noMerge, 0,
"do not merge paragraphs"},
{"-enc", argString, textEncName, sizeof(textEncName),
- "output text encoding name"},
+ "output text encoding name (UTF-8, Latin1 etc"},
{"-dev", argString, gsDevice, sizeof(gsDevice),
"output device name for Ghostscript (png16m, jpeg etc)"},
{"-v", argFlag, &printVersion, 0,
@@ -250,7 +253,6 @@ int main(int argc, char *argv[]) {
{
complexMode = gTrue;
noframes = gTrue;
- noMerge = gTrue;
}
// get page range