From 9802e36fd612f3f008ba74abcfb7dac6320b770b Mon Sep 17 00:00:00 2001 From: Igor Slepchin Date: Fri, 10 Feb 2012 00:41:22 -0500 Subject: [PATCH 1/2] Generate outlines in pdftohtml in -xml mode. --- utils/HtmlOutputDev.cc | 130 +++++++++++++++++++++++++++++++++++------------- utils/HtmlOutputDev.h | 7 ++- utils/pdf2xml.dtd | 7 ++- utils/pdftohtml.cc | 5 +-- 4 files changed, 108 insertions(+), 41 deletions(-) diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc index 43fc21a..0640f51 100644 --- a/utils/HtmlOutputDev.cc +++ b/utils/HtmlOutputDev.cc @@ -1563,7 +1563,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc) GBool bClose = gFalse; Catalog *catalog = doc->getCatalog(); - if (!ok || xml) + if (!ok) return gFalse; Outline *outline = doc->getOutline(); @@ -1574,7 +1574,7 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc) if (!outlines) return gFalse; - if (!complexMode && !xml) + if (!complexMode || xml) { output = page; } @@ -1609,21 +1609,30 @@ GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc) } } - GBool done = newOutlineLevel(output, outlines, catalog); - if (done && !complexMode) - fputs("
\n", output); - - if (bClose) + if (!xml) { - fputs("\n\n", output); - fclose(output); + GBool done = newHtmlOutlineLevel(output, outlines, catalog); + if (done && !complexMode) + fputs("
\n", output); + + if (bClose) + { + fputs("\n\n", output); + fclose(output); + } } - return done; + else + newXmlOutlineLevel(output, outlines, catalog); + + return gTrue; #endif } -GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level) +GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level) { +#ifdef DISABLE_OUTLINE + return gFalse; +#else GBool atLeastOne = gFalse; if (level == 1) @@ -1639,29 +1648,10 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), item->getTitleLength()); - // get corresponding link GooString *linkName = NULL;; - LinkAction *action = item->getAction(); - LinkGoTo *link = NULL; - if (action && action->getKind() == actionGoTo) - link = dynamic_cast(action); - if (link && link->isOk()) { - LinkDest *linkdest=NULL; - if (link->getDest()!=NULL) - linkdest=link->getDest()->copy(); - else if (link->getNamedDest()!=NULL) - linkdest=catalog->findDest(link->getNamedDest()); - - if (linkdest) { - int page; - if (linkdest->isPageRef()) { - Ref pageref=linkdest->getPageRef(); - page=catalog->findPage(pageref.num,pageref.gen); - } else { - page=linkdest->getPageNum(); - } - delete linkdest; - + int page = getOutlinePageNum(item); + if (page > 0) + { /* complex simple frames file-4.html files.html#4 noframes file.html#4 file.html#4 @@ -1682,7 +1672,6 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c } } delete str; - } } fputs("
  • ",output); @@ -1700,7 +1689,7 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c if (item->hasKids()) { fputs("\n",output); - newOutlineLevel(output, item->getKids(), catalog, level+1); + newHtmlOutlineLevel(output, item->getKids(), catalog, level+1); } item->close(); fputs("
  • \n",output); @@ -1708,4 +1697,75 @@ GBool HtmlOutputDev::newOutlineLevel(FILE *output, GooList *outlines, Catalog* c fputs("\n",output); return atLeastOne; +#endif +} + +void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog) +{ +#ifndef DISABLE_OUTLINE + fputs("\n", output); + + for (int i = 0; i < outlines->getLength(); i++) + { + OutlineItem *item = (OutlineItem*)outlines->get(i); + GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), + item->getTitleLength()); + int page = getOutlinePageNum(item); + if (page > 0) + { + fprintf(output, "%s\n", + page, titleStr->getCString()); + } + else + { + fprintf(output, "%s\n", titleStr->getCString()); + } + delete titleStr; + + item->open(); + if (item->hasKids()) + { + newXmlOutlineLevel(output, item->getKids(), catalog); + } + item->close(); + } + + fputs("\n", output); +#endif +} + +#ifndef DISABLE_OUTLINE +int HtmlOutputDev::getOutlinePageNum(OutlineItem *item) +{ + LinkAction *action = item->getAction(); + LinkGoTo *link = NULL; + LinkDest *linkdest = NULL; + int pagenum = -1; + + if (!action || action->getKind() != actionGoTo) + return pagenum; + + link = dynamic_cast(action); + + if (!link || !link->isOk()) + return pagenum; + + if (link->getDest()) + linkdest = link->getDest()->copy(); + else if (link->getNamedDest()) + linkdest = catalog->findDest(link->getNamedDest()); + + if (!linkdest) + return pagenum; + + if (linkdest->isPageRef()) { + Ref pageref = linkdest->getPageRef(); + pagenum = catalog->findPage(pageref.num, pageref.gen); + } else { + pagenum = linkdest->getPageNum(); + } + + delete linkdest; + return pagenum; } +#endif diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h index 2fe9ad5..64132f7 100644 --- a/utils/HtmlOutputDev.h +++ b/utils/HtmlOutputDev.h @@ -60,6 +60,7 @@ class GfxState; class GooString; class PDFDoc; +class OutlineItem; //------------------------------------------------------------------------ // HtmlString //------------------------------------------------------------------------ @@ -317,7 +318,11 @@ private: GooString* getLinkDest(AnnotLink *link,Catalog *catalog); void dumpMetaVars(FILE *); void doFrame(int firstPage); - GBool newOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1); + GBool newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1); + void newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog); +#ifndef DISABLE_OUTLINE + int getOutlinePageNum(OutlineItem *item); +#endif void drawJpegImage(GfxState *state, Stream *str); FILE *fContentsFrame; diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd index 9cd3880..389676c 100644 --- a/utils/pdf2xml.dtd +++ b/utils/pdf2xml.dtd @@ -1,5 +1,5 @@ - + + + + diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc index fea91a4..fce2b7b 100644 --- a/utils/pdftohtml.cc +++ b/utils/pdftohtml.cc @@ -403,10 +403,7 @@ int main(int argc, char *argv[]) { { doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0, gTrue, gFalse, gFalse); - if (!xml) - { - htmlOut->dumpDocOutline(doc); - } + htmlOut->dumpDocOutline(doc); } if ((complexMode || singleHtml) && !xml && !ignore) { -- 1.7.7.6