From 83106788346b8fff32cfa7ec8f6e3e56b1ed1619 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 26 Nov 2017 20:43:15 +1030 Subject: [PATCH 7/8] cairo: when interactive and source has outline, copy the outline to the output Bug #103912 --- poppler/CairoOutputDev.cc | 176 +++++++++++++++++++++++++++++++------ poppler/CairoOutputDev.h | 20 ++++- poppler/UTF.cc | 37 ++++++++ poppler/UTF.h | 7 ++ qt5/tests/check_utf_conversion.cpp | 8 ++ utils/pdftocairo.1 | 2 +- utils/pdftocairo.cc | 1 + 7 files changed, 220 insertions(+), 31 deletions(-) diff --git a/poppler/CairoOutputDev.cc b/poppler/CairoOutputDev.cc index 1fd0fb48..30462cc5 100644 --- a/poppler/CairoOutputDev.cc +++ b/poppler/CairoOutputDev.cc @@ -70,6 +70,7 @@ #include "UnicodeMap.h" #include "UTF.h" #include "JBIG2Stream.h" +#include "Outline.h" //------------------------------------------------------------------------ // #define LOG_CAIRO @@ -176,6 +177,7 @@ CairoOutputDev::CairoOutputDev() { pdfPageNum = 0; cairoPageNum = 0; forwardLinkCount = 0; + firstPage = gTrue; // the SA parameter supposedly defaults to false, but Acrobat // apparently hardwires it to true @@ -290,6 +292,8 @@ void CairoOutputDev::startDoc(PDFDoc *docA, destsMap.clear(); pdfPageRefToCairoPageNumMap.clear(); + outlineTree.clear(); + pdfPageRefToOutlineNodeMap.clear(); cairoPageNum = 0; forwardLinkCount = 0; firstPage = gTrue; @@ -311,14 +315,66 @@ void CairoOutputDev::textStringToQuotedUtf8(GooString *text, GooString *s) } else { s->append(utf8[i]); } + } s->append("'"); gfree(utf8); } -// Initialization that needs to be performed after setCairo() is called. +void CairoOutputDev::addOutlineNodes(GfxState *state, GooList *list, OutlineNode *parentNode) +{ + for (int i = 0; i < list->getLength(); i++) { + OutlineItem *item = (OutlineItem*)list->get(i); + OutlineNode *node = new OutlineNode; + outlineTree.push_back(std::unique_ptr(node)); + node->parent = parentNode; + Unicode *ucs4 = item->getTitle(); + int len = item->getTitleLength(); + char *name = ucs4ToUtf8(ucs4, len); + node->name.Set(name); + free(name); + node->action = item->getAction(); + node->isOpen = item->isOpen(); + node->id = 0; + node->hasRef = gFalse; + node->emitNode = gFalse; + node->emitAction = gFalse; + if (node->action && node->action->getKind() == actionGoTo) { + LinkGoTo *act = static_cast(node->action); + if (act->isOk()) { + GooString *namedDest = act->getNamedDest(); + LinkDest *linkDest; + bool freeDest = false; + if (namedDest) { + linkDest = doc->getCatalog()->findDest(namedDest); + freeDest = true; + } else { + linkDest = act->getDest(); + } + if (linkDest) { + if (linkDest->isOk() && linkDest->isPageRef()) { + node->ref = linkDest->getPageRef(); + node->hasRef = gTrue; + pdfPageRefToOutlineNodeMap.insert(std::pair(node->ref, node)); + } + if (freeDest) + delete linkDest; + } + } + } + item->open(); + if (item->hasKids()) { + GooList *kids = item->getKids(); + if (kids) + addOutlineNodes (state, kids, node); + } + } +} + +// Document setup that needs to be performed after setCairo() is called. void CairoOutputDev::startFirstPage(int pageNum, GfxState *state, XRef *xrefA) { + firstPage = gFalse; if (xrefA != NULL) { xref = xrefA; } @@ -346,6 +402,14 @@ void CairoOutputDev::startFirstPage(int pageNum, GfxState *state, XRef *xrefA) std::unique_ptr(dest))); } } + + // Create outline tree + Outline *outline = doc->getOutline(); + if (outline) { + GooList *list = outline->getItems(); + if (list) + addOutlineNodes (state, list, nullptr); + } } } @@ -394,8 +458,6 @@ void CairoOutputDev::startPage(int pageNum, GfxState *state, XRef *xrefA) { #endif // build list of link annotations - if (annotations) - delete annotations; Object obj = doc->getPage(pageNum)->getAnnotsObject(xref); annotations = new Annots(doc, pageNum, &obj); @@ -430,6 +492,22 @@ void CairoOutputDev::startPage(int pageNum, GfxState *state, XRef *xrefA) { #endif } } + + // Check if any outlines required for this page + auto range = pdfPageRefToOutlineNodeMap.equal_range(*ref); + for (auto &it = range.first; it !=range.second; ++it) { + OutlineNode *node = it->second; + node->emitNode = gTrue; + node->emitAction = gTrue; + // walk back up tree marking all parent nodes as also required + while (node->parent) { + node = node->parent; + if (!node->emitNode) + node->emitNode = gTrue; + else + break; + } + } } } @@ -439,14 +517,43 @@ void CairoOutputDev::endPage(GfxState *state) { text->coalesce(gTrue, 0, gFalse); } - if (interactive && annotations && isPDF()) { - for (int i = 0; i < annotations->getNumAnnots(); i++) { - Annot *annot = annotations->getAnnot(i); - if (annot->getType() == Annot::typeLink) { - AnnotLink *linkAnnot = static_cast(annot); - beginLinkTag(state, linkAnnot); + if (interactive && isPDF()) { + // Write any remaining links + if (annotations) { + for (int i = 0; i < annotations->getNumAnnots(); i++) { + Annot *annot = annotations->getAnnot(i); + if (annot->getType() == Annot::typeLink) { + AnnotLink *linkAnnot = static_cast(annot); + beginLinkTag(state, linkAnnot); #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 15, 8) - cairo_tag_end (cairo, "Link"); + cairo_tag_end (cairo, "Link"); +#endif + } + } + } + + } + + if (annotations) + delete annotations; + annotations = nullptr; +} + +void CairoOutputDev::emitOutlines(cairo_surface_t *surface) +{ + if (interactive && cairo_surface_get_type (surface) == CAIRO_SURFACE_TYPE_PDF) { + for (auto const &node : outlineTree) { + if (node->emitNode) { + GooString attribs; + if (node->emitAction) + appendLinkAttributes(attribs, node->action, 0); + cairo_pdf_outline_flags_t flags = (cairo_pdf_outline_flags_t)0; + int parentId = CAIRO_PDF_OUTLINE_ROOT; + if (node->parent) + parentId = node->parent->id; +#if defined(CAIRO_HAS_PDF_SURFACE) && CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 15, 8) + node->id = cairo_pdf_surface_add_outline (surface, parentId, node->name.getCString(), + attribs.getCString(), flags); #endif } } @@ -3441,6 +3548,9 @@ void CairoOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, void CairoOutputDev::findLinkObject(const StructElement *elem) { + if (!annotations) + return; + if (elem->isObjectRef()) { Ref ref = elem->getObjectRef(); for (int i = 0; i < annotations->getNumAnnots(); i++) { @@ -3529,29 +3639,29 @@ void CairoOutputDev::appendLinkDestXY(GooString *s, LinkDest *dest, double destP destPageHeight ? destPageHeight - y : y); } -void CairoOutputDev::beginLinkTag(GfxState *state, AnnotLink *linkAnnot) +void CairoOutputDev::appendLinkAttributes(GooString& attrib, LinkAction *action, double pageHeight) { - GooString attrib; - attrib.append("rect=["); - AnnotQuadrilaterals *quads = linkAnnot->getQuadrilaterals(); - if (quads && quads->getQuadrilateralsLength() > 0) { - for (int i = 0; i < quads->getQuadrilateralsLength(); i++) { - cairo_rectangle_t rect; - quadToCairoRect(quads, i, state->getPageHeight(), &rect); - attrib.appendf("{0:g} {1:g} {2:g} {3:g} ", rect.x, rect.y, rect.width, rect.height); + if (pageHeight > 0) { + attrib.append("rect=["); + AnnotQuadrilaterals *quads = linkAnnot->getQuadrilaterals(); + if (quads && quads->getQuadrilateralsLength() > 0) { + for (int i = 0; i < quads->getQuadrilateralsLength(); i++) { + cairo_rectangle_t rect; + quadToCairoRect(quads, i, pageHeight, &rect); + attrib.appendf("{0:g} {1:g} {2:g} {3:g} ", rect.x, rect.y, rect.width, rect.height); + } + } else { + double x1, x2, y1, y2; + linkAnnot->getRect(&x1, &y1, &x2, &y2); + attrib.appendf("{0:g} {1:g} {2:g} {3:g} ", + x1, + pageHeight - y2, + x2 - x1, + y2 - y1); } - } else { - double x1, x2, y1, y2; - linkAnnot->getRect(&x1, &y1, &x2, &y2); - attrib.appendf("{0:g} {1:g} {2:g} {3:g} ", - x1, - state->getPageHeight() - y2, - x2 - x1, - y2 - y1); + attrib.append("] "); } - attrib.append("] "); - LinkAction *action = linkAnnot->getAction(); if (action->getKind() == actionGoTo) { LinkGoTo *act = static_cast(action); if (act->isOk()) { @@ -3584,6 +3694,14 @@ void CairoOutputDev::beginLinkTag(GfxState *state, AnnotLink *linkAnnot) attrib.appendf("uri='{0:t}'", act->getURI()); } } +} + +void CairoOutputDev::beginLinkTag(GfxState *state, AnnotLink *linkAnnot) +{ + GooString attrib; + LinkAction *action = linkAnnot->getAction(); + appendLinkAttributes (attrib, action, state->getPageHeight()); + #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 15, 8) cairo_tag_begin (cairo, "Link", attrib.getCString()); #endif diff --git a/poppler/CairoOutputDev.h b/poppler/CairoOutputDev.h index 4c33202a..e0fb7b02 100644 --- a/poppler/CairoOutputDev.h +++ b/poppler/CairoOutputDev.h @@ -142,6 +142,8 @@ public: // End a page. void endPage(GfxState *state) override; + void emitOutlines(cairo_surface_t *surface); + //----- save/restore graphics state void saveState(GfxState *state) override; void restoreState(GfxState *state) override; @@ -303,6 +305,7 @@ protected: void quadToCairoRect(AnnotQuadrilaterals *quads, int idx, double destPageHeight, cairo_rectangle_t *rect); void appendLinkDestRef(GooString *s, LinkDest *dest); void appendLinkDestXY(GooString *s, LinkDest *dest, double destPageHeight); + void appendLinkAttributes(GooString& attrib, LinkAction *action, double pageHeight); void beginLinkTag(GfxState *state, AnnotLink *linkAnnot); void beginLink(GfxState *state, const StructElement *linkElem); #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 14, 0) @@ -311,7 +314,7 @@ protected: static void setContextAntialias(cairo_t *cr, cairo_antialias_t antialias); static void textStringToQuotedUtf8(GooString *text, GooString *s); GBool isPDF(); - + void startFirstPage(GfxState *state); GfxRGB fill_color, stroke_color; cairo_pattern_t *fill_pattern, *stroke_pattern; @@ -405,8 +408,23 @@ protected: } }; + struct OutlineNode { + OutlineNode *parent; + GooString name; + LinkAction *action; + GBool isOpen; + int id; + GBool hasRef; + Ref ref; + GBool emitNode; + GBool emitAction; + }; + void addOutlineNodes(GfxState *state, GooList *list, OutlineNode *parentNode); + std::map,std::unique_ptr,GooStringCompare>, RefCompare > destsMap; std::map pdfPageRefToCairoPageNumMap; + std::vector > outlineTree; + std::multimap pdfPageRefToOutlineNodeMap; int forwardLinkCount; }; diff --git a/poppler/UTF.cc b/poppler/UTF.cc index 36b44062..acc93c97 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -447,3 +447,40 @@ char *TextStringToUtf8(GooString *textStr) } return utf8; } + +// Count number of UTF-8 bytes required to convert a UCS-4 string to +// UTF-8 (excluding terminating NULL). +int ucs4CountUtf8Bytes(const Unicode *ucs4, int ucs4Len) +{ + int n = 0; + for (int i = 0; i < ucs4Len; i++) { + Unicode c = *ucs4; + if (c < 0x80) + n++; + else if (c < 0x800) + n += 2; + else if (c < 0x10000) + n += 3; + else if (c <= UCS4_MAX) + n += 4; + else + n += 3; // replace with REPLACEMENT_CHAR + ucs4++; + } + return n; +} + +// Convert a UCS-4 string to UTF-8 +char *ucs4ToUtf8(const Unicode *ucs4, int ucs4Len) +{ + int len = ucs4CountUtf8Bytes(ucs4, ucs4Len); + char *utf8 = (char*)gmalloc(len + 1); + char *p = utf8; + for (int i = 0; i < ucs4Len; i++) { + int count = mapUTF8(ucs4[i], p, len); + p += count; + len -= count; + } + *p = 0; + return utf8; +} diff --git a/poppler/UTF.h b/poppler/UTF.h index 3f62c528..41874cee 100644 --- a/poppler/UTF.h +++ b/poppler/UTF.h @@ -87,4 +87,11 @@ char *utf16ToUtf8(const uint16_t *utf16, int maxUtf16 = INT_MAX); // returns UTF-8 string. char *TextStringToUtf8(GooString *textStr); +// Count number of UTF-8 bytes required to convert a UCS-4 string to +// UTF-8 (excluding terminating NULL). +int ucs4CountUtf8Bytes(const Unicode *ucs4, int ucs4Len); + +// Convert a UCS-4 string to UTF-8 +char *ucs4ToUtf8(const Unicode *ucs4, int ucs4Len); + #endif diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp index b8eb03a5..d566f05d 100644 --- a/qt5/tests/check_utf_conversion.cpp +++ b/qt5/tests/check_utf_conversion.cpp @@ -80,6 +80,14 @@ void TestUTFConversion::testUTF() QVERIFY( compare(utf8String, str) ); free (utf8String); + // UCS-4 to UTF-8 + + QVector ucs4 = s.toUcs4(); + QCOMPARE( ucs4CountUtf8Bytes(ucs4.data(), ucs4.length()), (int)strlen(str) ); + utf8String = ucs4ToUtf8(ucs4.data(), ucs4.length()); + QVERIFY( compare(utf8String, str) ); + free (utf8String); + free(str); } diff --git a/utils/pdftocairo.1 b/utils/pdftocairo.1 index cd966127..2f3b5e32 100644 --- a/utils/pdftocairo.1 +++ b/utils/pdftocairo.1 @@ -224,7 +224,7 @@ write this information to the output file (PDF only). .B \-inter If the input file contains interactive features, where supported by cairo, write this information to the output file (PDF only). Currently supported -features: page labels, thumbnail images. +features: page labels, outline, thumbnail images. .TP .B \-origpagesizes This option is the same as "\-paper match". diff --git a/utils/pdftocairo.cc b/utils/pdftocairo.cc index 1b53b330..50849350 100644 --- a/utils/pdftocairo.cc +++ b/utils/pdftocairo.cc @@ -1321,6 +1321,7 @@ int main(int argc, char *argv[]) { renderPage(doc, cairoOut, pg, pg_w, pg_h, output_w, output_h); endPage(imageFileName); } + cairoOut->emitOutlines(surface); delete cairoOut; endDocument(); -- 2.11.0