From 5b396af778864920aaeec24203c1432c11a90b5f Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sat, 3 Aug 2013 09:05:21 +0930 Subject: [PATCH 1/2] pdfinfo: indicate if pdf contains javascript --- CMakeLists.txt | 2 + poppler/Annot.cc | 27 ++++++++++ poppler/Annot.h | 8 +++ poppler/Catalog.cc | 26 ++++++++++ poppler/Catalog.h | 12 +++++ poppler/Form.cc | 4 ++ poppler/Form.h | 2 + poppler/JSInfo.cc | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++ poppler/JSInfo.h | 48 ++++++++++++++++++ poppler/Makefile.am | 2 + poppler/Page.cc | 20 ++++++++ poppler/Page.h | 10 +++- utils/pdfinfo.1 | 3 ++ utils/pdfinfo.cc | 8 +++ 14 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 poppler/JSInfo.cc create mode 100644 poppler/JSInfo.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0da8c6d..e62aa63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -300,6 +300,7 @@ set(poppler_SRCS poppler/Hints.cc poppler/JArithmeticDecoder.cc poppler/JBIG2Stream.cc + poppler/JSInfo.cc poppler/Lexer.cc poppler/Link.cc poppler/Linearization.cc @@ -456,6 +457,7 @@ if(ENABLE_XPDF_HEADERS) poppler/Hints.h poppler/JArithmeticDecoder.h poppler/JBIG2Stream.h + poppler/JSInfo.h poppler/Lexer.h poppler/Link.h poppler/Linearization.h diff --git a/poppler/Annot.cc b/poppler/Annot.cc index f0cce6d..4c4f37a 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -234,6 +234,28 @@ static LinkAction* getAdditionalAction(Annot::AdditionalActionsType type, Object return linkAction; } +static LinkAction* getFormAdditionalAction(Annot::FormAdditionalActionsType type, Object *additionalActions, PDFDoc *doc) { + Object additionalActionsObject; + LinkAction *linkAction = NULL; + + if (additionalActions->fetch(doc->getXRef(), &additionalActionsObject)->isDict()) { + const char *key = (type == Annot::actionFieldModified ? "K" : + type == Annot::actionFormatField ? "F" : + type == Annot::actionValidateField ? "V" : + type == Annot::actionCalculateField ? "C" : NULL); + + Object actionObject; + + if (additionalActionsObject.dictLookup(key, &actionObject)->isDict()) + linkAction = LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI()); + actionObject.free(); + } + + additionalActionsObject.free(); + + return linkAction; +} + //------------------------------------------------------------------------ // AnnotBorderEffect //------------------------------------------------------------------------ @@ -3911,6 +3933,11 @@ LinkAction* AnnotWidget::getAdditionalAction(AdditionalActionsType type) return ::getAdditionalAction(type, &additionalActions, doc); } +LinkAction* AnnotWidget::getFormAdditionalAction(FormAdditionalActionsType type) +{ + return ::getFormAdditionalAction(type, &additionalActions, doc); +} + // Grand unified handler for preparing text strings to be drawn into form // fields. Takes as input a text string (in PDFDocEncoding or UTF-16). // Converts some or all of this string to the appropriate encoding for the diff --git a/poppler/Annot.h b/poppler/Annot.h index 2865d23..ef2b1d0 100644 --- a/poppler/Annot.h +++ b/poppler/Annot.h @@ -536,6 +536,13 @@ public: actionPageInvisible ///< Performed when the page containing the annotation becomes invisible }; + enum FormAdditionalActionsType { + actionFieldModified, ///< Performed when the when the user modifies the field + actionFormatField, ///< Performed before the field is formatted to display its value + actionValidateField, ///< Performed when the field value changes + actionCalculateField, ///< Performed when the field needs to be recalculated + }; + Annot(PDFDoc *docA, PDFRectangle *rectA); Annot(PDFDoc *docA, Dict *dict); Annot(PDFDoc *docA, Dict *dict, Object *obj); @@ -1305,6 +1312,7 @@ public: AnnotAppearanceCharacs *getAppearCharacs() { return appearCharacs; } LinkAction *getAction() { return action; } LinkAction *getAdditionalAction(AdditionalActionsType type); + LinkAction *getFormAdditionalAction(FormAdditionalActionsType type); Dict *getParent() { return parent; } private: diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc index 25a8997..beb74c3 100644 --- a/poppler/Catalog.cc +++ b/poppler/Catalog.cc @@ -126,6 +126,9 @@ Catalog::Catalog(PDFDoc *docA) { } optContentProps.free(); + // actions + catDict.dictLookupNF("AA", &additionalActions); + // get the ViewerPreferences dictionary catDict.dictLookup("ViewerPreferences", &viewerPreferences); catDict.free(); @@ -181,6 +184,7 @@ Catalog::~Catalog() { outline.free(); acroForm.free(); viewerPreferences.free(); + additionalActions.free(); #if MULTITHREADED gDestroyMutex(&mutex); #endif @@ -1062,3 +1066,25 @@ NameTree *Catalog::getJSNameTree() return jsNameTree; } +LinkAction* Catalog::getAdditionalAction(DocumentAdditionalActionsType type) { + Object additionalActionsObject; + LinkAction *linkAction = NULL; + + if (additionalActions.fetch(doc->getXRef(), &additionalActionsObject)->isDict()) { + const char *key = (type == actionCloseDocument ? "WC" : + type == actionSaveDocumentStart ? "WS" : + type == actionSaveDocumentFinish ? "DS" : + type == actionPrintDocumentStart ? "WP" : + type == actionPrintDocumentFinish ? "DP" : NULL); + + Object actionObject; + + if (additionalActionsObject.dictLookup(key, &actionObject)->isDict()) + linkAction = LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI()); + actionObject.free(); + } + + additionalActionsObject.free(); + + return linkAction; +} diff --git a/poppler/Catalog.h b/poppler/Catalog.h index a89d9aa..0486bf0 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -48,6 +48,7 @@ class Page; class PageAttrs; struct Ref; class LinkDest; +class LinkAction; class PageLabelInfo; class Form; class OCGs; @@ -202,6 +203,16 @@ public: PageMode getPageMode(); PageLayout getPageLayout(); + enum DocumentAdditionalActionsType { + actionCloseDocument, ///< Performed before closing the document + actionSaveDocumentStart, ///< Performed before saving the document + actionSaveDocumentFinish, ///< Performed after saving the document + actionPrintDocumentStart, ///< Performed before printing the document + actionPrintDocumentFinish, ///< Performed after printing the document + }; + + LinkAction *getAdditionalAction(DocumentAdditionalActionsType type); + private: // Get page label info. @@ -237,6 +248,7 @@ private: PageLabelInfo *pageLabelInfo; // info about page labels PageMode pageMode; // page mode PageLayout pageLayout; // page layout + Object additionalActions; // page additional actions GBool cachePageTree(int page); // Cache first pages. Object *findDestInTree(Object *tree, GooString *name, Object *obj); diff --git a/poppler/Form.cc b/poppler/Form.cc index 78c25e3..3070927 100644 --- a/poppler/Form.cc +++ b/poppler/Form.cc @@ -156,6 +156,10 @@ LinkAction *FormWidget::getActivationAction() { return widget ? widget->getAction() : NULL; } +LinkAction *FormWidget::getAdditionalAction(Annot::FormAdditionalActionsType type) { + return widget ? widget->getFormAdditionalAction(type) : NULL; +} + FormWidgetButton::FormWidgetButton (PDFDoc *docA, Object *aobj, unsigned num, Ref ref, FormField *p) : FormWidget(docA, aobj, num, ref, p) { diff --git a/poppler/Form.h b/poppler/Form.h index ef67748..6cc54a9 100644 --- a/poppler/Form.h +++ b/poppler/Form.h @@ -21,6 +21,7 @@ #endif #include "Object.h" +#include "Annot.h" #include @@ -101,6 +102,7 @@ public: bool isReadOnly() const; LinkAction *getActivationAction(); + LinkAction *getAdditionalAction(Annot::FormAdditionalActionsType type); // return the unique ID corresponding to pageNum/fieldNum static int encodeID (unsigned pageNum, unsigned fieldNum); diff --git a/poppler/JSInfo.cc b/poppler/JSInfo.cc new file mode 100644 index 0000000..85fef90 --- /dev/null +++ b/poppler/JSInfo.cc @@ -0,0 +1,143 @@ +//======================================================================== +// +// JSInfo.cc +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + + +#include "config.h" +#include "Object.h" +#include "Dict.h" +#include "Annot.h" +#include "PDFDoc.h" +#include "JSInfo.h" +#include "Link.h" +#include "Form.h" + +JSInfo::JSInfo(PDFDoc *docA, int firstPage) { + doc = docA; + currentPage = firstPage + 1; +} + +JSInfo::~JSInfo() { +} + + +void JSInfo::scanLinkAction(LinkAction *link) { + if (!link) + return; + + if (link->getKind() == actionJavaScript) { + hasJS = gTrue; + } + + if (link->getKind() == actionRendition) { + LinkRendition *linkr = static_cast(link); + if (linkr->getScript()) + hasJS = gTrue; + } +} + +void JSInfo::scanJS(int nPages) { + Page *page; + Annots *annots; + Object obj1, obj2; + int lastPage; + + hasJS = gFalse; + + // Names + if (doc->getCatalog()->numJS() > 0) { + hasJS = gTrue; + } + + // document actions + scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionCloseDocument)); + scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionSaveDocumentStart)); + scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionSaveDocumentFinish)); + scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionPrintDocumentStart)); + scanLinkAction(doc->getCatalog()->getAdditionalAction(Catalog::actionPrintDocumentFinish)); + + // form field actions + if (doc->getCatalog()->getFormType() == Catalog::AcroForm) { + Form *form = doc->getCatalog()->getForm(); + for (int i = 0; i < form->getNumFields(); i++) { + FormField *field = form->getRootField(i); + for (int j = 0; j < field->getNumWidgets(); j++) { + FormWidget *widget = field->getWidget(j); + scanLinkAction(widget->getActivationAction()); + scanLinkAction(widget->getAdditionalAction(Annot::actionFieldModified)); + scanLinkAction(widget->getAdditionalAction(Annot::actionFormatField)); + scanLinkAction(widget->getAdditionalAction(Annot::actionValidateField)); + scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField)); + } + } + } + + // scan pages + + if (currentPage > doc->getNumPages()) { + return; + } + + lastPage = currentPage + nPages; + if (lastPage > doc->getNumPages() + 1) { + lastPage = doc->getNumPages() + 1; + } + + for (int pg = currentPage; pg < lastPage; ++pg) { + page = doc->getPage(pg); + if (!page) continue; + + // page actions (open, close) + scanLinkAction(page->getAdditionalAction(Page::actionOpenPage)); + scanLinkAction(page->getAdditionalAction(Page::actionClosePage)); + + // annotation actions (links, screen, widget) + annots = page->getAnnots(); + for (int i = 0; i < annots->getNumAnnots(); ++i) { + if (annots->getAnnot(i)->getType() == Annot::typeLink) { + AnnotLink *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + } else if (annots->getAnnot(i)->getType() == Annot::typeScreen) { + AnnotScreen *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + + } else if (annots->getAnnot(i)->getType() == Annot::typeWidget) { + AnnotWidget *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + } + } + } + + currentPage = lastPage; +} + +GBool JSInfo::containsJS() { + return hasJS; +}; diff --git a/poppler/JSInfo.h b/poppler/JSInfo.h new file mode 100644 index 0000000..b888e77 --- /dev/null +++ b/poppler/JSInfo.h @@ -0,0 +1,48 @@ +//======================================================================== +// +// JSInfo.h +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef JS_INFO_H +#define JS_INFO_H + +#include "Object.h" +#include "PDFDoc.h" +#include "goo/gtypes.h" + +#include "Link.h" + +class PDFDoc; + +class JSInfo { +public: + + // Constructor. + JSInfo(PDFDoc *doc, int firstPage = 0); + + // Destructor. + ~JSInfo(); + + // scan for JS in the PDF + void scanJS(int nPages); + + // return true if PDF contains JavaScript + GBool containsJS(); + +private: + + PDFDoc *doc; + int currentPage; + GBool hasJS; + + void scanLinkAction(LinkAction *link); + +}; + +#endif diff --git a/poppler/Makefile.am b/poppler/Makefile.am index aa7c924..8629e6f 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -190,6 +190,7 @@ poppler_include_HEADERS = \ Hints.h \ JArithmeticDecoder.h \ JBIG2Stream.h \ + JSInfo.h \ Lexer.h \ Linearization.h \ Link.h \ @@ -270,6 +271,7 @@ libpoppler_la_SOURCES = \ Hints.cc \ JArithmeticDecoder.cc \ JBIG2Stream.cc \ + JSInfo.cc \ Lexer.cc \ Linearization.cc \ Link.cc \ diff --git a/poppler/Page.cc b/poppler/Page.cc index e0a3b29..7825f80 100644 --- a/poppler/Page.cc +++ b/poppler/Page.cc @@ -823,3 +823,23 @@ void Page::getDefaultCTM(double *ctm, double hDPI, double vDPI, } delete state; } + +LinkAction* Page::getAdditionalAction(PageAdditionalActionsType type) { + Object additionalActionsObject; + LinkAction *linkAction = NULL; + + if (actions.fetch(doc->getXRef(), &additionalActionsObject)->isDict()) { + const char *key = (type == actionOpenPage ? "O" : + type == actionClosePage ? "C" : NULL); + + Object actionObject; + + if (additionalActionsObject.dictLookup(key, &actionObject)->isDict()) + linkAction = LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI()); + actionObject.free(); + } + + additionalActionsObject.free(); + + return linkAction; +} diff --git a/poppler/Page.h b/poppler/Page.h index 1c9d0a9..95adf3a 100644 --- a/poppler/Page.h +++ b/poppler/Page.h @@ -44,6 +44,7 @@ class PDFDoc; class XRef; class OutputDev; class Links; +class LinkAction; class Annots; class Annot; class Gfx; @@ -211,6 +212,13 @@ public: // Get actions Object *getActions(Object *obj) { return actions.fetch(xref, obj); } + enum PageAdditionalActionsType { + actionOpenPage, ///< Performed when opening the page + actionClosePage, ///< Performed when closing the page + }; + + LinkAction *getAdditionalAction(PageAdditionalActionsType type); + Gfx *createGfx(OutputDev *out, double hDPI, double vDPI, int rotate, GBool useMediaBox, GBool crop, int sliceX, int sliceY, int sliceW, int sliceH, @@ -267,7 +275,7 @@ private: Object contents; // page contents Object thumb; // page thumbnail Object trans; // page transition - Object actions; // page addiction actions + Object actions; // page additional actions double duration; // page duration GBool ok; // true if page is valid #if MULTITHREADED diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1 index a3ad1c3..134bd3f 100644 --- a/utils/pdfinfo.1 +++ b/utils/pdfinfo.1 @@ -48,6 +48,9 @@ tagged (yes/no) form (AcroForm / XFA / none) .RE .RS +javascript (yes/no) +.RE +.RS page count .RE .RS diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index f297614..902200f 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -53,6 +53,7 @@ #include "UTF.h" #include "Error.h" #include "DateInfo.h" +#include "JSInfo.h" static void printInfoString(Dict *infoDict, const char *key, const char *text, UnicodeMap *uMap); @@ -246,6 +247,13 @@ int main(int argc, char *argv[]) { break; } + // print javascript info + { + JSInfo jsInfo(doc, firstPage - 1); + jsInfo.scanJS(lastPage - firstPage + 1); + printf("JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no"); + } + // print page count printf("Pages: %d\n", doc->getNumPages()); -- 1.8.1.2