From f7aefa06ed0bb588a52a32b97d4e724f16b383d6 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sat, 3 Aug 2013 09:05:21 +0930 Subject: [PATCH 1/2] pdfinfo: indicate if pdf contains javascript --- poppler/Annot.cc | 27 ++++++++ poppler/Annot.h | 8 +++ poppler/Catalog.cc | 4 ++ poppler/Catalog.h | 3 + poppler/Form.cc | 4 ++ poppler/Form.h | 2 + poppler/JSInfo.cc | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++ poppler/JSInfo.h | 49 ++++++++++++++ poppler/Makefile.am | 2 + poppler/Page.h | 2 +- utils/pdfinfo.1 | 3 + utils/pdfinfo.cc | 8 +++ 12 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 poppler/JSInfo.cc create mode 100644 poppler/JSInfo.h diff --git a/poppler/Annot.cc b/poppler/Annot.cc index f0cce6d..4c4f37a 100644 --- a/poppler/Annot.cc +++ b/poppler/Annot.cc @@ -234,6 +234,28 @@ static LinkAction* getAdditionalAction(Annot::AdditionalActionsType type, Object return linkAction; } +static LinkAction* getFormAdditionalAction(Annot::FormAdditionalActionsType type, Object *additionalActions, PDFDoc *doc) { + Object additionalActionsObject; + LinkAction *linkAction = NULL; + + if (additionalActions->fetch(doc->getXRef(), &additionalActionsObject)->isDict()) { + const char *key = (type == Annot::actionFieldModified ? "K" : + type == Annot::actionFormatField ? "F" : + type == Annot::actionValidateField ? "V" : + type == Annot::actionCalculateField ? "C" : NULL); + + Object actionObject; + + if (additionalActionsObject.dictLookup(key, &actionObject)->isDict()) + linkAction = LinkAction::parseAction(&actionObject, doc->getCatalog()->getBaseURI()); + actionObject.free(); + } + + additionalActionsObject.free(); + + return linkAction; +} + //------------------------------------------------------------------------ // AnnotBorderEffect //------------------------------------------------------------------------ @@ -3911,6 +3933,11 @@ LinkAction* AnnotWidget::getAdditionalAction(AdditionalActionsType type) return ::getAdditionalAction(type, &additionalActions, doc); } +LinkAction* AnnotWidget::getFormAdditionalAction(FormAdditionalActionsType type) +{ + return ::getFormAdditionalAction(type, &additionalActions, doc); +} + // Grand unified handler for preparing text strings to be drawn into form // fields. Takes as input a text string (in PDFDocEncoding or UTF-16). // Converts some or all of this string to the appropriate encoding for the diff --git a/poppler/Annot.h b/poppler/Annot.h index 2865d23..ef2b1d0 100644 --- a/poppler/Annot.h +++ b/poppler/Annot.h @@ -536,6 +536,13 @@ public: actionPageInvisible ///< Performed when the page containing the annotation becomes invisible }; + enum FormAdditionalActionsType { + actionFieldModified, ///< Performed when the when the user modifies the field + actionFormatField, ///< Performed before the field is formatted to display its value + actionValidateField, ///< Performed when the field value changes + actionCalculateField, ///< Performed when the field needs to be recalculated + }; + Annot(PDFDoc *docA, PDFRectangle *rectA); Annot(PDFDoc *docA, Dict *dict); Annot(PDFDoc *docA, Dict *dict, Object *obj); @@ -1305,6 +1312,7 @@ public: AnnotAppearanceCharacs *getAppearCharacs() { return appearCharacs; } LinkAction *getAction() { return action; } LinkAction *getAdditionalAction(AdditionalActionsType type); + LinkAction *getFormAdditionalAction(FormAdditionalActionsType type); Dict *getParent() { return parent; } private: diff --git a/poppler/Catalog.cc b/poppler/Catalog.cc index 25a8997..4119747 100644 --- a/poppler/Catalog.cc +++ b/poppler/Catalog.cc @@ -126,6 +126,9 @@ Catalog::Catalog(PDFDoc *docA) { } optContentProps.free(); + // actions + catDict.dictLookup("AA", &actions); + // get the ViewerPreferences dictionary catDict.dictLookup("ViewerPreferences", &viewerPreferences); catDict.free(); @@ -181,6 +184,7 @@ Catalog::~Catalog() { outline.free(); acroForm.free(); viewerPreferences.free(); + actions.free(); #if MULTITHREADED gDestroyMutex(&mutex); #endif diff --git a/poppler/Catalog.h b/poppler/Catalog.h index a89d9aa..216309b 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -202,6 +202,8 @@ public: PageMode getPageMode(); PageLayout getPageLayout(); + Object *getActions() { return &actions; } + private: // Get page label info. @@ -237,6 +239,7 @@ private: PageLabelInfo *pageLabelInfo; // info about page labels PageMode pageMode; // page mode PageLayout pageLayout; // page layout + Object actions; // page additional actions GBool cachePageTree(int page); // Cache first pages. Object *findDestInTree(Object *tree, GooString *name, Object *obj); diff --git a/poppler/Form.cc b/poppler/Form.cc index 78c25e3..3070927 100644 --- a/poppler/Form.cc +++ b/poppler/Form.cc @@ -156,6 +156,10 @@ LinkAction *FormWidget::getActivationAction() { return widget ? widget->getAction() : NULL; } +LinkAction *FormWidget::getAdditionalAction(Annot::FormAdditionalActionsType type) { + return widget ? widget->getFormAdditionalAction(type) : NULL; +} + FormWidgetButton::FormWidgetButton (PDFDoc *docA, Object *aobj, unsigned num, Ref ref, FormField *p) : FormWidget(docA, aobj, num, ref, p) { diff --git a/poppler/Form.h b/poppler/Form.h index ef67748..6cc54a9 100644 --- a/poppler/Form.h +++ b/poppler/Form.h @@ -21,6 +21,7 @@ #endif #include "Object.h" +#include "Annot.h" #include @@ -101,6 +102,7 @@ public: bool isReadOnly() const; LinkAction *getActivationAction(); + LinkAction *getAdditionalAction(Annot::FormAdditionalActionsType type); // return the unique ID corresponding to pageNum/fieldNum static int encodeID (unsigned pageNum, unsigned fieldNum); diff --git a/poppler/JSInfo.cc b/poppler/JSInfo.cc new file mode 100644 index 0000000..012c7c0 --- /dev/null +++ b/poppler/JSInfo.cc @@ -0,0 +1,183 @@ +//======================================================================== +// +// JSInfo.cc +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + + +#include "config.h" +#include "Object.h" +#include "Dict.h" +#include "Annot.h" +#include "PDFDoc.h" +#include "JSInfo.h" +#include "Link.h" +#include "Form.h" + +JSInfo::JSInfo(PDFDoc *docA, int firstPage) { + doc = docA; + currentPage = firstPage + 1; +} + +JSInfo::~JSInfo() { +} + +void JSInfo::scanAction(Object* obj) { + Object obj2, obj3; + + if (obj->isDict()) { + obj->dictLookup("S", &obj2); + if (obj2.isName("JavaScript")) { + hasJS = gTrue; + } + if (obj2.isName("Rendition")) { + obj2.dictLookup("JS", &obj3); + if (obj3.isString() || obj3.isStream()) { + hasJS = gTrue; + } + obj3.free(); + } + obj2.free(); + } +} + +void JSInfo::scanLinkAction(LinkAction *link) { + if (!link) + return; + + if (link->getKind() == actionJavaScript) { + hasJS = gTrue; + } + + if (link->getKind() == actionRendition) { + LinkRendition *linkr = static_cast(link); + if (linkr->getScript()) + hasJS = gTrue; + } +} + +void JSInfo::scanJS(int nPages) { + Page *page; + Annots *annots; + Object obj1, obj2; + Object *actions; + int lastPage; + + hasJS = gFalse; + + // document actions + actions = doc->getCatalog()->getActions(); + if (actions->isDict()) { + actions->getDict()->lookup("WC", &obj2); + scanAction(&obj2); + obj2.free(); + + actions->getDict()->lookup("WS", &obj2); + scanAction(&obj2); + obj2.free(); + + actions->getDict()->lookup("DS", &obj2); + scanAction(&obj2); + obj2.free(); + + actions->getDict()->lookup("WP", &obj2); + scanAction(&obj2); + obj2.free(); + + actions->getDict()->lookup("DP", &obj2); + scanAction(&obj2); + obj2.free(); + } + + // form field actions + if (doc->getCatalog()->getFormType() == Catalog::AcroForm) { + Form *form = doc->getCatalog()->getForm(); + for (int i = 0; i < form->getNumFields(); i++) { + FormField *field = form->getRootField(i); + for (int j = 0; j < field->getNumWidgets(); j++) { + FormWidget *widget = field->getWidget(j); + scanLinkAction(widget->getActivationAction()); + scanLinkAction(widget->getAdditionalAction(Annot::actionFieldModified)); + scanLinkAction(widget->getAdditionalAction(Annot::actionFormatField)); + scanLinkAction(widget->getAdditionalAction(Annot::actionValidateField)); + scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField)); + } + } + } + + // scan pages + + if (currentPage > doc->getNumPages()) { + return; + } + + lastPage = currentPage + nPages; + if (lastPage > doc->getNumPages() + 1) { + lastPage = doc->getNumPages() + 1; + } + + for (int pg = currentPage; pg < lastPage; ++pg) { + page = doc->getPage(pg); + if (!page) continue; + + // page actions (open, close) + page->getActions(&obj1); + if (obj1.isDict()) { + obj1.getDict()->lookup("O", &obj2); + scanAction(&obj2); + obj2.free(); + + obj1.getDict()->lookup("C", &obj2); + scanAction(&obj2); + obj2.free(); + } + obj1.free(); + + // annotation actions (links, screen, widget) + annots = page->getAnnots(); + for (int i = 0; i < annots->getNumAnnots(); ++i) { + if (annots->getAnnot(i)->getType() == Annot::typeLink) { + AnnotLink *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + } else if (annots->getAnnot(i)->getType() == Annot::typeScreen) { + AnnotScreen *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + + } else if (annots->getAnnot(i)->getType() == Annot::typeWidget) { + AnnotWidget *annot = static_cast(annots->getAnnot(i)); + scanLinkAction(annot->getAction()); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + } + } + } + + currentPage = lastPage; +} + +GBool JSInfo::containsJS() { + return hasJS; +}; diff --git a/poppler/JSInfo.h b/poppler/JSInfo.h new file mode 100644 index 0000000..a4c7f93 --- /dev/null +++ b/poppler/JSInfo.h @@ -0,0 +1,49 @@ +//======================================================================== +// +// JSInfo.h +// +// Copyright (C) 2013 Adrian Johnson +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef JS_INFO_H +#define JS_INFO_H + +#include "Object.h" +#include "PDFDoc.h" +#include "goo/gtypes.h" + +#include "Link.h" + +class PDFDoc; + +class JSInfo { +public: + + // Constructor. + JSInfo(PDFDoc *doc, int firstPage = 0); + + // Destructor. + ~JSInfo(); + + // scan for JS in the PDF + void scanJS(int nPages); + + // return true if PDF contains JavaScript + GBool containsJS(); + +private: + + PDFDoc *doc; + int currentPage; + GBool hasJS; + + void scanAction(Object *obj); + void scanLinkAction(LinkAction *link); + +}; + +#endif diff --git a/poppler/Makefile.am b/poppler/Makefile.am index aa7c924..8629e6f 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -190,6 +190,7 @@ poppler_include_HEADERS = \ Hints.h \ JArithmeticDecoder.h \ JBIG2Stream.h \ + JSInfo.h \ Lexer.h \ Linearization.h \ Link.h \ @@ -270,6 +271,7 @@ libpoppler_la_SOURCES = \ Hints.cc \ JArithmeticDecoder.cc \ JBIG2Stream.cc \ + JSInfo.cc \ Lexer.cc \ Linearization.cc \ Link.cc \ diff --git a/poppler/Page.h b/poppler/Page.h index 1c9d0a9..8aaee59 100644 --- a/poppler/Page.h +++ b/poppler/Page.h @@ -267,7 +267,7 @@ private: Object contents; // page contents Object thumb; // page thumbnail Object trans; // page transition - Object actions; // page addiction actions + Object actions; // page additional actions double duration; // page duration GBool ok; // true if page is valid #if MULTITHREADED diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1 index a3ad1c3..134bd3f 100644 --- a/utils/pdfinfo.1 +++ b/utils/pdfinfo.1 @@ -48,6 +48,9 @@ tagged (yes/no) form (AcroForm / XFA / none) .RE .RS +javascript (yes/no) +.RE +.RS page count .RE .RS diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index f297614..902200f 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -53,6 +53,7 @@ #include "UTF.h" #include "Error.h" #include "DateInfo.h" +#include "JSInfo.h" static void printInfoString(Dict *infoDict, const char *key, const char *text, UnicodeMap *uMap); @@ -246,6 +247,13 @@ int main(int argc, char *argv[]) { break; } + // print javascript info + { + JSInfo jsInfo(doc, firstPage - 1); + jsInfo.scanJS(lastPage - firstPage + 1); + printf("JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no"); + } + // print page count printf("Pages: %d\n", doc->getNumPages()); -- 1.7.10.4