From 4734b1a2002f4e20e1f126e7b83a0c9756cf118b Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sat, 3 Aug 2013 10:28:20 +0930 Subject: [PATCH 2/2] Add pdfinfo option to print out javascript --- poppler/Catalog.h | 1 + poppler/JSInfo.cc | 162 ++++++++++++++++++++++++++++++++++++++--------------- poppler/JSInfo.h | 16 +++++- utils/pdfinfo.1 | 3 + utils/pdfinfo.cc | 10 ++++ 5 files changed, 143 insertions(+), 49 deletions(-) diff --git a/poppler/Catalog.h b/poppler/Catalog.h index 216309b..583452f 100644 --- a/poppler/Catalog.h +++ b/poppler/Catalog.h @@ -155,6 +155,7 @@ public: // Get the i'th JavaScript script (at the Document level) in the document GooString *getJS(int i); + GooString *getJSName(int i) { return getJSNameTree()->getName(i); } // Convert between page indices and page labels. GBool labelToIndex(GooString *label, int *index); diff --git a/poppler/JSInfo.cc b/poppler/JSInfo.cc index 012c7c0..f5ad3c7 100644 --- a/poppler/JSInfo.cc +++ b/poppler/JSInfo.cc @@ -9,6 +9,7 @@ // //======================================================================== +#include #include "config.h" #include "Object.h" @@ -18,6 +19,8 @@ #include "JSInfo.h" #include "Link.h" #include "Form.h" +#include "UnicodeMap.h" +#include "UTF.h" JSInfo::JSInfo(PDFDoc *docA, int firstPage) { doc = docA; @@ -27,85 +30,152 @@ JSInfo::JSInfo(PDFDoc *docA, int firstPage) { JSInfo::~JSInfo() { } -void JSInfo::scanAction(Object* obj) { +void JSInfo::printJS(GooString *js) { + Unicode *u; + char buf[8]; + int i, n, len; + + len = TextStringToUCS4(js, &u); + for (i = 0; i < len; i++) { + n = uniMap->mapUnicode(u[i], buf, sizeof(buf)); + fwrite(buf, 1, n, file); + } +} + +void JSInfo::scanAction(Object* obj, const char *action) { Object obj2, obj3; if (obj->isDict()) { obj->dictLookup("S", &obj2); - if (obj2.isName("JavaScript")) { - hasJS = gTrue; - } - if (obj2.isName("Rendition")) { + if (obj2.isName("JavaScript") || obj2.isName("Rendition")) { obj2.dictLookup("JS", &obj3); if (obj3.isString() || obj3.isStream()) { hasJS = gTrue; + fprintf(file, "%s%s:\n", action, obj2.isName("Rendition") ? " (Rendition)" : ""); + if (obj3.isString()) { + GooString *s = obj3.getString(); + if (s && s->getCString()) { + printJS(s); + fputc('\n', file); + } + } else { + GooString s; + obj3.getStream()->fillGooString(&s); + printJS(&s); + fputc('\n', file); + } + fputc('\n', file); } - obj3.free(); } obj2.free(); } } -void JSInfo::scanLinkAction(LinkAction *link) { +void JSInfo::scanLinkAction(LinkAction *link, const char *action) { if (!link) return; if (link->getKind() == actionJavaScript) { hasJS = gTrue; + if (print) { + LinkJavaScript *linkjs = static_cast(link); + GooString *s = linkjs->getScript(); + if (s && s->getCString()) { + fprintf(file, "%s:\n", action); + printJS(s); + fputs("\n\n", file); + } + } } if (link->getKind() == actionRendition) { LinkRendition *linkr = static_cast(link); - if (linkr->getScript()) + if (linkr->getScript()) { hasJS = gTrue; + if (print) { + GooString *s = linkr->getScript(); + if (s && s->getCString()) { + fprintf(file, "%s (Rendition):\n", action); + printJS(s); + fputs("\n\n", file); + } + } + } } } void JSInfo::scanJS(int nPages) { + print = gFalse; + file = NULL; + scan(nPages); +} + +void JSInfo::scanJS(int nPages, FILE *fout, UnicodeMap *uMap) { + print = gTrue; + file = fout; + uniMap = uMap; + scan(nPages); +} + +void JSInfo::scan(int nPages) { Page *page; Annots *annots; Object obj1, obj2; Object *actions; int lastPage; + Catalog *catalog = doc->getCatalog(); hasJS = gFalse; + // Names + int numNames = catalog->numJS(); + if (numNames > 0) { + hasJS = gTrue; + if (print) { + for (int i = 0; i < numNames; i++) { + fprintf(file, "Name Dictionary \"%s\":\n", catalog->getJSName(i)->getCString()); + printJS(catalog->getJS(i)); + fputs("\n\n", file); + } + } + } + // document actions - actions = doc->getCatalog()->getActions(); + actions = catalog->getActions(); if (actions->isDict()) { actions->getDict()->lookup("WC", &obj2); - scanAction(&obj2); + scanAction(&obj2, "Before Close Document"); obj2.free(); actions->getDict()->lookup("WS", &obj2); - scanAction(&obj2); + scanAction(&obj2, "Before Save Document"); obj2.free(); actions->getDict()->lookup("DS", &obj2); - scanAction(&obj2); + scanAction(&obj2, "After Save Document"); obj2.free(); actions->getDict()->lookup("WP", &obj2); - scanAction(&obj2); + scanAction(&obj2, "Before Print Document"); obj2.free(); actions->getDict()->lookup("DP", &obj2); - scanAction(&obj2); + scanAction(&obj2, "After Print Document"); obj2.free(); } // form field actions - if (doc->getCatalog()->getFormType() == Catalog::AcroForm) { - Form *form = doc->getCatalog()->getForm(); + if (catalog->getFormType() == Catalog::AcroForm) { + Form *form = catalog->getForm(); for (int i = 0; i < form->getNumFields(); i++) { FormField *field = form->getRootField(i); for (int j = 0; j < field->getNumWidgets(); j++) { FormWidget *widget = field->getWidget(j); - scanLinkAction(widget->getActivationAction()); - scanLinkAction(widget->getAdditionalAction(Annot::actionFieldModified)); - scanLinkAction(widget->getAdditionalAction(Annot::actionFormatField)); - scanLinkAction(widget->getAdditionalAction(Annot::actionValidateField)); - scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField)); + scanLinkAction(widget->getActivationAction(), "Field Activated"); + scanLinkAction(widget->getAdditionalAction(Annot::actionFieldModified), "Field Modified"); + scanLinkAction(widget->getAdditionalAction(Annot::actionFormatField), "Format Field"); + scanLinkAction(widget->getAdditionalAction(Annot::actionValidateField), "Validate Field"); + scanLinkAction(widget->getAdditionalAction(Annot::actionCalculateField), "Calculate Field"); } } } @@ -129,11 +199,11 @@ void JSInfo::scanJS(int nPages) { page->getActions(&obj1); if (obj1.isDict()) { obj1.getDict()->lookup("O", &obj2); - scanAction(&obj2); + scanAction(&obj2, "Page Open"); obj2.free(); obj1.getDict()->lookup("C", &obj2); - scanAction(&obj2); + scanAction(&obj2, "Page Close"); obj2.free(); } obj1.free(); @@ -143,34 +213,34 @@ void JSInfo::scanJS(int nPages) { for (int i = 0; i < annots->getNumAnnots(); ++i) { if (annots->getAnnot(i)->getType() == Annot::typeLink) { AnnotLink *annot = static_cast(annots->getAnnot(i)); - scanLinkAction(annot->getAction()); + scanLinkAction(annot->getAction(), "Link Annotation Activated"); } else if (annots->getAnnot(i)->getType() == Annot::typeScreen) { AnnotScreen *annot = static_cast(annots->getAnnot(i)); - scanLinkAction(annot->getAction()); - scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); - scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); - scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); - scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); - scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); - scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAction(), "Screen Annotation Activated"); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering), "Screen Annotation Cursor Enter"); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving), "Screen Annotation Cursor Leave"); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed), "Screen Annotation Mouse Pressed"); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased), "Screen Annotation Mouse Released"); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn), "Screen Annotation Focus In"); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut), "Screen Annotation Focus Out"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening), "Screen Annotation Page Open"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing), "Screen Annotation Page Close"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible), "Screen Annotation Page Visible"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible), "Screen Annotation Page Invisible"); } else if (annots->getAnnot(i)->getType() == Annot::typeWidget) { AnnotWidget *annot = static_cast(annots->getAnnot(i)); - scanLinkAction(annot->getAction()); - scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering)); - scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving)); - scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed)); - scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased)); - scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn)); - scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); - scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible)); + scanLinkAction(annot->getAction(), "Widget Annotation Activated"); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorEntering), "Widget Annotation Cursor Enter"); + scanLinkAction(annot->getAdditionalAction(Annot::actionCursorLeaving), "Widget Annotation Cursor Leave"); + scanLinkAction(annot->getAdditionalAction(Annot::actionMousePressed), "Widget Annotation Mouse Pressed"); + scanLinkAction(annot->getAdditionalAction(Annot::actionMouseReleased), "Widget Annotation Mouse Released"); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusIn), "Widget Annotation Focus In"); + scanLinkAction(annot->getAdditionalAction(Annot::actionFocusOut), "Widget Annotation Focus Out"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageOpening), "Widget Annotation Page Open"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageClosing), "Widget Annotation Page Close"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageVisible), "Widget Annotation Page Visible"); + scanLinkAction(annot->getAdditionalAction(Annot::actionPageInvisible), "Widget Annotation Page Invisible"); } } } diff --git a/poppler/JSInfo.h b/poppler/JSInfo.h index a4c7f93..d0e10d6 100644 --- a/poppler/JSInfo.h +++ b/poppler/JSInfo.h @@ -12,11 +12,13 @@ #ifndef JS_INFO_H #define JS_INFO_H +#include #include "Object.h" #include "PDFDoc.h" #include "goo/gtypes.h" #include "Link.h" +#include "UnicodeMap.h" class PDFDoc; @@ -32,6 +34,9 @@ public: // scan for JS in the PDF void scanJS(int nPages); + // scan and print JS in the PDF + void scanJS(int nPages, FILE *fout, UnicodeMap *uMap); + // return true if PDF contains JavaScript GBool containsJS(); @@ -40,9 +45,14 @@ private: PDFDoc *doc; int currentPage; GBool hasJS; - - void scanAction(Object *obj); - void scanLinkAction(LinkAction *link); + GBool print; + FILE *file; + UnicodeMap *uniMap; + + void scan(int nPages); + void scanAction(Object *obj, const char *action); + void scanLinkAction(LinkAction *link, const char *action); + void printJS(GooString *js); }; diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1 index 134bd3f..cc967b2 100644 --- a/utils/pdfinfo.1 +++ b/utils/pdfinfo.1 @@ -93,6 +93,9 @@ TrimBox, and ArtBox. Prints document-level metadata. (This is the "Metadata" stream from the PDF file's Catalog object.) .TP +.B \-js +Prints all JavaScript in the PDF. +.TP .B \-rawdates Prints the raw (undecoded) date strings, directly from the PDF file. .TP diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index 902200f..44773a3 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -64,6 +64,7 @@ static int firstPage = 1; static int lastPage = 0; static GBool printBoxes = gFalse; static GBool printMetadata = gFalse; +static GBool printJS = gFalse; static GBool rawDates = gFalse; static char textEncName[128] = ""; static char ownerPassword[33] = "\001"; @@ -81,6 +82,8 @@ static const ArgDesc argDesc[] = { "print the page bounding boxes"}, {"-meta", argFlag, &printMetadata, 0, "print the document metadata (XML)"}, + {"-js", argFlag, &printJS, 0, + "print all JavaScript in the PDF"}, {"-rawdates", argFlag, &rawDates, 0, "print the undecoded date strings directly from the PDF file"}, {"-enc", argString, textEncName, sizeof(textEncName), @@ -383,6 +386,13 @@ int main(int argc, char *argv[]) { delete metadata; } + // print javascript + if (printJS) { + JSInfo jsInfo(doc, firstPage - 1); + fputs("\n", stdout); + jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap); + } + exitCode = 0; // clean up -- 1.7.10.4