From ebc74b179a06a45c0fac2eef008ff404f214bef8 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Mon, 4 Jul 2016 21:55:53 +0930 Subject: [PATCH] pdfinfo: Don't print pdf info when printing metadata, javascript, or structure Bug 96801 --- utils/pdfinfo.cc | 579 +++++++++++++++++++++++++++---------------------------- 1 file changed, 289 insertions(+), 290 deletions(-) diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index b3e6513..bffd67f 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -61,13 +61,6 @@ #include "StructTreeRoot.h" #include "StructElement.h" -static void printInfoString(Dict *infoDict, const char *key, const char *text, - UnicodeMap *uMap); -static void printInfoDate(Dict *infoDict, const char *key, const char *text); -static void printISODate(Dict *infoDict, const char *key, const char *text); -static void printBox(const char *text, PDFRectangle *box); -static void printStruct(const StructElement *element, unsigned indent = 0); -static void printIndent(unsigned level); static int firstPage = 1; static int lastPage = 0; @@ -125,114 +118,184 @@ static const ArgDesc argDesc[] = { {NULL} }; -int main(int argc, char *argv[]) { - PDFDoc *doc; - GooString *fileName; - GooString *ownerPW, *userPW; - UnicodeMap *uMap; - Page *page; - Object info; - char buf[256]; - double w, h, wISO, hISO; - FILE *f; - GooString *metadata; - GBool ok; - int exitCode; - int pg, i; - GBool multiPage; - int r; - - exitCode = 99; +static void printInfoString(Dict *infoDict, const char *key, const char *text, + UnicodeMap *uMap) { + Object obj; + GooString *s1; + Unicode *u; + char buf[8]; + int i, n, len; - // parse args - ok = parseArgs(argDesc, &argc, argv); - if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) { - fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION); - fprintf(stderr, "%s\n", popplerCopyright); - fprintf(stderr, "%s\n", xpdfCopyright); - if (!printVersion) { - printUsage("pdfinfo", "", argDesc); + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s1 = obj.getString(); + len = TextStringToUCS4(s1, &u); + for (i = 0; i < len; i++) { + n = uMap->mapUnicode(u[i], buf, sizeof(buf)); + fwrite(buf, 1, n, stdout); } - if (printVersion || printHelp) - exitCode = 0; - goto err0; + gfree(u); + fputc('\n', stdout); } + obj.free(); +} - if (printStructureText) - printStructure = gTrue; - - // read config file - globalParams = new GlobalParams(); +static void printInfoDate(Dict *infoDict, const char *key, const char *text) { + Object obj; + char *s; + int year, mon, day, hour, min, sec, tz_hour, tz_minute; + char tz; + struct tm tmStruct; + time_t time; + char buf[256]; - if (printEnc) { - printEncodings(); - delete globalParams; - exitCode = 0; - goto err0; + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s = obj.getString()->getCString(); + // TODO do something with the timezone info + if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { + tmStruct.tm_year = year - 1900; + tmStruct.tm_mon = mon - 1; + tmStruct.tm_mday = day; + tmStruct.tm_hour = hour; + tmStruct.tm_min = min; + tmStruct.tm_sec = sec; + tmStruct.tm_wday = -1; + tmStruct.tm_yday = -1; + tmStruct.tm_isdst = -1; + // compute the tm_wday and tm_yday fields + time = timegm(&tmStruct); + if (time != (time_t)-1) { + int offset = (tz_hour*60 + tz_minute)*60; + if (tz == '-') + offset *= -1; + time -= offset; + localtime_r(&time, &tmStruct); + strftime(buf, sizeof(buf), "%c %Z", &tmStruct); + fputs(buf, stdout); + } else { + fputs(s, stdout); + } + } else { + fputs(s, stdout); + } + fputc('\n', stdout); } + obj.free(); +} - fileName = new GooString(argv[1]); - - if (textEncName[0]) { - globalParams->setTextEncoding(textEncName); - } +void printISODate(Dict *infoDict, const char *key, const char *text) +{ + Object obj; + char *s; + int year, mon, day, hour, min, sec, tz_hour, tz_minute; + char tz; - // get mapping to output encoding - if (!(uMap = globalParams->getTextEncoding())) { - error(errCommandLine, -1, "Couldn't get text encoding"); - delete fileName; - goto err1; + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s = obj.getString()->getCString(); + if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { + fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec); + if (tz_hour == 0 && tz_minute == 0) { + fprintf(stdout, "Z"); + } else { + fprintf(stdout, "%c%02d", tz, tz_hour); + if (tz_minute) + fprintf(stdout, ":%02d", tz_minute); + } + } else { + fputs(s, stdout); + } + fputc('\n', stdout); } + obj.free(); +} - // open PDF file - if (ownerPassword[0] != '\001') { - ownerPW = new GooString(ownerPassword); - } else { - ownerPW = NULL; - } - if (userPassword[0] != '\001') { - userPW = new GooString(userPassword); - } else { - userPW = NULL; - } +static void printBox(const char *text, PDFRectangle *box) { + printf("%s%8.2f %8.2f %8.2f %8.2f\n", + text, box->x1, box->y1, box->x2, box->y2); +} - if (fileName->cmp("-") == 0) { - delete fileName; - fileName = new GooString("fd://0"); +static void printIndent(unsigned indent) { + while (indent--) { + putchar(' '); + putchar(' '); } +} - doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); - - if (userPW) { - delete userPW; +static void printAttribute(const Attribute *attribute, unsigned indent) +{ + printIndent(indent); + printf(" /%s ", attribute->getTypeName()); + if (attribute->getType() == Attribute::UserProperty) { + GooString *name = attribute->getName(); + printf("(%s) ", name->getCString()); + delete name; } - if (ownerPW) { - delete ownerPW; + attribute->getValue()->print(stdout); + if (attribute->getFormattedValue()) { + printf(" \"%s\"", attribute->getFormattedValue()); } - if (!doc->isOk()) { - exitCode = 1; - goto err2; + if (attribute->isHidden()) { + printf(" [hidden]"); } +} - // get page range - if (firstPage < 1) { - firstPage = 1; - } - if (lastPage == 0) { - multiPage = gFalse; - lastPage = 1; - } else { - multiPage = gTrue; +static void printStruct(const StructElement *element, unsigned indent) { + if (element->isObjectRef()) { + printIndent(indent); + printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen); + return; } - if (lastPage < 1 || lastPage > doc->getNumPages()) { - lastPage = doc->getNumPages(); + + if (printStructureText && element->isContent()) { + GooString *text = element->getText(gFalse); + printIndent(indent); + if (text) { + printf("\"%s\"\n", text->getCString()); + } else { + printf("(No content?)\n"); + } + delete text; } - if (lastPage < firstPage) { - error(errCommandLine, -1, - "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", - firstPage, lastPage); - goto err2; + + if (!element->isContent()) { + printIndent(indent); + printf("%s", element->getTypeName()); + if (element->getID()) { + printf(" <%s>", element->getID()->getCString()); + } + if (element->getTitle()) { + printf(" \"%s\"", element->getTitle()->getCString()); + } + if (element->getRevision() > 0) { + printf(" r%u", element->getRevision()); + } + if (element->isInline() || element->isBlock()) { + printf(" (%s)", element->isInline() ? "inline" : "block"); + } + if (element->getNumAttributes()) { + putchar(':'); + for (unsigned i = 0; i < element->getNumAttributes(); i++) { + putchar('\n'); + printAttribute(element->getAttribute(i), indent + 1); + } + } + + putchar('\n'); + for (unsigned i = 0; i < element->getNumChildren(); i++) { + printStruct(element->getChild(i), indent + 1); + } } +} + +void printInfo(PDFDoc *doc, UnicodeMap *uMap, long long filesize, GBool multiPage) { + Page *page; + Object info; + char buf[256]; + double w, h, wISO, hISO; + int pg, i; + int r; // print doc info doc->getDocInfo(&info); @@ -354,7 +417,7 @@ int main(int argc, char *argv[]) { } else { printf("Page rot: %d\n", r); } - } + } // print the boxes if (printBoxes) { @@ -391,232 +454,168 @@ int main(int argc, char *argv[]) { } // print file size -#ifdef VMS - f = fopen(fileName->getCString(), "rb", "ctx=stm"); -#else - f = fopen(fileName->getCString(), "rb"); -#endif - if (f) { - Gfseek(f, 0, SEEK_END); - printf("File size: %lld bytes\n", (long long)Gftell(f)); - fclose(f); - } + printf("File size: %lld bytes\n", filesize); // print linearization info printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no"); // print PDF version printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); +} - // print the metadata - if (printMetadata && (metadata = doc->readMetadata())) { - fputs("Metadata:\n", stdout); - fputs(metadata->getCString(), stdout); - fputc('\n', stdout); - delete metadata; - } - - // print javascript - if (printJS) { - JSInfo jsInfo(doc, firstPage - 1); - fputs("\n", stdout); - jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap); - } +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *ownerPW, *userPW; + UnicodeMap *uMap; + Object info; + FILE *f; + GBool ok; + int exitCode; + GBool multiPage; + + exitCode = 99; - // print the structure - const StructTreeRoot *structTree; - if (printStructure && (structTree = doc->getCatalog()->getStructTreeRoot())) { - fputs("Structure:\n", stdout); - for (unsigned i = 0; i < structTree->getNumChildren(); i++) { - printStruct(structTree->getChild(i), 1); + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) { + fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfinfo", "", argDesc); } + if (printVersion || printHelp) + exitCode = 0; + goto err0; } - exitCode = 0; - - // clean up - err2: - uMap->decRefCnt(); - delete doc; - delete fileName; - err1: - delete globalParams; - err0: + if (printStructureText) + printStructure = gTrue; - // check for memory leaks - Object::memCheck(stderr); - gMemReport(stderr); + // read config file + globalParams = new GlobalParams(); - return exitCode; -} + if (printEnc) { + printEncodings(); + delete globalParams; + exitCode = 0; + goto err0; + } -static void printInfoString(Dict *infoDict, const char *key, const char *text, - UnicodeMap *uMap) { - Object obj; - GooString *s1; - Unicode *u; - char buf[8]; - int i, n, len; + fileName = new GooString(argv[1]); - if (infoDict->lookup(key, &obj)->isString()) { - fputs(text, stdout); - s1 = obj.getString(); - len = TextStringToUCS4(s1, &u); - for (i = 0; i < len; i++) { - n = uMap->mapUnicode(u[i], buf, sizeof(buf)); - fwrite(buf, 1, n, stdout); - } - gfree(u); - fputc('\n', stdout); + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); } - obj.free(); -} -static void printInfoDate(Dict *infoDict, const char *key, const char *text) { - Object obj; - char *s; - int year, mon, day, hour, min, sec, tz_hour, tz_minute; - char tz; - struct tm tmStruct; - time_t time; - char buf[256]; - - if (infoDict->lookup(key, &obj)->isString()) { - fputs(text, stdout); - s = obj.getString()->getCString(); - // TODO do something with the timezone info - if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { - tmStruct.tm_year = year - 1900; - tmStruct.tm_mon = mon - 1; - tmStruct.tm_mday = day; - tmStruct.tm_hour = hour; - tmStruct.tm_min = min; - tmStruct.tm_sec = sec; - tmStruct.tm_wday = -1; - tmStruct.tm_yday = -1; - tmStruct.tm_isdst = -1; - // compute the tm_wday and tm_yday fields - time = timegm(&tmStruct); - if (time != (time_t)-1) { - int offset = (tz_hour*60 + tz_minute)*60; - if (tz == '-') - offset *= -1; - time -= offset; - localtime_r(&time, &tmStruct); - strftime(buf, sizeof(buf), "%c %Z", &tmStruct); - fputs(buf, stdout); - } else { - fputs(s, stdout); - } - } else { - fputs(s, stdout); - } - fputc('\n', stdout); + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(errCommandLine, -1, "Couldn't get text encoding"); + delete fileName; + goto err1; } - obj.free(); -} -void printISODate(Dict *infoDict, const char *key, const char *text) -{ - Object obj; - char *s; - int year, mon, day, hour, min, sec, tz_hour, tz_minute; - char tz; + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } - if (infoDict->lookup(key, &obj)->isString()) { - fputs(text, stdout); - s = obj.getString()->getCString(); - if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { - fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec); - if (tz_hour == 0 && tz_minute == 0) { - fprintf(stdout, "Z"); - } else { - fprintf(stdout, "%c%02d", tz, tz_hour); - if (tz_minute) - fprintf(stdout, ":%02d", tz_minute); - } - } else { - fputs(s, stdout); - } - fputc('\n', stdout); + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); } - obj.free(); -} -static void printBox(const char *text, PDFRectangle *box) { - printf("%s%8.2f %8.2f %8.2f %8.2f\n", - text, box->x1, box->y1, box->x2, box->y2); -} + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); -static void printIndent(unsigned indent) { - while (indent--) { - putchar(' '); - putchar(' '); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; } -} -static void printAttribute(const Attribute *attribute, unsigned indent) -{ - printIndent(indent); - printf(" /%s ", attribute->getTypeName()); - if (attribute->getType() == Attribute::UserProperty) { - GooString *name = attribute->getName(); - printf("(%s) ", name->getCString()); - delete name; + // get page range + if (firstPage < 1) { + firstPage = 1; } - attribute->getValue()->print(stdout); - if (attribute->getFormattedValue()) { - printf(" \"%s\"", attribute->getFormattedValue()); + if (lastPage == 0) { + multiPage = gFalse; + lastPage = 1; + } else { + multiPage = gTrue; } - if (attribute->isHidden()) { - printf(" [hidden]"); + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); } -} - -static void printStruct(const StructElement *element, unsigned indent) { - if (element->isObjectRef()) { - printIndent(indent); - printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen); - return; + if (lastPage < firstPage) { + error(errCommandLine, -1, + "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", + firstPage, lastPage); + goto err2; } - if (printStructureText && element->isContent()) { - GooString *text = element->getText(gFalse); - printIndent(indent); - if (text) { - printf("\"%s\"\n", text->getCString()); - } else { - printf("(No content?)\n"); + if (printMetadata) { + // print the metadata + GooString *metadata = doc->readMetadata(); + if (metadata) { + fputs(metadata->getCString(), stdout); + fputc('\n', stdout); + delete metadata; } - delete text; - } - - if (!element->isContent()) { - printIndent(indent); - printf("%s", element->getTypeName()); - if (element->getID()) { - printf(" <%s>", element->getID()->getCString()); - } - if (element->getTitle()) { - printf(" \"%s\"", element->getTitle()->getCString()); - } - if (element->getRevision() > 0) { - printf(" r%u", element->getRevision()); - } - if (element->isInline() || element->isBlock()) { - printf(" (%s)", element->isInline() ? "inline" : "block"); - } - if (element->getNumAttributes()) { - putchar(':'); - for (unsigned i = 0; i < element->getNumAttributes(); i++) { - putchar('\n'); - printAttribute(element->getAttribute(i), indent + 1); - } + } else if (printJS) { + // print javascript + JSInfo jsInfo(doc, firstPage - 1); + jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap); + } else if (printStructure || printStructureText) { + // print structure + const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot(); + if (structTree) { + for (unsigned i = 0; i < structTree->getNumChildren(); i++) { + printStruct(structTree->getChild(i), 0); } + } + } else { + // print info + long long filesize = 0; - putchar('\n'); - for (unsigned i = 0; i < element->getNumChildren(); i++) { - printStruct(element->getChild(i), indent + 1); - } +#ifdef VMS + f = fopen(fileName->getCString(), "rb", "ctx=stm"); +#else + f = fopen(fileName->getCString(), "rb"); +#endif + if (f) { + Gfseek(f, 0, SEEK_END); + filesize = Gftell(f); + fclose(f); + } + printInfo(doc, uMap, filesize, multiPage); } + exitCode = 0; + + // clean up + err2: + uMap->decRefCnt(); + delete doc; + delete fileName; + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; } -- 2.1.4