From 6b46928977d569237af2a3672ecd7ec39b7291a7 Mon Sep 17 00:00:00 2001 From: Jakub Kucharski Date: Mon, 8 Feb 2016 20:48:37 +0100 Subject: [PATCH] Added DocInfo setters & getters --- poppler/Object.h | 7 +++++- poppler/PDFDoc.cc | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- poppler/PDFDoc.h | 42 +++++++++++++++++++++++++++++++++ poppler/XRef.cc | 60 +++++++++++++++++++++++++++++++++++++++++++++++ poppler/XRef.h | 15 ++++++++++++ 5 files changed, 189 insertions(+), 4 deletions(-) diff --git a/poppler/Object.h b/poppler/Object.h index 1b06a3c..35eb0f8 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -21,6 +21,7 @@ // Copyright (C) 2013 Thomas Freitag // Copyright (C) 2013 Adrian Johnson // Copyright (C) 2013 Adrian Perez de Castro +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -225,6 +226,7 @@ public: int dictGetLength(); void dictAdd(char *key, Object *val); void dictSet(const char *key, Object *val); + void dictRemove(const char *key); GBool dictIs(const char *dictType); Object *dictLookup(const char *key, Object *obj, int recursion = 0); Object *dictLookupNF(const char *key, Object *obj); @@ -308,7 +310,10 @@ inline void Object::dictAdd(char *key, Object *val) { OBJECT_TYPE_CHECK(objDict); dict->add(key, val); } inline void Object::dictSet(const char *key, Object *val) - { OBJECT_TYPE_CHECK(objDict); dict->set(key, val); } + { OBJECT_TYPE_CHECK(objDict); dict->set(key, val); } + +inline void Object::dictRemove(const char *key) + { OBJECT_TYPE_CHECK(objDict); dict->remove(key); } inline GBool Object::dictIs(const char *dictType) { OBJECT_TYPE_CHECK(objDict); return dict->is(dictType); } diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 46c4544..4543ab0 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -32,6 +32,7 @@ // Copyright (C) 2013 Adam Reichold // Copyright (C) 2014 Bogdan Cristea // Copyright (C) 2015 Li Junling +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -537,6 +538,69 @@ GBool PDFDoc::isLinearized(GBool tryingToReconstruct) { } } +void PDFDoc::setDocInfoModified(Object *infoObj) +{ + Object infoObjRef; + getDocInfoNF(&infoObjRef); + xref->setModifiedObject(infoObj, infoObjRef.getRef()); + infoObjRef.free(); +} + +void PDFDoc::setDocInfoStringEntry(const char *key, GooString *value) +{ + GBool removeEntry = !value || value->getLength() == 0; + + Object infoObj; + getDocInfo(&infoObj); + + if (infoObj.isNull() && removeEntry) { + // No info dictionary, so no entry to remove. + return; + } + + createDocInfoIfNoneExists(&infoObj); + + Object gooStrObj; + if (removeEntry) { + gooStrObj.initNull(); + } else { + gooStrObj.initString(value); + } + + // gooStrObj is set to value or null by now. The latter will cause a removal. + infoObj.dictSet(key, &gooStrObj); + + if (infoObj.dictGetLength() == 0) { + // Info dictionary is empty. Remove it altogether. + removeDocInfo(); + } else { + setDocInfoModified(&infoObj); + } + + infoObj.free(); +} + +GooString *PDFDoc::getDocInfoStringEntry(const char *key) { + Object infoObj; + getDocInfo(&infoObj); + + Object entryObj; + infoObj.dictLookup(key, &entryObj); + + GooString *result; + + if (entryObj.isString()) { + result = new GooString(entryObj.getString()); + } else { + result = NULL; + } + + entryObj.free(); + infoObj.free(); + + return result; +} + static GBool get_id (GooString *encodedidstring, GooString *id) { const char *encodedid = encodedidstring->getCString(); @@ -797,11 +861,10 @@ int PDFDoc::saveAs(GooString *name, PDFWriteMode mode) { int PDFDoc::saveAs(OutStream *outStr, PDFWriteMode mode) { // find if we have updated objects - GBool updated = gFalse; - for(int i=0; igetNumObjects(); i++) { + GBool updated = xref->isTrailerDictModified(); + for(int i=0; !updated && igetNumObjects(); i++) { if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { updated = gTrue; - break; } } diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index 6c40f7b..c223f73 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -27,6 +27,7 @@ // Copyright (C) 2013 Adrian Johnson // Copyright (C) 2013 Adam Reichold // Copyright (C) 2013 Adrian Perez de Castro +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -225,6 +226,44 @@ public: Object *getDocInfo(Object *obj) { return xref->getDocInfo(obj); } Object *getDocInfoNF(Object *obj) { return xref->getDocInfoNF(obj); } + // Create and return the document's Info dictionary if none exists. + // Otherwise return the existing one. + Object *createDocInfoIfNoneExists(Object *obj) { return xref->createDocInfoIfNoneExists(obj); } + Object *createDocInfoIfNoneExistsNF(Object *obj) { return xref->createDocInfoIfNoneExistsNF(obj); } + + // Remove the document's Info dictionary and update the trailer dictionary. + void removeDocInfo() { xref->removeDocInfo(); } + + // Set doc info string entry. NULL or empty value will cause a removal. + // Takes ownership of value. + void setDocInfoStringEntry(const char *key, GooString *value); + + // Set document's properties in document's Info dictionary. + // NULL or empty value will cause a removal. + // Takes ownership of value. + void setDocInfoTitle(GooString *title) { setDocInfoStringEntry("Title", title); } + void setDocInfoAuthor(GooString *author) { setDocInfoStringEntry("Author", author); } + void setDocInfoSubject(GooString *subject) { setDocInfoStringEntry("Subject", subject); } + void setDocInfoKeywords(GooString *keywords) { setDocInfoStringEntry("Keywords", keywords); } + void setDocInfoCreator(GooString *creator) { setDocInfoStringEntry("Creator", creator); } + void setDocInfoProducer(GooString *producer) { setDocInfoStringEntry("Producer", producer); } + void setDocInfoCreatDate(GooString *creatDate) { setDocInfoStringEntry("CreationDate", creatDate); } + void setDocInfoModDate(GooString *modDate) { setDocInfoStringEntry("ModDate", modDate); } + + // Get document's properties from document's Info dictionary. + // Returns NULL on fail. + // Returned GooStrings should be freed by the caller. + GooString *getDocInfoStringEntry(const char *key); + + GooString *getDocInfoTitle() { return getDocInfoStringEntry("Title"); } + GooString *getDocInfoAuthor() { return getDocInfoStringEntry("Author"); } + GooString *getDocInfoSubject() { return getDocInfoStringEntry("Subject"); } + GooString *getDocInfoKeywords() { return getDocInfoStringEntry("Keywords"); } + GooString *getDocInfoCreator() { return getDocInfoStringEntry("Creator"); } + GooString *getDocInfoProducer() { return getDocInfoStringEntry("Producer"); } + GooString *getDocInfoCreatDate() { return getDocInfoStringEntry("CreationDate"); } + GooString *getDocInfoModDate() { return getDocInfoStringEntry("ModDate"); } + // Return the PDF version specified by the file. int getPDFMajorVersion() { return pdfMajorVersion; } int getPDFMinorVersion() { return pdfMinorVersion; } @@ -309,6 +348,9 @@ private: Goffset getMainXRefEntriesOffset(GBool tryingToReconstruct = gFalse); long long strToLongLong(char *s); + // Mark the document's Info dictionary as modified. + void setDocInfoModified(Object *infoObj); + GooString *fileName; #ifdef _WIN32 wchar_t *fileNameU; diff --git a/poppler/XRef.cc b/poppler/XRef.cc index 1bbbdc5..86677ce 100644 --- a/poppler/XRef.cc +++ b/poppler/XRef.cc @@ -24,6 +24,7 @@ // Copyright (C) 2012, 2013 Fabio D'Urso // Copyright (C) 2013, 2014 Adrian Johnson // Copyright (C) 2013 Pino Toscano +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -286,6 +287,7 @@ void XRef::init() { entries = NULL; capacity = 0; size = 0; + trailerDictModified = gFalse; streamEnds = NULL; streamEndsLen = 0; objStrs = new PopplerCache(5); @@ -1273,6 +1275,64 @@ Object *XRef::getDocInfoNF(Object *obj) { return trailerDict.dictLookupNF("Info", obj); } +Object *XRef::createDocInfoIfNoneExists(Object *obj) { + getDocInfo(obj); + + if (!obj->isNull()) { + return obj; + } + + obj->initDict(this); + + Ref ref = addIndirectObject(obj); + + Object objRef; + objRef.initRef(ref.num, ref.gen); + + trailerDict.dictSet("Info", &objRef); + setTrailerDictModified(); + + objRef.free(); + + return obj; +} + +Object *XRef::createDocInfoIfNoneExistsNF(Object *obj) { + getDocInfoNF(obj); + + if (!obj->isNull()) { + return obj; + } + + Object infoObj; + infoObj.initDict(this); + + Ref ref = addIndirectObject(&infoObj); + infoObj.free(); + + Object objRef; + objRef.initRef(ref.num, ref.gen); + + trailerDict.dictSet("Info", &objRef); + setTrailerDictModified(); + + *obj = objRef; + objRef.free(); + + return obj; +} + +void XRef::removeDocInfo() { + Object infoObjRef; + getDocInfoNF(&infoObjRef); + + trailerDict.dictRemove("Info"); + setTrailerDictModified(); + + removeIndirectObject(infoObjRef.getRef()); + infoObjRef.free(); +} + GBool XRef::getStreamEnd(Goffset streamStart, Goffset *streamEnd) { int a, b, m; diff --git a/poppler/XRef.h b/poppler/XRef.h index 70065d8..7a1cbdf 100644 --- a/poppler/XRef.h +++ b/poppler/XRef.h @@ -22,6 +22,7 @@ // Copyright (C) 2012, 2013 Thomas Freitag // Copyright (C) 2012, 2013 Fabio D'Urso // Copyright (C) 2013 Adrian Johnson +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -149,6 +150,14 @@ public: Object *getDocInfo(Object *obj); Object *getDocInfoNF(Object *obj); + // Create and return the document's Info dictionary if none exists. + // Otherwise return the existing one. + Object *createDocInfoIfNoneExists(Object *obj); + Object *createDocInfoIfNoneExistsNF(Object *obj); + + // Remove the document's Info dictionary and update the trailer dictionary. + void removeDocInfo(); + // Return the number of objects in the xref table. int getNumObjects() { return size; } @@ -175,6 +184,11 @@ public: XRefEntry *getEntry(int i, GBool complainIfMissing = gTrue); Object *getTrailerDict() { return &trailerDict; } + // Was the document's trailer dictionary modified? + GBool isTrailerDictModified() { return trailerDictModified; } + // Set the modification flag for the document's trailer dictionary true. + void setTrailerDictModified() { trailerDictModified = gTrue; } + // Write access void setModifiedObject(Object* o, Ref r); Ref addIndirectObject (Object* o); @@ -202,6 +216,7 @@ private: GBool ok; // true if xref table is valid int errCode; // error code (if is false) Object trailerDict; // trailer dictionary + GBool trailerDictModified; Goffset *streamEnds; // 'endstream' positions - only used in // damaged files int streamEndsLen; // number of valid entries in streamEnds -- 2.7.1