diff --git poppler/PDFDoc.cc poppler/PDFDoc.cc index 631f9a70..3a7bd3e5 100644 --- poppler/PDFDoc.cc +++ poppler/PDFDoc.cc @@ -984,49 +984,52 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr) XRef *uxref; int c; //copy the original file BaseStream *copyStr = str->copy(); copyStr->reset(); while ((c = copyStr->getChar()) != EOF) { outStr->put(c); } copyStr->close(); delete copyStr; Guchar *fileKey; CryptAlgorithm encAlgorithm; int keyLength; xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength); uxref = new XRef(); uxref->add(0, 65535, 0, gFalse); xref->lock(); for(int i=0; igetNumObjects(); i++) { - if ((xref->getEntry(i)->type == xrefEntryFree) && - (xref->getEntry(i)->gen == 0)) //we skip the irrelevant free objects + XRefEntry *entry = xref->getEntry(i, DontLoadIfMissing); + if (entry->type == xrefEntryNone || + ((entry->type == xrefEntryFree) && (entry->gen == 0))) { + //we skip irrelevant objects continue; + } - if (xref->getEntry(i)->getFlag(XRefEntry::Updated)) { //we have an updated object + if (entry->getFlag(XRefEntry::Updated)) { //we have an updated object Ref ref; ref.num = i; - ref.gen = xref->getEntry(i)->type == xrefEntryCompressed ? 0 : xref->getEntry(i)->gen; - if (xref->getEntry(i)->type != xrefEntryFree) { + ref.gen = entry->type == xrefEntryCompressed ? 0 : entry->gen; + if (entry->type != xrefEntryFree) { Object obj1 = xref->fetch(ref.num, ref.gen, 1); Goffset offset = writeObjectHeader(&ref, outStr); writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen); writeObjectFooter(outStr); uxref->add(ref.num, ref.gen, offset, gTrue); } else { uxref->add(ref.num, ref.gen, 0, gFalse); } } } xref->unlock(); // because of "uxref->add(0, 65535, 0, gFalse);" uxref->getNumObjects() will // always be >= 1; if it is 1, it means there is nothing to update if (uxref->getNumObjects() == 1) { delete uxref; return; } Goffset uxrefOffset = outStr->getPos(); int numobjects = xref->getNumObjects(); @@ -1055,67 +1058,69 @@ void PDFDoc::saveIncrementalUpdate (OutStream* outStr) delete uxref; } void PDFDoc::saveCompleteRewrite (OutStream* outStr) { // Make sure that special flags are set, because we are going to read // all objects, including Unencrypted ones. xref->scanSpecialFlags(); Guchar *fileKey; CryptAlgorithm encAlgorithm; int keyLength; xref->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength); outStr->printf("%%PDF-%d.%d\r\n",pdfMajorVersion,pdfMinorVersion); XRef *uxref = new XRef(); uxref->add(0, 65535, 0, gFalse); xref->lock(); for(int i=0; igetNumObjects(); i++) { Ref ref; - XRefEntryType type = xref->getEntry(i)->type; - if (type == xrefEntryFree) { + XRefEntry *entry = xref->getEntry(i, DontLoadIfMissing); + if (entry->type == xrefEntryNone) + continue; + if (entry->type == xrefEntryFree) { ref.num = i; - ref.gen = xref->getEntry(i)->gen; + ref.gen = entry->gen; /* the XRef class adds a lot of irrelevant free entries, we only want the significant one and we don't want the one with num=0 because it has already been added (gen = 65535)*/ if (ref.gen > 0 && ref.num > 0) uxref->add(ref.num, ref.gen, 0, gFalse); - } else if (xref->getEntry(i)->getFlag(XRefEntry::DontRewrite)) { + } else if (entry->getFlag(XRefEntry::DontRewrite)) { // This entry must not be written, put a free entry instead (with incremented gen) ref.num = i; - ref.gen = xref->getEntry(i)->gen + 1; + ref.gen = entry->gen + 1; uxref->add(ref.num, ref.gen, 0, gFalse); - } else if (type == xrefEntryUncompressed){ + } else if (entry->type == xrefEntryUncompressed){ ref.num = i; - ref.gen = xref->getEntry(i)->gen; + ref.gen = entry->gen; Object obj1 = xref->fetch(ref.num, ref.gen, 1); Goffset offset = writeObjectHeader(&ref, outStr); // Write unencrypted objects in unencrypted form - if (xref->getEntry(i)->getFlag(XRefEntry::Unencrypted)) { + if (entry->getFlag(XRefEntry::Unencrypted)) { writeObject(&obj1, outStr, nullptr, cryptRC4, 0, 0, 0); } else { writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen); } writeObjectFooter(outStr); uxref->add(ref.num, ref.gen, offset, gTrue); - } else if (type == xrefEntryCompressed) { + } else if (entry->type == xrefEntryCompressed) { ref.num = i; ref.gen = 0; //compressed entries have gen == 0 Object obj1 = xref->fetch(ref.num, ref.gen, 1); Goffset offset = writeObjectHeader(&ref, outStr); writeObject(&obj1, outStr, fileKey, encAlgorithm, keyLength, ref.num, ref.gen); writeObjectFooter(outStr); uxref->add(ref.num, ref.gen, offset, gTrue); } } xref->unlock(); Goffset uxrefOffset = outStr->getPos(); writeXRefTableTrailer(uxrefOffset, uxref, gTrue /* write all entries */, uxref->getNumObjects(), outStr, gFalse /* complete rewrite */); delete uxref; } void PDFDoc::writeDictionnary (Dict* dict, OutStream* outStr, XRef *xRef, Guint numOffset, Guchar *fileKey, CryptAlgorithm encAlgorithm, int keyLength, int objNum, int objGen, std::set *alreadyWrittenDicts) { bool deleteSet = false; diff --git poppler/XRef.cc poppler/XRef.cc index 6eadc0a1..643cf0b1 100644 --- poppler/XRef.cc +++ poppler/XRef.cc @@ -1299,41 +1299,41 @@ GBool XRef::getStreamEnd(Goffset streamStart, Goffset *streamEnd) { m = (a + b) / 2; if (streamStart <= streamEnds[m]) { b = m; } else { a = m; } } *streamEnd = streamEnds[b]; return gTrue; } int XRef::getNumEntry(Goffset offset) { if (size > 0) { int res = 0; Goffset resOffset = getEntry(0)->offset; XRefEntry *e; for (int i = 1; i < size; ++i) { - e = getEntry(i, gFalse); + e = getEntry(i, LoadSilentlyIfMissing); if (e->type != xrefEntryFree && e->offset < offset && e->offset >= resOffset) { res = i; resOffset = e->offset; } } return res; } else return -1; } void XRef::add(int num, int gen, Goffset offs, GBool used) { xrefLocker(); if (num >= size) { if (num >= capacity) { entries = (XRefEntry *)greallocn(entries, num + 1, sizeof(XRefEntry)); capacity = num + 1; } for (int i = size; i < num + 1; ++i) { entries[i].offset = -1; @@ -1355,41 +1355,41 @@ void XRef::add(int num, int gen, Goffset offs, GBool used) { e->type = xrefEntryFree; e->offset = 0; } } void XRef::setModifiedObject (const Object* o, Ref r) { xrefLocker(); if (r.num < 0 || r.num >= size) { error(errInternal, -1,"XRef::setModifiedObject on unknown ref: {0:d}, {1:d}\n", r.num, r.gen); return; } XRefEntry *e = getEntry(r.num); e->obj = o->copy(); e->setFlag(XRefEntry::Updated, gTrue); setModified(); } Ref XRef::addIndirectObject (const Object *o) { int entryIndexToUse = -1; for (int i = 1; entryIndexToUse == -1 && i < size; ++i) { - XRefEntry *e = getEntry(i, false /* complainIfMissing */); + XRefEntry *e = getEntry(i, LoadSilentlyIfMissing); if (e->type == xrefEntryFree && e->gen != 65535) { entryIndexToUse = i; } } XRefEntry *e; if (entryIndexToUse == -1) { entryIndexToUse = size; add(entryIndexToUse, 0, 0, gFalse); e = getEntry(entryIndexToUse); } else { //reuse a free entry e = getEntry(entryIndexToUse); //we don't touch gen number, because it should have been //incremented when the object was deleted } e->type = xrefEntryUncompressed; e->obj = o->copy(); e->setFlag(XRefEntry::Updated, gTrue); setModified(); @@ -1594,65 +1594,65 @@ void XRef::readXRefUntil(int untilEntryNum, std::vector *xrefStreamObjsNum) followedPrev.push_back (prevXRefOffset); std::vector followedXRefStm; if (!readXRef(&prevXRefOffset, &followedXRefStm, xrefStreamObjsNum)) { prevXRefOffset = 0; } // if there was a problem with the xref table, or we haven't found the entry // we were looking for, try to reconstruct the xref if (!ok || (!prevXRefOffset && untilEntryNum != -1 && entries[untilEntryNum].type == xrefEntryNone)) { GBool wasReconstructed = false; if (!(ok = constructXRef(&wasReconstructed))) { errCode = errDamaged; break; } break; } } } -XRefEntry *XRef::getEntry(int i, GBool complainIfMissing) +XRefEntry *XRef::getEntry(int i, GetEntryOption option) { + static XRefEntry dummy = { 0, -1, xrefEntryNone, 0, Object() }; + if (i >= size || entries[i].type == xrefEntryNone) { + if (option == DontLoadIfMissing) + return &dummy; + if ((!xRefStream) && mainXRefEntriesOffset) { if (!parseEntry(mainXRefEntriesOffset + 20*i, &entries[i])) { error(errSyntaxError, -1, "Failed to parse XRef entry [{0:d}].", i); } } else { // Read XRef tables until the entry we're looking for is found readXRefUntil(i); // We might have reconstructed the xref // Check again i is in bounds if (unlikely(i >= size)) { - static XRefEntry dummy; - dummy.offset = 0; - dummy.gen = -1; - dummy.type = xrefEntryNone; - dummy.flags = 0; return &dummy; } if (entries[i].type == xrefEntryNone) { - if (complainIfMissing) { + if (option == LoadAndComplainIfMissing) { error(errSyntaxError, -1, "Invalid XRef entry"); } entries[i].type = xrefEntryFree; } } } return &entries[i]; } // Recursively sets the Unencrypted flag in all referenced xref entries void XRef::markUnencrypted(Object *obj) { Object obj1; switch (obj->getType()) { case objArray: { Array *array = obj->getArray(); for (int i = 0; i < array->getLength(); i++) { obj1 = array->getNF(i); diff --git poppler/XRef.h poppler/XRef.h index bf904d50..d9a1f719 100644 --- poppler/XRef.h +++ poppler/XRef.h @@ -43,40 +43,47 @@ #include "Stream.h" #include class Dict; class Stream; class Parser; class PopplerCache; //------------------------------------------------------------------------ // XRef //------------------------------------------------------------------------ enum XRefEntryType { xrefEntryFree, xrefEntryUncompressed, xrefEntryCompressed, xrefEntryNone }; +enum GetEntryOption +{ + LoadSilentlyIfMissing, // If entry is not in memory, try to load it from input stream (may trigger reconstruction) + LoadAndComplainIfMissing, // If entry is not in memory, print error and try to load it from input stream (may trigger reconstruction) + DontLoadIfMissing // If entry is not in memory, return an empty entry +}; + struct XRefEntry { Goffset offset; int gen; XRefEntryType type; int flags; Object obj; //if this entry was updated, obj will contains the updated object enum Flag { // Regular flags Updated, // Entry was modified // Special flags -- available only after xref->scanSpecialFlags() is run Unencrypted, // Entry is stored in unencrypted form (meaningless in unencrypted documents) DontRewrite // Entry must not be written back in case of full rewrite }; inline GBool getFlag(Flag flag) const { const int mask = (1 << (int)flag); return (flags & mask) != 0; } @@ -166,41 +173,41 @@ public: // Return the catalog object reference. int getRootNum() const { return rootNum; } int getRootGen() const { return rootGen; } // Get end position for a stream in a damaged file. // Returns false if unknown or file is not damaged. GBool getStreamEnd(Goffset streamStart, Goffset *streamEnd); // Retuns the entry that belongs to the offset int getNumEntry(Goffset offset); // Scans the document and sets special flags in all xref entries. One of those // flags is Unencrypted, which affects how the object is fetched. Therefore, // this function must be called before fetching unencrypted objects (e.g. // Encrypt dictionary, XRef streams). Note that the code that initializes // decryption doesn't need to call this function, because it runs before // decryption is enabled, and therefore the Unencrypted flag is ignored. void scanSpecialFlags(); // Direct access. - XRefEntry *getEntry(int i, GBool complainIfMissing = gTrue); + XRefEntry *getEntry(int i, GetEntryOption option = LoadAndComplainIfMissing); Object *getTrailerDict() { return &trailerDict; } // Was the XRef modified? GBool isModified() const { return modified; } // Set the modification flag for XRef to true. void setModified() { modified = gTrue; } // Write access void setModifiedObject(const Object* o, Ref r); Ref addIndirectObject (const Object* o); void removeIndirectObject(Ref r); void add(int num, int gen, Goffset offs, GBool used); // Output XRef table to stream void writeTableToFile(OutStream* outStr, GBool writeAllEntries); // Output XRef stream contents to GooString and fill trailerDict fields accordingly void writeStreamToBuffer(GooString *stmBuf, Dict *xrefDict, XRef *xref); // to be thread safe during write where changes are not allowed void lock();