From e44120078151e2c3d6c09026c82286fe51c12810 Mon Sep 17 00:00:00 2001 From: Hib Eris Date: Wed, 4 Jan 2012 17:56:00 +0100 Subject: [PATCH 1/3] Do not cache startxref position --- poppler/PDFDoc.cc | 5 +---- poppler/PDFDoc.h | 1 - 2 files changed, 1 insertions(+), 5 deletions(-) diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index 01d2759..cb55c2b 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -105,7 +105,6 @@ void PDFDoc::init() #ifndef DISABLE_OUTLINE outline = NULL; #endif - startXRefPos = ~(Guint)0; secHdlr = NULL; pageCache = NULL; } @@ -1380,7 +1379,7 @@ Guint PDFDoc::strToUnsigned(char *s) { // Read the 'startxref' position. Guint PDFDoc::getStartXRef() { - if (startXRefPos == ~(Guint)0) { + Guint startXRefPos; if (isLinearized()) { char buf[linearizationSearchSize+1]; @@ -1435,8 +1434,6 @@ Guint PDFDoc::getStartXRef() } } - } - return startXRefPos; } diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index 92cee78..f5a4444 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -302,7 +302,6 @@ private: //then the POSIX errno will be here. int fopenErrno; - Guint startXRefPos; // offset of last xref table }; #endif -- 1.7.7 From 244eacdebc47ad6946498d8f44dab2ed98faa027 Mon Sep 17 00:00:00 2001 From: Hib Eris Date: Wed, 4 Jan 2012 17:59:13 +0100 Subject: [PATCH 2/3] Return a BaseStream from makeSubStream --- poppler/Stream.cc | 8 ++++---- poppler/Stream.h | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/poppler/Stream.cc b/poppler/Stream.cc index ee53502..24d26fb 100644 --- a/poppler/Stream.cc +++ b/poppler/Stream.cc @@ -712,7 +712,7 @@ FileStream::~FileStream() { close(); } -Stream *FileStream::makeSubStream(Guint startA, GBool limitedA, +BaseStream *FileStream::makeSubStream(Guint startA, GBool limitedA, Guint lengthA, Object *dictA) { return new FileStream(f, startA, limitedA, lengthA, dictA); } @@ -840,7 +840,7 @@ CachedFileStream::~CachedFileStream() cc->decRefCnt(); } -Stream *CachedFileStream::makeSubStream(Guint startA, GBool limitedA, +BaseStream *CachedFileStream::makeSubStream(Guint startA, GBool limitedA, Guint lengthA, Object *dictA) { cc->incRefCnt(); @@ -935,7 +935,7 @@ MemStream::~MemStream() { } } -Stream *MemStream::makeSubStream(Guint startA, GBool limited, +BaseStream *MemStream::makeSubStream(Guint startA, GBool limited, Guint lengthA, Object *dictA) { MemStream *subStr; Guint newLength; @@ -993,7 +993,7 @@ EmbedStream::EmbedStream(Stream *strA, Object *dictA, EmbedStream::~EmbedStream() { } -Stream *EmbedStream::makeSubStream(Guint start, GBool limitedA, +BaseStream *EmbedStream::makeSubStream(Guint start, GBool limitedA, Guint lengthA, Object *dictA) { error(-1, "Internal: called makeSubStream() on EmbedStream"); return NULL; diff --git a/poppler/Stream.h b/poppler/Stream.h index cb08e3a..7938fc1 100644 --- a/poppler/Stream.h +++ b/poppler/Stream.h @@ -296,7 +296,7 @@ public: BaseStream(Object *dictA, Guint lengthA); virtual ~BaseStream(); - virtual Stream *makeSubStream(Guint start, GBool limited, + virtual BaseStream *makeSubStream(Guint start, GBool limited, Guint length, Object *dict) = 0; virtual void setPos(Guint pos, int dir = 0) = 0; virtual GBool isBinary(GBool last = gTrue) { return last; } @@ -446,7 +446,7 @@ public: FileStream(FILE *fA, Guint startA, GBool limitedA, Guint lengthA, Object *dictA); virtual ~FileStream(); - virtual Stream *makeSubStream(Guint startA, GBool limitedA, + virtual BaseStream *makeSubStream(Guint startA, GBool limitedA, Guint lengthA, Object *dictA); virtual StreamKind getKind() { return strFile; } virtual void reset(); @@ -504,7 +504,7 @@ public: CachedFileStream(CachedFile *ccA, Guint startA, GBool limitedA, Guint lengthA, Object *dictA); virtual ~CachedFileStream(); - virtual Stream *makeSubStream(Guint startA, GBool limitedA, + virtual BaseStream *makeSubStream(Guint startA, GBool limitedA, Guint lengthA, Object *dictA); virtual StreamKind getKind() { return strCachedFile; } virtual void reset(); @@ -546,7 +546,7 @@ public: MemStream(char *bufA, Guint startA, Guint lengthA, Object *dictA); virtual ~MemStream(); - virtual Stream *makeSubStream(Guint start, GBool limited, + virtual BaseStream *makeSubStream(Guint start, GBool limited, Guint lengthA, Object *dictA); virtual StreamKind getKind() { return strWeird; } virtual void reset(); @@ -591,7 +591,7 @@ public: EmbedStream(Stream *strA, Object *dictA, GBool limitedA, Guint lengthA); virtual ~EmbedStream(); - virtual Stream *makeSubStream(Guint start, GBool limitedA, + virtual BaseStream *makeSubStream(Guint start, GBool limitedA, Guint lengthA, Object *dictA); virtual StreamKind getKind() { return str->getKind(); } virtual void reset() {} -- 1.7.7 From c82ed96d3e4aa7365a1825a7fd2eb0e37fc48527 Mon Sep 17 00:00:00 2001 From: Hib Eris Date: Thu, 5 Jan 2012 11:42:07 +0100 Subject: [PATCH 3/3] Improve handling of corrupted documents Fixes bug #44488 --- poppler/PDFDoc.cc | 48 +++++++++++++++++++++++++++++++++++++++++++++++- poppler/PDFDoc.h | 1 + 2 files changed, 48 insertions(+), 1 deletions(-) diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index cb55c2b..0cbc7cd 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -222,6 +222,29 @@ PDFDoc::PDFDoc(BaseStream *strA, GooString *ownerPassword, ok = setup(ownerPassword, userPassword); } +Guint PDFDoc::SearchLastEOF(Guint maximum) { + char buf[256]; + Guint pos, offset = 0; + + str->reset(); + while (1) { + if (!str->getLine(buf, 256)) { + break; + } + + pos = str->getPos(); + if (pos > maximum) { + break; + } + + if (!strncmp(buf, "%%EOF", 6)) { + offset = pos; + } + } + + return offset; +} + GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) { str->setPos(0, -1); if (str->getPos() < 0) @@ -258,9 +281,32 @@ GBool PDFDoc::setup(GooString *ownerPassword, GooString *userPassword) { // read catalog catalog = new Catalog(xref); if (catalog && !catalog->isOk()) { + + // Maybe the document is corrupted by garbage at the end. + // Try to limit the document to a previous %%EOF mark. + BaseStream *oldstr = str; + XRef *oldxref = xref; + Catalog *oldcatalog = catalog; + Guint limit = SearchLastEOF(str->getLength()-1); + Object obj; + obj.initNull(); + str = str->makeSubStream(0, gTrue, limit, &obj); + if (setup(ownerPassword, userPassword)) { + error(-1, "Document limited to first %d bytes", limit); + delete oldcatalog; + delete oldxref; + delete oldstr; + return gTrue; + } else { + delete str; + } + catalog = oldcatalog; + xref = oldxref; + str = oldstr; + + // try one more time to contruct the Catalog, maybe the problem is damaged XRef if (!wasReconstructed) { - // try one more time to contruct the Catalog, maybe the problem is damaged XRef delete catalog; delete xref; xref = new XRef(str, 0, 0, NULL, true); diff --git a/poppler/PDFDoc.h b/poppler/PDFDoc.h index f5a4444..f2de090 100644 --- a/poppler/PDFDoc.h +++ b/poppler/PDFDoc.h @@ -279,6 +279,7 @@ private: // linearized document (0 for non linearized documents). Guint getMainXRefEntriesOffset(); Guint strToUnsigned(char *s); + Guint SearchLastEOF(Guint maximum); GooString *fileName; FILE *file; -- 1.7.7