From 6fee0dde82896d7cd0c6bb92e6bb164b4a67d57a Mon Sep 17 00:00:00 2001 From: Jakub Kucharski Date: Sun, 24 Jul 2016 22:30:30 +0200 Subject: [PATCH 1/2] introduced hex string as a new Object type and used it for file identifier File identifiers are usually written as hex strings (and this is how the PDF reference presents them in an example). Until now, poppler was reading hex strings properly, but was forgeting about the fact that a given string is a hex string, so e.g. file identifier was first read as a hex string and then printed as an ordinary string and thanks to that what was printed was actually junk. This commit fixes that. --- poppler/Lexer.cc | 3 ++- poppler/Object.cc | 15 +++++++++++++-- poppler/Object.h | 16 +++++++++++++--- poppler/PDFDoc.cc | 10 ++++++++++ 4 files changed, 38 insertions(+), 6 deletions(-) diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc index 952967a..ec45ad2 100644 --- a/poppler/Lexer.cc +++ b/poppler/Lexer.cc @@ -18,6 +18,7 @@ // Copyright (C) 2010 Carlos Garcia Campos // Copyright (C) 2012, 2013 Adrian Johnson // Copyright (C) 2013 Thomas Freitag +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -526,7 +527,7 @@ Object *Lexer::getObj(Object *obj, int objNum) { s->append(tokBuf, n); if (m == 1) s->append((char)(c2 << 4)); - obj->initString(s); + obj->initHexString(s); } break; diff --git a/poppler/Object.cc b/poppler/Object.cc index d06bb39..1bd7734 100644 --- a/poppler/Object.cc +++ b/poppler/Object.cc @@ -15,6 +15,7 @@ // // Copyright (C) 2008, 2010, 2012 Albert Astals Cid // Copyright (C) 2013 Adrian Johnson +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -54,12 +55,13 @@ static const char *objTypeNames[numObjTypes] = { "error", "eof", "none", - "integer64" + "integer64", + "hexstring" }; #ifdef DEBUG_MEM int Object::numAlloc[numObjTypes] = - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; #endif Object *Object::initArray(XRef *xref) { @@ -91,6 +93,7 @@ Object *Object::copy(Object *obj) { *obj = *this; switch (type) { case objString: + case objHexString: obj->string = string->copy(); break; case objName: @@ -125,6 +128,7 @@ Object *Object::fetch(XRef *xref, Object *obj, int recursion) { void Object::free() { switch (type) { case objString: + case objHexString: delete string; break; case objName: @@ -180,6 +184,13 @@ void Object::print(FILE *f) { fwrite(string->getCString(), 1, string->getLength(), f); fprintf(f, ")"); break; + case objHexString: + fprintf(f, "<"); + for (i = 0; i < string->getLength(); i++) { + fprintf(f, "%02x", string->getChar(i) & 0xff); + } + fprintf(f, ">"); + break; case objName: fprintf(f, "/%s", name); break; diff --git a/poppler/Object.h b/poppler/Object.h index 0c7767c..8889cdb 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -105,10 +105,12 @@ enum ObjType { objNone, // uninitialized object // poppler-only objects - objInt64 // integer with at least 64-bits + objInt64, // integer with at least 64-bits + + objHexString // hex string }; -#define numObjTypes 15 // total number of object types +#define numObjTypes 16 // total number of object types //------------------------------------------------------------------------ // Object @@ -138,6 +140,8 @@ public: { initObj(objReal); real = realA; return this; } Object *initString(GooString *stringA) { initObj(objString); string = stringA; return this; } + Object *initHexString(GooString *hexA) + { initObj(objHexString); string = hexA; return this; } Object *initName(const char *nameA) { initObj(objName); name = copyString(nameA); return this; } Object *initNull() @@ -178,6 +182,7 @@ public: GBool isReal() { return type == objReal; } GBool isNum() { return type == objInt || type == objReal || type == objInt64; } GBool isString() { return type == objString; } + GBool isHexString() { return type == objHexString; } GBool isName() { return type == objName; } GBool isNull() { return type == objNull; } GBool isArray() { return type == objArray; } @@ -213,6 +218,11 @@ public: // because the object it's not expected to have a NULL string. GooString *takeString() { OBJECT_TYPE_CHECK(objString); GooString *s = string; string = NULL; return s; } + GooString *getHexString() { OBJECT_TYPE_CHECK(objHexString); return string; } + // After takeHexString() the only method that should be called for the object is free() + // because the object it's not expected to have a NULL hex string. + GooString *takeHexString() { + OBJECT_TYPE_CHECK(objHexString); GooString *s = string; string = NULL; return s; } char *getName() { OBJECT_TYPE_CHECK(objName); return name; } Array *getArray() { OBJECT_TYPE_CHECK(objArray); return array; } Dict *getDict() { OBJECT_TYPE_CHECK(objDict); return dict; } @@ -271,7 +281,7 @@ private: int intg; // integer long long int64g; // 64-bit integer double real; // real - GooString *string; // string + GooString *string; // (hex) string char *name; // name Array *array; // array Dict *dict; // dictionary diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index f3383fc..8ffca85 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -1266,6 +1266,16 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO case objString: writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen); break; + case objHexString: + { + GooString *s = obj->getHexString(); + outStr->printf("<"); + for (int i = 0; i < s->getLength(); i++) { + outStr->printf("%02x", s->getChar(i) & 0xff); + } + outStr->printf(">"); + break; + } case objName: { GooString name(obj->getName()); -- 2.9.2