From ed04611aa8a75a4c20d418a14ce73ce4d3938723 Mon Sep 17 00:00:00 2001 From: Jakub Kucharski Date: Sun, 24 Jul 2016 22:30:30 +0200 Subject: [PATCH 1/2] introduced hex string as a new Object type and used it for file identifier File identifiers are usually written as hex strings (and this is how the PDF reference presents them in an example). Until now, poppler was reading hex strings properly, but was forgeting about the fact that a given string is a hex string, so e.g. file identifier was first read as a hex string and then printed as an ordinary string and thanks to that what was printed was actually junk. This commit fixes that. --- poppler/Lexer.cc | 2 +- poppler/Object.cc | 16 +++++++++++++++- poppler/Object.h | 12 +++++++++++- poppler/PDFDoc.cc | 10 ++++++++++ 4 files changed, 37 insertions(+), 3 deletions(-) diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc index 952967a..28cddfa 100644 --- a/poppler/Lexer.cc +++ b/poppler/Lexer.cc @@ -526,7 +526,7 @@ Object *Lexer::getObj(Object *obj, int objNum) { s->append(tokBuf, n); if (m == 1) s->append((char)(c2 << 4)); - obj->initString(s); + obj->initHexString(s); } break; diff --git a/poppler/Object.cc b/poppler/Object.cc index d06bb39..dcbd450 100644 --- a/poppler/Object.cc +++ b/poppler/Object.cc @@ -44,6 +44,7 @@ static const char *objTypeNames[numObjTypes] = { "integer", "real", "string", + "hexstring", "name", "null", "array", @@ -59,7 +60,7 @@ static const char *objTypeNames[numObjTypes] = { #ifdef DEBUG_MEM int Object::numAlloc[numObjTypes] = - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; #endif Object *Object::initArray(XRef *xref) { @@ -93,6 +94,9 @@ Object *Object::copy(Object *obj) { case objString: obj->string = string->copy(); break; + case objHexString: + obj->hex = hex->copy(); + break; case objName: obj->name = copyString(name); break; @@ -127,6 +131,9 @@ void Object::free() { case objString: delete string; break; + case objHexString: + delete hex; + break; case objName: gfree(name); break; @@ -180,6 +187,13 @@ void Object::print(FILE *f) { fwrite(string->getCString(), 1, string->getLength(), f); fprintf(f, ")"); break; + case objHexString: + fprintf(f, "<"); + for (i = 0; i < hex->getLength(); i++) { + fprintf(f, "%02x", hex->getChar(i) & 0xff); + } + fprintf(f, ">"); + break; case objName: fprintf(f, "/%s", name); break; diff --git a/poppler/Object.h b/poppler/Object.h index 0c7767c..50d618b 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -89,6 +89,7 @@ enum ObjType { objInt, // integer objReal, // real objString, // string + objHexString, // hex string objName, // name objNull, // null @@ -108,7 +109,7 @@ enum ObjType { objInt64 // integer with at least 64-bits }; -#define numObjTypes 15 // total number of object types +#define numObjTypes 16 // total number of object types //------------------------------------------------------------------------ // Object @@ -138,6 +139,8 @@ public: { initObj(objReal); real = realA; return this; } Object *initString(GooString *stringA) { initObj(objString); string = stringA; return this; } + Object *initHexString(GooString *hexA) + { initObj(objHexString); hex = hexA; return this; } Object *initName(const char *nameA) { initObj(objName); name = copyString(nameA); return this; } Object *initNull() @@ -178,6 +181,7 @@ public: GBool isReal() { return type == objReal; } GBool isNum() { return type == objInt || type == objReal || type == objInt64; } GBool isString() { return type == objString; } + GBool isHexString() { return type == objHexString; } GBool isName() { return type == objName; } GBool isNull() { return type == objNull; } GBool isArray() { return type == objArray; } @@ -213,6 +217,11 @@ public: // because the object it's not expected to have a NULL string. GooString *takeString() { OBJECT_TYPE_CHECK(objString); GooString *s = string; string = NULL; return s; } + GooString *getHexString() { OBJECT_TYPE_CHECK(objHexString); return hex; } + // After takeHexString() the only method that should be called for the object is free() + // because the object it's not expected to have a NULL hex string. + GooString *takeHexString() { + OBJECT_TYPE_CHECK(objHexString); GooString *s = hex; hex = NULL; return s; } char *getName() { OBJECT_TYPE_CHECK(objName); return name; } Array *getArray() { OBJECT_TYPE_CHECK(objArray); return array; } Dict *getDict() { OBJECT_TYPE_CHECK(objDict); return dict; } @@ -272,6 +281,7 @@ private: long long int64g; // 64-bit integer double real; // real GooString *string; // string + GooString *hex; // hex string char *name; // name Array *array; // array Dict *dict; // dictionary diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index c91d6e4..f49e45a 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -1263,6 +1263,16 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO case objString: writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen); break; + case objHexString: + { + GooString *s = obj->getHexString(); + outStr->printf("<"); + for (int i = 0; i < s->getLength(); i++) { + outStr->printf("%02x", s->getChar(i) & 0xff); + } + outStr->printf(">"); + break; + } case objName: { GooString name(obj->getName()); -- 2.9.0