From 9fd2b8223676402b76be290bd798a6c24ed799cc Mon Sep 17 00:00:00 2001 From: Jakub Kucharski Date: Sun, 24 Jul 2016 22:30:30 +0200 Subject: [PATCH 1/2] introduced hex string as a new Object type and used it for file identifier File identifiers are usually written as hex strings (and this is how the PDF reference presents them in an example). Until now, poppler was reading hex strings properly, but was forgeting about the fact that a given string is a hex string, so e.g. file identifier was first read as a hex string and then printed as an ordinary string and thanks to that what was printed was actually junk. This commit fixes that. --- poppler/Lexer.cc | 3 ++- poppler/Object.cc | 19 +++++++++++++++++-- poppler/Object.h | 15 +++++++++++++-- poppler/PDFDoc.cc | 10 ++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/poppler/Lexer.cc b/poppler/Lexer.cc index 952967a..ec45ad2 100644 --- a/poppler/Lexer.cc +++ b/poppler/Lexer.cc @@ -18,6 +18,7 @@ // Copyright (C) 2010 Carlos Garcia Campos // Copyright (C) 2012, 2013 Adrian Johnson // Copyright (C) 2013 Thomas Freitag +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -526,7 +527,7 @@ Object *Lexer::getObj(Object *obj, int objNum) { s->append(tokBuf, n); if (m == 1) s->append((char)(c2 << 4)); - obj->initString(s); + obj->initHexString(s); } break; diff --git a/poppler/Object.cc b/poppler/Object.cc index d06bb39..681732d 100644 --- a/poppler/Object.cc +++ b/poppler/Object.cc @@ -15,6 +15,7 @@ // // Copyright (C) 2008, 2010, 2012 Albert Astals Cid // Copyright (C) 2013 Adrian Johnson +// Copyright (C) 2016 Jakub Kucharski // // To see a description of the changes please see the Changelog file that // came with your tarball or type make ChangeLog if you are building from git @@ -54,12 +55,13 @@ static const char *objTypeNames[numObjTypes] = { "error", "eof", "none", - "integer64" + "integer64", + "hexstring" }; #ifdef DEBUG_MEM int Object::numAlloc[numObjTypes] = - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; #endif Object *Object::initArray(XRef *xref) { @@ -93,6 +95,9 @@ Object *Object::copy(Object *obj) { case objString: obj->string = string->copy(); break; + case objHexString: + obj->hex = hex->copy(); + break; case objName: obj->name = copyString(name); break; @@ -127,6 +132,9 @@ void Object::free() { case objString: delete string; break; + case objHexString: + delete hex; + break; case objName: gfree(name); break; @@ -180,6 +188,13 @@ void Object::print(FILE *f) { fwrite(string->getCString(), 1, string->getLength(), f); fprintf(f, ")"); break; + case objHexString: + fprintf(f, "<"); + for (i = 0; i < hex->getLength(); i++) { + fprintf(f, "%02x", hex->getChar(i) & 0xff); + } + fprintf(f, ">"); + break; case objName: fprintf(f, "/%s", name); break; diff --git a/poppler/Object.h b/poppler/Object.h index 0c7767c..ca15541 100644 --- a/poppler/Object.h +++ b/poppler/Object.h @@ -105,10 +105,12 @@ enum ObjType { objNone, // uninitialized object // poppler-only objects - objInt64 // integer with at least 64-bits + objInt64, // integer with at least 64-bits + + objHexString // hex string }; -#define numObjTypes 15 // total number of object types +#define numObjTypes 16 // total number of object types //------------------------------------------------------------------------ // Object @@ -138,6 +140,8 @@ public: { initObj(objReal); real = realA; return this; } Object *initString(GooString *stringA) { initObj(objString); string = stringA; return this; } + Object *initHexString(GooString *hexA) + { initObj(objHexString); hex = hexA; return this; } Object *initName(const char *nameA) { initObj(objName); name = copyString(nameA); return this; } Object *initNull() @@ -178,6 +182,7 @@ public: GBool isReal() { return type == objReal; } GBool isNum() { return type == objInt || type == objReal || type == objInt64; } GBool isString() { return type == objString; } + GBool isHexString() { return type == objHexString; } GBool isName() { return type == objName; } GBool isNull() { return type == objNull; } GBool isArray() { return type == objArray; } @@ -213,6 +218,11 @@ public: // because the object it's not expected to have a NULL string. GooString *takeString() { OBJECT_TYPE_CHECK(objString); GooString *s = string; string = NULL; return s; } + GooString *getHexString() { OBJECT_TYPE_CHECK(objHexString); return hex; } + // After takeHexString() the only method that should be called for the object is free() + // because the object it's not expected to have a NULL hex string. + GooString *takeHexString() { + OBJECT_TYPE_CHECK(objHexString); GooString *s = hex; hex = NULL; return s; } char *getName() { OBJECT_TYPE_CHECK(objName); return name; } Array *getArray() { OBJECT_TYPE_CHECK(objArray); return array; } Dict *getDict() { OBJECT_TYPE_CHECK(objDict); return dict; } @@ -272,6 +282,7 @@ private: long long int64g; // 64-bit integer double real; // real GooString *string; // string + GooString *hex; // hex string char *name; // name Array *array; // array Dict *dict; // dictionary diff --git a/poppler/PDFDoc.cc b/poppler/PDFDoc.cc index c91d6e4..f49e45a 100644 --- a/poppler/PDFDoc.cc +++ b/poppler/PDFDoc.cc @@ -1263,6 +1263,16 @@ void PDFDoc::writeObject (Object* obj, OutStream* outStr, XRef *xRef, Guint numO case objString: writeString(obj->getString(), outStr, fileKey, encAlgorithm, keyLength, objNum, objGen); break; + case objHexString: + { + GooString *s = obj->getHexString(); + outStr->printf("<"); + for (int i = 0; i < s->getLength(); i++) { + outStr->printf("%02x", s->getChar(i) & 0xff); + } + outStr->printf(">"); + break; + } case objName: { GooString name(obj->getName()); -- 2.9.0