From 16e51366c78905c16fc6788d1f5d9d549ba9611e Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Tue, 21 Feb 2012 22:39:16 +1030 Subject: [PATCH] pdfinfo: decode utf-16 surrogate pairs Bug 23075 --- utils/pdfinfo.cc | 11 ++++++++++- 1 files changed, 10 insertions(+), 1 deletions(-) diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc index e0a6f7a..a2c86ee 100644 --- a/utils/pdfinfo.cc +++ b/utils/pdfinfo.cc @@ -379,7 +379,7 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text, Object obj; GooString *s1; GBool isUnicode; - Unicode u; + Unicode u, u1, u2; char buf[8]; int i, n; @@ -399,6 +399,15 @@ static void printInfoString(Dict *infoDict, const char *key, const char *text, u = ((s1->getChar(i) & 0xff) << 8) | (s1->getChar(i+1) & 0xff); i += 2; + if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) { + // surrogate pair + u1 = u; + u2 = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + if (u2 >= 0xdc00 && u <= 0xdfff) + u = ((u1 - 0xd800) >> 10) + (u2 - 0xdc00); + } } else { u = pdfDocEncoding[s1->getChar(i) & 0xff]; ++i; -- 1.7.5.4