From 462b37a73aa06d8434d75808a25f8a257722c203 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 28 Oct 2012 12:13:22 +1030 Subject: [PATCH] Don't allow invalid unicode to be passed to backends --- poppler/CharCodeToUnicode.cc | 11 +++++++++-- poppler/UTF.cc | 11 +++++++++++ poppler/UTF.h | 4 ++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/poppler/CharCodeToUnicode.cc b/poppler/CharCodeToUnicode.cc index 4298090..c396302 100644 --- a/poppler/CharCodeToUnicode.cc +++ b/poppler/CharCodeToUnicode.cc @@ -439,7 +439,7 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, for (i = oldLen; i < mapLen; ++i) { map[i] = 0; } - } + } } if (n <= 4) { if (!parseHex(uStr, n, &u)) { @@ -447,6 +447,9 @@ void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n, return; } map[code] = u + offset; + if (!UnicodeIsValid(map[code])) { + map[code] = 0xfffd; + } } else { if (sMapLen >= sMapSize) { sMapSize = sMapSize + 16; @@ -593,7 +596,11 @@ void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) { sMap[i].len = len; sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode)); for (j = 0; j < len; ++j) { - sMap[i].u[j] = u[j]; + if (UnicodeIsValid(u[j])) { + sMap[i].u[j] = u[j]; + } else { + sMap[i].u[j] = 0xfffd; + } } } } diff --git a/poppler/UTF.cc b/poppler/UTF.cc index 675ac68..42c7836 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -26,6 +26,14 @@ #include "PDFDocEncoding.h" #include "UTF.h" +bool UnicodeIsValid(Unicode ucs4) +{ + return (ucs4 < 0x110000) && + ((ucs4 & 0xfffff800) != 0xd800) && + (ucs4 < 0xfdd0 || ucs4 > 0xfdef) && + ((ucs4 & 0xfffe) != 0xfffe); +} + int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4) { int i, n, len; @@ -64,6 +72,9 @@ int UTF16toUCS4(const Unicode *utf16, int utf16Len, Unicode **ucs4) } else { u[n] = utf16[i]; } + if (!UnicodeIsValid(u[n])) { + u[n] = 0xfffd; + } n++; } *ucs4 = u; diff --git a/poppler/UTF.h b/poppler/UTF.h index 1111c37..248c168 100644 --- a/poppler/UTF.h +++ b/poppler/UTF.h @@ -32,4 +32,8 @@ int UTF16toUCS4(const Unicode *utf16, int utf16_len, Unicode **ucs4_out); // returns number of UCS-4 characters int TextStringToUCS4(GooString *textStr, Unicode **ucs4); +// check if UCS-4 character is valid +bool UnicodeIsValid(Unicode ucs4); + + #endif -- 1.7.10.4