--- poppler/GfxFont.cc 2006/11/14 22:07:55 1.1 +++ poppler/GfxFont.cc 2006/11/15 08:27:39 @@ -773,42 +773,52 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha } } + // construct the char code -> Unicode mapping object + ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + // pass 2: try to fill in the missing chars, looking for names of // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 // decimal digits - if (missing && globalParams->getMapNumericCharNames()) { + if (missing) { for (code = 0; code < 256; ++code) { if ((charName = enc[code]) && !toUnicode[code] && strcmp(charName, ".notdef")) { - n = strlen(charName); - code2 = -1; - if (hex && n == 3 && isalpha(charName[0]) && - isxdigit(charName[1]) && isxdigit(charName[2])) { - sscanf(charName+1, "%x", &code2); - } else if (hex && n == 2 && - isxdigit(charName[0]) && isxdigit(charName[1])) { - sscanf(charName, "%x", &code2); - } else if (!hex && n >= 2 && n <= 4 && - isdigit(charName[0]) && isdigit(charName[1])) { - code2 = atoi(charName); - } else if (n >= 3 && n <= 5 && - isdigit(charName[1]) && isdigit(charName[2])) { - code2 = atoi(charName+1); - } else if (n >= 4 && n <= 6 && - isdigit(charName[2]) && isdigit(charName[3])) { - code2 = atoi(charName+2); - } - if (code2 >= 0 && code2 <= 0xff) { - toUnicode[code] = (Unicode)code2; + m = sizeof(uBuf)/sizeof(Unicode); + if (globalParams->getMapNumericCharNames() && + (n = parseNumericCharCode(charName, hex, uBuf, m))) { + ctu->setMapping((CharCode)code, uBuf, n); + } else if (strchr(charName, '_')) { // possibly a ligature? + // names of the form A_a (e.g. f_i, T_h, l_quotesingle) + char *lig_copy = charName = copyString(charName); + n = 0; + do { + if ((buf = strchr(charName, '_'))) + *buf = 0; + if (!strcmp(charName, ".notdef")) + ; + else if ((uBuf[n] = globalParams->mapNameToUnicode(charName))) + n++; + else if (globalParams->getMapNumericCharNames() && + (i = parseNumericCharCode(charName, hex, uBuf + n, m - n))) + n += i; + else + fprintf(stderr, "Could not parse ligature component in charref " + "for nameToUnicode: %s\n", charName); + charName = buf + 1; + } while (buf && n < m); + if (n >= 1) { + ctu->setMapping((CharCode)code, uBuf, n); + } + gfree(lig_copy); + } else { + fprintf(stderr, "Could not parse charref for nameToUnicode: %s\n", + charName); } } } } - // construct the char code -> Unicode mapping object - ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); - // merge in a ToUnicode CMap, if there is one -- this overwrites // existing entries in ctu, i.e., the ToUnicode CMap takes // precedence, but the other encoding info is allowed to fill in any @@ -938,6 +948,44 @@ Gfx8BitFont::~Gfx8BitFont() { } } +int Gfx8BitFont::parseNumericCharCode(char *charName, GBool hex, + Unicode *uBuf, int uLen) { + int n, code2; + if (uLen < 1) + return 0; + // look for names of the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where + // 'A' and 'B' are any letters, 'xx' is two hex digits, and 'nn' is 2-4 + // decimal digits + // look for names of the form 'uniXXXX' where 'XXXX' is 4 hex digits + n = strlen(charName); + code2 = -1; + if (hex && n == 3 && isalpha(charName[0]) && + isxdigit(charName[1]) && isxdigit(charName[2])) { + sscanf(charName+1, "%x", &code2); + } else if (hex && n == 2 && + isxdigit(charName[0]) && isxdigit(charName[1])) { + sscanf(charName, "%x", &code2); + } else if (!hex && n >= 2 && n <= 4 && + isdigit(charName[0]) && isdigit(charName[1])) { + code2 = atoi(charName); + } else if (n >= 3 && n <= 5 && + isdigit(charName[1]) && isdigit(charName[2])) { + code2 = atoi(charName+1); + } else if (n >= 4 && n <= 6 && + isdigit(charName[2]) && isdigit(charName[3])) { + code2 = atoi(charName+2); + } else if (n == 7 && !strncmp(charName, "uni", 3) && + isxdigit(charName[3]) && isxdigit(charName[4]) && + isxdigit(charName[5]) && isxdigit(charName[6])) { + sscanf(charName + 3, "%x", &code2); + } + if (code2 != -1) { + uBuf[0] = (Unicode)code2; + return 1; + } + return 0; +} + int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code, Unicode *u, int uSize, int *uLen, double *dx, double *dy, double *ox, double *oy) { --- poppler/GfxFont.h 2006/11/14 23:02:19 1.1 +++ poppler/GfxFont.h 2006/11/15 07:54:00 @@ -271,6 +271,9 @@ public: private: + // Parse a character name as a numeric reference. + int parseNumericCharCode(char *charName, GBool hex, Unicode *uBuf, int uLen); + char *enc[256]; // char code --> char name char encFree[256]; // boolean for each char name: if set, // the string is malloc'ed