--- GfxFont.cc 2006/11/11 21:19:19 1.1 +++ GfxFont.cc 2006/11/12 00:37:14 @@ -457,7 +457,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha FoFiType1C *ffT1C; int code, code2; char *charName; - GBool missing, hex; + GBool missing, hex, missing_lig; Unicode toUnicode[256]; CharCodeToUnicode *utu, *ctu2; Unicode uBuf[8]; @@ -743,7 +743,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha //----- build the mapping to Unicode ----- // pass 1: use the name-to-Unicode mapping table - missing = hex = gFalse; + missing = hex = missing_lig = gFalse; for (code = 0; code < 256; ++code) { if ((charName = enc[code])) { if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) && @@ -766,6 +766,9 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha (charName[1] >= 'A' && charName[1] <= 'F')))) { hex = gTrue; } + if (strchr(charName, '_')) { + missing_lig = gTrue; + } missing = gTrue; } } else { @@ -777,6 +780,7 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha // the form 'Axx', 'xx', 'Ann', 'ABnn', or 'nn', where 'A' and 'B' // are any letters, 'xx' is two hex digits, and 'nn' is 2-4 // decimal digits + // also look for names of the form 'uniXXXX' where 'XXXX' is 4 hex digits if (missing && globalParams->getMapNumericCharNames()) { for (code = 0; code < 256; ++code) { if ((charName = enc[code]) && !toUnicode[code] && @@ -798,8 +802,12 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha } else if (n >= 4 && n <= 6 && isdigit(charName[2]) && isdigit(charName[3])) { code2 = atoi(charName+2); + } else if (n == 7 && !strncmp(charName, "uni", 3) && + isxdigit(charName[3]) && isxdigit(charName[4]) && + isxdigit(charName[5]) && isxdigit(charName[6])) { + sscanf(charName + 3, "%x", &code2); } - if (code2 >= 0 && code2 <= 0xff) { + if (code2 != -1) { toUnicode[code] = (Unicode)code2; } } @@ -809,6 +817,29 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, cha // construct the char code -> Unicode mapping object ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + // pass 3: look for ligatures, names of the form A_a (e.g. f_i, + // T_h, l_quotesingle) + if (missing_lig) { + for (code = 0; code < 256; ++code) { + if (enc[code] && strchr(enc[code], '_') && !toUnicode[code] && + !ctu->mapToUnicode((CharCode)code, uBuf, 8)) { + char *lig_copy = charName = copyString(enc[code]); + n = 0; + do { + if ((buf = strchr(charName, '_'))) + *buf = 0; + if ((uBuf[n] = globalParams->mapNameToUnicode(charName))) + ++n; + charName = buf + 1; + } while (buf); + if (n >= 1) { + ctu->setMapping((CharCode)code, uBuf, n); + } + gfree(lig_copy); + } + } + } + // merge in a ToUnicode CMap, if there is one -- this overwrites // existing entries in ctu, i.e., the ToUnicode CMap takes // precedence, but the other encoding info is allowed to fill in any