diff --git a/fofi/FoFiTrueType.cc b/fofi/FoFiTrueType.cc index 77a7ad2..d702123 100644 --- a/fofi/FoFiTrueType.cc +++ b/fofi/FoFiTrueType.cc @@ -321,10 +321,10 @@ int FoFiTrueType::findCmap(int platform, int encoding) { return -1; } -Gushort FoFiTrueType::mapCodeToGID(int i, int c) { +Gushort FoFiTrueType::mapCodeToGID(int i, Guint c) { Gushort gid; - int segCnt, segEnd, segStart, segDelta, segOffset; - int cmapFirst, cmapLen; + Guint segCnt, segEnd, segStart, segDelta, segOffset; + Guint cmapFirst, cmapLen; int pos, a, b, m; GBool ok; @@ -335,7 +335,7 @@ Gushort FoFiTrueType::mapCodeToGID(int i, int c) { pos = cmaps[i].offset; switch (cmaps[i].fmt) { case 0: - if (c < 0 || c >= cmaps[i].len - 6) { + if (c >= cmaps[i].len - 6) { return 0; } gid = getU8(cmaps[i].offset + 6 + c, &ok); @@ -384,6 +384,31 @@ Gushort FoFiTrueType::mapCodeToGID(int i, int c) { } gid = getU16BE(pos + 10 + 2 * (c - cmapFirst), &ok); break; + case 12: + segCnt = getU32BE(pos + 12, &ok); + a = -1; + b = segCnt - 1; + segEnd = getU32BE(pos + 16 + 12*b+4, &ok); + if (c > segEnd) { + return 0; + } + // invariant: seg[a].end < code <= seg[b].end + while (b - a > 1 && ok) { + m = (a + b) / 2; + segEnd = getU32BE(pos + 16 + 12*m+4, &ok); + if (segEnd < c) { + a = m; + } else { + b = m; + } + } + segStart = getU32BE(pos + 16 + 12*b, &ok); + segDelta = getU32BE(pos + 16 + 12*b+8, &ok); + if (c < segStart) { + return 0; + } + gid = segDelta + (c-segStart); + break; default: return 0; } diff --git a/fofi/FoFiTrueType.h b/fofi/FoFiTrueType.h index 2ef5ebc..af6dac2 100644 --- a/fofi/FoFiTrueType.h +++ b/fofi/FoFiTrueType.h @@ -54,7 +54,7 @@ public: int findCmap(int platform, int encoding); // Return the GID corresponding to according to the th cmap. - Gushort mapCodeToGID(int i, int c); + Gushort mapCodeToGID(int i, Guint c); // map gid to vertical glyph gid if exist. // if not exist return original gid diff --git a/poppler/CMap.cc b/poppler/CMap.cc index 65f4766..c812404 100644 --- a/poppler/CMap.cc +++ b/poppler/CMap.cc @@ -354,6 +354,40 @@ CID CMap::getCID(char *s, int len, int *nUsed) { } } +void CMap::setReverseMapVector(Guint startCode, CMapVectorEntry *vec, + Guint *rmap, Guint rmapSize, Guint ncand) { + int i; + + if (vec == 0) return; + for (i = 0;i < 256;i++) { + if (vec[i].isVector) { + setReverseMapVector((startCode+i) << 8, + vec[i].vector,rmap,rmapSize,ncand); + } else { + Guint cid = vec[i].cid; + + if (cid < rmapSize) { + int cand; + + for (cand = 0;cand < ncand;cand++) { + Guint code = startCode+i; + Guint idx = cid*ncand+cand; + if (rmap[idx] == 0) { + rmap[idx] = code; + break; + } else if (rmap[idx] == code) { + break; + } + } + } + } + } +} + +void CMap::setReverseMap(Guint *rmap, Guint rmapSize, Guint ncand) { + setReverseMapVector(0,vector,rmap,rmapSize,ncand); +} + //------------------------------------------------------------------------ CMapCache::CMapCache() { diff --git a/poppler/CMap.h b/poppler/CMap.h index 1652ccf..d65e434 100644 --- a/poppler/CMap.h +++ b/poppler/CMap.h @@ -55,6 +55,8 @@ public: // Return the writing mode (0=horizontal, 1=vertical). int getWMode() { return wMode; } + void setReverseMap(Guint *rmap, Guint rmapSize, Guint ncand); + private: CMap(GooString *collectionA, GooString *cMapNameA); @@ -65,6 +67,8 @@ private: Guint nBytes); void addCIDs(Guint start, Guint end, Guint nBytes, CID firstCID); void freeCMapVector(CMapVectorEntry *vec); + void setReverseMapVector(Guint startCode, CMapVectorEntry *vec, + Guint *rmap, Guint rmapSize, Guint ncand); GooString *collection; GooString *cMapName; diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index cce097f..1055df1 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -1588,33 +1588,44 @@ Gushort GfxCIDFont::mapCodeToGID(FoFiTrueType *ff, int cmapi, Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { /* space characters */ +#define N_UCS_CANDIDATES 2 static unsigned long spaces[] = { 0x2000,0x2001,0x2002,0x2003,0x2004,0x2005,0x2006,0x2007, 0x2008,0x2009,0x200A,0x00A0,0x200B,0x2060,0x3000,0xFEFF, 0 }; static char *adobe_cns1_cmaps[] = { + "UniCNS-UTF32-V", "UniCNS-UCS2-V", + "UniCNS-UTF32-H", "UniCNS-UCS2-H", 0 }; static char *adobe_gb1_cmaps[] = { + "UniGB-UTF32-V", "UniGB-UCS2-V", + "UniGB-UTF32-H", "UniGB-UCS2-H", 0 }; static char *adobe_japan1_cmaps[] = { + "UniJIS-UTF32-V", "UniJIS-UCS2-V", + "UniJIS-UTF32-H", "UniJIS-UCS2-H", 0 }; static char *adobe_japan2_cmaps[] = { + "UniHojo-UTF32-V", "UniHojo-UCS2-V", + "UniHojo-UTF32-H", "UniHojo-UCS2-H", 0 }; static char *adobe_korea1_cmaps[] = { + "UniKS-UTF32-V", "UniKS-UCS2-V", + "UniKS-UTF32-H", "UniKS-UCS2-H", 0 }; @@ -1680,8 +1691,17 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { for (i = 0; i < ff->getNumCmaps(); ++i) { cmapPlatform = ff->getCmapPlatform(i); cmapEncoding = ff->getCmapEncoding(i); - if ((cmapPlatform == 3 && cmapEncoding == 1) || cmapPlatform == 0) - cmap = i; + if (cmapPlatform == 3 && cmapEncoding == 10) { + /* UCS-4 */ + cmap = i; + /* use UCS-4 cmap */ + break; + } else if (cmapPlatform == 3 && cmapEncoding == 1) { + /* Unicode */ + cmap = i; + } else if (cmapPlatform == 0 && cmap < 0) { + cmap = i; + } } if (cmap < 0) return NULL; @@ -1692,8 +1712,9 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { break; } } - n = ctu->getLength(); - humap = new Unicode[n]; + //n = ctu->getLength(); + n = 65536; + humap = new Unicode[n*N_UCS_CANDIDATES]; if (lp->collection != 0) { CharCodeToUnicode *tctu; GooString tname(lp->toUnicodeMap); @@ -1706,10 +1727,15 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { len = tctu->mapToUnicode(cid,ucodes,4); if (len == 1) { - humap[cid] = ucodes[0]; + humap[cid*N_UCS_CANDIDATES] = ucodes[0]; + for (i = 1;i < N_UCS_CANDIDATES;i++) { + humap[cid*N_UCS_CANDIDATES+i] = 0; + } } else { /* if not single character, ignore it */ - humap[cid] = 0; + for (i = 0;i < N_UCS_CANDIDATES;i++) { + humap[cid*N_UCS_CANDIDATES+i] = 0; + } } } delete tctu; @@ -1721,25 +1747,11 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { if ((cMap = globalParams->getCMap(getCollection(),&cname)) != 0) { - for (u = 0;u <= 65535;u++) { - CID cid; - char code[2]; - - code[0] = (u >> 8) & 0xff; - code[1] = u & 0xff; - cid = cMap->getCID(code,2,&nUsed); - if (cid != 0) { if (cMap->getWMode()) { - if (cid < n && vumap[cid] == 0) { - vumap[cid] = u; - } + cMap->setReverseMap(vumap,n,1); } else { - if (cid < n && humap[cid] == 0) { - humap[cid] = u; - } + cMap->setReverseMap(humap,n,N_UCS_CANDIDATES); } - } - } cMap->decRefCnt(); } } @@ -1754,7 +1766,10 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { Unicode ucode; len = ctu->mapToUnicode(cid,&ucode,1); - humap[cid] = ucode; + humap[cid*N_UCS_CANDIDATES] = ucode; + for (i = 1;i < N_UCS_CANDIDATES;i++) { + humap[cid*N_UCS_CANDIDATES+i] = 0; + } } ctu->decRefCnt(); } @@ -1771,13 +1786,17 @@ Gushort *GfxCIDFont::getCodeToGIDMap(FoFiTrueType *ff, int *mapsizep) { if (unicode != 0) { gid = mapCodeToGID(ff,cmap,unicode,gTrue); if (gid == 0 && humap != 0) { - if (humap != 0) unicode = humap[code]; - if (unicode != 0) gid = mapCodeToGID(ff,cmap,unicode,gTrue); + for (i = 0;i < N_UCS_CANDIDATES + && gid == 0 && (unicode = humap[code*N_UCS_CANDIDATES+i]) != 0;i++) { + gid = mapCodeToGID(ff,cmap,unicode,gTrue); + } } } - if (gid == 0) { - if (humap != 0) unicode = humap[code]; - if (unicode != 0) gid = mapCodeToGID(ff,cmap,unicode,wmode); + if (gid == 0 && humap != 0) { + for (i = 0;i < N_UCS_CANDIDATES + && gid == 0 && (unicode = humap[code*N_UCS_CANDIDATES+i]) != 0;i++) { + gid = mapCodeToGID(ff,cmap,unicode,wmode); + } } if (gid == 0) { /* special handling space characters */