From 31131d377df4f3a5aa49c10e34d5bd2b9b60e5de Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 19 Feb 2012 21:31:05 +1030 Subject: [PATCH] Expand glyph name ligatures such as "ff", "ffi" etc to normal form Bug 7002 --- poppler/GfxFont.cc | 12 ++++++++++++ poppler/UnicodeTypeTable.cc | 8 ++++++++ poppler/UnicodeTypeTable.h | 2 ++ 3 files changed, 22 insertions(+), 0 deletions(-) diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index 4e29c36..3125093 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -55,6 +55,7 @@ #include "CharCodeToUnicode.h" #include "FontEncodingTables.h" #include "BuiltinFontTables.h" +#include "UnicodeTypeTable.h" #include #include #include @@ -1228,6 +1229,17 @@ Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GooString *nameA // construct the char code -> Unicode mapping object ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode); + // pass 1a: Expand ligatures in the Alphabetic Presentation Form + // block (eg "fi", "ffi") to normal form + for (code = 0; code < 256; ++code) { + if (unicodeIsAlphabeticPresentationForm(toUnicode[code])) { + Unicode *normalized = unicodeNormalizeNFKC(&toUnicode[code], 1, &len, NULL); + if (len > 1) + ctu->setMapping((CharCode)code, normalized, len); + gfree(normalized); + } + } + // pass 2: try to fill in the missing chars, looking for ligatures, numeric // references and variants if (missing) { diff --git a/poppler/UnicodeTypeTable.cc b/poppler/UnicodeTypeTable.cc index d620025..98e04a7 100644 --- a/poppler/UnicodeTypeTable.cc +++ b/poppler/UnicodeTypeTable.cc @@ -962,6 +962,14 @@ GBool unicodeTypeAlphaNum(Unicode c) { return t == 'L' || t == 'R' || t == '#'; } +#define UNICODE_ALPHABETIC_PRESENTATION_BLOCK_BEGIN 0xFB00 +#define UNICODE_ALPHABETIC_PRESENTATION_BLOCK_END 0xFB4F + +GBool unicodeIsAlphabeticPresentationForm(Unicode c) { + return c >= UNICODE_ALPHABETIC_PRESENTATION_BLOCK_BEGIN + && c <= UNICODE_ALPHABETIC_PRESENTATION_BLOCK_END; +} + Unicode unicodeToUpper(Unicode c) { int i; diff --git a/poppler/UnicodeTypeTable.h b/poppler/UnicodeTypeTable.h index 0dfbd98..c3906d8 100644 --- a/poppler/UnicodeTypeTable.h +++ b/poppler/UnicodeTypeTable.h @@ -33,6 +33,8 @@ extern GBool unicodeTypeNum(Unicode c); extern GBool unicodeTypeAlphaNum(Unicode c); +extern GBool unicodeIsAlphabeticPresentationForm(Unicode c); + extern Unicode unicodeToUpper(Unicode c); extern Unicode *unicodeNormalizeNFKC(Unicode *in, int len, -- 1.7.5.4