From 63e26b085d0152e00c9ae6a85dfbac90aa48c50b Mon Sep 17 00:00:00 2001 From: Jason Crain Date: Fri, 15 Jan 2016 01:16:05 -0600 Subject: [PATCH] Fallback to looking up glyph by Unicode code point Sometimes glyphs don't display in documents because either font substitution picks a font without the required glyphs, or the PDF document uses different names from the font. Provide fontconfig the required characters so it can better choose a font. Add a dependency on ICU so we can use the Unicode property functions to only request the alphabetic characters, as requesting other characters can make fontconfig choose a font without the indicated italic/bold/etc but with better Unicode coverage. Fallback to looking up the glyph id from the Unicode code point if lookup by name or character code fails. Change buildFcPattern to make a copy of its argument instead of modifying in place. Bug #93299 --- CMakeLists.txt | 3 ++ cmake/modules/FindICU.cmake | 43 ++++++++++++++++++++++++ config.h.cmake | 3 ++ configure.ac | 3 ++ poppler/CairoFontEngine.cc | 7 ++++ poppler/GfxFont.cc | 4 +++ poppler/GlobalParams.cc | 82 +++++++++++++++++++++++++++++++++++++++------ poppler/Makefile.am | 5 +++ splash/SplashFTFontFile.cc | 8 +++++ 9 files changed, 148 insertions(+), 10 deletions(-) create mode 100644 cmake/modules/FindICU.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index e58a41d..f9350d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -201,6 +201,7 @@ if(ENABLE_LIBCURL) include_directories(${CURL_INCLUDE_DIR}) set(POPPLER_HAS_CURL_SUPPORT ON) endif(ENABLE_LIBCURL) +find_package(ICU REQUIRED) add_definitions(-DHAVE_CONFIG_H=1) if(MINGW) @@ -258,6 +259,7 @@ endif(LCMS_FOUND) if(LCMS2_FOUND) include_directories(${LCMS2_INCLUDE_DIR}) endif(LCMS2_FOUND) +include_directories(${ICU_INCLUDE_DIR}) # Recent versions of poppler-data install a .pc file. # Use it to determine the encoding data path, if available. @@ -469,6 +471,7 @@ endif(TIFF_FOUND) if(HAVE_PTHREAD) set(poppler_LIBS ${poppler_LIBS} ${CMAKE_THREAD_LIBS_INIT}) endif() +set(poppler_LIBS ${poppler_LIBS} ${ICU_LIBRARIES}) if(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) diff --git a/cmake/modules/FindICU.cmake b/cmake/modules/FindICU.cmake new file mode 100644 index 0000000..5c680b5 --- /dev/null +++ b/cmake/modules/FindICU.cmake @@ -0,0 +1,43 @@ +# - Try to find icu +# Once done this will define +# ICU_FOUND - System has ICU +# ICU_INCLUDE_DIRS - The ICU include directories +# ICU_LIBRARIES - The libraries needed to use ICU +# ICU_DEFINITIONS - Compiler switches required for using ICU + +# Copyright (c) 2016 Jason Crain, +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +find_package(PkgConfig QUIET) +pkg_check_modules(PC_ICU QUIET icu-uc) +set(ICU_DEFINITIONS ${PC_ICU_CFLAGS_OTHER}) + +find_path(ICU_INCLUDE_DIR unicode/uchar.h + PATHS + ${PC_ICU_INCLUDEDIR} + ${PC_ICU_INCLUDE_DIRS} + ) +find_library(ICU_COMMON_LIBRARY icuuc + PATHS + ${PC_ICU_LIBDIR} + ${PC_ICU_LIBRARY_DIRS} + ) +find_library(ICU_DATA_LIBRARY icudata icudt + PATHS + ${PC_ICU_LIBDIR} + ${PC_ICU_LIBRARY_DIRS} + ) + +mark_as_advanced(ICU_INCLUDE_DIR ICU_COMMON_LIBRARY ICU_DATA_LIBRARY) +set(ICU_LIBRARIES ${ICU_COMMON_LIBRARY} ${ICU_DATA_LIBRARY}) +set(ICU_INCLUDE_DIRS ${ICU_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ICU DEFAULT_MSG + ICU_LIBRARIES + ICU_COMMON_LIBRARY + ICU_DATA_LIBRARY + ICU_INCLUDE_DIR + ) diff --git a/config.h.cmake b/config.h.cmake index 440a13d..dd42687 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -219,6 +219,9 @@ #define HAVE_BOOLEAN #endif +/* Use ICU's C++ namespace */ +#define U_USING_ICU_NAMESPACE 0 + /* MS defined snprintf as deprecated but then added it in Visual Studio 2015. */ #if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf diff --git a/configure.ac b/configure.ac index 1e34506..c3f5bac 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,9 @@ then AC_CHECK_LIB(posix4,nanosleep, X_EXTRA_LIBS="$X_EXTRA_LIBS -lposix4") fi +dnl Find libicu +PKG_CHECK_MODULES(ICU, icu-uc) +AC_DEFINE(U_USING_ICU_NAMESPACE, 0, [Use ICU's C++ namespace]) dnl ##### Test for libopenjpeg. Versions prior to 1.4 do not provide a pkgconfig file. openjpeg1="no" diff --git a/poppler/CairoFontEngine.cc b/poppler/CairoFontEngine.cc index 1d611b5..012b6ea 100644 --- a/poppler/CairoFontEngine.cc +++ b/poppler/CairoFontEngine.cc @@ -471,6 +471,13 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref, codeToGID[i] = FT_Get_Name_Index(face, (char*)name); } } + if (codeToGID[i] == 0) { + // lookup by name failed, try by Unicode + Unicode u = globalParams->mapNameToUnicodeAll(enc[i]); + if (u) { + codeToGID[i] = FT_Get_Char_Index(face, u); + } + } } } break; diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index 81f5903..bbb9fbc 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -1761,6 +1761,10 @@ int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) { if ((charName = enc[i])) { if ((code = globalParams->getMacRomanCharCode(charName))) { map[i] = ff->mapCodeToGID(cmap, code); + } else if (unicodeCmap >= 0 && + (code = globalParams->mapNameToUnicodeAll(charName))) { + // not in MacRoman encoding, try Unicode lookup + map[i] = ff->mapCodeToGID(unicodeCmap, code); } } else { map[i] = -1; diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc index e543408..8ec7188 100644 --- a/poppler/GlobalParams.cc +++ b/poppler/GlobalParams.cc @@ -50,6 +50,7 @@ #include #include #include +#include #ifdef ENABLE_PLUGINS # ifndef _WIN32 # include @@ -992,12 +993,14 @@ static FcPattern *buildFcPattern(GfxFont *font, GooString *base14Name) width = -1, spacing = -1; bool deleteFamily = false; + GooString *nameStr; char *family, *name, *modifiers; const char *start; FcPattern *p; // this is all heuristics will be overwritten if font had proper info - name = (base14Name == NULL) ? font->getName()->getCString() : base14Name->getCString(); + nameStr = (base14Name == NULL) ? font->getName() : base14Name; + name = copyString(nameStr->getCString()); modifiers = strchr (name, ','); if (modifiers == NULL) @@ -1097,6 +1100,7 @@ static FcPattern *buildFcPattern(GfxFont *font, GooString *base14Name) if (width != -1) FcPatternAddInteger(p, FC_WIDTH, width); if (spacing != -1) FcPatternAddInteger(p, FC_SPACING, spacing); + gfree(name); if (deleteFamily) delete[] family; return p; @@ -1159,11 +1163,76 @@ GooString *GlobalParams::findBase14FontFile(GooString *base14Name, GfxFont *font return findSystemFontFile(font, &type, &fontNum, NULL, base14Name); } +// Compare font names while skipping punctuation and spaces. Return true if +// names match. +bool fontNamesMatch(const char *s1, const char *s2) { + while (*s1 != '\0' && *s2 != '\0') { + while (isprint(*s1) && !isalpha(*s1)) + s1++; + while (isprint(*s2) && !isalpha(*s2)) + s2++; + if (*s1 != *s2) + return false; + s1++; s2++; + } + return true; +} + +FcFontSet *getFcFontSet(GfxFont *font, GooString *base14Name) { + FcResult res; + char *fontFamily; + bool skipCharSearch = font->isCIDFont() || + (base14Name && (base14Name->cmp("Symbol") == 0 || base14Name->cmp("ZapfDingbats") == 0)); + + // If a specific font was requested or a character set search would not be + // productive, search for a font based just on the name and attributes. + if (skipCharSearch || !base14Name) { + FcPattern *p = buildFcPattern(font, base14Name); + FcConfigSubstitute(NULL, p, FcMatchPattern); + FcDefaultSubstitute(p); + FcFontSet *set = FcFontSort(NULL, p, FcFalse, NULL, &res); + FcPatternDestroy(p); + + if (!set || set->nfont == 0 || skipCharSearch) + return set; + + res = FcPatternGetString(set->fonts[0], FC_FAMILY, 0, (FcChar8**)&fontFamily); + if (res == FcResultMatch && fontNamesMatch(font->getName()->getCString(), fontFamily)) + return set; // found specifically requested font + + FcFontSetDestroy(set); + } + + // No exact match found; include character set in search. + FcPattern *p = buildFcPattern(font, base14Name); + Gfx8BitFont *bFont = dynamic_cast(font); + FcCharSet *charSet = FcCharSetCreate(); + char **enc = bFont->getEncoding(); + + for (int i = 0; i < 256; ++i) { + if (enc[i]) { + Unicode u = globalParams->mapNameToUnicodeText(enc[i]); + // limited to alphabetic characters to prevent fontconfig from preferring + // better Unicode coverage over the requested weight/slant/width/spacing. + if (u_isUAlphabetic((UChar32) u)) { + FcCharSetAddChar(charSet, u); + } + } + } + + FcPatternAddCharSet(p, FC_CHARSET, charSet); + FcCharSetDestroy(charSet); + FcConfigSubstitute(NULL, p, FcMatchPattern); + FcDefaultSubstitute(p); + FcFontSet *set = FcFontSort(NULL, p, FcFalse, NULL, &res); + FcPatternDestroy(p); + return set; +} + GooString *GlobalParams::findSystemFontFile(GfxFont *font, SysFontType *type, int *fontNum, GooString *substituteFontName, GooString *base14Name) { SysFontInfo *fi = NULL; - FcPattern *p=0; GooString *path = NULL; GooString *fontName = font->getName(); GooString substituteName; @@ -1182,13 +1251,8 @@ GooString *GlobalParams::findSystemFontFile(GfxFont *font, FcFontSet *set; int i; FcLangSet *lb = NULL; - p = buildFcPattern(font, base14Name); - if (!p) - goto fin; - FcConfigSubstitute(NULL, p, FcMatchPattern); - FcDefaultSubstitute(p); - set = FcFontSort(NULL, p, FcFalse, NULL, &res); + set = getFcFontSet(font, base14Name); if (!set) goto fin; @@ -1315,8 +1379,6 @@ GooString *GlobalParams::findSystemFontFile(GfxFont *font, substituteFontName->Set(substituteName.getCString()); } fin: - if (p) - FcPatternDestroy(p); unlockGlobalParams; return path; } diff --git a/poppler/Makefile.am b/poppler/Makefile.am index 79b6000..a1b3f9d 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -113,6 +113,9 @@ cms_includes = $(LCMS_CFLAGS) cms_libs = $(LCMS_LIBS) endif +icu_includes = $(ICU_CFLAGS) +icu_libs = $(ICU_LIBS) + if ENABLE_XPDF_HEADERS poppler_includedir = $(includedir)/poppler @@ -269,6 +272,7 @@ libpoppler_la_SOURCES = \ libpoppler_la_CPPFLAGS = \ $(cms_includes) \ + $(icu_includes) \ $(splash_includes) \ $(libjpeg_includes) \ $(libtiff_includes) \ @@ -283,6 +287,7 @@ libpoppler_la_LIBADD = \ $(top_builddir)/goo/libgoo.la \ $(top_builddir)/fofi/libfofi.la \ $(cms_libs) \ + $(icu_libs) \ $(splash_libs) \ $(libtiff_libs) \ $(libjpeg_libs) \ diff --git a/splash/SplashFTFontFile.cc b/splash/SplashFTFontFile.cc index f0dcf50..815796b 100644 --- a/splash/SplashFTFontFile.cc +++ b/splash/SplashFTFontFile.cc @@ -30,6 +30,7 @@ #include "goo/gmem.h" #include "goo/GooString.h" #include "poppler/GfxFont.h" +#include "poppler/GlobalParams.h" #include "SplashFTFontEngine.h" #include "SplashFTFont.h" #include "SplashFTFontFile.h" @@ -65,6 +66,13 @@ SplashFontFile *SplashFTFontFile::loadType1Font(SplashFTFontEngine *engineA, codeToGIDA[i] = FT_Get_Name_Index(faceA, (char *)name); } } + if (codeToGIDA[i] == 0) { + // lookup by name failed, try by Unicode + Unicode u = globalParams->mapNameToUnicodeAll(encA[i]); + if (u) { + codeToGIDA[i] = FT_Get_Char_Index(faceA, u); + } + } } } -- 2.7.0