From 40dbf8ab2f756ac25bae41d23a1f789def004e3a Mon Sep 17 00:00:00 2001 From: Jason Crain Date: Fri, 15 Jan 2016 01:16:05 -0600 Subject: [PATCH] Fallback to looking up glyph by Unicode code point Sometimes glyphs don't display in documents because either font substitution picks a font without the required glyphs, or the PDF document uses different names from the font. Provide fontconfig the required characters so it can better choose a font. Add a dependency on ICU so we can use the Unicode property functions to only request the alphabetic characters, as requesting other characters can make fontconfig choose a font without the indicated italic/bold/etc but with better Unicode coverage. Fallback to looking up the glyph id from the Unicode code point if lookup by name or character code fails. Change buildFcPattern to make a copy of its argument instead of modifying in place. Bug #93299 --- CMakeLists.txt | 12 ++++++ cmake/modules/FindICU.cmake | 43 ++++++++++++++++++++ config.h.cmake | 8 ++++ configure.ac | 16 ++++++++ poppler/CairoFontEngine.cc | 7 ++++ poppler/GfxFont.cc | 4 ++ poppler/GlobalParams.cc | 99 ++++++++++++++++++++++++++++++++++++++++----- poppler/Makefile.am | 7 ++++ splash/SplashFTFontFile.cc | 8 ++++ 9 files changed, 194 insertions(+), 10 deletions(-) create mode 100644 cmake/modules/FindICU.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index e58a41d..cfebaae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ set(ENABLE_LIBOPENJPEG "auto" CACHE STRING "Use libopenjpeg for JPX streams. Pos set(ENABLE_CMS "auto" CACHE STRING "Use color management system. Possible values: auto, lcms1, lcms2. 'auto' prefers lcms2 over lcms1 if both are available. Unset to disable color management system.") option(ENABLE_LIBCURL "Build libcurl based HTTP support." OFF) option(ENABLE_ZLIB "Build with zlib (not totally safe)." OFF) +option(ENABLE_ICU "Build with ICU" ON) option(SPLASH_CMYK "Include support for CMYK rasterization." OFF) option(USE_FIXEDPOINT "Use fixed point arithmetic in the Splash backend" OFF) option(USE_FLOAT "Use single precision arithmetic in the Splash backend" OFF) @@ -201,6 +202,10 @@ if(ENABLE_LIBCURL) include_directories(${CURL_INCLUDE_DIR}) set(POPPLER_HAS_CURL_SUPPORT ON) endif(ENABLE_LIBCURL) +if(ENABLE_ICU) + find_package(ICU) + set(USE_ICU ${ICU_FOUND}) +endif(ENABLE_ICU) add_definitions(-DHAVE_CONFIG_H=1) if(MINGW) @@ -258,6 +263,9 @@ endif(LCMS_FOUND) if(LCMS2_FOUND) include_directories(${LCMS2_INCLUDE_DIR}) endif(LCMS2_FOUND) +if(ICU_FOUND) + include_directories(${ICU_INCLUDE_DIR}) +endif(ICU_FOUND) # Recent versions of poppler-data install a .pc file. # Use it to determine the encoding data path, if available. @@ -469,6 +477,9 @@ endif(TIFF_FOUND) if(HAVE_PTHREAD) set(poppler_LIBS ${poppler_LIBS} ${CMAKE_THREAD_LIBS_INIT}) endif() +if(USE_ICU) + set(poppler_LIBS ${poppler_LIBS} ${ICU_LIBRARIES}) +endif(USE_ICU) if(MSVC) add_definitions(-D_CRT_SECURE_NO_WARNINGS) @@ -732,6 +743,7 @@ endif(LCMS_FOUND) if(LCMS2_FOUND) message(" with lcms2") endif(LCMS2_FOUND) +show_end_message_yesno("use icu" USE_ICU) show_end_message_yesno("command line utils" ENABLE_UTILS) show_end_message("test data dir" ${TESTDATADIR}) diff --git a/cmake/modules/FindICU.cmake b/cmake/modules/FindICU.cmake new file mode 100644 index 0000000..5c680b5 --- /dev/null +++ b/cmake/modules/FindICU.cmake @@ -0,0 +1,43 @@ +# - Try to find icu +# Once done this will define +# ICU_FOUND - System has ICU +# ICU_INCLUDE_DIRS - The ICU include directories +# ICU_LIBRARIES - The libraries needed to use ICU +# ICU_DEFINITIONS - Compiler switches required for using ICU + +# Copyright (c) 2016 Jason Crain, +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +find_package(PkgConfig QUIET) +pkg_check_modules(PC_ICU QUIET icu-uc) +set(ICU_DEFINITIONS ${PC_ICU_CFLAGS_OTHER}) + +find_path(ICU_INCLUDE_DIR unicode/uchar.h + PATHS + ${PC_ICU_INCLUDEDIR} + ${PC_ICU_INCLUDE_DIRS} + ) +find_library(ICU_COMMON_LIBRARY icuuc + PATHS + ${PC_ICU_LIBDIR} + ${PC_ICU_LIBRARY_DIRS} + ) +find_library(ICU_DATA_LIBRARY icudata icudt + PATHS + ${PC_ICU_LIBDIR} + ${PC_ICU_LIBRARY_DIRS} + ) + +mark_as_advanced(ICU_INCLUDE_DIR ICU_COMMON_LIBRARY ICU_DATA_LIBRARY) +set(ICU_LIBRARIES ${ICU_COMMON_LIBRARY} ${ICU_DATA_LIBRARY}) +set(ICU_INCLUDE_DIRS ${ICU_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ICU DEFAULT_MSG + ICU_LIBRARIES + ICU_COMMON_LIBRARY + ICU_DATA_LIBRARY + ICU_INCLUDE_DIR + ) diff --git a/config.h.cmake b/config.h.cmake index 440a13d..9d2b6a8 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -219,6 +219,14 @@ #define HAVE_BOOLEAN #endif +/* Use ICU */ +#cmakedefine USE_ICU 1 + +/* Use ICU's C++ namespace */ +#ifdef USE_ICU +# define U_USING_ICU_NAMESPACE 0 +#endif + /* MS defined snprintf as deprecated but then added it in Visual Studio 2015. */ #if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf diff --git a/configure.ac b/configure.ac index 1e34506..5afb169 100644 --- a/configure.ac +++ b/configure.ac @@ -199,6 +199,21 @@ then AC_CHECK_LIB(posix4,nanosleep, X_EXTRA_LIBS="$X_EXTRA_LIBS -lposix4") fi +dnl Test for libicu +AC_ARG_ENABLE(icu, + AS_HELP_STRING([--disable-icu], + [Don't build against icu libraries.])) + +if test x$enable_icu != xno; then + PKG_CHECK_MODULES(ICU, icu-uc, [enable_icu=yes], [enable_icu=no]) +fi + +if test x$enable_icu = xyes; then + AC_DEFINE(USE_ICU, 1, [Build against icu libraries.]) + AC_DEFINE(U_USING_ICU_NAMESPACE, 0, [Use ICU's C++ namespace]) +fi + +AM_CONDITIONAL(USE_ICU, test x$enable_icu = xyes) dnl ##### Test for libopenjpeg. Versions prior to 1.4 do not provide a pkgconfig file. openjpeg1="no" @@ -1022,6 +1037,7 @@ if test x$enable_cms = xyes;then echo " with lcms2" fi fi +echo " use icu: $enable_icu" if test x$enable_build_type != xno;then echo " build type: $enable_build_type" fi diff --git a/poppler/CairoFontEngine.cc b/poppler/CairoFontEngine.cc index 1d611b5..012b6ea 100644 --- a/poppler/CairoFontEngine.cc +++ b/poppler/CairoFontEngine.cc @@ -471,6 +471,13 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref, codeToGID[i] = FT_Get_Name_Index(face, (char*)name); } } + if (codeToGID[i] == 0) { + // lookup by name failed, try by Unicode + Unicode u = globalParams->mapNameToUnicodeAll(enc[i]); + if (u) { + codeToGID[i] = FT_Get_Char_Index(face, u); + } + } } } break; diff --git a/poppler/GfxFont.cc b/poppler/GfxFont.cc index 81f5903..bbb9fbc 100644 --- a/poppler/GfxFont.cc +++ b/poppler/GfxFont.cc @@ -1761,6 +1761,10 @@ int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) { if ((charName = enc[i])) { if ((code = globalParams->getMacRomanCharCode(charName))) { map[i] = ff->mapCodeToGID(cmap, code); + } else if (unicodeCmap >= 0 && + (code = globalParams->mapNameToUnicodeAll(charName))) { + // not in MacRoman encoding, try Unicode lookup + map[i] = ff->mapCodeToGID(unicodeCmap, code); } } else { map[i] = -1; diff --git a/poppler/GlobalParams.cc b/poppler/GlobalParams.cc index e543408..1637d46 100644 --- a/poppler/GlobalParams.cc +++ b/poppler/GlobalParams.cc @@ -117,6 +117,10 @@ extern XpdfPluginVecTable xpdfPluginVecTable; # endif #endif +#ifdef USE_ICU +# include +#endif + //------------------------------------------------------------------------ #define cidToUnicodeCacheSize 4 @@ -992,12 +996,14 @@ static FcPattern *buildFcPattern(GfxFont *font, GooString *base14Name) width = -1, spacing = -1; bool deleteFamily = false; + GooString *nameStr; char *family, *name, *modifiers; const char *start; FcPattern *p; // this is all heuristics will be overwritten if font had proper info - name = (base14Name == NULL) ? font->getName()->getCString() : base14Name->getCString(); + nameStr = (base14Name == NULL) ? font->getName() : base14Name; + name = copyString(nameStr->getCString()); modifiers = strchr (name, ','); if (modifiers == NULL) @@ -1097,6 +1103,7 @@ static FcPattern *buildFcPattern(GfxFont *font, GooString *base14Name) if (width != -1) FcPatternAddInteger(p, FC_WIDTH, width); if (spacing != -1) FcPatternAddInteger(p, FC_SPACING, spacing); + gfree(name); if (deleteFamily) delete[] family; return p; @@ -1159,11 +1166,90 @@ GooString *GlobalParams::findBase14FontFile(GooString *base14Name, GfxFont *font return findSystemFontFile(font, &type, &fontNum, NULL, base14Name); } +// Compare font names while skipping punctuation and spaces. Return true if +// names match. +bool fontNamesMatch(const char *s1, const char *s2) { + while (*s1 != '\0' && *s2 != '\0') { + while (isprint(*s1) && !isalpha(*s1)) + s1++; + while (isprint(*s2) && !isalpha(*s2)) + s2++; + if (*s1 != *s2) + return false; + s1++; s2++; + } + return true; +} + +FcFontSet *getFcFontSet(GfxFont *font, GooString *base14Name) { + FcResult res; + +#ifdef USE_ICU + + char *fontFamily; + bool skipCharSearch = font->isCIDFont() || + (base14Name && (base14Name->cmp("Symbol") == 0 || base14Name->cmp("ZapfDingbats") == 0)); + + // If a specific font was requested or a character set search would not be + // productive, search for a font based just on the name and attributes. + if (skipCharSearch || !base14Name) { + FcPattern *p = buildFcPattern(font, base14Name); + FcConfigSubstitute(NULL, p, FcMatchPattern); + FcDefaultSubstitute(p); + FcFontSet *set = FcFontSort(NULL, p, FcFalse, NULL, &res); + FcPatternDestroy(p); + + if (!set || set->nfont == 0 || skipCharSearch) + return set; + + res = FcPatternGetString(set->fonts[0], FC_FAMILY, 0, (FcChar8**)&fontFamily); + if (res == FcResultMatch && fontNamesMatch(font->getName()->getCString(), fontFamily)) + return set; // found specifically requested font + + FcFontSetDestroy(set); + } + + // No exact match found; include character set in search. + FcPattern *p = buildFcPattern(font, base14Name); + Gfx8BitFont *bFont = dynamic_cast(font); + FcCharSet *charSet = FcCharSetCreate(); + char **enc = bFont->getEncoding(); + + for (int i = 0; i < 256; ++i) { + if (enc[i]) { + Unicode u = globalParams->mapNameToUnicodeText(enc[i]); + // limited to alphabetic characters to prevent fontconfig from preferring + // better Unicode coverage over the requested weight/slant/width/spacing. + if (u_isUAlphabetic((UChar32) u)) { + FcCharSetAddChar(charSet, u); + } + } + } + + FcPatternAddCharSet(p, FC_CHARSET, charSet); + FcCharSetDestroy(charSet); + FcConfigSubstitute(NULL, p, FcMatchPattern); + FcDefaultSubstitute(p); + FcFontSet *set = FcFontSort(NULL, p, FcFalse, NULL, &res); + FcPatternDestroy(p); + return set; + +#else + + FcPattern *p = buildFcPattern(font, base14Name); + FcConfigSubstitute(NULL, p, FcMatchPattern); + FcDefaultSubstitute(p); + FcFontSet *set = FcFontSort(NULL, p, FcFalse, NULL, &res); + FcPatternDestroy(p); + return set; + +#endif +} + GooString *GlobalParams::findSystemFontFile(GfxFont *font, SysFontType *type, int *fontNum, GooString *substituteFontName, GooString *base14Name) { SysFontInfo *fi = NULL; - FcPattern *p=0; GooString *path = NULL; GooString *fontName = font->getName(); GooString substituteName; @@ -1182,13 +1268,8 @@ GooString *GlobalParams::findSystemFontFile(GfxFont *font, FcFontSet *set; int i; FcLangSet *lb = NULL; - p = buildFcPattern(font, base14Name); - if (!p) - goto fin; - FcConfigSubstitute(NULL, p, FcMatchPattern); - FcDefaultSubstitute(p); - set = FcFontSort(NULL, p, FcFalse, NULL, &res); + set = getFcFontSet(font, base14Name); if (!set) goto fin; @@ -1315,8 +1396,6 @@ GooString *GlobalParams::findSystemFontFile(GfxFont *font, substituteFontName->Set(substituteName.getCString()); } fin: - if (p) - FcPatternDestroy(p); unlockGlobalParams; return path; } diff --git a/poppler/Makefile.am b/poppler/Makefile.am index 79b6000..93394e5 100644 --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -113,6 +113,11 @@ cms_includes = $(LCMS_CFLAGS) cms_libs = $(LCMS_LIBS) endif +if USE_ICU +icu_includes = $(ICU_CFLAGS) +icu_libs = $(ICU_LIBS) +endif + if ENABLE_XPDF_HEADERS poppler_includedir = $(includedir)/poppler @@ -269,6 +274,7 @@ libpoppler_la_SOURCES = \ libpoppler_la_CPPFLAGS = \ $(cms_includes) \ + $(icu_includes) \ $(splash_includes) \ $(libjpeg_includes) \ $(libtiff_includes) \ @@ -283,6 +289,7 @@ libpoppler_la_LIBADD = \ $(top_builddir)/goo/libgoo.la \ $(top_builddir)/fofi/libfofi.la \ $(cms_libs) \ + $(icu_libs) \ $(splash_libs) \ $(libtiff_libs) \ $(libjpeg_libs) \ diff --git a/splash/SplashFTFontFile.cc b/splash/SplashFTFontFile.cc index f0dcf50..815796b 100644 --- a/splash/SplashFTFontFile.cc +++ b/splash/SplashFTFontFile.cc @@ -30,6 +30,7 @@ #include "goo/gmem.h" #include "goo/GooString.h" #include "poppler/GfxFont.h" +#include "poppler/GlobalParams.h" #include "SplashFTFontEngine.h" #include "SplashFTFont.h" #include "SplashFTFontFile.h" @@ -65,6 +66,13 @@ SplashFontFile *SplashFTFontFile::loadType1Font(SplashFTFontEngine *engineA, codeToGIDA[i] = FT_Get_Name_Index(faceA, (char *)name); } } + if (codeToGIDA[i] == 0) { + // lookup by name failed, try by Unicode + Unicode u = globalParams->mapNameToUnicodeAll(encA[i]); + if (u) { + codeToGIDA[i] = FT_Get_Char_Index(faceA, u); + } + } } } -- 2.6.4