--- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,6 +30,7 @@ option(ENABLE_LIBOPENJPEG "Use libopenjpeg for JPX streams." ON) option(ENABLE_LCMS "Use liblcms for color management." ON) option(ENABLE_LIBCURL "Build libcurl based HTTP support." OFF) +option(ENABLE_ICU "Build with ICU." ON) option(ENABLE_ZLIB "Build with zlib (not totally safe)." OFF) option(USE_FIXEDPOINT "Use fixed point arithmetic in the Splash backend" OFF) option(USE_FLOAT "Use single precision arithmetic in the Splash backend" OFF) @@ -124,6 +125,15 @@ set(ENABLE_CPP ${ICONV_FOUND}) set(HAVE_ICONV ${ICONV_FOUND}) endif(ENABLE_CPP) + +# well i don't know what to do here. +# the icu module is there. +if(ENABLE_ICU) + macro_optional_find_package(ICU) + set(ENABLE_ICU ${ICU_FOUND}) + set(HAVE_ICU ${ICU_FOUND}) +endif(ENABLE_ICU) + if(ENABLE_ZLIB) find_package(ZLIB) if(ZLIB_FOUND) --- a/Makefile.am +++ b/Makefile.am @@ -86,6 +86,7 @@ cmake/modules/PopplerDefaults.cmake \ cmake/modules/PopplerMacros.cmake \ cmake/modules/FindLIBOPENJPEG.cmake \ + cmake/modules/FindICU.cmake \ config.h.cmake \ poppler-cairo.pc.cmake \ poppler/poppler-config.h.cmake \ --- a/configure.ac +++ b/configure.ac @@ -265,6 +265,29 @@ AC_DEFINE(HAVE_FSEEK64) fi +dnl Test for icu +AC_ARG_ENABLE(icu, + AC_HELP_STRING([--enable-icu], + [Build with icu based BIDI support. Enabled by default.]), + enable_icu=$enableval, + enable_icu="yes") +if test x$enable_icu = xyes; then + PKG_CHECK_MODULES(ICU, icu-uc, + [icu_pkgconfig=yes], [icu_pkgconfig=no]) + if test "x$icu_pkgconfig" = "xyes"; then + AC_DEFINE(HAVE_ICU, 1, [Have ICU include files]) + else + ICU_LIBS= + ICU_CFLAGS= + AC_PATH_PROG(ICU_CONFIG, icu-config, no) + if test "x$ICU_CONFIG" != "xno" ; then + ICU_CFLAGS= + ICU_LIBS=`$ICU_CONFIG --ldflags-libsonly` + AC_DEFINE(HAVE_ICU, 1, [Have ICU include files]) + fi + fi +fi + dnl Test for zlib AC_ARG_ENABLE([zlib], [AS_HELP_STRING([--enable-zlib],[Build with zlib])], @@ -816,6 +839,7 @@ echo " use libjpeg: $enable_libjpeg" echo " use libpng: $enable_libpng" echo " use libtiff: $enable_libtiff" +echo " use icu: $enable_icu" echo " use zlib: $enable_zlib" echo " use libcurl: $enable_libcurl" echo " use libopenjpeg: $enable_libopenjpeg" --- /dev/null +++ b/cmake/modules/FindICU.cmake @@ -0,0 +1,52 @@ +# Imperfect module to find the ICU library with cmake + +find_program(ICU_CONFIG icu-config) + +if(NOT ICU_CONFIG) + set(ICU_FOUND FALSE) +else(NOT ICU_CONFIG) + + execute_process(COMMAND ${ICU_CONFIG} --exists + RESULT_VARIABLE ICU_EXISTS_RESULT) + if(NOT ${ICU_EXISTS_RESULT} EQUAL 0) + set(ICU_FOUND FALSE) + else(NOT ${ICU_EXISTS_RESULT} EQUAL 0) + + execute_process(COMMAND ${ICU_CONFIG} --cppflags-searchpath + COMMAND sed s/^-I//g + OUTPUT_VARIABLE ICU_INCLUDE_DIRS) + + execute_process(COMMAND ${ICU_CONFIG} --ldflags-libsonly + COMMAND tr -d \n + COMMAND sed "s/[\t ]*-l/ /g" + COMMAND sed "s/^[\t ]*//" + COMMAND sed "s/[\t ]*$//" + OUTPUT_VARIABLE ICU_LIBRARY_NAMES) + + separate_arguments(ICU_LIBRARY_NAMES) + +# execute_process(COMMAND ${ICU_CONFIG} --ldflags-searchpath +# COMMAND sed s/^-L// +# OUTPUT_VARIABLE ICU_LIB_DIR) + + foreach(CURRENT_LIB_NAME ${ICU_LIBRARY_NAMES}) + set(CURRENT_LIBRARY ${CURRENT_LIB_NAME}-NOTFOUND) + find_library(CURRENT_LIBRARY ${CURRENT_LIB_NAME}) + list(APPEND ICU_LIBRARIES ${CURRENT_LIBRARY}) + endforeach(CURRENT_LIB_NAME ${ICU_LIBRARY_NAMES}) + + set(ICU_FOUND TRUE) + + endif(NOT ${ICU_EXISTS_RESULT} EQUAL 0) +endif(NOT ICU_CONFIG) + +if(ICU_FOUND) + if(NOT ICU_FIND_QUIETLY) + MESSAGE(STATUS "Found ICU") + endif(NOT ICU_FIND_QUIETLY) +else(ICU_FOUND) + if(ICU_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find ICU") + endif(ICU_FIND_REQUIRED) +endif(ICU_FOUND) + --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -33,6 +33,9 @@ #endif #include "poppler-config.h" +#if HAVE_ICU +#include +#endif #include #include "goo/gtypes.h" #include "GfxFont.h" @@ -482,8 +485,25 @@ class TextPage { public: + enum ReorderingMode { + ReorderingNotNeeded, +#if HAVE_ICU + // all numbers ltr + ReorderingNumbersAsL=UBIDI_REORDER_INVERSE_NUMBERS_AS_L, + // windows + ReorderingNumbersSpecial=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, + // unicode (default) + ReorderingLikeDirect=UBIDI_REORDER_INVERSE_LIKE_DIRECT, + ReorderingDefault=ReorderingNumbersSpecial, +#else + ReorderingDefault, +#endif + ReorderingSentinel + }; + // Constructor. - TextPage(GBool rawOrderA); + TextPage(GBool rawOrderA, + ReorderingMode reordering_modeA=ReorderingDefault); void incRefCnt(); void decRefCnt(); @@ -611,6 +631,7 @@ int primaryRot; // primary rotation GBool primaryLR; // primary direction (true means L-to-R, // false means R-to-L) + ReorderingMode reordering_mode; TextWord *rawWords; // list of words, in raw order (only if // rawOrder is set) TextWord *rawLastWord; // last word on rawWords list --- a/poppler/Makefile.am +++ b/poppler/Makefile.am @@ -138,10 +138,14 @@ cms_libs = $(LCMS_LIBS) endif +icu_includes = $(ICU_CFLAGS) +icu_libs = $(ICU_LIBS) + INCLUDES = \ -I$(top_srcdir) \ -I$(top_srcdir)/goo \ $(cms_includes) \ + $(icu_includes) \ $(splash_includes) \ $(cairo_includes) \ $(arthur_includes) \ @@ -165,6 +169,7 @@ $(top_builddir)/goo/libgoo.la \ $(top_builddir)/fofi/libfofi.la \ $(cms_libs) \ + $(icu_libs) \ $(splash_libs) \ $(libtiff_libs) \ $(libjpeg_libs) \ --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -48,6 +48,10 @@ #include #include #include +#if HAVE_ICU +#include +#include +#endif #ifdef _WIN32 #include // for O_BINARY #include // for setmode @@ -2050,11 +2054,12 @@ // TextPage //------------------------------------------------------------------------ -TextPage::TextPage(GBool rawOrderA) { +TextPage::TextPage(GBool rawOrderA, ReorderingMode reordering_modeA) { int rot; refCnt = 1; rawOrder = rawOrderA; + reordering_mode = reordering_modeA; curWord = NULL; charPos = 0; curFont = NULL; @@ -3556,7 +3561,62 @@ double xMin1, yMin1, xMax1, yMax1; GBool found; - //~ needs to handle right-to-left text +#if HAVE_ICU + static struct { + ReorderingMode inverse_mode; + UBiDiReorderingMode direct_mode; + } reordering_inverse_to_direct[] = + { + // all numbers ltr + {ReorderingNumbersAsL, UBIDI_REORDER_GROUP_NUMBERS_WITH_R}, + // windows + {ReorderingNumbersSpecial, UBIDI_REORDER_NUMBERS_SPECIAL}, + // unicode (default) + {ReorderingLikeDirect, UBIDI_REORDER_DEFAULT}, + {ReorderingSentinel, UBIDI_REORDER_NUMBERS_SPECIAL} + }; + UBiDi *bidi = NULL; + UErrorCode err; + UChar *output = NULL, *input = NULL; + int32_t ulen; + UConverter *conv32 = NULL; + UBiDiReorderingMode ubidi_reordering_mode; + + err = U_ZERO_ERROR; + conv32 = ucnv_open("UTF-32_PlatformEndian", &err); + ulen = ucnv_toUChars( + conv32, NULL, 0, (const char *) s, len * sizeof(Unicode), &err); + input = new UChar[ulen]; + output = new UChar[ulen]; + bidi = ubidi_open(); + + if (input && output && bidi && conv32) { + for (i = 0, ubidi_reordering_mode = UBIDI_REORDER_NUMBERS_SPECIAL; + reordering_inverse_to_direct[i].inverse_mode != ReorderingSentinel; + ++i) { + if (reordering_mode == reordering_inverse_to_direct[i].inverse_mode) { + ubidi_reordering_mode = reordering_inverse_to_direct[i].direct_mode; + break; + } + } + UBiDiLevel level = (primaryLR) ? UBIDI_DEFAULT_LTR : UBIDI_DEFAULT_RTL; + err = U_ZERO_ERROR; + ulen = ucnv_toUChars( + conv32, input, ulen, (const char *) s, len * sizeof(Unicode), &err); + ubidi_setReorderingMode( bidi, ubidi_reordering_mode ); + ubidi_setReorderingOptions(bidi, UBIDI_OPTION_REMOVE_CONTROLS); + ubidi_setPara( bidi, input, len, level, NULL, &err ); + ubidi_writeReordered( bidi, output, len, UBIDI_DO_MIRRORING, &err ); + ucnv_fromUChars( + conv32, (char *) s, len * sizeof(Unicode), output, ulen, &err); + } + if (input != NULL) + delete input; + if (output != NULL) + delete output; + ubidi_close(bidi); + ucnv_close(conv32); +#endif if (rawOrder) { return gFalse; @@ -5107,89 +5167,75 @@ int TextPage::dumpFragment(Unicode *text, int len, UnicodeMap *uMap, GooString *s) { - char lre[8], rle[8], popdf[8], buf[8]; - int lreLen, rleLen, popdfLen, n; - int nCols, i, j, k; - - nCols = 0; + char buf[8]; + int nCols = 0, i, n; + +#if HAVE_ICU + if (reordering_mode != ReorderingNotNeeded && uMap->isUnicode()) { - if (uMap->isUnicode()) { + char lre[8], rle[8], popdf[8]; + int lreLen, rleLen, popdfLen, j, k; + UBiDi *bidi = NULL; + UErrorCode err = U_ZERO_ERROR; + UBiDiLevel level = UBIDI_DEFAULT_LTR; + UChar *output = NULL, *input = NULL; + int32_t ulen; + UConverter *conv32 = NULL; lreLen = uMap->mapUnicode(0x202a, lre, sizeof(lre)); rleLen = uMap->mapUnicode(0x202b, rle, sizeof(rle)); popdfLen = uMap->mapUnicode(0x202c, popdf, sizeof(popdf)); - if (primaryLR) { - - i = 0; - while (i < len) { - // output a left-to-right section - for (j = i; j < len && !unicodeTypeR(text[j]); ++j) ; - for (k = i; k < j; ++k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - i = j; - // output a right-to-left section - for (j = i; - j < len && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); - ++j) ; - if (j > i) { - s->append(rle, rleLen); - for (k = j - 1; k >= i; --k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - s->append(popdf, popdfLen); - i = j; - } + conv32 = ucnv_open("UTF-32_PlatformEndian", &err); + ulen = ucnv_toUChars( + conv32, NULL, 0, (const char *) text, len * sizeof(Unicode), &err); + input = new UChar[ulen]; + output = new UChar[ulen]; + bidi = ubidi_open(); + + if (input && output && bidi && conv32) { + if (primaryLR) { + level = UBIDI_DEFAULT_LTR; + s->append(lre, lreLen); } - - } else { - - // Note: This code treats numeric characters (European and - // Arabic/Indic) as left-to-right, which isn't strictly correct - // (incurs extra LRE/POPDF pairs), but does produce correct - // visual formatting. - s->append(rle, rleLen); - i = len - 1; - while (i >= 0) { - // output a right-to-left section - for (j = i; - j >= 0 && !(unicodeTypeL(text[j]) || unicodeTypeNum(text[j])); - --j) ; - for (k = i; k > j; --k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - i = j; - // output a left-to-right section - for (j = i; j >= 0 && !unicodeTypeR(text[j]); --j) ; - if (j < i) { - s->append(lre, lreLen); - for (k = j + 1; k <= i; ++k) { - n = uMap->mapUnicode(text[k], buf, sizeof(buf)); - s->append(buf, n); - ++nCols; - } - s->append(popdf, popdfLen); - i = j; - } + else { + level = UBIDI_DEFAULT_RTL; + s->append(rle, rleLen); + } + err = U_ZERO_ERROR; + ulen = ucnv_toUChars( + conv32, input, ulen, (const char *) text, len * sizeof(Unicode), &err); + ubidi_setReorderingMode( bidi, (UBiDiReorderingMode) reordering_mode ); + ubidi_setReorderingOptions(bidi, UBIDI_OPTION_REMOVE_CONTROLS); + ubidi_setPara( bidi, input, len, level, NULL, &err ); + ubidi_writeReordered( bidi, output, len, UBIDI_DO_MIRRORING, &err ); + ucnv_fromUChars(conv32, (char *) text, len * sizeof(Unicode), + output, ulen, &err); + UCharCharacterIterator iter(output, ulen); + for (iter.first(); iter.current() != CharacterIterator::DONE; iter.next()) { + n = uMap->mapUnicode(iter.current(), buf, sizeof(buf)); + s->append(buf, n); + ++nCols; } s->append(popdf, popdfLen); - + if (input != NULL) + delete input; + if (output != NULL) + delete output; + ubidi_close(bidi); + ucnv_close(conv32); } - - } else { + } + else { +#endif for (i = 0; i < len; ++i) { n = uMap->mapUnicode(text[i], buf, sizeof(buf)); s->append(buf, n); nCols += n; } +#if (HAVE_ICU) } +#endif return nCols; }