REVIEW PURPOSES ONLY NOT INTENDED FOR SHIPPING diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index c9db1e7..b86ce96 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -469,6 +469,102 @@ void TextWord::merge(TextWord *word) { len += word->len; } +// returning combining versions of characters +Unicode getCombiningChar(Unicode u) { + // proably should be an array with binary search + // might also vary with which character it's being combined with, ex. copyrtcir + switch (u) { + case 0x002e: return 0x0323; // dot below - might be confused with period + case 0x0060: return 0x0300; // grave + case 0x00a8: return 0x0308; // dieresis + case 0x00af: return 0x0304; // macron above - same char is used for macron below + case 0x00b4: return 0x0301; // acute + case 0x00b8: return 0x0327; // cedilla + case 0x02c6: return 0x0302; // circumflex + case 0x02c7: return 0x030c; // caron + case 0x02d8: return 0x0306; // breve + case 0x02d9: return 0x0307; // dot above + case 0x02da: return 0x030a; // ring above + case 0x02dc: return 0x0303; // tilde + case 0x02dd: return 0x030b; // double acute accent + default: return 0; + } +} + +GBool TextWord::addCombining(GfxState *state, TextFontInfo *fontA, double x, + double y, double dx, double dy, int charPosA, + int charLen, CharCode c, Unicode u, Matrix textMatA) { + // check for rotation ? + // might be nice to handle multiple combining marks + + // only combine overlapping characters + // might need to differentiate between combining marks near top and bottom, for example macron + if (len == 0 || + x + dx < edge[len-1] || x > edge[len] - (edge[len] - edge[len-1]) / 3 || + y < yMin || y > yMax + (yMax - yMin) / 2) + return gFalse; + + Unicode cu = getCombiningChar(text[len-1]); + if (cu != 0) { + if (len == size) { + size += 16; + text = (Unicode *)greallocn(text, size, sizeof(Unicode)); + charcode = (Unicode *)greallocn(charcode, size, sizeof(CharCode)); + edge = (double *)greallocn(edge, (size + 1), sizeof(double)); + charPos = (int *)greallocn(charPos, size + 1, sizeof(int)); + font = (TextFontInfo **)greallocn(font, size, sizeof(TextFontInfo *)); + textMat = (Matrix *)greallocn(textMat, size, sizeof(Matrix)); + } + text[len] = cu; + charcode[len] = charcode[len-1]; + xMax = edge[len+1] = x + dx; + edge[len] = edge[len-1]; + charPos[len+1] = charPosA; + charPos[len] = charPos[len-1]; + font[len] = font[len-1]; + textMat[len] = textMat[len-1]; + + text[len-1] = u; + charcode[len-1] = c; + edge[len-1] = x; + font[len-1] = fontA; + textMat[len-1] = textMatA; + + if (len == 1) { + base = y; + xMin = x; + } + + ++len; + return gTrue; + } + + cu = getCombiningChar(u); + if (cu != 0) { + // simply use addChar ? + if (len == size) { + size += 16; + text = (Unicode *)greallocn(text, size, sizeof(Unicode)); + charcode = (Unicode *)greallocn(charcode, size, sizeof(CharCode)); + edge = (double *)greallocn(edge, (size + 1), sizeof(double)); + charPos = (int *)greallocn(charPos, size + 1, sizeof(int)); + font = (TextFontInfo **)greallocn(font, size, sizeof(TextFontInfo *)); + textMat = (Matrix *)greallocn(textMat, size, sizeof(Matrix)); + } + text[len] = cu; + charcode[len] = c; + charPos[len] = charPosA; + charPos[len+1] = charPosA + charLen; + font[len] = fontA; + textMat[len] = textMatA; + xMax = edge[len+1] = edge[len]; // current char is probably wider + edge[len] = x; + ++len; + return gTrue; + } + return gFalse; +} + inline int TextWord::primaryCmp(TextWord *word) { double cmp; @@ -2316,6 +2412,20 @@ void TextPage::addChar(GfxState *state, double x, double y, return; } + state->getFontTransMat(&mat.m[0], &mat.m[1], &mat.m[2], &mat.m[3]); + mat.m[4] = x1; + mat.m[5] = y1; + + // test for uLen > 1 ? + // might need to worry about dup text for bold/shadows + if (curWord && uLen == 1 && + curWord->addCombining(state, curFont, x1, y1, w1, h1, charPos, nBytes, c, + u[0], mat)) { + charPos += nBytes; + // set lastCharOverlap = gFalse ? + return; + } + // start a new word if: // (1) this character doesn't fall in the right place relative to // the end of the previous word (this places upper and lower @@ -2372,10 +2482,6 @@ void TextPage::addChar(GfxState *state, double x, double y, beginWord(state); } - state->getFontTransMat(&mat.m[0], &mat.m[1], &mat.m[2], &mat.m[3]); - mat.m[4] = x1; - mat.m[5] = y1; - // page rotation and/or transform matrices can cause text to be // drawn in reverse order -- in this case, swap the begin/end // coordinates and break text into individual chars diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 23fb3b7..827c9c0 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -128,6 +128,10 @@ public: // Merge onto the end of . void merge(TextWord *word); + GBool addCombining(GfxState *state, TextFontInfo *fontA, double x, + double y, double dx, double dy, int charPosA, + int charLen, CharCode c, Unicode u, Matrix textMatA); + // Compares to , returning -1 (<), 0 (=), or +1 (>), // based on a primary-axis comparison, e.g., x ordering if rot=0. int primaryCmp(TextWord *word);