From 9d2747c925676a05e4c683d28878f60cc59353bc Mon Sep 17 00:00:00 2001 From: Jason Crain Date: Sat, 3 Mar 2012 10:45:45 -0600 Subject: [PATCH 2/2] Fixes display for selected glyphs in ActualText span When text is selected in a pdf containing an ActualText span, glyphs are not visible because the CharCode is 0. This sets the correct charcodes. If the number of glyphs does not match the number of ActualText characters, glyphs may be added without included text, or wih multiple text characters. --- poppler/TextOutputDev.cc | 93 +++++++++++++++++++++++++-------------------- poppler/TextOutputDev.h | 9 +++- 2 files changed, 58 insertions(+), 44 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 1d19fb0..63ae4fb 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -5250,7 +5250,7 @@ ActualText::ActualText(TextPage *out) { ActualText::~ActualText() { if (actualText) - delete actualText; + delete [] actualText; text->decRefCnt(); } @@ -5262,66 +5262,77 @@ void ActualText::addChar(GfxState *state, double x, double y, return; } - // Inside ActualText span. - if (!actualTextNBytes) { - actualTextX0 = x; - actualTextY0 = y; + if (actualTextNBytes) { + Unicode *uni = NULL; + int length = 0; + + if (actualTextPos < actualTextLength) { + uni = actualText + actualTextPos++; + length = 1; + } + + if (uni && uni[0] >= 0xD800 && uni[0] <= 0xDC00 && (actualTextPos < actualTextLength)) { + // surrogate pair + length = 2; + actualTextPos++; + } + + text->addChar(state, actualTextX0, actualTextY0, actualTextDX, actualTextDY, + charcode, actualTextNBytes, uni, length); } - actualTextX1 = x + dx; - actualTextY1 = y + dy; - actualTextNBytes += nBytes; + + actualTextX0 = x; + actualTextY0 = y; + actualTextDX = dx; + actualTextDY = dy; + actualTextNBytes = nBytes; + charcode = c; } void ActualText::begin(GfxState *state, GooString *text) { if (actualText) - delete actualText; - actualText = new GooString(text); + delete [] actualText; + actualText = NULL; + actualTextPos = 0; actualTextNBytes = 0; -} -void ActualText::end(GfxState *state) { - // ActualText span closed. Output the span text and the - // extents of all the glyphs inside the span - - if (actualTextNBytes) { + if (text) { char *uniString = NULL; - Unicode *uni; - int length, i; + int i; - if (!actualText->hasUnicodeMarker()) { - if (actualText->getLength() > 0) { + if (!text->hasUnicodeMarker()) { + if (text->getLength() > 0) { //non-unicode string -- assume pdfDocEncoding and //try to convert to UTF16BE - uniString = pdfDocEncodingToUTF16(actualText, &length); + uniString = pdfDocEncodingToUTF16(text, &actualTextLength); } else { - length = 0; + actualTextLength = 0; } } else { - uniString = actualText->getCString(); - length = actualText->getLength(); + uniString = text->getCString(); + actualTextLength = text->getLength(); } - if (length < 3) - length = 0; + if (actualTextLength < 3) + actualTextLength = 0; else - length = length/2 - 1; - uni = new Unicode[length]; - for (i = 0 ; i < length; i++) - uni[i] = ((uniString[2 + i*2] & 0xff)<<8)|(uniString[3 + i*2] & 0xff); - - // now that we have the position info for all of the text inside - // the marked content span, we feed the "ActualText" back through - // text->addChar() - text->addChar(state, actualTextX0, actualTextY0, - actualTextX1 - actualTextX0, actualTextY1 - actualTextY0, - 0, actualTextNBytes, uni, length); - - delete [] uni; - if (!actualText->hasUnicodeMarker()) + actualTextLength = actualTextLength/2 - 1; + actualText = new Unicode[actualTextLength]; + for (i = 0 ; i < actualTextLength; i++) + actualText[i] = ((uniString[2 + i*2] & 0xff)<<8)|(uniString[3 + i*2] & 0xff); + + if (!text->hasUnicodeMarker()) delete [] uniString; } +} - delete actualText; +void ActualText::end(GfxState *state) { + if (actualTextNBytes) { + text->addChar(state, actualTextX0, actualTextY0, actualTextDX, actualTextDY, + charcode, actualTextNBytes, actualText + actualTextPos, + actualTextLength - actualTextPos); + } + delete [] actualText; actualText = NULL; actualTextNBytes = 0; } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index e017fbd..231a888 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -660,12 +660,15 @@ public: private: TextPage *text; - GooString *actualText; // replacement text for the span + Unicode *actualText; // replacement text for the span + int actualTextLength; + int actualTextPos; double actualTextX0; double actualTextY0; - double actualTextX1; - double actualTextY1; + double actualTextDX; + double actualTextDY; int actualTextNBytes; + CharCode charcode; }; -- 1.7.5.4