From 85ecd125ca340fadc9126faabd03542c32d96969 Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Thu, 8 Mar 2012 21:55:33 +1030 Subject: [PATCH 4/4] TextOutput: output all charcodes in an ActualText span Text within an ActualText span is not selectable unless all charcodes are output. The unicode values of the charcodes are replaced with the replacement text evenly distributed across the charcodes. --- poppler/TextOutputDev.cc | 73 +++++++++++++++++++++++++++++----------------- poppler/TextOutputDev.h | 18 +++++++---- 2 files changed, 58 insertions(+), 33 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index b7fd582..8e9e1cf 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -5224,12 +5224,16 @@ ActualText::ActualText(TextPage *out) { out->incRefCnt(); text = out; actualText = NULL; - actualTextNBytes = 0; + lenGlyphs = 0; + sizeGlyphs = 0; + glyphs = NULL; } ActualText::~ActualText() { if (actualText) delete actualText; + if (glyphs) + gfree(glyphs); text->decRefCnt(); } @@ -5242,43 +5246,58 @@ void ActualText::addChar(GfxState *state, double x, double y, } // Inside ActualText span. - if (!actualTextNBytes) { - actualTextX0 = x; - actualTextY0 = y; - } - actualTextX1 = x + dx; - actualTextY1 = y + dy; - actualTextNBytes += nBytes; + if (lenGlyphs == sizeGlyphs) { + sizeGlyphs += 16; + glyphs = (Glyph *)greallocn(glyphs, sizeGlyphs, sizeof(Glyph)); + } + glyphs[lenGlyphs].x = x; + glyphs[lenGlyphs].y = y; + glyphs[lenGlyphs].dx = dx; + glyphs[lenGlyphs].dy = dy; + glyphs[lenGlyphs].c = c; + glyphs[lenGlyphs].nBytes = nBytes; + lenGlyphs++; } void ActualText::begin(GfxState *state, GooString *text) { if (actualText) delete actualText; actualText = new GooString(text); - actualTextNBytes = 0; + lenGlyphs = 0; } void ActualText::end(GfxState *state) { - // ActualText span closed. Output the span text and the - // extents of all the glyphs inside the span - - if (actualTextNBytes) { - Unicode *uni; - int length; - - // now that we have the position info for all of the text inside - // the marked content span, we feed the "ActualText" back through - // text->addChar() - length = TextStringToUCS4(actualText, &uni); - text->addChar(state, actualTextX0, actualTextY0, - actualTextX1 - actualTextX0, actualTextY1 - actualTextY0, - 0, actualTextNBytes, uni, length); - gfree(uni); - } - + // ActualText span closed. Output the glyphs that were output + // inside the span with the span text distributed across the glyphs. + // Note: if there are no glyphs in the span the replacement text will + // not be output. This is the same as acroread behavior. + + Unicode *uni; + int length, i, first, count; + double pos; + + length = TextStringToUCS4(actualText, &uni); + + // Output each glyph replacing the unicode values of glyphs with the + // replacement text. The replacement text is evenly distributed + // across the glyphs. + first = 0; + pos = 0.0; + for (i = 0; i < lenGlyphs; i++) { + pos += (double)length/lenGlyphs; + count = (int)pos - first; + // If this is the last glyph ensure all remaining text is included + // as pos may be < length due to rounding errors. + if (i == lenGlyphs - 1) + count = length - first; + text->addChar(state, glyphs[i].x, glyphs[i].y, glyphs[i].dx, glyphs[i].dy, + glyphs[i].c, glyphs[i].nBytes, uni + first, count); + first += count; + } + + gfree(uni); delete actualText; actualText = NULL; - actualTextNBytes = 0; } //------------------------------------------------------------------------ diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index eec7b79..ec05332 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -645,6 +645,7 @@ private: // ActualText //------------------------------------------------------------------------ + class ActualText { public: // Create an ActualText @@ -661,13 +662,18 @@ private: TextPage *text; GooString *actualText; // replacement text for the span - double actualTextX0; - double actualTextY0; - double actualTextX1; - double actualTextY1; - int actualTextNBytes; + int lenGlyphs; + int sizeGlyphs; + struct Glyph { + double x; + double y; + double dx; + double dy; + CharCode c; + int nBytes; + } *glyphs; }; - + //------------------------------------------------------------------------ // TextOutputDev -- 1.7.5.4