From 5dc1cf5126f35286d83e7785f340132535aa7e30 Mon Sep 17 00:00:00 2001 From: Vladimir Ushakov Date: Mon, 16 May 2016 19:06:01 +0300 Subject: [PATCH] Get rid of ActualText class, move its functionality to TextPage. pdttotext -bbox-layout did not work after the first page because TextOutputDev::takeText() did not know about ActualText keeping an extra reference to the TextPage. The simplest fix would be not to use broken takeText(), but on a second thought I decided that the ActualText functionality is so tightly bound to the TextPage, that it makes no sense keeping them apart. --- poppler/CairoOutputDev.cc | 25 +++------- poppler/CairoOutputDev.h | 1 - poppler/TextOutputDev.cc | 120 +++++++++++++++++++--------------------------- poppler/TextOutputDev.h | 40 ++++------------ 4 files changed, 66 insertions(+), 120 deletions(-) diff --git a/poppler/CairoOutputDev.cc b/poppler/CairoOutputDev.cc index b0987b3..8efed41 100644 --- a/poppler/CairoOutputDev.cc +++ b/poppler/CairoOutputDev.cc @@ -160,7 +160,6 @@ CairoOutputDev::CairoOutputDev() { knockoutCount = 0; text = NULL; - actualText = NULL; // the SA parameter supposedly defaults to false, but Acrobat // apparently hardwires it to true @@ -187,8 +186,6 @@ CairoOutputDev::~CairoOutputDev() { cairo_pattern_destroy (shape); if (text) text->decRefCnt(); - if (actualText) - delete actualText; } void CairoOutputDev::setCairo(cairo_t *cairo) @@ -216,16 +213,9 @@ void CairoOutputDev::setTextPage(TextPage *text) { if (this->text) this->text->decRefCnt(); - if (actualText) - delete actualText; - if (text) { - this->text = text; - this->text->incRefCnt(); - actualText = new ActualText(text); - } else { - this->text = NULL; - actualText = NULL; - } + this->text = text; + if (text) + text->incRefCnt(); } void CairoOutputDev::startDoc(PDFDoc *docA, @@ -1394,9 +1384,8 @@ void CairoOutputDev::drawChar(GfxState *state, double x, double y, } } - if (!text) - return; - actualText->addChar (state, x, y, dx, dy, code, nBytes, u, uLen); + if (text) + text->addChar (state, x, y, dx, dy, code, nBytes, u, uLen); } void CairoOutputDev::endString(GfxState *state) @@ -1553,13 +1542,13 @@ void CairoOutputDev::endTextObject(GfxState *state) { void CairoOutputDev::beginActualText(GfxState *state, GooString *text) { if (this->text) - actualText->begin(state, text); + this->text->beginActualText(state, text); } void CairoOutputDev::endActualText(GfxState *state) { if (text) - actualText->end(state); + text->endActualText(state); } static inline int splashRound(SplashCoord x) { diff --git a/poppler/CairoOutputDev.h b/poppler/CairoOutputDev.h index 103a326..1a61b1c 100644 --- a/poppler/CairoOutputDev.h +++ b/poppler/CairoOutputDev.h @@ -344,7 +344,6 @@ protected: GBool prescaleImages; TextPage *text; // text for the current page - ActualText *actualText; cairo_pattern_t *group; cairo_pattern_t *shape; diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index cf39bd8..dc46d3d 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -2363,6 +2363,7 @@ TextPage::TextPage(GBool rawOrderA) { underlines = new GooList(); links = new GooList(); mergeCombining = gTrue; + actualText.replacement = NULL; } TextPage::~TextPage() { @@ -2452,6 +2453,8 @@ void TextPage::clear() { fonts = new GooList(); underlines = new GooList(); links = new GooList(); + delete actualText.replacement; + actualText.replacement = NULL; } void TextPage::updateFont(GfxState *state) { @@ -2576,6 +2579,18 @@ void TextPage::addChar(GfxState *state, double x, double y, int wMode; Matrix mat; + if (actualText.replacement) { + // Inside actualText span. + if (!actualText.nBytes) { + actualText.x0 = x; + actualText.y0 = y; + } + actualText.x1 = x + dx; + actualText.y1 = y + dy; + actualText.nBytes += nBytes; + return; + } + // subtract char and word spacing from the dx,dy values sp = state->getCharSpace(); if (c == (CharCode)0x20) { @@ -2752,6 +2767,37 @@ void TextPage::addLink(int xMin, int yMin, int xMax, int yMax, AnnotLink *link) links->append(new TextLink(xMin, yMin, xMax, yMax, link)); } +void TextPage::beginActualText(GfxState *state, GooString *text) +{ + delete actualText.replacement; + actualText.replacement = new GooString(text); + actualText.nBytes = 0; +} + +void TextPage::endActualText(GfxState *state) +{ + if (!actualText.replacement) + return; + + // ActualText span closed. Output the span text and the + // extents of all the glyphs inside the span + if (actualText.nBytes) { + Unicode *uni = NULL; + int length; + + // now that we have the position info for all of the text inside + // the marked content span, we feed the "ActualText" back through + // addChar() + length = TextStringToUCS4(actualText.replacement, &uni); + addChar(state, actualText.x0, actualText.y0, + actualText.x1 - actualText.x0, actualText.y1 - actualText.y0, + 0, actualText.nBytes, uni, length); + gfree(uni); + } + delete actualText.replacement; + actualText.replacement = NULL; +} + void TextPage::coalesce(GBool physLayout, double fixedPitch, GBool doHTML) { UnicodeMap *uMap; TextPool *pool; @@ -5475,70 +5521,6 @@ TextWordList *TextPage::makeWordList(GBool physLayout) { #endif //------------------------------------------------------------------------ -// ActualText -//------------------------------------------------------------------------ -ActualText::ActualText(TextPage *out) { - out->incRefCnt(); - text = out; - actualText = NULL; - actualTextNBytes = 0; -} - -ActualText::~ActualText() { - if (actualText) - delete actualText; - text->decRefCnt(); -} - -void ActualText::addChar(GfxState *state, double x, double y, - double dx, double dy, - CharCode c, int nBytes, Unicode *u, int uLen) { - if (!actualText) { - text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); - return; - } - - // Inside ActualText span. - if (!actualTextNBytes) { - actualTextX0 = x; - actualTextY0 = y; - } - actualTextX1 = x + dx; - actualTextY1 = y + dy; - actualTextNBytes += nBytes; -} - -void ActualText::begin(GfxState *state, GooString *text) { - if (actualText) - delete actualText; - actualText = new GooString(text); - actualTextNBytes = 0; -} - -void ActualText::end(GfxState *state) { - // ActualText span closed. Output the span text and the - // extents of all the glyphs inside the span - - if (actualTextNBytes) { - Unicode *uni = NULL; - int length; - - // now that we have the position info for all of the text inside - // the marked content span, we feed the "ActualText" back through - // text->addChar() - length = TextStringToUCS4(actualText, &uni); - text->addChar(state, actualTextX0, actualTextY0, - actualTextX1 - actualTextX0, actualTextY1 - actualTextY0, - 0, actualTextNBytes, uni, length); - gfree(uni); - } - - delete actualText; - actualText = NULL; - actualTextNBytes = 0; -} - -//------------------------------------------------------------------------ // TextOutputDev //------------------------------------------------------------------------ @@ -5570,7 +5552,6 @@ TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, } else { error(errIO, -1, "Couldn't open text file '{0:s}'", fileName); ok = gFalse; - actualText = NULL; return; } outputFunc = &TextOutputDev_outputToFile; @@ -5580,7 +5561,6 @@ TextOutputDev::TextOutputDev(char *fileName, GBool physLayoutA, // set up text object text = new TextPage(rawOrderA); - actualText = new ActualText(text); } TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, @@ -5594,7 +5574,6 @@ TextOutputDev::TextOutputDev(TextOutputFunc func, void *stream, rawOrder = rawOrderA; doHTML = gFalse; text = new TextPage(rawOrderA); - actualText = new ActualText(text); ok = gTrue; } @@ -5608,7 +5587,6 @@ TextOutputDev::~TextOutputDev() { if (text) { text->decRefCnt(); } - delete actualText; } void TextOutputDev::startPage(int pageNum, GfxState *state, XRef *xref) { @@ -5641,7 +5619,7 @@ void TextOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode c, int nBytes, Unicode *u, int uLen) { - actualText->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); + text->addChar(state, x, y, dx, dy, c, nBytes, u, uLen); } void TextOutputDev::incCharCount(int nChars) { @@ -5650,12 +5628,12 @@ void TextOutputDev::incCharCount(int nChars) { void TextOutputDev::beginActualText(GfxState *state, GooString *text) { - actualText->begin(state, text); + this->text->beginActualText(state, text); } void TextOutputDev::endActualText(GfxState *state) { - actualText->end(state); + text->endActualText(state); } void TextOutputDev::stroke(GfxState *state) { diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 7da797c..477d91c 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -566,6 +566,10 @@ public: // Add a hyperlink. void addLink(int xMin, int yMin, int xMax, int yMax, AnnotLink *link); + // Mark the actual text replacement + void beginActualText(GfxState *state, GooString *text); + void endActualText(GfxState *state); + // Coalesce strings that look like parts of the same line. void coalesce(GBool physLayout, double fixedPitch, GBool doHTML); @@ -681,6 +685,12 @@ private: GooList *underlines; // [TextUnderline] GooList *links; // [TextLink] + struct { + GooString *replacement; // replacement text for the span + double x0, y0, x1, y1; + int nBytes; + } actualText; + int refCnt; friend class TextLine; @@ -693,34 +703,6 @@ private: }; //------------------------------------------------------------------------ -// ActualText -//------------------------------------------------------------------------ - -class ActualText { -public: - // Create an ActualText - ActualText(TextPage *out); - ~ActualText(); - - void addChar(GfxState *state, double x, double y, - double dx, double dy, - CharCode c, int nBytes, Unicode *u, int uLen); - void begin(GfxState *state, GooString *text); - void end(GfxState *state); - -private: - TextPage *text; - - GooString *actualText; // replacement text for the span - double actualTextX0; - double actualTextY0; - double actualTextX1; - double actualTextY1; - int actualTextNBytes; -}; - - -//------------------------------------------------------------------------ // TextOutputDev //------------------------------------------------------------------------ @@ -877,8 +859,6 @@ private: GBool rawOrder; // keep text in content stream order GBool doHTML; // extra processing for HTML conversion GBool ok; // set up ok? - - ActualText *actualText; }; #endif -- 2.5.5