From 532eb71339826e9f8b8baf4f1ad33d6cc20a6b83 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Campos Date: Sat, 2 Nov 2013 14:07:07 +0100 Subject: [PATCH] TextOutputDev: Honor the selection rectangle passed to TextPage::getSelectionWords() Make TextPage::getSelectionWords() return a list of TextWordSelection instead of a list of TextWord so that it's possible to know which characters of the word are inside the given selection rectangle. Adapt the glib frontend to the new API and use the selection bounds instead of the whole world to build the list of characters in poppler_page_get_text_layout() and poppler_page_get_text_attributes(), which ensures the number of glyphs returned is in sync with the number of characters returned by poppler_page_get_text(). --- glib/poppler-page.cc | 22 ++++++++++++++-------- poppler/TextOutputDev.cc | 40 ++++++++-------------------------------- poppler/TextOutputDev.h | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 40 deletions(-) diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 9115b78..10d51c3 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -2006,8 +2006,8 @@ poppler_page_get_text_layout (PopplerPage *page, n_rects += line_words->getLength() - 1; for (j = 0; j < line_words->getLength(); j++) { - TextWord *word = (TextWord *)line_words->get(j); - n_rects += word->getLength(); + TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j); + n_rects += word_sel->getEnd() - word_sel->getBegin(); } } @@ -2019,8 +2019,11 @@ poppler_page_get_text_layout (PopplerPage *page, GooList *line_words = word_list[i]; for (j = 0; j < line_words->getLength(); j++) { - TextWord *word = (TextWord *)line_words->get(j); - for (k = 0; k < word->getLength(); k++) + TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j); + TextWord *word = word_sel->getWord(); + int end = word_sel->getEnd(); + + for (k = word_sel->getBegin(); k < end; k++) { rect = *rectangles + offset; word->getCharBBox (k, @@ -2036,9 +2039,9 @@ poppler_page_get_text_layout (PopplerPage *page, if (j < line_words->getLength() - 1) { - TextWord *next_word = (TextWord *)line_words->get(j + 1); + TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j + 1); - next_word->getBBox(&x3, &y3, &x4, &y4); + word_sel->getWord()->getBBox(&x3, &y3, &x4, &y4); // space is from one word to other and with the same height as // first word. rect->x1 = x2; @@ -2148,9 +2151,12 @@ poppler_page_get_text_attributes (PopplerPage *page) GooList *line_words = word_list[i]; for (j = 0; j < line_words->getLength(); j++) { - word = (TextWord *)line_words->get(j); + TextWordSelection *word_sel = (TextWordSelection *)line_words->get(j); + int end = word_sel->getEnd(); + + word = word_sel->getWord(); - for (word_i = 0; word_i < word->getLength (); word_i++) + for (word_i = word_sel->getBegin(); word_i < end; word_i++) { if (!prev_word || !word_text_attributes_equal (word, word_i, prev_word, prev_word_i)) { diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 4adb3c2..7c2ca78 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4000,21 +4000,6 @@ public: PDFRectangle *selection) = 0; protected: - - class TextWordSelection { - public: - TextWordSelection(TextWord *word, int begin, int end) - : word(word), - begin(begin), - end(end) - { - } - - TextWord *word; - int begin; - int end; - }; - TextPage *page; }; @@ -4044,7 +4029,7 @@ public: void endPage(); GooString *getText(void); - GooList **getWordList(int *nLines); + GooList **takeWordList(int *nLines); private: @@ -4179,27 +4164,18 @@ GooString *TextSelectionDumper::getText (void) return text; } -GooList **TextSelectionDumper::getWordList(int *nLinesOut) +GooList **TextSelectionDumper::takeWordList(int *nLinesOut) { - int i, j; + GooList **returnValue = lines; + *nLinesOut = nLines; if (nLines == 0) return NULL; - GooList **wordList = (GooList **)gmallocn(nLines, sizeof(GooList *)); - - for (i = 0; i < nLines; i++) { - GooList *lineWords = lines[i]; - wordList[i] = new GooList(); - for (j = 0; j < lineWords->getLength(); j++) { - TextWordSelection *sel = (TextWordSelection *)lineWords->get(j); - wordList[i]->append(sel->word); - } - } - - *nLinesOut = nLines; + nLines = 0; + lines = NULL; - return wordList; + return returnValue; } class TextSelectionSizer : public TextSelectionVisitor { @@ -4793,7 +4769,7 @@ GooList **TextPage::getSelectionWords(PDFRectangle *selection, visitSelection(&dumper, selection, style); dumper.endPage(); - return dumper.getWordList(nLines); + return dumper.takeWordList(nLines); } GBool TextPage::findCharRange(int pos, int length, diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 56736b3..23fb3b7 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -478,6 +478,26 @@ private: #endif // TEXTOUT_WORD_LIST +class TextWordSelection { +public: + TextWordSelection(TextWord *word, int begin, int end) + : word(word), begin(begin), end(end) + { + } + + TextWord * getWord() const { return word; } + int getBegin() const { return begin; } + int getEnd() const { return end; } + +private: + TextWord *word; + int begin; + int end; + + friend class TextSelectionPainter; + friend class TextSelectionDumper; +}; + //------------------------------------------------------------------------ // TextPage //------------------------------------------------------------------------ -- 1.8.4.rc3