From accb938021cf2bd0f7ae37546c601623f5dc1f1b Mon Sep 17 00:00:00 2001 From: danigm Date: Mon, 10 May 2010 10:14:57 +0200 Subject: [PATCH] TextData::getText in rawOrder now count chars The previous getText with rawOrder only looked if words are between limits. This commit adds chars of a word which are in. --- poppler/TextOutputDev.cc | 43 ++++++++++++++++++++++++++++++++----------- test/gettext-test.cc | 1 + 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 28b864b..4c42b30 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -3626,22 +3626,43 @@ GooString *TextPage::getText(double xMin, double yMin, TextWordList *wordlist; wordlist = makeWordList(gFalse); int word_length = wordlist->getLength (); - TextWord *word; + TextWord *word=NULL, *prev_word=NULL; + const Unicode *word_char; + char buf[8]; + bool outOfBound = false; double xMinA, yMinA, xMaxA, yMaxA; - for (int i=0; i < word_length; i++) - { + for (int i=0; i < word_length; i++) { word = wordlist->get (i); + + if (prev_word && word->primaryDelta (prev_word) <= 0) { + if (!outOfBound) + s->append(space, spaceLen); + } else { + s->append(eol, eolLen); + } + word->getBBox (&xMinA, &yMinA, &xMaxA, &yMaxA); - if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) + if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) { s->append (word->getText ()); - else - continue; - if (word->getNext() && word->getNext()->primaryDelta (word) <= 0) - { - s->append(space, spaceLen); - } else { - s->append(eol, eolLen); + prev_word = word; + outOfBound = false; + } + else if (xMinA < xMax && yMinA < yMax) { + for (int i=0; i < word->getLength(); i++) { + int n; + word->getCharBBox(i, &xMinA, &yMinA, &xMaxA, &yMaxA); + if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) { + word_char = word->getChar(i); + n = uMap->mapUnicode(*word_char, buf, sizeof(buf)); + s->append(buf, n); + } + } + prev_word = word; + outOfBound = true; + } + else { + outOfBound = true; } } return s; diff --git a/test/gettext-test.cc b/test/gettext-test.cc index 0c32a9e..58f07a9 100644 --- a/test/gettext-test.cc +++ b/test/gettext-test.cc @@ -50,6 +50,7 @@ int main (int argc, char *argv[]) rect = page->getCropBox(); s = textOut->getText(rect->x1, rect->y1, rect->x2, rect->y2); + //s = textOut->getText(0, 0, 200, 1000); result = s->getCString (); printf ("%s\n", result); -- 1.7.1