From 50f6ae59d101fb60bd61e8b8063eac10121074da Mon Sep 17 00:00:00 2001 From: danigm Date: Thu, 6 May 2010 23:52:04 +0200 Subject: [PATCH] TextData::getText return text in rawOrder --- poppler/TextOutputDev.cc | 30 +++++++++++++++++++--- test/CMakeLists.txt | 7 +++- test/gettext-test.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 test/gettext-test.cc diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index ef9c486..28b864b 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -3602,10 +3602,6 @@ GooString *TextPage::getText(double xMin, double yMin, s = new GooString(); - if (rawOrder) { - return s; - } - // get the output encoding if (!(uMap = globalParams->getTextEncoding())) { return s; @@ -3626,6 +3622,32 @@ GooString *TextPage::getText(double xMin, double yMin, break; } + if (rawOrder) { + TextWordList *wordlist; + wordlist = makeWordList(gFalse); + int word_length = wordlist->getLength (); + TextWord *word; + double xMinA, yMinA, xMaxA, yMaxA; + + for (int i=0; i < word_length; i++) + { + word = wordlist->get (i); + word->getBBox (&xMinA, &yMinA, &xMaxA, &yMaxA); + if (xMinA > xMin && yMinA > yMin && xMaxA < xMax && yMaxA < yMax) + s->append (word->getText ()); + else + continue; + if (word->getNext() && word->getNext()->primaryDelta (word) <= 0) + { + s->append(space, spaceLen); + } else { + s->append(eol, eolLen); + } + } + return s; + } + + //~ writing mode (horiz/vert) // collect the line fragments that are in the rectangle diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index acb867b..fadcd45 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,3 @@ - if (ENABLE_SPLASH) if (HAVE_NANOSLEEP OR LIB_RT_HAS_NANOSLEEP) @@ -58,4 +57,8 @@ set (pdf_fullrewrite_SRCS add_executable(pdf-fullrewrite ${pdf_fullrewrite_SRCS}) target_link_libraries(pdf-fullrewrite poppler) - +set (gettext_SRCS + gettext-test.cc + ) +add_executable(gettext-test ${gettext_SRCS}) +target_link_libraries(gettext-test poppler) diff --git a/test/gettext-test.cc b/test/gettext-test.cc new file mode 100644 index 0000000..0c32a9e --- /dev/null +++ b/test/gettext-test.cc @@ -0,0 +1,63 @@ +#include "config.h" +#include "Page.h" +#include +#include "GlobalParams.h" +#include "Error.h" +#include "PDFDoc.h" +#include "goo/GooString.h" +#include "TextOutputDev.h" + +int main (int argc, char *argv[]) +{ + PDFDoc *doc; + GooString *inputName; + GooString *s; + char *result; + int page_index; + TextOutputDev *textOut; + Page *page; + PDFRectangle *rect; + + // parse args + if (argc < 3) { + fprintf(stderr, "usage: %s INPUT-FILE page\n", argv[0]); + return 1; + } + if (!sscanf (argv[2], "%d", &page_index)) + { + fprintf(stderr, "usage: %s INPUT-FILE page\n", argv[0]); + return 1; + } + + inputName = new GooString(argv[1]); + + globalParams = new GlobalParams(); + + doc = new PDFDoc(inputName); + + if (!doc->isOk()) { + delete doc; + fprintf(stderr, "Error loading document !\n"); + return 1; + } + + page = doc->getCatalog()->getPage(1); + + //textOut = new TextOutputDev(0, gFalse, gFalse, gFalse); + textOut = new TextOutputDev(0, gTrue, gTrue, gFalse); + doc->displayPageSlice(textOut, page_index, 72, 72, + 0, false, true, false, -1, -1, -1, -1); + + rect = page->getCropBox(); + s = textOut->getText(rect->x1, rect->y1, rect->x2, rect->y2); + + result = s->getCString (); + printf ("%s\n", result); + + delete textOut; + delete s; + + delete doc; + delete globalParams; + return 0; +} -- 1.7.0.4.361.g8b5fe