From efe8f8a1914cc4e3c652ca43aa93488c46419b42 Mon Sep 17 00:00:00 2001 From: Brian Ewins Date: Sun, 22 Nov 2009 09:47:40 +0000 Subject: [PATCH 4/5] Make pdftotext newlines match copy and paste The output of pdftotext didn't insert line breaks, resulting in jumbled text. Change the rules to emit a newline at the end of each line unless a hyphenation is being supressed, and an extra newline at the end of each flow. --- poppler/TextOutputDev.cc | 17 +++-------------- 1 files changed, 3 insertions(+), 14 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 077e55d..f4debfc 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -4400,24 +4400,13 @@ void TextPage::dump(void *outputStream, TextOutputFunc outputFunc, dumpFragment(line->text, n, uMap, s); (*outputFunc)(outputStream, s->getCString(), s->getLength()); delete s; - if (!line->hyphenated) { - if (line->next) { - (*outputFunc)(outputStream, space, spaceLen); - } else if (blk->next) { - //~ this is a bit of a kludge - we should really do a more - //~ intelligent determination of paragraphs - if (blk->next->lines->words->fontSize == - blk->lines->words->fontSize) { - (*outputFunc)(outputStream, space, spaceLen); - } else { - (*outputFunc)(outputStream, eol, eolLen); - } - } + // output a newline when a hyphen is not suppressed + if (n == line->len) { + (*outputFunc)(outputStream, eol, eolLen); } } } (*outputFunc)(outputStream, eol, eolLen); - (*outputFunc)(outputStream, eol, eolLen); } } -- 1.6.3.3