From 1eadfa584bb060c4325a130529c0e0e4d21525c2 Mon Sep 17 00:00:00 2001 From: Brian Ewins Date: Thu, 4 Nov 2010 23:36:55 +0000 Subject: [PATCH] Sort blocks into yx-order before topological sort This allows us to take advantage of the order of the blocks to make the topological sort more efficient. In and of itself, the yx-sort also speeds up reading order sort on docs with lots of text blocks, since they are less likely to be out of order in the most common case. --- poppler/TextOutputDev.cc | 165 +++++++++++++++++++++------------------------ poppler/TextOutputDev.h | 5 +- 2 files changed, 79 insertions(+), 91 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 576bcc9..7944fa3 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -1620,43 +1620,6 @@ GBool TextBlock::isBelow(TextBlock *blk) { return below; } -GBool TextBlock::isBeforeByRule1(TextBlock *blk1) { - GBool before = gFalse; - GBool overlap = gFalse; - - switch (this->page->primaryRot) { - case 0: - case 2: - overlap = ((this->ExMin <= blk1->ExMin) && - (blk1->ExMin <= this->ExMax)) || - ((blk1->ExMin <= this->ExMin) && - (this->ExMin <= blk1->ExMax)); - break; - case 1: - case 3: - overlap = ((this->EyMin <= blk1->EyMin) && - (blk1->EyMin <= this->EyMax)) || - ((blk1->EyMin <= this->EyMin) && - (this->EyMin <= blk1->EyMax)); - break; - } - switch (this->page->primaryRot) { - case 0: - before = overlap && this->EyMin < blk1->EyMin; - break; - case 1: - before = overlap && this->ExMax > blk1->ExMax; - break; - case 2: - before = overlap && this->EyMax > blk1->EyMax; - break; - case 3: - before = overlap && this->ExMin < blk1->ExMin; - break; - } - return before; -} - GBool TextBlock::isBeforeByRule2(TextBlock *blk1) { double cmp = 0; int rotLR = rot; @@ -1687,12 +1650,14 @@ GBool TextBlock::isBeforeByRule2(TextBlock *blk1) { // See http://pubs.iupr.org/#2003-breuel-sdiut // Topological sort is done by depth first search, see // http://en.wikipedia.org/wiki/Topological_sorting -int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, +int TextBlock::visitDepthFirst(TextBlock **yxordered, int nBlocks, + int pos1, TextBlock **sorted, int sortPos, GBool* visited) { int pos2; - TextBlock *blk1, *blk2, *blk3; + TextBlock *blk1, *blk2; GBool before; + double covermin, covermax; if (visited[pos1]) { return sortPos; @@ -1705,9 +1670,19 @@ int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, sortPos, blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax); #endif visited[pos1] = gTrue; - pos2 = -1; - for (blk2 = blkList; blk2; blk2 = blk2->next) { - pos2++; + + // track the 'cover' of blk1, ie the interval spanned + // by blocks below blk1 that overlap blk1, repeatedly + // applied as we move through the list of blocks. + if (this->page->primaryRot % 2 == 0) { + covermin = this->ExMin; + covermax = this->ExMax; + } else { + covermin = this->EyMin; + covermax = this->EyMax; + } + for (pos2 = 0; pos2 < nBlocks; pos2++) { + blk2 = yxordered[pos2]; if (visited[pos2]) { // skip visited nodes continue; @@ -1717,56 +1692,60 @@ int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, // is blk2 before blk1? (for table entries) if (blk1->tableId >= 0 && blk1->tableId == blk2->tableId) { if (page->primaryLR) { - if (blk2->xMax <= blk1->xMin && - blk2->yMin <= blk1->yMax && - blk2->yMax >= blk1->yMin) - before = gTrue; + if (blk2->xMax <= blk1->xMin && + blk2->yMin <= blk1->yMax && + blk2->yMax >= blk1->yMin) + before = gTrue; } else { - if (blk2->xMin >= blk1->xMax && - blk2->yMin <= blk1->yMax && - blk2->yMax >= blk1->yMin) - before = gTrue; + if (blk2->xMin >= blk1->xMax && + blk2->yMin <= blk1->yMax && + blk2->yMax >= blk1->yMin) + before = gTrue; } if (blk2->yMax <= blk1->yMin) - before = gTrue; + before = gTrue; } else { - if (blk2->isBeforeByRule1(blk1)) { - // Rule (1) blk1 and blk2 overlap, and blk2 is above blk1. - before = gTrue; -#if 0 // for debugging - printf("rule1: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n", - blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax, - blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax); -#endif - } else if (blk2->isBeforeByRule2(blk1)) { - // Rule (2) blk2 left of blk1, and no intervening blk3 - // such that blk1 is before blk3 by rule 1, - // and blk3 is before blk2 by rule 1. - before = gTrue; - for (blk3 = blkList; blk3; blk3 = blk3->next) { - if (blk3 == blk2 || blk3 == blk1) { - continue; + if (this->page->primaryRot % 2 == 0) { + if (((covermin <= blk2->ExMin) && + (blk2->ExMin <= covermax)) || + ((blk2->ExMin <= covermin) && + (covermin <= blk2->ExMax))) { + if (pos2 < pos1) { + // Rule (1) blk2 is above and overlapping blk1. + before = gTrue; + } else { + covermin = fmin(covermin, blk2->ExMin); + covermax = fmax(covermax, blk2->ExMax); } - if (blk1->isBeforeByRule1(blk3) && - blk3->isBeforeByRule1(blk2)) { - before = gFalse; - break; + } else { + // Rule (2) blk2 left of blk1, and no intervening blk3 + // such that blk1 is before blk3 by rule 1, + // and blk3 is before blk2 by rule 1. + // blk2 doesn't overlap cover of blk1, so no + // intervening block exists. + before = pos1 < pos2 && blk2->isBeforeByRule2(blk1); + } + } else { + if (((covermin <= blk2->EyMin) && + (blk2->EyMin <= covermax)) || + ((blk2->EyMin <= covermin) && + (covermin <= blk2->EyMax))) { + if (pos2 < pos1) { + before = gTrue; + } else { + covermin = fmin(covermin, blk2->EyMin); + covermax = fmax(covermax, blk2->EyMax); } - } -#if 0 // for debugging - if (before) { - printf("rule2: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n", - blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax, - blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax); - } -#endif + } else { + before = pos1 < pos2 && blk2->isBeforeByRule2(blk1); + } + } + if (before) { + // blk2 is before blk1, so it needs to be visited + // before we can add blk1 to the sorted list. + sortPos = blk2->visitDepthFirst(yxordered, nBlocks, pos2, sorted, sortPos, visited); } - } - if (before) { - // blk2 is before blk1, so it needs to be visited - // before we can add blk1 to the sorted list. - sortPos = blk2->visitDepthFirst(blkList, pos2, sorted, sortPos, visited); } } #if 0 // for debugging @@ -3281,11 +3260,21 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { } } - i = -1; - for (blk1 = blkList; blk1; blk1 = blk1->next) { - i++; - sortPos = blk1->visitDepthFirst(blkList, i, blocks, sortPos, visited); + // sort blocks into yx order (in preparation for reading order sort) + TextBlock **yxordered; + yxordered = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *)); + for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { + yxordered[i] = blk; } + qsort(yxordered, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot); + + for (i = 0; i < nBlocks; i++) { + sortPos = yxordered[i]->visitDepthFirst(yxordered, nBlocks, i, blocks, sortPos, visited); + } + if (yxordered) + gfree (yxordered); + + if (visited) { gfree(visited); } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 438aee4..553c526 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -362,11 +362,10 @@ public: private: - GBool isBeforeByRule1(TextBlock *blk1); - GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1); GBool isBeforeByRule2(TextBlock *blk1); - int visitDepthFirst(TextBlock *blkList, int pos1, + int visitDepthFirst(TextBlock **yxordered, + int nBlocks, int pos1, TextBlock **sorted, int sortPos, GBool* visited); -- 1.7.1