From 87bafee6609b628e00fbceb35acefe7f0105bc60 Mon Sep 17 00:00:00 2001 From: Brian Ewins Date: Thu, 4 Nov 2010 23:36:55 +0000 Subject: [PATCH] Sort blocks into yx-order before topological sort This allows us to take advantage of the order of the blocks to make the topological sort more efficient. In and of itself, the yx-sort also speeds up reading order sort on docs with lots of text blocks, since they are less likely to be out of order in the most common case. --- poppler/TextOutputDev.cc | 175 +++++++++++++++++++++++++++------------------- poppler/TextOutputDev.h | 5 +- 2 files changed, 105 insertions(+), 75 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index 576bcc9..21bd414 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -1620,43 +1620,6 @@ GBool TextBlock::isBelow(TextBlock *blk) { return below; } -GBool TextBlock::isBeforeByRule1(TextBlock *blk1) { - GBool before = gFalse; - GBool overlap = gFalse; - - switch (this->page->primaryRot) { - case 0: - case 2: - overlap = ((this->ExMin <= blk1->ExMin) && - (blk1->ExMin <= this->ExMax)) || - ((blk1->ExMin <= this->ExMin) && - (this->ExMin <= blk1->ExMax)); - break; - case 1: - case 3: - overlap = ((this->EyMin <= blk1->EyMin) && - (blk1->EyMin <= this->EyMax)) || - ((blk1->EyMin <= this->EyMin) && - (this->EyMin <= blk1->EyMax)); - break; - } - switch (this->page->primaryRot) { - case 0: - before = overlap && this->EyMin < blk1->EyMin; - break; - case 1: - before = overlap && this->ExMax > blk1->ExMax; - break; - case 2: - before = overlap && this->EyMax > blk1->EyMax; - break; - case 3: - before = overlap && this->ExMin < blk1->ExMin; - break; - } - return before; -} - GBool TextBlock::isBeforeByRule2(TextBlock *blk1) { double cmp = 0; int rotLR = rot; @@ -1687,12 +1650,14 @@ GBool TextBlock::isBeforeByRule2(TextBlock *blk1) { // See http://pubs.iupr.org/#2003-breuel-sdiut // Topological sort is done by depth first search, see // http://en.wikipedia.org/wiki/Topological_sorting -int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, +int TextBlock::visitDepthFirst(TextBlock **yxordered, int nBlocks, + int pos1, TextBlock **sorted, int sortPos, GBool* visited) { int pos2; - TextBlock *blk1, *blk2, *blk3; + TextBlock *blk1, *blk2; GBool before; + double covermin, covermax; if (visited[pos1]) { return sortPos; @@ -1705,9 +1670,23 @@ int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, sortPos, blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax); #endif visited[pos1] = gTrue; - pos2 = -1; - for (blk2 = blkList; blk2; blk2 = blk2->next) { - pos2++; + + // track the 'cover' of blk1, ie the interval spanned + // by blocks below blk1 that overlap blk1, repeatedly + // applied as we move through the list of blocks. + switch (this->page->primaryRot) { + case 0: + case 2: + covermin = this->ExMin; + covermax = this->ExMax; + break; + default: + covermin = this->EyMin; + covermax = this->EyMax; + break; + } + for (pos2 = 0; pos2 < nBlocks; pos2++) { + blk2 = yxordered[pos2]; if (visited[pos2]) { // skip visited nodes continue; @@ -1731,42 +1710,84 @@ int TextBlock::visitDepthFirst(TextBlock *blkList, int pos1, if (blk2->yMax <= blk1->yMin) before = gTrue; } else { - if (blk2->isBeforeByRule1(blk1)) { - // Rule (1) blk1 and blk2 overlap, and blk2 is above blk1. - before = gTrue; -#if 0 // for debugging - printf("rule1: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n", - blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax, - blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax); -#endif - } else if (blk2->isBeforeByRule2(blk1)) { - // Rule (2) blk2 left of blk1, and no intervening blk3 - // such that blk1 is before blk3 by rule 1, - // and blk3 is before blk2 by rule 1. - before = gTrue; - for (blk3 = blkList; blk3; blk3 = blk3->next) { - if (blk3 == blk2 || blk3 == blk1) { - continue; - } - if (blk1->isBeforeByRule1(blk3) && - blk3->isBeforeByRule1(blk2)) { - before = gFalse; + if (pos2 < pos1) { + // Rule (1) blk2 is above blk1. + // Just test if blk1 and blk2 overlap. + switch (this->page->primaryRot) { + case 0: + case 2: + before = ((this->ExMin <= blk1->ExMin) && + (blk1->ExMin <= this->ExMax)) || + ((blk1->ExMin <= this->ExMin) && + (this->ExMin <= blk1->ExMax)); + break; + default: + before = ((this->EyMin <= blk1->EyMin) && + (blk1->EyMin <= this->EyMax)) || + ((blk1->EyMin <= this->EyMin) && + (this->EyMin <= blk1->EyMax)); + break; + } + } else { + if (blk2->isBeforeByRule2(blk1)) { + // Rule (2) blk2 left of blk1, and no intervening blk3 + // such that blk1 is before blk3 by rule 1, + // and blk3 is before blk2 by rule 1. + // Finding an individual blk3 is expensive, requiring + // an additional loop. Instead we track the cover of + // blk1, which will include any such blk3. + switch (this->page->primaryRot) { + case 0: + case 2: + before = !(((covermin <= blk2->ExMin) && + (blk2->ExMin <= covermax)) || + ((blk2->ExMin <= covermin) && + (covermin <= blk2->ExMax))); break; + default: + before = !(((covermin <= blk2->EyMin) && + (blk2->EyMin <= covermax)) || + ((blk2->EyMin <= covermin) && + (covermin <= blk2->EyMax))); } - } + } + // if blk2 overlaps the cover of blk1, + // extend the cover to include it. + switch (this->page->primaryRot) { + case 0: + case 2: + if (((covermin <= blk2->ExMin) && + (blk2->ExMin <= covermax)) || + ((blk2->ExMin <= covermin) && + (covermin <= blk2->ExMax))) { + covermin = fmin(covermin, blk2->ExMin); + covermax = fmax(covermax, blk2->ExMax); + } + break; + default: + if (((covermin <= blk2->EyMin) && + (blk2->EyMin <= covermax)) || + ((blk2->EyMin <= covermin) && + (covermin <= blk2->EyMax))) { + covermin = fmin(covermin, blk2->EyMin); + covermax = fmax(covermax, blk2->EyMax); + } + break; + } + #if 0 // for debugging - if (before) { + if (before) { printf("rule2: %.2f..%.2f %.2f..%.2f %.2f..%.2f %.2f..%.2f\n", - blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax, - blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax); - } + blk1->ExMin, blk1->ExMax, blk1->EyMin, blk1->EyMax, + blk2->ExMin, blk2->ExMax, blk2->EyMin, blk2->EyMax); + } #endif } } if (before) { // blk2 is before blk1, so it needs to be visited // before we can add blk1 to the sorted list. - sortPos = blk2->visitDepthFirst(blkList, pos2, sorted, sortPos, visited); + sortPos = blk2->visitDepthFirst(yxordered, nBlocks, pos2, sorted, sortPos, visited); } } #if 0 // for debugging @@ -3281,11 +3302,21 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { } } - i = -1; - for (blk1 = blkList; blk1; blk1 = blk1->next) { - i++; - sortPos = blk1->visitDepthFirst(blkList, i, blocks, sortPos, visited); + // sort blocks into yx order (in preparation for reading order sort) + TextBlock **yxordered; + yxordered = (TextBlock **)gmallocn(nBlocks, sizeof(TextBlock *)); + for (blk = blkList, i = 0; blk; blk = blk->next, ++i) { + yxordered[i] = blk; } + qsort(yxordered, nBlocks, sizeof(TextBlock *), &TextBlock::cmpYXPrimaryRot); + + for (i = 0; i < nBlocks; i++) { + sortPos = yxordered[i]->visitDepthFirst(yxordered, nBlocks, i, blocks, sortPos, visited); + } + if (yxordered) + gfree (yxordered); + + if (visited) { gfree(visited); } diff --git a/poppler/TextOutputDev.h b/poppler/TextOutputDev.h index 438aee4..553c526 100644 --- a/poppler/TextOutputDev.h +++ b/poppler/TextOutputDev.h @@ -362,11 +362,10 @@ public: private: - GBool isBeforeByRule1(TextBlock *blk1); - GBool isBeforeByRepeatedRule1(TextBlock *blkList, TextBlock *blk1); GBool isBeforeByRule2(TextBlock *blk1); - int visitDepthFirst(TextBlock *blkList, int pos1, + int visitDepthFirst(TextBlock **yxordered, + int nBlocks, int pos1, TextBlock **sorted, int sortPos, GBool* visited); -- 1.7.1