From 4d98cfe22253ebadd44df2acaed43341c0e76104 Mon Sep 17 00:00:00 2001 From: Marek Kasik Date: Fri, 7 May 2010 14:58:04 +0200 Subject: [PATCH] Don't detect overlaping cells as table when selecting text Check whether cells of assumed table overlap in an axis or not. They shouldn't. --- poppler/TextOutputDev.cc | 23 +++++++++++++++++++++-- 1 files changed, 21 insertions(+), 2 deletions(-) diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc index ef9c486..699a39e 100644 --- a/poppler/TextOutputDev.cc +++ b/poppler/TextOutputDev.cc @@ -3025,6 +3025,7 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { int numTables = 0; int tableId = -1; int correspondenceX, correspondenceY; + int overlapX, overlapY; double xCentre1, yCentre1, xCentre2, yCentre2; double xCentre3, yCentre3, xCentre4, yCentre4; double deltaX, deltaY; @@ -3099,6 +3100,8 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { tableId = -1; correspondenceX = 0; correspondenceY = 0; + overlapX = 0; + overlapY = 0; deltaX = 0.0; deltaY = 0.0; @@ -3128,6 +3131,20 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { xCentre4 = (fblk4->xMax + fblk4->xMin) / 2.0; yCentre4 = (fblk4->yMax + fblk4->yMin) / 2.0; + // do blocks overlap in x ? + if (blk1->xMax > fblk2->xMin || + blk1->xMax > fblk4->xMin || + fblk3->xMax > fblk2->xMin || + fblk3->xMax > fblk4->xMin) + overlapX++; + + // do blocks overlap in y ? + if (blk1->yMax > fblk3->yMin || + blk1->yMax > fblk4->yMin || + fblk2->yMax > fblk3->yMin || + fblk2->yMax > fblk4->yMin) + overlapY++; + // are blocks centrally aligned in x ? if (fabs (xCentre1 - xCentre3) <= deltaX && fabs (xCentre2 - xCentre4) <= deltaX) @@ -3158,9 +3175,11 @@ void TextPage::coalesce(GBool physLayout, GBool doHTML) { fabs (fblk3->yMax - fblk4->yMax) <= deltaY) correspondenceY++; - // are blocks aligned in x and y ? + // are blocks aligned in x and y and don't overlap ? if (correspondenceX > 0 && - correspondenceY > 0) { + correspondenceY > 0 && + overlapX == 0 && + overlapY == 0) { // find maximal tableId tableId = tableId < fblk4->tableId ? fblk4->tableId : tableId; -- 1.6.5.2