*** poppler-0.16.0/utils/HtmlOutputDev.cc_org 2010-12-16 23:51:27.000000000 +0100
--- poppler-0.16.0/utils/HtmlOutputDev.cc 2011-01-17 23:36:13.240003415 +0100
***************
*** 430,450 ****
GBool found;
while (str1)
{
! double size = str1->yMax - str1->yMin;
! double xLimit = str1->xMin + size * 0.2;
found = gFalse;
for (str2 = str1, str3 = str1->yxNext;
! str3 && str3->xMin < xLimit;
str2 = str3, str3 = str2->yxNext)
{
if (str3->len == str1->len &&
!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
! fabs(str3->yMin - str1->yMin) < size * 0.2 &&
! fabs(str3->yMax - str1->yMax) < size * 0.2 &&
! fabs(str3->xMax - str1->xMax) < size * 0.2)
{
! found = gTrue;
! //printf("found duplicate!\n");
break;
}
}
--- 430,459 ----
GBool found;
while (str1)
{
! // A string is considered a duplicate if it has the same text
! // and that text starts less than half the distance before its
! // preceeding character ends in both the X and Y dimensions.
! double yCutoff = str1->yMin + ((str1->yMax - str1->yMin) / 2);
! double xCutoff = str1->xMin + ((str1->xMax - str1->xMin) / 2);
found = gFalse;
for (str2 = str1, str3 = str1->yxNext;
! str3 && str3->xMin < xCutoff;
str2 = str3, str3 = str2->yxNext)
{
if (str3->len == str1->len &&
!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
! (str3->yMin < yCutoff) &&
! (str3->xMin >= str1->xMin) &&
! (str3->yMin >= str1->yMin))
{
! #if 0
! printf("\nfound duplicate: ");
! for (i = 0; i < str3->len; ++i) {
! fputc(str3->text[i] & 0xff, stdout);
! }
! fputc('\n', stdout);
! #endif
! found = gTrue;
break;
}
}