diff -crB poppler-0.13.3/utils/HtmlOutputDev.cc poppler-0.13.3-patch/utils/HtmlOutputDev.cc
*** poppler-0.13.3/utils/HtmlOutputDev.cc 2010-04-05 14:08:19.000000000 -0400
--- poppler-0.13.3-patch/utils/HtmlOutputDev.cc 2010-05-10 00:06:57.239846349 -0400
***************
*** 428,448 ****
GBool found;
while (str1)
{
! double size = str1->yMax - str1->yMin;
! double xLimit = str1->xMin + size * 0.2;
found = gFalse;
for (str2 = str1, str3 = str1->yxNext;
! str3 && str3->xMin < xLimit;
str2 = str3, str3 = str2->yxNext)
{
if (str3->len == str1->len &&
!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
! fabs(str3->yMin - str1->yMin) < size * 0.2 &&
! fabs(str3->yMax - str1->yMax) < size * 0.2 &&
! fabs(str3->xMax - str1->xMax) < size * 0.2)
{
! found = gTrue;
! //printf("found duplicate!\n");
break;
}
}
--- 428,455 ----
GBool found;
while (str1)
{
! // A string is considered a duplicate if it has the same text
! // and that text starts less than half the distance before its
! // preceeding character ends in both the X and Y dimensions.
! double yCutoff = str1->yMin + ((str1->yMax - str1->yMin) / 2);
! double xCutoff = str1->xMin + ((str1->xMax - str1->xMin) / 2);
found = gFalse;
for (str2 = str1, str3 = str1->yxNext;
! str3 && str3->xMin < xCutoff;
str2 = str3, str3 = str2->yxNext)
{
if (str3->len == str1->len &&
!memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
! str3->yMin < yCutoff)
{
! #if 0
! printf("\nfound duplicate: ");
! for (i = 0; i < str3->len; ++i) {
! fputc(str3->text[i] & 0xff, stdout);
! }
! fputc('\n', stdout);
! #endif
! found = gTrue;
break;
}
}