diff -crB poppler-0.13.3/utils/HtmlOutputDev.cc poppler-0.13.3-patch/utils/HtmlOutputDev.cc *** poppler-0.13.3/utils/HtmlOutputDev.cc 2010-04-05 14:08:19.000000000 -0400 --- poppler-0.13.3-patch/utils/HtmlOutputDev.cc 2010-05-10 00:06:57.239846349 -0400 *************** *** 428,448 **** GBool found; while (str1) { ! double size = str1->yMax - str1->yMin; ! double xLimit = str1->xMin + size * 0.2; found = gFalse; for (str2 = str1, str3 = str1->yxNext; ! str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) { if (str3->len == str1->len && !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && ! fabs(str3->yMin - str1->yMin) < size * 0.2 && ! fabs(str3->yMax - str1->yMax) < size * 0.2 && ! fabs(str3->xMax - str1->xMax) < size * 0.2) { ! found = gTrue; ! //printf("found duplicate!\n"); break; } } --- 428,455 ---- GBool found; while (str1) { ! // A string is considered a duplicate if it has the same text ! // and that text starts less than half the distance before its ! // preceeding character ends in both the X and Y dimensions. ! double yCutoff = str1->yMin + ((str1->yMax - str1->yMin) / 2); ! double xCutoff = str1->xMin + ((str1->xMax - str1->xMin) / 2); found = gFalse; for (str2 = str1, str3 = str1->yxNext; ! str3 && str3->xMin < xCutoff; str2 = str3, str3 = str2->yxNext) { if (str3->len == str1->len && !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && ! str3->yMin < yCutoff) { ! #if 0 ! printf("\nfound duplicate: "); ! for (i = 0; i < str3->len; ++i) { ! fputc(str3->text[i] & 0xff, stdout); ! } ! fputc('\n', stdout); ! #endif ! found = gTrue; break; } }