diff -urN poppler-0.4.3.orig/configure.ac poppler-0.4.3/configure.ac --- poppler-0.4.3.orig/configure.ac 2005-12-12 23:46:00.000000000 +0100 +++ poppler-0.4.3/configure.ac 2005-12-30 11:18:50.000000000 +0100 @@ -437,6 +437,7 @@ fofi/Makefile splash/Makefile poppler/Makefile +utils/Makefile glib/Makefile test/Makefile qt/Makefile diff -urN poppler-0.4.3.orig/Makefile.am poppler-0.4.3/Makefile.am --- poppler-0.4.3.orig/Makefile.am 2005-06-28 12:00:09.000000000 +0200 +++ poppler-0.4.3/Makefile.am 2005-12-30 11:18:50.000000000 +0100 @@ -22,7 +22,7 @@ qt4_pc_file = poppler-qt4.pc endif -SUBDIRS = goo fofi $(splash_subdir) poppler $(glib_subdir) $(qt_subdir) test $(qt4_subdir) +SUBDIRS = goo fofi $(splash_subdir) poppler utils $(glib_subdir) $(qt_subdir) test $(qt4_subdir) EXTRA_DIST = \ README-XPDF \ diff -urN poppler-0.4.3.orig/utils/GVector.h poppler-0.4.3/utils/GVector.h --- poppler-0.4.3.orig/utils/GVector.h 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/GVector.h 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,101 @@ +#ifndef _VECTOR_H +#define _VECTOR_H +#include "goo/gtypes.h" + + +template +class GVector{ +private: + + int _size; + T* last; + T* storage; + + void resize(){ + if (_size==0) _size=2;else _size=2*_size; + T *tmp=new T[_size]; + if (storage){ + last=copy(storage,last,tmp); + delete [] storage; + } + else last=tmp; + storage=tmp; + } + + T* copy(T* src1,T* scr2,T* dest){ + T* tmp=src1; + T* d=dest; + while(tmp!=scr2){ + *d=*tmp; + d++;tmp++; + } + return d; + } + +public: + typedef T* iterator; + + GVector(){ + _size=0; + last=0; + storage=0; +} + + + +virtual ~GVector(){ + delete[] storage ; +} + +void reset(){ + last=storage; +} + +int size(){ + return (last-storage); +} +void push_back(const T& elem){ + if (!storage||(size() >=_size)) resize(); + *last=elem; + last++; + + +} + + +T pop_back() { + if (last!=storage) last--; + + return *last; +} + + +T operator[](unsigned int i){ + return *(storage+i); +} + + +GBool isEmpty() const{ + return !_size || (last==storage) ; +} + + + +iterator begin() const{ + return storage; +} + +iterator end() const { + return last; +} +}; +#endif + + + + + + + + + diff -urN poppler-0.4.3.orig/utils/HtmlFonts.cc poppler-0.4.3/utils/HtmlFonts.cc --- poppler-0.4.3.orig/utils/HtmlFonts.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/HtmlFonts.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,326 @@ +#include "HtmlFonts.h" +#include "GlobalParams.h" +#include "UnicodeMap.h" +#include + + struct Fonts{ + char *Fontname; + char *name; + }; + +const int font_num=13; + +static Fonts fonts[font_num+1]={ + {"Courier", "Courier" }, + {"Courier-Bold", "Courier"}, + {"Courier-BoldOblique", "Courier"}, + {"Courier-Oblique", "Courier"}, + {"Helvetica", "Helvetica"}, + {"Helvetica-Bold", "Helvetica"}, + {"Helvetica-BoldOblique", "Helvetica"}, + {"Helvetica-Oblique", "Helvetica"}, + {"Symbol", "Symbol" }, + {"Times-Bold", "Times" }, + {"Times-BoldItalic", "Times" }, + {"Times-Italic", "Times" }, + {"Times-Roman", "Times" }, + {" " , "Times" }, +}; + +#define xoutRound(x) ((int)(x + 0.5)) +extern GBool xml; + +GooString* HtmlFont::DefaultFont=new GooString("Times"); // Arial,Helvetica,sans-serif + +HtmlFontColor::HtmlFontColor(GfxRGB rgb){ + r=static_cast(255*rgb.r); + g=static_cast(255*rgb.g); + b=static_cast(255*rgb.b); + if (!(Ok(r)&&Ok(b)&&Ok(g))) {printf("Error : Bad color \n");r=0;g=0;b=0;} +} + +GooString *HtmlFontColor::convtoX(unsigned int xcol) const{ + GooString *xret=new GooString(); + char tmp; + unsigned int k; + k = (xcol/16); + if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10); + xret->append(tmp); + k = (xcol%16); + if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10); + xret->append(tmp); + return xret; +} + +GooString *HtmlFontColor::toString() const{ + GooString *tmp=new GooString("#"); + GooString *tmpr=convtoX(r); + GooString *tmpg=convtoX(g); + GooString *tmpb=convtoX(b); + tmp->append(tmpr); + tmp->append(tmpg); + tmp->append(tmpb); + delete tmpr; + delete tmpg; + delete tmpb; + return tmp; +} + +HtmlFont::HtmlFont(GooString* ftname,int _size, GfxRGB rgb){ + //if (col) color=HtmlFontColor(col); + //else color=HtmlFontColor(); + color=HtmlFontColor(rgb); + + GooString *fontname = NULL; + + if( ftname ){ + fontname = new GooString(ftname); + FontName=new GooString(ftname); + } + else { + fontname = NULL; + FontName = NULL; + } + + lineSize = -1; + + size=(_size-1); + italic = gFalse; + bold = gFalse; + + if (fontname){ + if (strstr(fontname->lowerCase()->getCString(),"bold")) bold=gTrue; + + if (strstr(fontname->lowerCase()->getCString(),"italic")|| + strstr(fontname->lowerCase()->getCString(),"oblique")) italic=gTrue; + + int i=0; + while (strcmp(ftname->getCString(),fonts[i].Fontname)&&(igetTextEncoding())) { + return tmp; + } + + for (int i = 0; i < uLen; ++i) { + switch (u[i]) + { + case '"': tmp->append("""); break; + case '&': tmp->append("&"); break; + case '<': tmp->append("<"); break; + case '>': tmp->append(">"); break; + default: + { + // convert unicode to string + if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) { + tmp->append(buf, n); + } + } + } + } + + uMap->decRefCnt(); + return tmp; +} + +GooString* HtmlFont::simple(HtmlFont* font, Unicode* content, int uLen){ + GooString *cont=HtmlFilter (content, uLen); + + /*if (font.isBold()) { + cont->insert(0,"",3); + cont->append("",4); + } + if (font.isItalic()) { + cont->insert(0,"",3); + cont->append("",4); + } */ + + return cont; +} + +HtmlFontAccu::HtmlFontAccu(){ + accu=new GVector(); +} + +HtmlFontAccu::~HtmlFontAccu(){ + if (accu) delete accu; +} + +int HtmlFontAccu::AddFont(const HtmlFont& font){ + GVector::iterator i; + for (i=accu->begin();i!=accu->end();i++) + { + if (font.isEqual(*i)) + { + return (int)(i-(accu->begin())); + } + } + + accu->push_back(font); + return (accu->size()-1); +} + +// get CSS font name for font #i +GooString* HtmlFontAccu::getCSStyle(int i, GooString* content){ + GooString *tmp; + GooString *iStr=GooString::fromInt(i); + + if (!xml) { + tmp = new GooString("append(iStr); + tmp->append("\">"); + tmp->append(content); + tmp->append(""); + } else { + tmp = new GooString(""); + tmp->append(content); + } + + delete iStr; + return tmp; +} + +// get CSS font definition for font #i +GooString* HtmlFontAccu::CSStyle(int i){ + GooString *tmp=new GooString(); + GooString *iStr=GooString::fromInt(i); + + GVector::iterator g=accu->begin(); + g+=i; + HtmlFont font=*g; + GooString *Size=GooString::fromInt(font.getSize()); + GooString *colorStr=font.getColor().toString(); + GooString *fontName=font.getFontName(); + GooString *lSize; + + if(!xml){ + tmp->append(".ft"); + tmp->append(iStr); + tmp->append("{font-size:"); + tmp->append(Size); + if( font.getLineSize() != -1 ) + { + lSize = GooString::fromInt(font.getLineSize()); + tmp->append("px;line-height:"); + tmp->append(lSize); + delete lSize; + } + tmp->append("px;font-family:"); + tmp->append(fontName); //font.getFontName()); + tmp->append(";color:"); + tmp->append(colorStr); + tmp->append(";}"); + } + if (xml) { + tmp->append("append(iStr); + tmp->append("\" size=\""); + tmp->append(Size); + tmp->append("\" family=\""); + tmp->append(fontName); //font.getFontName()); + tmp->append("\" color=\""); + tmp->append(colorStr); + tmp->append("\"/>"); + } + + delete fontName; + delete colorStr; + delete iStr; + delete Size; + return tmp; +} + + diff -urN poppler-0.4.3.orig/utils/HtmlFonts.h poppler-0.4.3/utils/HtmlFonts.h --- poppler-0.4.3.orig/utils/HtmlFonts.h 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/HtmlFonts.h 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,85 @@ +#ifndef _HTML_FONTS_H +#define _HTML_FONTS_H +#include "GVector.h" +#include "goo/GooString.h" +#include "GfxState.h" +#include "CharTypes.h" + + +class HtmlFontColor{ + private: + unsigned int r; + unsigned int g; + unsigned int b; + GBool Ok(unsigned int xcol){ return ((xcol<=255)&&(xcol>=0));} + GooString *convtoX(unsigned int xcol) const; + public: + HtmlFontColor():r(0),g(0),b(0){} + HtmlFontColor(GfxRGB rgb); + HtmlFontColor(const HtmlFontColor& x){r=x.r;g=x.g;b=x.b;} + HtmlFontColor& operator=(const HtmlFontColor &x){ + r=x.r;g=x.g;b=x.b; + return *this; + } + ~HtmlFontColor(){}; + GooString* toString() const; + GBool isEqual(const HtmlFontColor& col) const{ + return ((r==col.r)&&(g==col.g)&&(b==col.b)); + } +} ; + + +class HtmlFont{ + private: + unsigned int size; + int lineSize; + GBool italic; + GBool bold; + int pos; // position of the font name in the fonts array + static GooString *DefaultFont; + GooString *FontName; + HtmlFontColor color; + static GooString* HtmlFilter(Unicode* u, int uLen); //char* s); +public: + + HtmlFont(){FontName=NULL;}; + HtmlFont(GooString* fontname,int _size, GfxRGB rgb); + HtmlFont(const HtmlFont& x); + HtmlFont& operator=(const HtmlFont& x); + HtmlFontColor getColor() const {return color;} + ~HtmlFont(); + static void clear(); + GooString* getFullName(); + GBool isItalic() const {return italic;} + GBool isBold() const {return bold;} + unsigned int getSize() const {return size;} + int getLineSize() const {return lineSize;} + void setLineSize(int _lineSize) { lineSize = _lineSize; } + GooString* getFontName(); + static GooString* getDefaultFont(); + static void setDefaultFont(GooString* defaultFont); + GBool isEqual(const HtmlFont& x) const; + GBool isEqualIgnoreBold(const HtmlFont& x) const; + static GooString* simple(HtmlFont *font, Unicode *content, int uLen); + void print() const {printf("font: %s %d %s%spos: %d\n", FontName->getCString(), size, bold ? "bold " : "", italic ? "italic " : "", pos);}; +}; + +class HtmlFontAccu{ +private: + GVector *accu; + +public: + HtmlFontAccu(); + ~HtmlFontAccu(); + int AddFont(const HtmlFont& font); + HtmlFont* Get(int i){ + GVector::iterator g=accu->begin(); + g+=i; + return g; + } + GooString* getCSStyle (int i, GooString* content); + GooString* CSStyle(int i); + int size() const {return accu->size();} + +}; +#endif diff -urN poppler-0.4.3.orig/utils/HtmlLinks.cc poppler-0.4.3/utils/HtmlLinks.cc --- poppler-0.4.3.orig/utils/HtmlLinks.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/HtmlLinks.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,101 @@ +#include "HtmlLinks.h" + +HtmlLink::HtmlLink(const HtmlLink& x){ + Xmin=x.Xmin; + Ymin=x.Ymin; + Xmax=x.Xmax; + Ymax=x.Ymax; + dest=new GooString(x.dest); +} + +HtmlLink::HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString * _dest) +{ + if (xmin < xmax) { + Xmin=xmin; + Xmax=xmax; + } else { + Xmin=xmax; + Xmax=xmin; + } + if (ymin < ymax) { + Ymin=ymin; + Ymax=ymax; + } else { + Ymin=ymax; + Ymax=ymin; + } + dest=new GooString(_dest); +} + +HtmlLink::~HtmlLink(){ + if (dest) delete dest; +} + +GBool HtmlLink::isEqualDest(const HtmlLink& x) const{ + return (!strcmp(dest->getCString(), x.dest->getCString())); +} + +GBool HtmlLink::inLink(double xmin,double ymin,double xmax,double ymax) const { + double y=(ymin+ymax)/2; + if (y>Ymax) return gFalse; + return (y>Ymin)&&(xminXmin); + } + + +HtmlLink& HtmlLink::operator=(const HtmlLink& x){ + if (this==&x) return *this; + if (dest) {delete dest;dest=NULL;} + Xmin=x.Xmin; + Ymin=x.Ymin; + Xmax=x.Xmax; + Ymax=x.Ymax; + dest=new GooString(x.dest); + return *this; +} + +GooString* HtmlLink::getLinkStart() { + GooString *res = new GooString("append(dest); + res->append("\">"); + return res; +} + +/*GooString* HtmlLink::Link(GooString* content){ + //GooString* _dest=new GooString(dest); + GooString *tmp=new GooString("append(dest); + tmp->append("\">"); + tmp->append(content); + tmp->append(""); + //delete _dest; + return tmp; + }*/ + + + +HtmlLinks::HtmlLinks(){ + accu=new GVector(); +} + +HtmlLinks::~HtmlLinks(){ + delete accu; + accu=NULL; +} + +GBool HtmlLinks::inLink(double xmin,double ymin,double xmax,double ymax,int& p)const { + + for(GVector::iterator i=accu->begin();i!=accu->end();i++){ + if (i->inLink(xmin,ymin,xmax,ymax)) { + p=(i - accu->begin()); + return 1; + } + } + return 0; +} + +HtmlLink* HtmlLinks::getLink(int i) const{ + GVector::iterator g=accu->begin(); + g+=i; + return g; +} + diff -urN poppler-0.4.3.orig/utils/HtmlLinks.h poppler-0.4.3/utils/HtmlLinks.h --- poppler-0.4.3.orig/utils/HtmlLinks.h 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/HtmlLinks.h 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,47 @@ +#ifndef _HTML_LINKS +#define _HTML_LINKS + +#include "GVector.h" +#include "goo/GooString.h" + +class HtmlLink{ + +private: + double Xmin; + double Ymin; + double Xmax; + double Ymax; + GooString* dest; + +public: + HtmlLink(){dest=NULL;} + HtmlLink(const HtmlLink& x); + HtmlLink& operator=(const HtmlLink& x); + HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString *_dest); + ~HtmlLink(); + GBool HtmlLink::isEqualDest(const HtmlLink& x) const; + GooString *getDest(){return new GooString(dest);} + double getX1() const {return Xmin;} + double getX2() const {return Xmax;} + double getY1() const {return Ymin;} + double getY2() const {return Ymax;} + GBool inLink(double xmin,double ymin,double xmax,double ymax) const ; + //GooString *Link(GooString *content); + GooString* getLinkStart(); + +}; + +class HtmlLinks{ +private: + GVector *accu; +public: + HtmlLinks(); + ~HtmlLinks(); + void AddLink(const HtmlLink& x) {accu->push_back(x);} + GBool inLink(double xmin,double ymin,double xmax,double ymax,int& p) const; + HtmlLink* getLink(int i) const; + +}; + +#endif + diff -urN poppler-0.4.3.orig/utils/HtmlOutputDev.cc poppler-0.4.3/utils/HtmlOutputDev.cc --- poppler-0.4.3.orig/utils/HtmlOutputDev.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/HtmlOutputDev.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,1569 @@ +//======================================================================== +// +// HtmlOutputDev.cc +// +// Copyright 1997-2002 Glyph & Cog, LLC +// +// Changed 1999-2000 by G.Ovtcharov +// +// Changed 2002 by Mikhail Kruk +// +//======================================================================== + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include +#include +#include +#include +#include +#include +#include "goo/GooString.h" +#include "goo/GooList.h" +#include "UnicodeMap.h" +#include "goo/gmem.h" +#include "config.h" +#include "Error.h" +#include "GfxState.h" +#include "DCTStream.h" +#include "GlobalParams.h" +#include "HtmlOutputDev.h" +#include "HtmlFonts.h" + +int HtmlPage::pgNum=0; +int HtmlOutputDev::imgNum=1; + +extern double scale; +extern GBool complexMode; +extern GBool ignore; +extern GBool printCommands; +extern GBool printHtml; +extern GBool noframes; +extern GBool stout; +extern GBool xml; +extern GBool showHidden; +extern GBool noMerge; + +static GooString* basename(GooString* str){ + + char *p=str->getCString(); + int len=str->getLength(); + for (int i=len-1;i>=0;i--) + if (*(p+i)==SLASH) + return new GooString((p+i+1),len-i-1); + return new GooString(str); +} + +static GooString* Dirname(GooString* str){ + + char *p=str->getCString(); + int len=str->getLength(); + for (int i=len-1;i>=0;i--) + if (*(p+i)==SLASH) + return new GooString(p,i+1); + return new GooString(); +} + +//------------------------------------------------------------------------ +// HtmlString +//------------------------------------------------------------------------ + +HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts) { + GfxFont *font; + double x, y; + + state->transform(state->getCurX(), state->getCurY(), &x, &y); + if ((font = state->getFont())) { + yMin = y - font->getAscent() * fontSize; + yMax = y - font->getDescent() * fontSize; + GfxRGB rgb; + state->getFillRGB(&rgb); + GooString *name = state->getFont()->getName(); + if (!name) name = HtmlFont::getDefaultFont(); //new GooString("default"); + HtmlFont hfont=HtmlFont(name, static_cast(fontSize-1), rgb); + fontpos = fonts->AddFont(hfont); + } else { + // this means that the PDF file draws text without a current font, + // which should never happen + yMin = y - 0.95 * fontSize; + yMax = y + 0.35 * fontSize; + fontpos=0; + } + if (yMin == yMax) { + // this is a sanity check for a case that shouldn't happen -- but + // if it does happen, we want to avoid dividing by zero later + yMin = y; + yMax = y + 1; + } + col = 0; + text = NULL; + xRight = NULL; + link = NULL; + len = size = 0; + yxNext = NULL; + xyNext = NULL; + htext=new GooString(); + dir = textDirUnknown; +} + + +HtmlString::~HtmlString() { + delete text; + delete htext; + gfree(xRight); +} + +void HtmlString::addChar(GfxState *state, double x, double y, + double dx, double dy, Unicode u) { + if (dir == textDirUnknown) { + //dir = UnicodeMap::getDirection(u); + dir = textDirLeftRight; + } + + if (len == size) { + size += 16; + text = (Unicode *)grealloc(text, size * sizeof(Unicode)); + xRight = (double *)grealloc(xRight, size * sizeof(double)); + } + text[len] = u; + if (len == 0) { + xMin = x; + } + xMax = xRight[len] = x + dx; +//printf("added char: %f %f xright = %f\n", x, dx, x+dx); + ++len; +} + +void HtmlString::endString() +{ + if( dir == textDirRightLeft && len > 1 ) + { + //printf("will reverse!\n"); + for (int i = 0; i < len / 2; i++) + { + Unicode ch = text[i]; + text[i] = text[len - i - 1]; + text[len - i - 1] = ch; + } + } +} + +//------------------------------------------------------------------------ +// HtmlPage +//------------------------------------------------------------------------ + +HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) { + this->rawOrder = rawOrder; + curStr = NULL; + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; + fonts=new HtmlFontAccu(); + links=new HtmlLinks(); + pageWidth=0; + pageHeight=0; + fontsPageMarker = 0; + DocName=NULL; + firstPage = -1; + imgExt = new GooString(imgExtVal); +} + +HtmlPage::~HtmlPage() { + clear(); + if (DocName) delete DocName; + if (fonts) delete fonts; + if (links) delete links; + if (imgExt) delete imgExt; +} + +void HtmlPage::updateFont(GfxState *state) { + GfxFont *font; + double *fm; + char *name; + int code; + double w; + + // adjust the font size + fontSize = state->getTransformedFontSize(); + if ((font = state->getFont()) && font->getType() == fontType3) { + // This is a hack which makes it possible to deal with some Type 3 + // fonts. The problem is that it's impossible to know what the + // base coordinate system used in the font is without actually + // rendering the font. This code tries to guess by looking at the + // width of the character 'm' (which breaks if the font is a + // subset that doesn't contain 'm'). + for (code = 0; code < 256; ++code) { + if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && + name[0] == 'm' && name[1] == '\0') { + break; + } + } + if (code < 256) { + w = ((Gfx8BitFont *)font)->getWidth(code); + if (w != 0) { + // 600 is a generic average 'm' width -- yes, this is a hack + fontSize *= w / 0.6; + } + } + fm = font->getFontMatrix(); + if (fm[0] != 0) { + fontSize *= fabs(fm[3] / fm[0]); + } + } +} + +void HtmlPage::beginString(GfxState *state, GooString *s) { + curStr = new HtmlString(state, fontSize, fonts); +} + + +void HtmlPage::conv(){ + HtmlString *tmp; + + int linkIndex = 0; + HtmlFont* h; + for(tmp=yxStrings;tmp;tmp=tmp->yxNext){ + int pos=tmp->fontpos; + // printf("%d\n",pos); + h=fonts->Get(pos); + + if (tmp->htext) delete tmp->htext; + tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len); + + if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){ + tmp->link = links->getLink(linkIndex); + /*GooString *t=tmp->htext; + tmp->htext=links->getLink(k)->Link(tmp->htext); + delete t;*/ + } + } + +} + + +void HtmlPage::addChar(GfxState *state, double x, double y, + double dx, double dy, + double ox, double oy, Unicode *u, int uLen) { + double x1, y1, w1, h1, dx2, dy2; + int n, i; + state->transform(x, y, &x1, &y1); + n = curStr->len; + + // check that new character is in the same direction as current string + // and is not too far away from it before adding + //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || + // XXX + if ( + (n > 0 && + fabs(x1 - curStr->xRight[n-1]) > 0.1 * (curStr->yMax - curStr->yMin))) { + endString(); + beginString(state, NULL); + } + state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), + 0, &dx2, &dy2); + dx -= dx2; + dy -= dy2; + state->transformDelta(dx, dy, &w1, &h1); + if (uLen != 0) { + w1 /= uLen; + h1 /= uLen; + } + for (i = 0; i < uLen; ++i) { + curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]); + } +} + +void HtmlPage::endString() { + HtmlString *p1, *p2; + double h, y1, y2; + + // throw away zero-length strings -- they don't have valid xMin/xMax + // values, and they're useless anyway + if (curStr->len == 0) { + delete curStr; + curStr = NULL; + return; + } + + curStr->endString(); + +#if 0 //~tmp + if (curStr->yMax - curStr->yMin > 20) { + delete curStr; + curStr = NULL; + return; + } +#endif + + // insert string in y-major list + h = curStr->yMax - curStr->yMin; + y1 = curStr->yMin + 0.5 * h; + y2 = curStr->yMin + 0.8 * h; + if (rawOrder) { + p1 = yxCur1; + p2 = NULL; + } else if ((!yxCur1 || + (y1 >= yxCur1->yMin && + (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && + (!yxCur2 || + (y1 < yxCur2->yMin || + (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) { + p1 = yxCur1; + p2 = yxCur2; + } else { + for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) { + if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) + break; + } + yxCur2 = p2; + } + yxCur1 = curStr; + if (p1) + p1->yxNext = curStr; + else + yxStrings = curStr; + curStr->yxNext = p2; + curStr = NULL; +} + +void HtmlPage::coalesce() { + HtmlString *str1, *str2; + HtmlFont *hfont1, *hfont2; + double space, horSpace, vertSpace, vertOverlap; + GBool addSpace, addLineBreak; + int n, i; + double curX, curY; + +#if 0 //~ for debugging + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + printf("x=%f..%f y=%f..%f size=%2d '", + str1->xMin, str1->xMax, str1->yMin, str1->yMax, + (int)(str1->yMax - str1->yMin)); + for (i = 0; i < str1->len; ++i) { + fputc(str1->text[i] & 0xff, stdout); + } + printf("'\n"); + } + printf("\n------------------------------------------------------------\n\n"); +#endif + str1 = yxStrings; + + if( !str1 ) return; + + //----- discard duplicated text (fake boldface, drop shadows) + if( !complexMode ) + { /* if not in complex mode get rid of duplicate strings */ + HtmlString *str3; + GBool found; + while (str1) + { + double size = str1->yMax - str1->yMin; + double xLimit = str1->xMin + size * 0.2; + found = gFalse; + for (str2 = str1, str3 = str1->yxNext; + str3 && str3->xMin < xLimit; + str2 = str3, str3 = str2->yxNext) + { + if (str3->len == str1->len && + !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && + fabs(str3->yMin - str1->yMin) < size * 0.2 && + fabs(str3->yMax - str1->yMax) < size * 0.2 && + fabs(str3->xMax - str1->xMax) < size * 0.2) + { + found = gTrue; + //printf("found duplicate!\n"); + break; + } + } + if (found) + { + str2->xyNext = str3->xyNext; + str2->yxNext = str3->yxNext; + delete str3; + } + else + { + str1 = str1->yxNext; + } + } + } /*- !complexMode */ + + str1 = yxStrings; + + hfont1 = getFont(str1); + if( hfont1->isBold() ) + str1->htext->insert(0,"",3); + if( hfont1->isItalic() ) + str1->htext->insert(0,"",3); + if( str1->getLink() != NULL ) { + GooString *ls = str1->getLink()->getLinkStart(); + str1->htext->insert(0, ls); + delete ls; + } + curX = str1->xMin; curY = str1->yMin; + + while (str1 && (str2 = str1->yxNext)) { + hfont2 = getFont(str2); + space = str1->yMax - str1->yMin; + horSpace = str2->xMin - str1->xMax; + addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4); + vertSpace = str2->yMin - str1->yMax; + +//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d); + + if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) + { + vertOverlap = str1->yMax - str2->yMin; + } else + if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) + { + vertOverlap = str2->yMax - str1->yMin; + } else + { + vertOverlap = 0; + } + + if ( + ( + ( + ( + (rawOrder && vertOverlap > 0.5 * space) + || + (!rawOrder && str2->yMin < str1->yMax) + ) && + (horSpace > -0.5 * space && horSpace < space) + ) || + (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak) + ) && + (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter + str1->dir == str2->dir // text direction the same + ) + { +// printf("yes\n"); + n = str1->len + str2->len; + if ((addSpace = horSpace > 0.1 * space)) { + ++n; + } + if (addLineBreak) { + ++n; + } + + str1->size = (n + 15) & ~15; + str1->text = (Unicode *)grealloc(str1->text, + str1->size * sizeof(Unicode)); + str1->xRight = (double *)grealloc(str1->xRight, + str1->size * sizeof(double)); + if (addSpace) { + str1->text[str1->len] = 0x20; + str1->htext->append(" "); + str1->xRight[str1->len] = str2->xMin; + ++str1->len; + } + if (addLineBreak) { + str1->text[str1->len] = '\n'; + str1->htext->append("
"); + str1->xRight[str1->len] = str2->xMin; + ++str1->len; + str1->yMin = str2->yMin; + str1->yMax = str2->yMax; + str1->xMax = str2->xMax; + int fontLineSize = hfont1->getLineSize(); + int curLineSize = (int)(vertSpace + space); + if( curLineSize != fontLineSize ) + { + HtmlFont *newfnt = new HtmlFont(*hfont1); + newfnt->setLineSize(curLineSize); + str1->fontpos = fonts->AddFont(*newfnt); + delete newfnt; + hfont1 = getFont(str1); + // we have to reget hfont2 because it's location could have + // changed on resize + hfont2 = getFont(str2); + } + } + for (i = 0; i < str2->len; ++i) { + str1->text[str1->len] = str2->text[i]; + str1->xRight[str1->len] = str2->xRight[i]; + ++str1->len; + } + + /* fix and if str1 and str2 differ */ + if( hfont1->isBold() && !hfont2->isBold() ) + str1->htext->append("", 4); + if( hfont1->isItalic() && !hfont2->isItalic() ) + str1->htext->append("", 4); + if( !hfont1->isBold() && hfont2->isBold() ) + str1->htext->append("", 3); + if( !hfont1->isItalic() && hfont2->isItalic() ) + str1->htext->append("", 3); + + /* now handle switch of links */ + HtmlLink *hlink1 = str1->getLink(); + HtmlLink *hlink2 = str2->getLink(); + if( !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2) ) { + if(hlink1 != NULL ) + str1->htext->append(""); + if(hlink2 != NULL ) { + GooString *ls = hlink2->getLinkStart(); + str1->htext->append(ls); + delete ls; + } + } + + str1->htext->append(str2->htext); + // str1 now contains href for link of str2 (if it is defined) + str1->link = str2->link; + hfont1 = hfont2; + if (str2->xMax > str1->xMax) { + str1->xMax = str2->xMax; + } + if (str2->yMax > str1->yMax) { + str1->yMax = str2->yMax; + } + str1->yxNext = str2->yxNext; + delete str2; + } else { // keep strings separate +// printf("no\n"); + if( hfont1->isBold() ) + str1->htext->append("",4); + if( hfont1->isItalic() ) + str1->htext->append("",4); + if(str1->getLink() != NULL ) + str1->htext->append(""); + + str1->xMin = curX; str1->yMin = curY; + str1 = str2; + curX = str1->xMin; curY = str1->yMin; + hfont1 = hfont2; + if( hfont1->isBold() ) + str1->htext->insert(0,"",3); + if( hfont1->isItalic() ) + str1->htext->insert(0,"",3); + if( str1->getLink() != NULL ) { + GooString *ls = str1->getLink()->getLinkStart(); + str1->htext->insert(0, ls); + delete ls; + } + } + } + str1->xMin = curX; str1->yMin = curY; + if( hfont1->isBold() ) + str1->htext->append("",4); + if( hfont1->isItalic() ) + str1->htext->append("",4); + if(str1->getLink() != NULL ) + str1->htext->append(""); + +#if 0 //~ for debugging + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + printf("x=%3d..%3d y=%3d..%3d size=%2d ", + (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax, + (int)(str1->yMax - str1->yMin)); + printf("'%s'\n", str1->htext->getCString()); + } + printf("\n------------------------------------------------------------\n\n"); +#endif + +} + +void HtmlPage::dumpAsXML(FILE* f,int page){ + fprintf(f, "\n", pageHeight,pageWidth); + + for(int i=fontsPageMarker;i < fonts->size();i++) { + GooString *fontCSStyle = fonts->CSStyle(i); + fprintf(f,"\t%s\n",fontCSStyle->getCString()); + delete fontCSStyle; + } + + GooString *str, *str1; + for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ + if (tmp->htext){ + str=new GooString(tmp->htext); + fprintf(f,"yMin),xoutRound(tmp->xMin)); + fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin)); + fprintf(f,"font=\"%d\">", tmp->fontpos); + if (tmp->fontpos!=-1){ + str1=fonts->getCSStyle(tmp->fontpos, str); + } + fputs(str1->getCString(),f); + delete str; + delete str1; + fputs("\n",f); + } + } + fputs("\n",f); +} + + +void HtmlPage::dumpComplex(FILE *file, int page){ + FILE* pageFile; + GooString* tmp; + char* htmlEncoding; + + if( firstPage == -1 ) firstPage = page; + + if( !noframes ) + { + GooString* pgNum=GooString::fromInt(page); + tmp = new GooString(DocName); + tmp->append('-')->append(pgNum)->append(".html"); + delete pgNum; + + if (!(pageFile = fopen(getFileNameFromPath(tmp->getCString(),tmp->getLength()), "w"))) { + error(-1, "Couldn't open html file '%s'", tmp->getCString()); + delete tmp; + return; + } + delete tmp; + + fprintf(pageFile,"%s\n\n\nPage %d\n\n", + DOCTYPE, page); + + htmlEncoding = HtmlOutputDev::mapEncodingToHtml + (globalParams->getTextEncodingName()); + fprintf(pageFile, "\n", htmlEncoding); + } + else + { + pageFile = file; + fprintf(pageFile,"\n", page); + fprintf(pageFile,"\n", page); + } + + fprintf(pageFile,"
\n", + pageWidth, pageHeight); + + tmp=basename(DocName); + + fputs("\n",pageFile); + + if( !noframes ) + { + fputs("\n\n",pageFile); + } + + if( !ignore ) + { + fprintf(pageFile, + "\"background\n", + pageWidth, pageHeight, tmp->getCString(), + (page-firstPage+1), imgExt->getCString()); + } + + delete tmp; + + GooString *str, *str1; + for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){ + if (tmp1->htext){ + str=new GooString(tmp1->htext); + fprintf(pageFile, + "
", + xoutRound(tmp1->yMin), + xoutRound(tmp1->xMin)); + fputs("",pageFile); + if (tmp1->fontpos!=-1){ + str1=fonts->getCSStyle(tmp1->fontpos, str); + } + //printf("%s\n", str1->getCString()); + fputs(str1->getCString(),pageFile); + + delete str; + delete str1; + fputs("
\n",pageFile); + } + } + + fputs("
\n", pageFile); + + if( !noframes ) + { + fputs("\n\n",pageFile); + fclose(pageFile); + } +} + + +void HtmlPage::dump(FILE *f, int pageNum) +{ + if (complexMode) + { + if (xml) dumpAsXML(f, pageNum); + if (!xml) dumpComplex(f, pageNum); + } + else + { + fprintf(f,"",pageNum); + GooString* fName=basename(DocName); + for (int i=1;i
\n",fName->getCString(),pageNum,i); + HtmlOutputDev::imgNum=1; + delete fName; + + GooString* str; + for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ + if (tmp->htext){ + str=new GooString(tmp->htext); + fputs(str->getCString(),f); + delete str; + fputs("
\n",f); + } + } + fputs("
\n",f); + } +} + + + +void HtmlPage::clear() { + HtmlString *p1, *p2; + + if (curStr) { + delete curStr; + curStr = NULL; + } + for (p1 = yxStrings; p1; p1 = p2) { + p2 = p1->yxNext; + delete p1; + } + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; + + if( !noframes ) + { + delete fonts; + fonts=new HtmlFontAccu(); + fontsPageMarker = 0; + } + else + { + fontsPageMarker = fonts->size(); + } + + delete links; + links=new HtmlLinks(); + + +} + +void HtmlPage::setDocName(char *fname){ + DocName=new GooString(fname); +} + +//------------------------------------------------------------------------ +// HtmlMetaVar +//------------------------------------------------------------------------ + +HtmlMetaVar::HtmlMetaVar(char *_name, char *_content) +{ + name = new GooString(_name); + content = new GooString(_content); +} + +HtmlMetaVar::~HtmlMetaVar() +{ + delete name; + delete content; +} + +GooString* HtmlMetaVar::toString() +{ + GooString *result = new GooString("append(name); + result->append("\" content=\""); + result->append(content); + result->append("\">"); + return result; +} + +//------------------------------------------------------------------------ +// HtmlOutputDev +//------------------------------------------------------------------------ + +static char* HtmlEncodings[][2] = { + {"Latin1", "ISO-8859-1"}, + {NULL, NULL} +}; + + +char* HtmlOutputDev::mapEncodingToHtml(GooString* encoding) +{ + char* enc = encoding->getCString(); + for(int i = 0; HtmlEncodings[i][0] != NULL; i++) + { + if( strcmp(enc, HtmlEncodings[i][0]) == 0 ) + { + return HtmlEncodings[i][1]; + } + } + return enc; +} + +void HtmlOutputDev::doFrame(int firstPage){ + GooString* fName=new GooString(Docname); + char* htmlEncoding; + fName->append(".html"); + + if (!(fContentsFrame = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "w"))){ + delete fName; + error(-1, "Couldn't open html file '%s'", fName->getCString()); + return; + } + + delete fName; + + fName=basename(Docname); + fputs(DOCTYPE_FRAMES, fContentsFrame); + fputs("\n",fContentsFrame); + fputs("\n",fContentsFrame); + fprintf(fContentsFrame,"\n%s",docTitle->getCString()); + htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); + fprintf(fContentsFrame, "\n\n", htmlEncoding); + dumpMetaVars(fContentsFrame); + fprintf(fContentsFrame, "\n"); + fputs("\n",fContentsFrame); + fprintf(fContentsFrame,"\n",fName->getCString()); + fputs("getCString(), firstPage); + else + fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString()); + + fputs(">\n\n\n",fContentsFrame); + + delete fName; + fclose(fContentsFrame); +} + +HtmlOutputDev::HtmlOutputDev(char *fileName, char *title, + char *author, char *keywords, char *subject, char *date, + char *extension, + GBool rawOrder, int firstPage, GBool outline) +{ + char *htmlEncoding; + + fContentsFrame = NULL; + docTitle = new GooString(title); + pages = NULL; + dumpJPEG=gTrue; + //write = gTrue; + this->rawOrder = rawOrder; + this->doOutline = outline; + ok = gFalse; + imgNum=1; + //this->firstPage = firstPage; + //pageNum=firstPage; + // open file + needClose = gFalse; + pages = new HtmlPage(rawOrder, extension); + + glMetaVars = new GooList(); + glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36")); + if( author ) glMetaVars->append(new HtmlMetaVar("author", author)); + if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords)); + if( date ) glMetaVars->append(new HtmlMetaVar("date", date)); + if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject)); + + maxPageWidth = 0; + maxPageHeight = 0; + + pages->setDocName(fileName); + Docname=new GooString (fileName); + + // for non-xml output (complex or simple) with frames generate the left frame + if(!xml && !noframes) + { + GooString* left=new GooString(fileName); + left->append("_ind.html"); + + doFrame(firstPage); + + if (!(fContentsFrame = fopen(getFileNameFromPath(left->getCString(),left->getLength()), "w"))) + { + error(-1, "Couldn't open html file '%s'", left->getCString()); + delete left; + return; + } + delete left; + fputs(DOCTYPE, fContentsFrame); + fputs("\n\n\n\n\n",fContentsFrame); + + if (doOutline) + { + GooString *str = basename(Docname); + fprintf(fContentsFrame, "Outline
", str->getCString(), complexMode ? "-outline.html" : "s.html#outline"); + delete str; + } + + if (!complexMode) + { /* not in complex mode */ + + GooString* right=new GooString(fileName); + right->append("s.html"); + + if (!(page=fopen(getFileNameFromPath(right->getCString(),right->getLength()),"w"))){ + error(-1, "Couldn't open html file '%s'", right->getCString()); + delete right; + return; + } + delete right; + fputs(DOCTYPE, page); + fputs("\n\n\n\n\n",page); + } + } + + if (noframes) { + if (stout) page=stdout; + else { + GooString* right=new GooString(fileName); + if (!xml) right->append(".html"); + if (xml) right->append(".xml"); + if (!(page=fopen(getFileNameFromPath(right->getCString(),right->getLength()),"w"))){ + delete right; + error(-1, "Couldn't open html file '%s'", right->getCString()); + return; + } + delete right; + } + + htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); + if (xml) + { + fprintf(page, "\n", htmlEncoding); + fputs("\n\n", page); + fputs("\n",page); + } + else + { + fprintf(page,"%s\n\n\n%s\n", + DOCTYPE, docTitle->getCString()); + + fprintf(page, "\n", htmlEncoding); + + dumpMetaVars(page); + fprintf(page,"\n"); + fprintf(page,"\n"); + } + } + ok = gTrue; +} + +HtmlOutputDev::~HtmlOutputDev() { + /*if (mode&&!xml){ + int h=xoutRound(pages->pageHeight/scale); + int w=xoutRound(pages->pageWidth/scale); + fprintf(tin,"%s=%03d\n","PAPER_WIDTH",w); + fprintf(tin,"%s=%03d\n","PAPER_HEIGHT",h); + fclose(tin); + }*/ + + HtmlFont::clear(); + + delete Docname; + delete docTitle; + + deleteGooList(glMetaVars, HtmlMetaVar); + + if (fContentsFrame){ + fputs("\n\n",fContentsFrame); + fclose(fContentsFrame); + } + if (xml) { + fputs("\n",page); + fclose(page); + } else + if ( !complexMode || xml || noframes ) + { + fputs("\n\n",page); + fclose(page); + } + if (pages) + delete pages; +} + + + +void HtmlOutputDev::startPage(int pageNum, GfxState *state) { + /*if (mode&&!xml){ + if (write){ + write=gFalse; + GooString* fname=Dirname(Docname); + fname->append("image.log"); + if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){ + printf("Error : can not open %s",fname); + exit(1); + } + delete fname; + // if(state->getRotation()!=0) + // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1()); + // else + fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1()); + } + }*/ + + this->pageNum = pageNum; + GooString *str=basename(Docname); + pages->clear(); + if(!noframes) + { + if (fContentsFrame) + { + if (complexMode) + fprintf(fContentsFrame,"getCString(),pageNum); + else + fprintf(fContentsFrame,"getCString(),pageNum); + fprintf(fContentsFrame," target=\"contents\" >Page %d
\n",pageNum); + } + } + + pages->pageWidth=static_cast(state->getPageWidth()); + pages->pageHeight=static_cast(state->getPageHeight()); + + delete str; +} + + +void HtmlOutputDev::endPage() { + pages->conv(); + pages->coalesce(); + pages->dump(page, pageNum); + + // I don't yet know what to do in the case when there are pages of different + // sizes and we want complex output: running ghostscript many times + // seems very inefficient. So for now I'll just use last page's size + maxPageWidth = pages->pageWidth; + maxPageHeight = pages->pageHeight; + + //if(!noframes&&!xml) fputs("
\n", fContentsFrame); + if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum)); +} + +void HtmlOutputDev::updateFont(GfxState *state) { + pages->updateFont(state); +} + +void HtmlOutputDev::beginString(GfxState *state, GooString *s) { + pages->beginString(state, s); +} + +void HtmlOutputDev::endString(GfxState *state) { + pages->endString(); +} + +void HtmlOutputDev::drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, Unicode *u, int uLen) +{ + if ( !showHidden && (state->getRender() & 3) == 3) { + return; + } + pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen); +} + +void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool inlineImg) { + + int i, j; + + if (ignore||complexMode) { + OutputDev::drawImageMask(state, ref, str, width, height, invert, inlineImg); + return; + } + + FILE *f1; + int c; + + int x0, y0; // top left corner of image + int w0, h0, w1, h1; // size of image + double xt, yt, wt, ht; + GBool rotate, xFlip, yFlip; + GBool dither; + int x, y; + int ix, iy; + int px1, px2, qx, dx; + int py1, py2, qy, dy; + Gulong pixel; + int nComps, nVals, nBits; + double r1, g1, b1; + + // get image position and size + state->transform(0, 0, &xt, &yt); + state->transformDelta(1, 1, &wt, &ht); + if (wt > 0) { + x0 = xoutRound(xt); + w0 = xoutRound(wt); + } else { + x0 = xoutRound(xt + wt); + w0 = xoutRound(-wt); + } + if (ht > 0) { + y0 = xoutRound(yt); + h0 = xoutRound(ht); + } else { + y0 = xoutRound(yt + ht); + h0 = xoutRound(-ht); + } + state->transformDelta(1, 0, &xt, &yt); + rotate = fabs(xt) < fabs(yt); + if (rotate) { + w1 = h0; + h1 = w0; + xFlip = ht < 0; + yFlip = wt > 0; + } else { + w1 = w0; + h1 = h0; + xFlip = wt < 0; + yFlip = ht > 0; + } + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT) { + GooString *fName=new GooString(Docname); + fName->append("-"); + GooString *pgNum=GooString::fromInt(pageNum); + GooString *imgnum=GooString::fromInt(imgNum); + // open the image file + fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg"); + ++imgNum; + if (!(f1 = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "wb"))) { + error(-1, "Couldn't open image file '%s'", fName->getCString()); + return; + } + + // initialize stream + str = ((DCTStream *)str)->getRawStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f1); + + fclose(f1); + + if (pgNum) delete pgNum; + if (imgnum) delete imgnum; + if (fName) delete fName; + } + else { + OutputDev::drawImageMask(state, ref, str, width, height, invert, inlineImg); + } +} + +void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + int *maskColors, GBool inlineImg) { + + int i, j; + + if (ignore||complexMode) { + OutputDev::drawImage(state, ref, str, width, height, colorMap, + maskColors, inlineImg); + return; + } + + FILE *f1; + ImageStream *imgStr; + Guchar pixBuf[4]; + GfxColor color; + int c; + + int x0, y0; // top left corner of image + int w0, h0, w1, h1; // size of image + double xt, yt, wt, ht; + GBool rotate, xFlip, yFlip; + GBool dither; + int x, y; + int ix, iy; + int px1, px2, qx, dx; + int py1, py2, qy, dy; + Gulong pixel; + int nComps, nVals, nBits; + double r1, g1, b1; + + // get image position and size + state->transform(0, 0, &xt, &yt); + state->transformDelta(1, 1, &wt, &ht); + if (wt > 0) { + x0 = xoutRound(xt); + w0 = xoutRound(wt); + } else { + x0 = xoutRound(xt + wt); + w0 = xoutRound(-wt); + } + if (ht > 0) { + y0 = xoutRound(yt); + h0 = xoutRound(ht); + } else { + y0 = xoutRound(yt + ht); + h0 = xoutRound(-ht); + } + state->transformDelta(1, 0, &xt, &yt); + rotate = fabs(xt) < fabs(yt); + if (rotate) { + w1 = h0; + h1 = w0; + xFlip = ht < 0; + yFlip = wt > 0; + } else { + w1 = w0; + h1 = h0; + xFlip = wt < 0; + yFlip = ht > 0; + } + + + /*if( !globalParams->getErrQuiet() ) + printf("image stream of kind %d\n", str->getKind());*/ + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT) { + GooString *fName=new GooString(Docname); + fName->append("-"); + GooString *pgNum= GooString::fromInt(pageNum); + GooString *imgnum= GooString::fromInt(imgNum); + + // open the image file + fName->append(pgNum)->append("_")->append(imgnum)->append(".jpg"); + ++imgNum; + + if (!(f1 = fopen(getFileNameFromPath(fName->getCString(),fName->getLength()), "wb"))) { + error(-1, "Couldn't open image file '%s'", fName->getCString()); + return; + } + + // initialize stream + str = ((DCTStream *)str)->getRawStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f1); + + fclose(f1); + + delete fName; + delete pgNum; + delete imgnum; + } + else { + OutputDev::drawImage(state, ref, str, width, height, colorMap, + maskColors, inlineImg); + } +} + + + +void HtmlOutputDev::drawLink(Link* link,Catalog *cat){ + double _x1,_y1,_x2,_y2,w; + int x1,y1,x2,y2; + + link->getRect(&_x1,&_y1,&_x2,&_y2); + w = link->getBorderStyle()->getWidth(); + cvtUserToDev(_x1,_y1,&x1,&y1); + + cvtUserToDev(_x2,_y2,&x2,&y2); + + + GooString* _dest=getLinkDest(link,cat); + HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest); + pages->AddLink(t); + delete _dest; +} + +GooString* HtmlOutputDev::getLinkDest(Link *link,Catalog* catalog){ + char *p; + switch(link->getAction()->getKind()) + { + case actionGoTo: + { + GooString* file=basename(Docname); + int page=1; + LinkGoTo *ha=(LinkGoTo *)link->getAction(); + LinkDest *dest=NULL; + if (ha->getDest()==NULL) + dest=catalog->findDest(ha->getNamedDest()); + else + dest=ha->getDest()->copy(); + if (dest){ + if (dest->isPageRef()){ + Ref pageref=dest->getPageRef(); + page=catalog->findPage(pageref.num,pageref.gen); + } + else { + page=dest->getPageNum(); + } + + delete dest; + + GooString *str=GooString::fromInt(page); + /* complex simple + frames file-4.html files.html#4 + noframes file.html#4 file.html#4 + */ + if (noframes) + { + file->append(".html#"); + file->append(str); + } + else + { + if( complexMode ) + { + file->append("-"); + file->append(str); + file->append(".html"); + } + else + { + file->append("s.html#"); + file->append(str); + } + } + + if (printCommands) printf(" link to page %d ",page); + delete str; + return file; + } + else + { + return new GooString(); + } + } + case actionGoToR: + { + LinkGoToR *ha=(LinkGoToR *) link->getAction(); + LinkDest *dest=NULL; + int page=1; + GooString *file=new GooString(); + if (ha->getFileName()){ + delete file; + file=new GooString(ha->getFileName()->getCString()); + } + if (ha->getDest()!=NULL) dest=ha->getDest()->copy(); + if (dest&&file){ + if (!(dest->isPageRef())) page=dest->getPageNum(); + delete dest; + + if (printCommands) printf(" link to page %d ",page); + if (printHtml){ + p=file->getCString()+file->getLength()-4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){ + file->del(file->getLength()-4,4); + file->append(".html"); + } + file->append('#'); + file->append(GooString::fromInt(page)); + } + } + if (printCommands) printf("filename %s\n",file->getCString()); + return file; + } + case actionURI: + { + LinkURI *ha=(LinkURI *) link->getAction(); + GooString* file=new GooString(ha->getURI()->getCString()); + // printf("uri : %s\n",file->getCString()); + return file; + } + case actionLaunch: + { + LinkLaunch *ha=(LinkLaunch *) link->getAction(); + GooString* file=new GooString(ha->getFileName()->getCString()); + if (printHtml) { + p=file->getCString()+file->getLength()-4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){ + file->del(file->getLength()-4,4); + file->append(".html"); + } + if (printCommands) printf("filename %s",file->getCString()); + + return file; + + } + } + default: + return new GooString(); + } +} + +void HtmlOutputDev::dumpMetaVars(FILE *file) +{ + GooString *var; + + for(int i = 0; i < glMetaVars->getLength(); i++) + { + HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i); + var = t->toString(); + fprintf(file, "%s\n", var->getCString()); + delete var; + } +} + +GBool HtmlOutputDev::dumpDocOutline(Catalog* catalog) +{ + FILE * output; + GBool bClose = gFalse; + + if (!ok || xml) + return gFalse; + + Object *outlines = catalog->getOutline(); + if (!outlines->isDict()) + return gFalse; + + if (!complexMode && !xml) + { + output = page; + } + else if (complexMode && !xml) + { + if (noframes) + { + output = page; + fputs("
\n", output); + } + else + { + GooString *str = basename(Docname); + str->append("-outline.html"); + output = fopen(getFileNameFromPath(str->getCString(),str->getLength()), "w"); + if (output == NULL) + return gFalse; + delete str; + bClose = gTrue; + fputs("\n\nDocument Outline\n\n\n", output); + } + } + + GBool done = newOutlineLevel(output, outlines, catalog); + if (done && !complexMode) + fputs("
\n", output); + + if (bClose) + { + fputs("\n\n", output); + fclose(output); + } + return done; +} + +GBool HtmlOutputDev::newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level) +{ + Object curr, next; + GBool atLeastOne = gFalse; + + if (node->dictLookup("First", &curr)->isDict()) { + if (level == 1) + { + fputs("", output); + fputs("

Document Outline

\n", output); + } + fputs("
    ",output); + do { + // get title, give up if not found + Object title; + if (curr.dictLookup("Title", &title)->isNull()) { + title.free(); + break; + } + GooString *titleStr = new GooString(title.getString()); + title.free(); + + // get corresponding link + // Note: some code duplicated from HtmlOutputDev::getLinkDest(). + GooString *linkName = NULL;; + Object dest; + if (!curr.dictLookup("Dest", &dest)->isNull()) { + LinkGoTo *link = new LinkGoTo(&dest); + LinkDest *linkdest=NULL; + if (link->getDest()==NULL) + linkdest=catalog->findDest(link->getNamedDest()); + else + linkdest=link->getDest()->copy(); + delete link; + if (linkdest) { + int page; + if (linkdest->isPageRef()) { + Ref pageref=linkdest->getPageRef(); + page=catalog->findPage(pageref.num,pageref.gen); + } else { + page=linkdest->getPageNum(); + } + delete linkdest; + + /* complex simple + frames file-4.html files.html#4 + noframes file.html#4 file.html#4 + */ + linkName=basename(Docname); + GooString *str=GooString::fromInt(page); + if (noframes) { + linkName->append(".html#"); + linkName->append(str); + } else { + if( complexMode ) { + linkName->append("-"); + linkName->append(str); + linkName->append(".html"); + } else { + linkName->append("s.html#"); + linkName->append(str); + } + } + delete str; + } + } + dest.free(); + + fputs("
  • ",output); + if (linkName) + fprintf(output,"", linkName->getCString()); + fputs(titleStr->getCString(),output); + if (linkName) { + fputs("",output); + delete linkName; + } + fputs("\n",output); + delete titleStr; + atLeastOne = gTrue; + + newOutlineLevel(output, &curr, catalog, level+1); + curr.dictLookup("Next", &next); + curr.free(); + curr = next; + } while(curr.isDict()); + fputs("
",output); + } + curr.free(); + + return atLeastOne; +} + +char* getFileNameFromPath(char* c, int strlen) { + int last_slash_index = 0; + int i = 0; + char* res; + + for (i=0;i +#include "goo/gtypes.h" +#include "goo/GooList.h" +#include "GfxFont.h" +#include "OutputDev.h" +#include "HtmlLinks.h" +#include "HtmlFonts.h" +#include "Link.h" +#include "Catalog.h" +#include "UnicodeMap.h" + + +#ifdef WIN32 +# define SLASH '\\' +#else +# define SLASH '/' +#endif + +#define xoutRound(x) ((int)(x + 0.5)) + +#define DOCTYPE "" +#define DOCTYPE_FRAMES "" + +class GfxState; +class GooString; +//------------------------------------------------------------------------ +// HtmlString +//------------------------------------------------------------------------ + +enum UnicodeTextDirection { + textDirUnknown, + textDirLeftRight, + textDirRightLeft, + textDirTopBottom +}; + + +class HtmlString { +public: + + // Constructor. + HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts); + + // Destructor. + ~HtmlString(); + + // Add a character to the string. + void addChar(GfxState *state, double x, double y, + double dx, double dy, + Unicode u); + HtmlLink* getLink() { return link; } + void endString(); // postprocessing + +private: +// aender die text variable + HtmlLink *link; + double xMin, xMax; // bounding box x coordinates + double yMin, yMax; // bounding box y coordinates + int col; // starting column + Unicode *text; // the text + double *xRight; // right-hand x coord of each char + HtmlString *yxNext; // next string in y-major order + HtmlString *xyNext; // next string in x-major order + int fontpos; + GooString* htext; + int len; // length of text and xRight + int size; // size of text and xRight arrays + UnicodeTextDirection dir; // direction (left to right/right to left) + + friend class HtmlPage; + +}; + + +//------------------------------------------------------------------------ +// HtmlPage +//------------------------------------------------------------------------ + + + +class HtmlPage { +public: + + // Constructor. + HtmlPage(GBool rawOrder, char *imgExtVal); + + // Destructor. + ~HtmlPage(); + + // Begin a new string. + void beginString(GfxState *state, GooString *s); + + // Add a character to the current string. + void addChar(GfxState *state, double x, double y, + double dx, double dy, + double ox, double oy, + Unicode *u, int uLen); //Guchar c); + + void updateFont(GfxState *state); + + // End the current string, sorting it into the list of strings. + void endString(); + + // Coalesce strings that look like parts of the same line. + void coalesce(); + + // Find a string. If is true, starts looking at top of page; + // otherwise starts looking at ,. If is true, + // stops looking at bottom of page; otherwise stops looking at + // ,. If found, sets the text bounding rectange and + // returns true; otherwise returns false. + + + // new functions + void AddLink(const HtmlLink& x){ + links->AddLink(x); + } + + void dump(FILE *f, int pageNum); + + // Clear the page. + void clear(); + + void conv(); +private: + HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); } + + double fontSize; // current font size + GBool rawOrder; // keep strings in content stream order + + HtmlString *curStr; // currently active string + + HtmlString *yxStrings; // strings in y-major order + HtmlString *xyStrings; // strings in x-major order + HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list + + void setDocName(char* fname); + void dumpAsXML(FILE* f,int page); + void dumpComplex(FILE* f, int page); + + // marks the position of the fonts that belong to current page (for noframes) + int fontsPageMarker; + HtmlFontAccu *fonts; + HtmlLinks *links; + + GooString *DocName; + GooString *imgExt; + int pageWidth; + int pageHeight; + static int pgNum; + int firstPage; // used to begin the numeration of pages + + friend class HtmlOutputDev; +}; + +//------------------------------------------------------------------------ +// HtmlMetaVar +//------------------------------------------------------------------------ +class HtmlMetaVar { +public: + HtmlMetaVar(char *_name, char *_content); + ~HtmlMetaVar(); + + GooString* toString(); + +private: + + GooString *name; + GooString *content; +}; + +//------------------------------------------------------------------------ +// HtmlOutputDev +//------------------------------------------------------------------------ + +class HtmlOutputDev: public OutputDev { +public: + + // Open a text output file. If is NULL, no file is written + // (this is useful, e.g., for searching text). If is true, + // text is converted to 7-bit ASCII; otherwise, text is converted to + // 8-bit ISO Latin-1. should also be set for Japanese + // (EUC-JP) text. If is true, the text is kept in content + // stream order. + HtmlOutputDev(char *fileName, char *title, + char *author, + char *keywords, + char *subject, + char *date, + char *extension, + GBool rawOrder, + int firstPage = 1, + GBool outline = 0); + + // Destructor. + virtual ~HtmlOutputDev(); + + // Check if file was successfully created. + virtual GBool isOk() { return ok; } + + //---- get info about output device + + // Does this device use upside-down coordinates? + // (Upside-down means (0,0) is the top left corner of the page.) + virtual GBool upsideDown() { return gTrue; } + + // Does this device use drawChar() or drawString()? + virtual GBool useDrawChar() { return gTrue; } + + // Does this device use beginType3Char/endType3Char? Otherwise, + // text in Type 3 fonts will be drawn with drawChar/drawString. + virtual GBool interpretType3Chars() { return gFalse; } + + // Does this device need non-text content? + virtual GBool needNonText() { return gFalse; } + + //----- initialization and control + + // Start a page. + virtual void startPage(int pageNum, GfxState *state); + + // End a page. + virtual void endPage(); + + //----- update text state + virtual void updateFont(GfxState *state); + + //----- text drawing + virtual void beginString(GfxState *state, GooString *s); + virtual void endString(GfxState *state); + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, Unicode *u, int uLen); + + virtual void drawImageMask(GfxState *state, Object *ref, + Stream *str, + int width, int height, GBool invert, + GBool inlineImg); + virtual void drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + int *maskColors, GBool inlineImg); + + //new feature + virtual int DevType() {return 1234;} + virtual void drawLink(Link *link,Catalog *cat); + + int getPageWidth() { return maxPageWidth; } + int getPageHeight() { return maxPageHeight; } + + GBool dumpDocOutline(Catalog* catalog); + + /* char* getFileNameFromPath(char* c, int strlen); */ + +private: + // convert encoding into a HTML standard, or encoding->getCString if not + // recognized + static char* mapEncodingToHtml(GooString* encoding); + GooString* getLinkDest(Link *link,Catalog *catalog); + void dumpMetaVars(FILE *); + void doFrame(int firstPage); + GBool newOutlineLevel(FILE *output, Object *node, Catalog* catalog, int level = 1); + + FILE *fContentsFrame; + FILE *page; // html file + //FILE *tin; // image log file + //GBool write; + GBool needClose; // need to close the file? + HtmlPage *pages; // text for the current page + GBool rawOrder; // keep text in content stream order + GBool doOutline; // output document outline + GBool ok; // set up ok? + GBool dumpJPEG; + int pageNum; + int maxPageWidth; + int maxPageHeight; + static int imgNum; + GooString *Docname; + GooString *docTitle; + GooList *glMetaVars; + friend class HtmlPage; +}; + +char* getFileNameFromPath(char* c, int strlen); + +#endif diff -urN poppler-0.4.3.orig/utils/ImageOutputDev.cc poppler-0.4.3/utils/ImageOutputDev.cc --- poppler-0.4.3.orig/utils/ImageOutputDev.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/ImageOutputDev.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,195 @@ +//======================================================================== +// +// ImageOutputDev.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include +#include +#include +#include +#include "goo/gmem.h" +#include "config.h" +#include "Error.h" +#include "GfxState.h" +#include "Object.h" +#include "Stream.h" +#include "DCTStream.h" +#include "ImageOutputDev.h" + +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool dumpJPEGA) { + fileRoot = copyString(fileRootA); + fileName = (char *)gmalloc(strlen(fileRoot) + 20); + dumpJPEG = dumpJPEGA; + imgNum = 0; + ok = gTrue; +} + +ImageOutputDev::~ImageOutputDev() { + gfree(fileName); + gfree(fileRoot); +} + +void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool inlineImg) { + FILE *f; + int c; + int size, i; + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT && !inlineImg) { + + // open the image file + sprintf(fileName, "%s-%03d.jpg", fileRoot, imgNum); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Couldn't open image file '%s'", fileName); + return; + } + + // initialize stream + str = ((DCTStream *)str)->getRawStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f); + + str->close(); + fclose(f); + + // dump PBM file + } else { + + // open the image file and write the PBM header + sprintf(fileName, "%s-%03d.pbm", fileRoot, imgNum); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Couldn't open image file '%s'", fileName); + return; + } + fprintf(f, "P4\n"); + fprintf(f, "%d %d\n", width, height); + + // initialize stream + str->reset(); + + // copy the stream + size = height * ((width + 7) / 8); + for (i = 0; i < size; ++i) { + fputc(str->getChar(), f); + } + + str->close(); + fclose(f); + } +} + +void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + int *maskColors, GBool inlineImg) { + FILE *f; + ImageStream *imgStr; + Guchar *p; + GfxRGB rgb; + int x, y; + int c; + int size, i; + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT && + colorMap->getNumPixelComps() == 3 && + !inlineImg) { + + // open the image file + sprintf(fileName, "%s-%03d.jpg", fileRoot, imgNum); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Couldn't open image file '%s'", fileName); + return; + } + + // initialize stream + str = ((DCTStream *)str)->getRawStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f); + + str->close(); + fclose(f); + + // dump PBM file + } else if (colorMap->getNumPixelComps() == 1 && + colorMap->getBits() == 1) { + + // open the image file and write the PBM header + sprintf(fileName, "%s-%03d.pbm", fileRoot, imgNum); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Couldn't open image file '%s'", fileName); + return; + } + fprintf(f, "P4\n"); + fprintf(f, "%d %d\n", width, height); + + // initialize stream + str->reset(); + + // copy the stream + size = height * ((width + 7) / 8); + for (i = 0; i < size; ++i) { + fputc(str->getChar() ^ 0xff, f); + } + + str->close(); + fclose(f); + + // dump PPM file + } else { + + // open the image file and write the PPM header + sprintf(fileName, "%s-%03d.ppm", fileRoot, imgNum); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(-1, "Couldn't open image file '%s'", fileName); + return; + } + fprintf(f, "P6\n"); + fprintf(f, "%d %d\n", width, height); + fprintf(f, "255\n"); + + // initialize stream + imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), + colorMap->getBits()); + imgStr->reset(); + + // for each line... + for (y = 0; y < height; ++y) { + + // write the line + p = imgStr->getLine(); + for (x = 0; x < width; ++x) { + colorMap->getRGB(p, &rgb); + fputc((int)(rgb.r * 255 + 0.5), f); + fputc((int)(rgb.g * 255 + 0.5), f); + fputc((int)(rgb.b * 255 + 0.5), f); + p += colorMap->getNumPixelComps(); + } + } + delete imgStr; + + fclose(f); + } +} diff -urN poppler-0.4.3.orig/utils/ImageOutputDev.h poppler-0.4.3/utils/ImageOutputDev.h --- poppler-0.4.3.orig/utils/ImageOutputDev.h 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/ImageOutputDev.h 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,76 @@ +//======================================================================== +// +// ImageOutputDev.h +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +#ifndef IMAGEOUTPUTDEV_H +#define IMAGEOUTPUTDEV_H + +#include + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include +#include "goo/gtypes.h" +#include "OutputDev.h" + +class GfxState; + +//------------------------------------------------------------------------ +// ImageOutputDev +//------------------------------------------------------------------------ + +class ImageOutputDev: public OutputDev { +public: + + // Create an OutputDev which will write images to files named + // -NNN.. Normally, all images are written as PBM + // (.pbm) or PPM (.ppm) files. If is set, JPEG images are + // written as JPEG (.jpg) files. + ImageOutputDev(char *fileRootA, GBool dumpJPEGA); + + // Destructor. + virtual ~ImageOutputDev(); + + // Check if file was successfully created. + virtual GBool isOk() { return ok; } + + // Does this device use beginType3Char/endType3Char? Otherwise, + // text in Type 3 fonts will be drawn with drawChar/drawString. + virtual GBool interpretType3Chars() { return gFalse; } + + // Does this device need non-text content? + virtual GBool needNonText() { return gFalse; } + + //---- get info about output device + + // Does this device use upside-down coordinates? + // (Upside-down means (0,0) is the top left corner of the page.) + virtual GBool upsideDown() { return gTrue; } + + // Does this device use drawChar() or drawString()? + virtual GBool useDrawChar() { return gFalse; } + + //----- image drawing + virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool inlineImg); + virtual void drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + int *maskColors, GBool inlineImg); + +private: + + char *fileRoot; // root of output file names + char *fileName; // buffer for output file names + GBool dumpJPEG; // set to dump native JPEG files + int imgNum; // current image number + GBool ok; // set up ok? +}; + +#endif diff -urN poppler-0.4.3.orig/utils/Makefile.am poppler-0.4.3/utils/Makefile.am --- poppler-0.4.3.orig/utils/Makefile.am 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/Makefile.am 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,26 @@ + +INCLUDES = \ + -I$(top_srcdir) + +LDADD = \ + $(top_builddir)/poppler/libpoppler.la + +#pdftoppm_LDADD = \ +# $(top_builddir)/splash/libsplash.la + +poppler_includedir = $(includedir)/poppler + +bin_PROGRAMS = pdffonts pdfimages pdfinfo pdftops pdftotext pdftohtml + +man1_MANS = pdffonts.1 pdfimages.1 pdfinfo.1 pdftops.1 pdftotext.1 pdftohtml.1 + +pdffonts_SOURCES = pdffonts.cc parseargs.c +pdfimages_SOURCES = pdfimages.cc ImageOutputDev.cc parseargs.c +pdfinfo_SOURCES = pdfinfo.cc parseargs.c +pdftops_SOURCES = pdftops.cc parseargs.c +pdftotext_SOURCES = pdftotext.cc parseargs.c +pdftohtml_SOURCES = pdftohtml.cc parseargs.c \ + HtmlFonts.cc HtmlLinks.cc HtmlOutputDev.cc + +#pdftoppm_SOURCES = pdftoppm.cc SplashOutputDev.cc parseargs.c +#bin_PROGRAMS += pdftoppm diff -urN poppler-0.4.3.orig/utils/Makefile.in poppler-0.4.3/utils/Makefile.in --- poppler-0.4.3.orig/utils/Makefile.in 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/Makefile.in 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,638 @@ +# Makefile.in generated by automake 1.9.6 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +top_builddir = .. +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +INSTALL = @INSTALL@ +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +bin_PROGRAMS = pdffonts$(EXEEXT) pdfimages$(EXEEXT) pdfinfo$(EXEEXT) \ + pdftops$(EXEEXT) pdftotext$(EXEEXT) pdftohtml$(EXEEXT) +subdir = utils +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h \ + $(top_builddir)/poppler/poppler-config.h +CONFIG_CLEAN_FILES = +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" +binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) +PROGRAMS = $(bin_PROGRAMS) +am_pdffonts_OBJECTS = pdffonts.$(OBJEXT) parseargs.$(OBJEXT) +pdffonts_OBJECTS = $(am_pdffonts_OBJECTS) +pdffonts_LDADD = $(LDADD) +pdffonts_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +am_pdfimages_OBJECTS = pdfimages.$(OBJEXT) ImageOutputDev.$(OBJEXT) \ + parseargs.$(OBJEXT) +pdfimages_OBJECTS = $(am_pdfimages_OBJECTS) +pdfimages_LDADD = $(LDADD) +pdfimages_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +am_pdfinfo_OBJECTS = pdfinfo.$(OBJEXT) parseargs.$(OBJEXT) +pdfinfo_OBJECTS = $(am_pdfinfo_OBJECTS) +pdfinfo_LDADD = $(LDADD) +pdfinfo_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +am_pdftohtml_OBJECTS = pdftohtml.$(OBJEXT) parseargs.$(OBJEXT) \ + HtmlFonts.$(OBJEXT) HtmlLinks.$(OBJEXT) \ + HtmlOutputDev.$(OBJEXT) +pdftohtml_OBJECTS = $(am_pdftohtml_OBJECTS) +pdftohtml_LDADD = $(LDADD) +pdftohtml_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +am_pdftops_OBJECTS = pdftops.$(OBJEXT) parseargs.$(OBJEXT) +pdftops_OBJECTS = $(am_pdftops_OBJECTS) +pdftops_LDADD = $(LDADD) +pdftops_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +am_pdftotext_OBJECTS = pdftotext.$(OBJEXT) parseargs.$(OBJEXT) +pdftotext_OBJECTS = $(am_pdftotext_OBJECTS) +pdftotext_LDADD = $(LDADD) +pdftotext_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la +DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) -I$(top_builddir)/poppler +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +SOURCES = $(pdffonts_SOURCES) $(pdfimages_SOURCES) $(pdfinfo_SOURCES) \ + $(pdftohtml_SOURCES) $(pdftops_SOURCES) $(pdftotext_SOURCES) +DIST_SOURCES = $(pdffonts_SOURCES) $(pdfimages_SOURCES) \ + $(pdfinfo_SOURCES) $(pdftohtml_SOURCES) $(pdftops_SOURCES) \ + $(pdftotext_SOURCES) +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(man1_MANS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMDEP_FALSE = @AMDEP_FALSE@ +AMDEP_TRUE = @AMDEP_TRUE@ +AMTAR = @AMTAR@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_CAIRO_OUTPUT_FALSE = @BUILD_CAIRO_OUTPUT_FALSE@ +BUILD_CAIRO_OUTPUT_TRUE = @BUILD_CAIRO_OUTPUT_TRUE@ +BUILD_GTK_TEST_FALSE = @BUILD_GTK_TEST_FALSE@ +BUILD_GTK_TEST_TRUE = @BUILD_GTK_TEST_TRUE@ +BUILD_LIBJPEG_FALSE = @BUILD_LIBJPEG_FALSE@ +BUILD_LIBJPEG_TRUE = @BUILD_LIBJPEG_TRUE@ +BUILD_POPPLER_GLIB_FALSE = @BUILD_POPPLER_GLIB_FALSE@ +BUILD_POPPLER_GLIB_TRUE = @BUILD_POPPLER_GLIB_TRUE@ +BUILD_POPPLER_QT4_FALSE = @BUILD_POPPLER_QT4_FALSE@ +BUILD_POPPLER_QT4_TRUE = @BUILD_POPPLER_QT4_TRUE@ +BUILD_POPPLER_QT_FALSE = @BUILD_POPPLER_QT_FALSE@ +BUILD_POPPLER_QT_TRUE = @BUILD_POPPLER_QT_TRUE@ +BUILD_SPLASH_OUTPUT_FALSE = @BUILD_SPLASH_OUTPUT_FALSE@ +BUILD_SPLASH_OUTPUT_TRUE = @BUILD_SPLASH_OUTPUT_TRUE@ +BUILD_ZLIB_FALSE = @BUILD_ZLIB_FALSE@ +BUILD_ZLIB_TRUE = @BUILD_ZLIB_TRUE@ +CAIRO_CFLAGS = @CAIRO_CFLAGS@ +CAIRO_LIBS = @CAIRO_LIBS@ +CAIRO_VERSION = @CAIRO_VERSION@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +ECHO = @ECHO@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +F77 = @F77@ +FFLAGS = @FFLAGS@ +FREETYPE_CFLAGS = @FREETYPE_CFLAGS@ +FREETYPE_CONFIG = @FREETYPE_CONFIG@ +FREETYPE_LIBS = @FREETYPE_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GTK_TEST_CFLAGS = @GTK_TEST_CFLAGS@ +GTK_TEST_LIBS = @GTK_TEST_LIBS@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LDFLAGS = @LDFLAGS@ +LIBJPEG_LIBS = @LIBJPEG_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +OBJEXT = @OBJEXT@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PKG_CONFIG = @PKG_CONFIG@ +POPPLER_GLIB_CFLAGS = @POPPLER_GLIB_CFLAGS@ +POPPLER_GLIB_LIBS = @POPPLER_GLIB_LIBS@ +POPPLER_QT4_CXXFLAGS = @POPPLER_QT4_CXXFLAGS@ +POPPLER_QT4_LIBS = @POPPLER_QT4_LIBS@ +POPPLER_QT_CXXFLAGS = @POPPLER_QT_CXXFLAGS@ +POPPLER_QT_LIBS = @POPPLER_QT_LIBS@ +RANLIB = @RANLIB@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SPLASH_CFLAGS = @SPLASH_CFLAGS@ +SPLASH_LIBS = @SPLASH_LIBS@ +STRIP = @STRIP@ +VERSION = @VERSION@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +ZLIB_LIBS = @ZLIB_LIBS@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_F77 = @ac_ct_F77@ +ac_ct_RANLIB = @ac_ct_RANLIB@ +ac_ct_STRIP = @ac_ct_STRIP@ +ac_pt_PKG_CONFIG = @ac_pt_PKG_CONFIG@ +am__fastdepCC_FALSE = @am__fastdepCC_FALSE@ +am__fastdepCC_TRUE = @am__fastdepCC_TRUE@ +am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@ +am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +datadir = @datadir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +INCLUDES = \ + -I$(top_srcdir) + +LDADD = \ + $(top_builddir)/poppler/libpoppler.la + + +#pdftoppm_LDADD = \ +# $(top_builddir)/splash/libsplash.la +poppler_includedir = $(includedir)/poppler +man1_MANS = pdffonts.1 pdfimages.1 pdfinfo.1 pdftops.1 pdftotext.1 pdftohtml.1 +pdffonts_SOURCES = pdffonts.cc parseargs.c +pdfimages_SOURCES = pdfimages.cc ImageOutputDev.cc parseargs.c +pdfinfo_SOURCES = pdfinfo.cc parseargs.c +pdftops_SOURCES = pdftops.cc parseargs.c +pdftotext_SOURCES = pdftotext.cc parseargs.c +pdftohtml_SOURCES = pdftohtml.cc parseargs.c \ + HtmlFonts.cc HtmlLinks.cc HtmlOutputDev.cc + +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .cc .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \ + && exit 0; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu utils/Makefile'; \ + cd $(top_srcdir) && \ + $(AUTOMAKE) --gnu utils/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + test -z "$(bindir)" || $(mkdir_p) "$(DESTDIR)$(bindir)" + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + p1=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + if test -f $$p \ + || test -f $$p1 \ + ; then \ + f=`echo "$$p1" | sed 's,^.*/,,;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) '$$p' '$(DESTDIR)$(bindir)/$$f'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(binPROGRAMS_INSTALL) "$$p" "$(DESTDIR)$(bindir)/$$f" || exit 1; \ + else :; fi; \ + done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo "$$p" | sed 's,^.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/'`; \ + echo " rm -f '$(DESTDIR)$(bindir)/$$f'"; \ + rm -f "$(DESTDIR)$(bindir)/$$f"; \ + done + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; for p in $$list; do \ + f=`echo $$p|sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f $$p $$f"; \ + rm -f $$p $$f ; \ + done +pdffonts$(EXEEXT): $(pdffonts_OBJECTS) $(pdffonts_DEPENDENCIES) + @rm -f pdffonts$(EXEEXT) + $(CXXLINK) $(pdffonts_LDFLAGS) $(pdffonts_OBJECTS) $(pdffonts_LDADD) $(LIBS) +pdfimages$(EXEEXT): $(pdfimages_OBJECTS) $(pdfimages_DEPENDENCIES) + @rm -f pdfimages$(EXEEXT) + $(CXXLINK) $(pdfimages_LDFLAGS) $(pdfimages_OBJECTS) $(pdfimages_LDADD) $(LIBS) +pdfinfo$(EXEEXT): $(pdfinfo_OBJECTS) $(pdfinfo_DEPENDENCIES) + @rm -f pdfinfo$(EXEEXT) + $(CXXLINK) $(pdfinfo_LDFLAGS) $(pdfinfo_OBJECTS) $(pdfinfo_LDADD) $(LIBS) +pdftohtml$(EXEEXT): $(pdftohtml_OBJECTS) $(pdftohtml_DEPENDENCIES) + @rm -f pdftohtml$(EXEEXT) + $(CXXLINK) $(pdftohtml_LDFLAGS) $(pdftohtml_OBJECTS) $(pdftohtml_LDADD) $(LIBS) +pdftops$(EXEEXT): $(pdftops_OBJECTS) $(pdftops_DEPENDENCIES) + @rm -f pdftops$(EXEEXT) + $(CXXLINK) $(pdftops_LDFLAGS) $(pdftops_OBJECTS) $(pdftops_LDADD) $(LIBS) +pdftotext$(EXEEXT): $(pdftotext_OBJECTS) $(pdftotext_DEPENDENCIES) + @rm -f pdftotext$(EXEEXT) + $(CXXLINK) $(pdftotext_LDFLAGS) $(pdftotext_OBJECTS) $(pdftotext_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/HtmlFonts.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/HtmlLinks.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/HtmlOutputDev.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ImageOutputDev.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parseargs.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdffonts.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfimages.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfinfo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftops.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftotext.Po@am__quote@ + +.c.o: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c $< + +.c.obj: +@am__fastdepCC_TRUE@ if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ if $(LTCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCC_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< + +.cc.o: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ if $(CXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ if $(LTCXXCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \ +@am__fastdepCXX_TRUE@ then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +distclean-libtool: + -rm -f libtool +uninstall-info-am: +install-man1: $(man1_MANS) $(man_MANS) + @$(NORMAL_INSTALL) + test -z "$(man1dir)" || $(mkdir_p) "$(DESTDIR)$(man1dir)" + @list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \ + l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \ + for i in $$l2; do \ + case "$$i" in \ + *.1*) list="$$list $$i" ;; \ + esac; \ + done; \ + for i in $$list; do \ + if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \ + else file=$$i; fi; \ + ext=`echo $$i | sed -e 's/^.*\\.//'`; \ + case "$$ext" in \ + 1*) ;; \ + *) ext='1' ;; \ + esac; \ + inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \ + inst=`echo $$inst | sed -e 's/^.*\///'`; \ + inst=`echo $$inst | sed '$(transform)'`.$$ext; \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst"; \ + done +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list='$(man1_MANS) $(dist_man1_MANS) $(nodist_man1_MANS)'; \ + l2='$(man_MANS) $(dist_man_MANS) $(nodist_man_MANS)'; \ + for i in $$l2; do \ + case "$$i" in \ + *.1*) list="$$list $$i" ;; \ + esac; \ + done; \ + for i in $$list; do \ + ext=`echo $$i | sed -e 's/^.*\\.//'`; \ + case "$$ext" in \ + 1*) ;; \ + *) ext='1' ;; \ + esac; \ + inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \ + inst=`echo $$inst | sed -e 's/^.*\///'`; \ + inst=`echo $$inst | sed '$(transform)'`.$$ext; \ + echo " rm -f '$(DESTDIR)$(man1dir)/$$inst'"; \ + rm -f "$(DESTDIR)$(man1dir)/$$inst"; \ + done + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$tags $$unique; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + tags=; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) ' { files[$$0] = 1; } \ + END { for (i in files) print i; }'`; \ + test -z "$(CTAGS_ARGS)$$tags$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$tags $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && cd $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) $$here + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \ + list='$(DISTFILES)'; for file in $$list; do \ + case $$file in \ + $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \ + $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \ + esac; \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test "$$dir" != "$$file" && test "$$dir" != "."; then \ + dir="/$$dir"; \ + $(mkdir_p) "$(distdir)$$dir"; \ + else \ + dir=''; \ + fi; \ + if test -d $$d/$$file; then \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \ + fi; \ + cp -pR $$d/$$file $(distdir)$$dir || exit 1; \ + else \ + test -f $(distdir)/$$file \ + || cp -p $$d/$$file $(distdir)/$$file \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(MANS) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(mkdir_p) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-libtool distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +info: info-am + +info-am: + +install-data-am: install-man + +install-exec-am: install-binPROGRAMS + +install-info: install-info-am + +install-man: install-man1 + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-info-am uninstall-man + +uninstall-man: uninstall-man1 + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-exec \ + install-exec-am install-info install-info-am install-man \ + install-man1 install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-binPROGRAMS uninstall-info-am \ + uninstall-man uninstall-man1 + + +#pdftoppm_SOURCES = pdftoppm.cc SplashOutputDev.cc parseargs.c +#bin_PROGRAMS += pdftoppm +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff -urN poppler-0.4.3.orig/utils/parseargs.c poppler-0.4.3/utils/parseargs.c --- poppler-0.4.3.orig/utils/parseargs.c 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/parseargs.c 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,190 @@ +/* + * parseargs.h + * + * Command line argument parser. + * + * Copyright 1996-2003 Glyph & Cog, LLC + */ + +#include +#include +#include +#include +#include +#include "parseargs.h" + +static ArgDesc *findArg(ArgDesc *args, char *arg); +static GBool grabArg(ArgDesc *arg, int i, int *argc, char *argv[]); + +GBool parseArgs(ArgDesc *args, int *argc, char *argv[]) { + ArgDesc *arg; + int i, j; + GBool ok; + + ok = gTrue; + i = 1; + while (i < *argc) { + if (!strcmp(argv[i], "--")) { + --*argc; + for (j = i; j < *argc; ++j) + argv[j] = argv[j+1]; + break; + } else if ((arg = findArg(args, argv[i]))) { + if (!grabArg(arg, i, argc, argv)) + ok = gFalse; + } else { + ++i; + } + } + return ok; +} + +void printUsage(char *program, char *otherArgs, ArgDesc *args) { + ArgDesc *arg; + char *typ; + int w, w1; + + w = 0; + for (arg = args; arg->arg; ++arg) { + if ((w1 = strlen(arg->arg)) > w) + w = w1; + } + + fprintf(stderr, "Usage: %s [options]", program); + if (otherArgs) + fprintf(stderr, " %s", otherArgs); + fprintf(stderr, "\n"); + + for (arg = args; arg->arg; ++arg) { + fprintf(stderr, " %s", arg->arg); + w1 = 9 + w - strlen(arg->arg); + switch (arg->kind) { + case argInt: + case argIntDummy: + typ = " "; + break; + case argFP: + case argFPDummy: + typ = " "; + break; + case argString: + case argStringDummy: + typ = " "; + break; + case argFlag: + case argFlagDummy: + default: + typ = ""; + break; + } + fprintf(stderr, "%-*s", w1, typ); + if (arg->usage) + fprintf(stderr, ": %s", arg->usage); + fprintf(stderr, "\n"); + } +} + +static ArgDesc *findArg(ArgDesc *args, char *arg) { + ArgDesc *p; + + for (p = args; p->arg; ++p) { + if (p->kind < argFlagDummy && !strcmp(p->arg, arg)) + return p; + } + return NULL; +} + +static GBool grabArg(ArgDesc *arg, int i, int *argc, char *argv[]) { + int n; + int j; + GBool ok; + + ok = gTrue; + n = 0; + switch (arg->kind) { + case argFlag: + *(GBool *)arg->val = gTrue; + n = 1; + break; + case argInt: + if (i + 1 < *argc && isInt(argv[i+1])) { + *(int *)arg->val = atoi(argv[i+1]); + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + case argFP: + if (i + 1 < *argc && isFP(argv[i+1])) { + *(double *)arg->val = atof(argv[i+1]); + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + case argString: + if (i + 1 < *argc) { + strncpy((char *)arg->val, argv[i+1], arg->size - 1); + ((char *)arg->val)[arg->size - 1] = '\0'; + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + default: + fprintf(stderr, "Internal error in arg table\n"); + n = 1; + break; + } + if (n > 0) { + *argc -= n; + for (j = i; j < *argc; ++j) + argv[j] = argv[j+n]; + } + return ok; +} + +GBool isInt(char *s) { + if (*s == '-' || *s == '+') + ++s; + while (isdigit(*s)) + ++s; + if (*s) + return gFalse; + return gTrue; +} + +GBool isFP(char *s) { + int n; + + if (*s == '-' || *s == '+') + ++s; + n = 0; + while (isdigit(*s)) { + ++s; + ++n; + } + if (*s == '.') + ++s; + while (isdigit(*s)) { + ++s; + ++n; + } + if (n > 0 && (*s == 'e' || *s == 'E')) { + ++s; + if (*s == '-' || *s == '+') + ++s; + n = 0; + if (!isdigit(*s)) + return gFalse; + do { + ++s; + } while (isdigit(*s)); + } + if (*s) + return gFalse; + return gTrue; +} diff -urN poppler-0.4.3.orig/utils/parseargs.h poppler-0.4.3/utils/parseargs.h --- poppler-0.4.3.orig/utils/parseargs.h 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/parseargs.h 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,71 @@ +/* + * parseargs.h + * + * Command line argument parser. + * + * Copyright 1996-2003 Glyph & Cog, LLC + */ + +#ifndef PARSEARGS_H +#define PARSEARGS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "goo/gtypes.h" + +/* + * Argument kinds. + */ +typedef enum { + argFlag, /* flag (present / not-present) */ + /* [val: GBool *] */ + argInt, /* integer arg */ + /* [val: int *] */ + argFP, /* floating point arg */ + /* [val: double *] */ + argString, /* string arg */ + /* [val: char *] */ + /* dummy entries -- these show up in the usage listing only; */ + /* useful for X args, for example */ + argFlagDummy, + argIntDummy, + argFPDummy, + argStringDummy +} ArgKind; + +/* + * Argument descriptor. + */ +typedef struct { + char *arg; /* the command line switch */ + ArgKind kind; /* kind of arg */ + void *val; /* place to store value */ + int size; /* for argString: size of string */ + char *usage; /* usage string */ +} ArgDesc; + +/* + * Parse command line. Removes all args which are found in the arg + * descriptor list . Stops parsing if "--" is found (and removes + * it). Returns gFalse if there was an error. + */ +extern GBool parseArgs(ArgDesc *args, int *argc, char *argv[]); + +/* + * Print usage message, based on arg descriptor list. + */ +extern void printUsage(char *program, char *otherArgs, ArgDesc *args); + +/* + * Check if a string is a valid integer or floating point number. + */ +extern GBool isInt(char *s); +extern GBool isFP(char *s); + +#ifdef __cplusplus +} +#endif + +#endif diff -urN poppler-0.4.3.orig/utils/pdffonts.1 poppler-0.4.3/utils/pdffonts.1 --- poppler-0.4.3.orig/utils/pdffonts.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdffonts.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,128 @@ +.\" Copyright 1999-2004 Glyph & Cog, LLC +.TH pdffonts 1 "22 January 2004" +.SH NAME +pdffonts \- Portable Document Format (PDF) font analyzer (version +3.00) +.SH SYNOPSIS +.B pdffonts +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdffonts +lists the fonts used in a Portable Document Format (PDF) file along +with various information for each font. +.PP +The following information is listed for each font: +.TP +.B name +the font name, exactly as given in the PDF file (potentially including +a subset prefix) +.TP +.B type +the font type -- see below for details +.TP +.B emb +"yes" if the font is embedded in the PDF file +.TP +.B sub +"yes" if the font is a subset +.TP +.B uni +"yes" if there is an explicit "ToUnicode" map in the PDF file (the +absence of a ToUnicode map doesn't necessarily mean that the text +can't be converted to Unicode) +.TP +.B object ID +the font dictionary object ID (number and generation) +.PP +PDF files can contain the following types of fonts: +.PP +.RS +Type 1 +.RE +.RS +Type 1C -- aka Compact Font Format (CFF) +.RE +.RS +Type 3 +.RE +.RS +TrueType +.RE +.RS +CID Type 0 -- 16-bit font with no specified type +.RE +.RS +CID Type 0C -- 16-bit PostScript CFF font +.RE +.RS +CID TrueType -- 16-bit TrueType font +.RE +.SH CONFIGURATION FILE +Pdffonts reads a configuration file at startup. It first tries to +find the user's private config file, ~/.xpdfrc. If that doesn't +exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to analyze. +.TP +.BI \-l " number" +Specifies the last page to analyze. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.BI \-cfg " config-file" +Read +.I config-file +in place of ~/.xpdfrc or the system-wide config file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdffonts software and documentation are copyright 1996-2004 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftops (1), +.BR pdftotext (1), +.BR pdfinfo (1), +.BR pdftoppm (1), +.BR pdfimages (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdffonts.cc poppler-0.4.3/utils/pdffonts.cc --- poppler-0.4.3.orig/utils/pdffonts.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdffonts.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,292 @@ +//======================================================================== +// +// pdffonts.cc +// +// Copyright 2001-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Error.h" +#include "Object.h" +#include "Dict.h" +#include "GfxFont.h" +#include "Annot.h" +#include "PDFDoc.h" +#include "config.h" + +static char *fontTypeNames[] = { + "unknown", + "Type 1", + "Type 1C", + "Type 3", + "TrueType", + "CID Type 0", + "CID Type 0C", + "CID TrueType" +}; + +static void scanFonts(Dict *resDict, PDFDoc *doc); +static void scanFont(GfxFont *font, PDFDoc *doc); + +static int firstPage = 1; +static int lastPage = 0; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to examine"}, + {"-l", argInt, &lastPage, 0, + "last page to examine"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +static Ref *fonts; +static int fontsLen; +static int fontsSize; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *ownerPW, *userPW; + GBool ok; + Page *page; + Dict *resDict; + Annots *annots; + Object obj1, obj2; + int pg, i; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc != 2 || printVersion || printHelp) { + fprintf(stderr, "pdffonts version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdffonts", "", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(cfgFileName); + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // scan the fonts + printf("name type emb sub uni object ID\n"); + printf("------------------------------------ ------------ --- --- --- ---------\n"); + fonts = NULL; + fontsLen = fontsSize = 0; + for (pg = firstPage; pg <= lastPage; ++pg) { + page = doc->getCatalog()->getPage(pg); + if ((resDict = page->getResourceDict())) { + scanFonts(resDict, doc); + } + annots = new Annots(doc->getXRef(), page->getAnnots(&obj1)); + obj1.free(); + for (i = 0; i < annots->getNumAnnots(); ++i) { + if (annots->getAnnot(i)->getAppearance(&obj1)->isStream()) { + obj1.streamGetDict()->lookup("Resources", &obj2); + if (obj2.isDict()) { + scanFonts(obj2.getDict(), doc); + } + obj2.free(); + } + obj1.free(); + } + delete annots; + } + + exitCode = 0; + + // clean up + gfree(fonts); + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + +static void scanFonts(Dict *resDict, PDFDoc *doc) { + Object obj1, obj2, xObjDict, xObj, resObj; + Ref r; + GfxFontDict *gfxFontDict; + GfxFont *font; + int i; + + // scan the fonts in this resource dictionary + gfxFontDict = NULL; + resDict->lookupNF("Font", &obj1); + if (obj1.isRef()) { + obj1.fetch(doc->getXRef(), &obj2); + if (obj2.isDict()) { + r = obj1.getRef(); + gfxFontDict = new GfxFontDict(doc->getXRef(), &r, obj2.getDict()); + } + obj2.free(); + } else if (obj1.isDict()) { + gfxFontDict = new GfxFontDict(doc->getXRef(), NULL, obj1.getDict()); + } + if (gfxFontDict) { + for (i = 0; i < gfxFontDict->getNumFonts(); ++i) { + if ((font = gfxFontDict->getFont(i))) { + scanFont(font, doc); + } + } + delete gfxFontDict; + } + obj1.free(); + + // recursively scan any resource dictionaries in objects in this + // resource dictionary + resDict->lookup("XObject", &xObjDict); + if (xObjDict.isDict()) { + for (i = 0; i < xObjDict.dictGetLength(); ++i) { + xObjDict.dictGetVal(i, &xObj); + if (xObj.isStream()) { + xObj.streamGetDict()->lookup("Resources", &resObj); + if (resObj.isDict()) { + scanFonts(resObj.getDict(), doc); + } + resObj.free(); + } + xObj.free(); + } + } + xObjDict.free(); +} + +static void scanFont(GfxFont *font, PDFDoc *doc) { + Ref fontRef, embRef; + Object fontObj, toUnicodeObj; + GooString *name; + GBool emb, subset, hasToUnicode; + int i; + + fontRef = *font->getID(); + + // check for an already-seen font + for (i = 0; i < fontsLen; ++i) { + if (fontRef.num == fonts[i].num && fontRef.gen == fonts[i].gen) { + return; + } + } + + // font name + name = font->getOrigName(); + + // check for an embedded font + if (font->getType() == fontType3) { + emb = gTrue; + } else { + emb = font->getEmbeddedFontID(&embRef); + } + + // look for a ToUnicode map + hasToUnicode = gFalse; + if (doc->getXRef()->fetch(fontRef.num, fontRef.gen, &fontObj)->isDict()) { + hasToUnicode = fontObj.dictLookup("ToUnicode", &toUnicodeObj)->isStream(); + toUnicodeObj.free(); + } + fontObj.free(); + + // check for a font subset name: capital letters followed by a '+' + // sign + subset = gFalse; + if (name) { + for (i = 0; i < name->getLength(); ++i) { + if (name->getChar(i) < 'A' || name->getChar(i) > 'Z') { + break; + } + } + subset = i > 0 && i < name->getLength() && name->getChar(i) == '+'; + } + + // print the font info + printf("%-36s %-12s %-3s %-3s %-3s", + name ? name->getCString() : "[none]", + fontTypeNames[font->getType()], + emb ? "yes" : "no", + subset ? "yes" : "no", + hasToUnicode ? "yes" : "no"); + if (fontRef.gen >= 100000) { + printf(" [none]\n"); + } else { + printf(" %6d %2d\n", fontRef.num, fontRef.gen); + } + + // add this font to the list + if (fontsLen == fontsSize) { + fontsSize += 32; + fonts = (Ref *)grealloc(fonts, fontsSize * sizeof(Ref)); + } + fonts[fontsLen++] = *font->getID(); +} diff -urN poppler-0.4.3.orig/utils/pdfimages.1 poppler-0.4.3/utils/pdfimages.1 --- poppler-0.4.3.orig/utils/pdfimages.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdfimages.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,96 @@ +.\" Copyright 1998-2004 Glyph & Cog, LLC +.TH pdfimages 1 "22 January 2004" +.SH NAME +pdfimages \- Portable Document Format (PDF) image extractor +(version 3.00) +.SH SYNOPSIS +.B pdfimages +[options] +.I PDF-file image-root +.SH DESCRIPTION +.B Pdfimages +saves images from a Portable Document Format (PDF) file as Portable +Pixmap (PPM), Portable Bitmap (PBM), or JPEG files. +.PP +Pdfimages reads the PDF file +.IR PDF-file , +scans one or more pages, and writes one PPM, PBM, or JPEG file for each image, +.IR image-root - nnn . xxx , +where +.I nnn +is the image number and +.I xxx +is the image type (.ppm, .pbm, .jpg). +.SH CONFIGURATION FILE +Pdfimages reads a configuration file at startup. It first tries to +find the user's private config file, ~/.xpdfrc. If that doesn't +exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to scan. +.TP +.BI \-l " number" +Specifies the last page to scan. +.TP +.B \-j +Normally, all images are written as PBM (for monochrome images) or PPM +(for non-monochrome images) files. With this option, images in DCT +format are saved as JPEG files. All non-DCT images are saved in +PBM/PPM format as usual. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.RB "[config file: " errQuiet ] +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfimages software and documentation are copyright 1998-2004 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftops (1), +.BR pdftotext (1), +.BR pdfinfo (1), +.BR pdffonts (1), +.BR pdftoppm (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdfimages.cc poppler-0.4.3/utils/pdfimages.cc --- poppler-0.4.3.orig/utils/pdfimages.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdfimages.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,158 @@ +//======================================================================== +// +// pdfimages.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "ImageOutputDev.h" +#include "Error.h" +#include "config.h" + +static int firstPage = 1; +static int lastPage = 0; +static GBool dumpJPEG = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-j", argFlag, &dumpJPEG, 0, + "write JPEG images as JPEG files"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + char *imgRoot; + GooString *ownerPW, *userPW; + ImageOutputDev *imgOut; + GBool ok; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc != 3 || printVersion || printHelp) { + fprintf(stderr, "pdfimages version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfimages", " ", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + imgRoot = argv[2]; + + // read config file + globalParams = new GlobalParams(cfgFileName); + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // check for copy permission +#ifdef ENFORCE_PERMISSIONS + if (!doc->okToCopy()) { + error(-1, "Copying of images from this document is not allowed."); + exitCode = 3; + goto err1; + } +#endif + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + // write image files + imgOut = new ImageOutputDev(imgRoot, dumpJPEG); + if (imgOut->isOk()) { + doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse); + } + delete imgOut; + + exitCode = 0; + + // clean up + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff -urN poppler-0.4.3.orig/utils/pdfinfo.1 poppler-0.4.3/utils/pdfinfo.1 --- poppler-0.4.3.orig/utils/pdfinfo.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdfinfo.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,157 @@ +.\" Copyright 1999-2004 Glyph & Cog, LLC +.TH pdfinfo 1 "22 January 2004" +.SH NAME +pdfinfo \- Portable Document Format (PDF) document information +extractor (version 3.00) +.SH SYNOPSIS +.B pdfinfo +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdfinfo +prints the contents of the \'Info' dictionary (plus some other useful +information) from a Portable Document Format (PDF) file. +.PP +The \'Info' dictionary contains the following values: +.PP +.RS +title +.RE +.RS +subject +.RE +.RS +keywords +.RE +.RS +author +.RE +.RS +creator +.RE +.RS +producer +.RE +.RS +creation date +.RE +.RS +modification date +.RE +.PP +In addition, the following information is printed: +.PP +.RS +tagged (yes/no) +.RE +.RS +page count +.RE +.RS +encrypted flag (yes/no) +.RE +.RS +print and copy permissions (if encrypted) +.RE +.RS +page size +.RE +.RS +file size +.RE +.RS +linearized (yes/no) +.RE +.RS +PDF version +.RE +.RS +metadata (only if requested) +.RE +.SH CONFIGURATION FILE +Pdfinfo reads a configuration file at startup. It first tries to find +the user's private config file, ~/.xpdfrc. If that doesn't exist, it +looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to examine. If multiple pages are requested +using the "-f" and "-l" options, the size of each requested page (and, +optionally, the bounding boxes for each requested page) are printed. +Otherwise, only page one is examined. +.TP +.BI \-l " number" +Specifies the last page to examine. +.TP +.B \-box +Prints the page box bounding boxes: MediaBox, CropBox, BleedBox, +TrimBox, and ArtBox. +.TP +.B \-meta +Prints document-level metadata. (This is the "Metadata" stream from +the PDF file's Catalog object.) +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. The +.I encoding\-name +must be defined with the unicodeMap command (see +.BR xpdfrc (5)). +This defaults to "Latin1" (which is a built-in encoding). +.RB "[config file: " textEncoding ] +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.BI \-cfg " config-file" +Read +.I config-file +in place of ~/.xpdfrc or the system-wide config file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfinfo software and documentation are copyright 1996-2004 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftops (1), +.BR pdftotext (1), +.BR pdffonts (1), +.BR pdftoppm (1), +.BR pdfimages (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdfinfo.cc poppler-0.4.3/utils/pdfinfo.cc --- poppler-0.4.3.orig/utils/pdfinfo.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdfinfo.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,376 @@ +//======================================================================== +// +// pdfinfo.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +#include +#include +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "CharTypes.h" +#include "UnicodeMap.h" +#include "Error.h" +#include "config.h" + +static void printInfoString(Dict *infoDict, char *key, char *text, + UnicodeMap *uMap); +static void printInfoDate(Dict *infoDict, char *key, char *text); +static void printBox(char *text, PDFRectangle *box); + +static int firstPage = 1; +static int lastPage = 0; +static GBool printBoxes = gFalse; +static GBool printMetadata = gFalse; +static char textEncName[128] = ""; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-box", argFlag, &printBoxes, 0, + "print the page bounding boxes"}, + {"-meta", argFlag, &printMetadata, 0, + "print the document metadata (XML)"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *ownerPW, *userPW; + UnicodeMap *uMap; + Page *page; + Object info; + char buf[256]; + double w, h, wISO, hISO; + FILE *f; + GooString *metadata; + GBool ok; + int exitCode; + int pg, i; + GBool multiPage; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc != 2 || printVersion || printHelp) { + fprintf(stderr, "pdfinfo version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfinfo", "", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(cfgFileName); + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + } + + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(-1, "Couldn't get text encoding"); + delete fileName; + goto err1; + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage == 0) { + multiPage = gFalse; + lastPage = 1; + } else { + multiPage = gTrue; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // print doc info + doc->getDocInfo(&info); + if (info.isDict()) { + printInfoString(info.getDict(), "Title", "Title: ", uMap); + printInfoString(info.getDict(), "Subject", "Subject: ", uMap); + printInfoString(info.getDict(), "Keywords", "Keywords: ", uMap); + printInfoString(info.getDict(), "Author", "Author: ", uMap); + printInfoString(info.getDict(), "Creator", "Creator: ", uMap); + printInfoString(info.getDict(), "Producer", "Producer: ", uMap); + printInfoDate(info.getDict(), "CreationDate", "CreationDate: "); + printInfoDate(info.getDict(), "ModDate", "ModDate: "); + } + info.free(); + + // print tagging info + printf("Tagged: %s\n", + doc->getStructTreeRoot()->isDict() ? "yes" : "no"); + + // print page count + printf("Pages: %d\n", doc->getNumPages()); + + // print encryption info + printf("Encrypted: "); + if (doc->isEncrypted()) { + printf("yes (print:%s copy:%s change:%s addNotes:%s)\n", + doc->okToPrint(gTrue) ? "yes" : "no", + doc->okToCopy(gTrue) ? "yes" : "no", + doc->okToChange(gTrue) ? "yes" : "no", + doc->okToAddNotes(gTrue) ? "yes" : "no"); + } else { + printf("no\n"); + } + + // print page size + for (pg = firstPage; pg <= lastPage; ++pg) { + w = doc->getPageWidth(pg); + h = doc->getPageHeight(pg); + if (multiPage) { + printf("Page %4d size: %g x %g pts", pg, w, h); + } else { + printf("Page size: %g x %g pts", w, h); + } + if ((fabs(w - 612) < 0.1 && fabs(h - 792) < 0.1) || + (fabs(w - 792) < 0.1 && fabs(h - 612) < 0.1)) { + printf(" (letter)"); + } else { + hISO = sqrt(sqrt(2.0)) * 7200 / 2.54; + wISO = hISO / sqrt(2.0); + for (i = 0; i <= 6; ++i) { + if ((fabs(w - wISO) < 1 && fabs(h - hISO) < 1) || + (fabs(w - hISO) < 1 && fabs(h - wISO) < 1)) { + printf(" (A%d)", i); + break; + } + hISO = wISO; + wISO /= sqrt(2.0); + } + } + printf("\n"); + } + + // print the boxes + if (printBoxes) { + if (multiPage) { + for (pg = firstPage; pg <= lastPage; ++pg) { + page = doc->getCatalog()->getPage(pg); + sprintf(buf, "Page %4d MediaBox: ", pg); + printBox(buf, page->getMediaBox()); + sprintf(buf, "Page %4d CropBox: ", pg); + printBox(buf, page->getCropBox()); + sprintf(buf, "Page %4d BleedBox: ", pg); + printBox(buf, page->getBleedBox()); + sprintf(buf, "Page %4d TrimBox: ", pg); + printBox(buf, page->getTrimBox()); + sprintf(buf, "Page %4d ArtBox: ", pg); + printBox(buf, page->getArtBox()); + } + } else { + page = doc->getCatalog()->getPage(firstPage); + printBox("MediaBox: ", page->getMediaBox()); + printBox("CropBox: ", page->getCropBox()); + printBox("BleedBox: ", page->getBleedBox()); + printBox("TrimBox: ", page->getTrimBox()); + printBox("ArtBox: ", page->getArtBox()); + } + } + + // print file size +#ifdef VMS + f = fopen(fileName->getCString(), "rb", "ctx=stm"); +#else + f = fopen(fileName->getCString(), "rb"); +#endif + if (f) { +#if HAVE_FSEEKO + fseeko(f, 0, SEEK_END); + printf("File size: %u bytes\n", (Guint)ftello(f)); +#elif HAVE_FSEEK64 + fseek64(f, 0, SEEK_END); + printf("File size: %u bytes\n", (Guint)ftell64(f)); +#else + fseek(f, 0, SEEK_END); + printf("File size: %d bytes\n", (int)ftell(f)); +#endif + fclose(f); + } + + // print linearization info + printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no"); + + // print PDF version + printf("PDF version: %.1f\n", doc->getPDFVersion()); + + // print the metadata + if (printMetadata && (metadata = doc->readMetadata())) { + fputs("Metadata:\n", stdout); + fputs(metadata->getCString(), stdout); + fputc('\n', stdout); + delete metadata; + } + + exitCode = 0; + + // clean up + err2: + uMap->decRefCnt(); + delete doc; + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + +static void printInfoString(Dict *infoDict, char *key, char *text, + UnicodeMap *uMap) { + Object obj; + GooString *s1; + GBool isUnicode; + Unicode u; + char buf[8]; + int i, n; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s1 = obj.getString(); + if ((s1->getChar(0) & 0xff) == 0xfe && + (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + i = 2; + } else { + isUnicode = gFalse; + i = 0; + } + while (i < obj.getString()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + } else { + u = s1->getChar(i) & 0xff; + ++i; + } + n = uMap->mapUnicode(u, buf, sizeof(buf)); + fwrite(buf, 1, n, stdout); + } + fputc('\n', stdout); + } + obj.free(); +} + +static void printInfoDate(Dict *infoDict, char *key, char *text) { + Object obj; + char *s; + int year, mon, day, hour, min, sec; + struct tm tmStruct; + char buf[256]; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s = obj.getString()->getCString(); + if (s[0] == 'D' && s[1] == ':') { + s += 2; + } + if (sscanf(s, "%4d%2d%2d%2d%2d%2d", + &year, &mon, &day, &hour, &min, &sec) == 6) { + tmStruct.tm_year = year - 1900; + tmStruct.tm_mon = mon - 1; + tmStruct.tm_mday = day; + tmStruct.tm_hour = hour; + tmStruct.tm_min = min; + tmStruct.tm_sec = sec; + tmStruct.tm_wday = -1; + tmStruct.tm_yday = -1; + tmStruct.tm_isdst = -1; + // compute the tm_wday and tm_yday fields + if (mktime(&tmStruct) != (time_t)-1 && + strftime(buf, sizeof(buf), "%c", &tmStruct)) { + fputs(buf, stdout); + } else { + fputs(s, stdout); + } + } else { + fputs(s, stdout); + } + fputc('\n', stdout); + } + obj.free(); +} + +static void printBox(char *text, PDFRectangle *box) { + printf("%s%8.2f %8.2f %8.2f %8.2f\n", + text, box->x1, box->y1, box->x2, box->y2); +} diff -urN poppler-0.4.3.orig/utils/pdftohtml.1 poppler-0.4.3/utils/pdftohtml.1 --- poppler-0.4.3.orig/utils/pdftohtml.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftohtml.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,85 @@ +.TH PDFTOHTML 1 +.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection +.\" other parms are allowed: see man(7), man(1) +.SH NAME +pdftohtml \- program to convert pdf files into html, xml and png images +.SH SYNOPSIS +.B pdftohtml +.I "[options] [ ]" +.SH "DESCRIPTION" +This manual page documents briefly the +.BR pdftohtml +command. +This manual page was written for the Debian GNU/Linux distribution +because the original program does not have a manual page. +.PP +.B pdftohtml +is a program that converts pdf documents into html. It generates its output in +the current working directory. +.SH OPTIONS +A summary of options are included below. +.TP +.B \-h, \-help +Show summary of options. +.TP +.B \-f +first page to print +.TP +.B \-l +last page to print +.TP +.B \-q +dont print any messages or errors +.TP +.B \-v +print copyright and version info +.TP +.B \-p +exchange .pdf links with .html +.TP +.B \-c +generate complex output +.TP +.B \-i +ignore images +.TP +.B \-noframes +generate no frames. Not supported in complex output mode. +.TP +.B \-stdout +use standard output +.TP +.B \-zoom +zoom the pdf document (default 1.5) +.TP +.B \-xml +output for XML post-processing +.TP +.B \-enc +output text encoding name +.TP +.B \-opw +owner password (for encrypted files) +.TP +.B \-upw +user password (for encrypted files) +.TP +.B \-hidden +force hidden text extraction +.TP +.B \-dev +output device name for Ghostscript (png16m, jpeg etc) +.TP +.B \-nomerge +do not merge paragraphs +.TP +.B \-nodrm +override document DRM settings + +.SH AUTHOR + +Pdftohtml was developed by Gueorgui Ovtcharov and Rainer Dorsch. It is +based and benefits a lot from Derek Noonburg's xpdf package. + +This manual page was written by Søren Boll Overgaard , +for the Debian GNU/Linux system (but may be used by others). diff -urN poppler-0.4.3.orig/utils/pdftohtml.cc poppler-0.4.3/utils/pdftohtml.cc --- poppler-0.4.3.orig/utils/pdftohtml.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftohtml.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,428 @@ +//======================================================================== +// +// pdftohtml.cc +// +// +// Copyright 1999-2000 G. Ovtcharov +//======================================================================== + +#include +#include +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "HtmlOutputDev.h" +#include "PSOutputDev.h" +#include "GlobalParams.h" +#include "Error.h" +#include "config.h" +#include "goo/gfile.h" + +#ifndef GHOSTSCRIPT +# define GHOSTSCRIPT "gs" +#endif + +static int firstPage = 1; +static int lastPage = 0; +static GBool rawOrder = gTrue; +GBool printCommands = gTrue; +static GBool printHelp = gFalse; +GBool printHtml = gFalse; +GBool complexMode=gFalse; +GBool ignore=gFalse; +//char extension[5]=".png"; +double scale=1.5; +GBool noframes=gFalse; +GBool stout=gFalse; +GBool xml=gFalse; +GBool errQuiet=gFalse; +GBool noDrm=gFalse; + +GBool showHidden = gFalse; +GBool noMerge = gFalse; +static char ownerPassword[33] = ""; +static char userPassword[33] = ""; +static char gsDevice[33] = "png16m"; +static GBool printVersion = gFalse; + +static GooString* getInfoString(Dict *infoDict, char *key); +static GooString* getInfoDate(Dict *infoDict, char *key); + +static char textEncName[128] = ""; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + /*{"-raw", argFlag, &rawOrder, 0, + "keep strings in content stream order"},*/ + {"-q", argFlag, &errQuiet, 0, + "don't print any messages or errors"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"-p", argFlag, &printHtml, 0, + "exchange .pdf links by .html"}, + {"-c", argFlag, &complexMode, 0, + "generate complex document"}, + {"-i", argFlag, &ignore, 0, + "ignore images"}, + {"-noframes", argFlag, &noframes, 0, + "generate no frames"}, + {"-stdout" ,argFlag, &stout, 0, + "use standard output"}, + {"-zoom", argFP, &scale, 0, + "zoom the pdf document (default 1.5)"}, + {"-xml", argFlag, &xml, 0, + "output for XML post-processing"}, + {"-hidden", argFlag, &showHidden, 0, + "output hidden text"}, + {"-nomerge", argFlag, &noMerge, 0, + "do not merge paragraphs"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-dev", argString, gsDevice, sizeof(gsDevice), + "output device name for Ghostscript (png16m, jpeg etc)"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-nodrm", argFlag, &noDrm, 0, + "override document DRM settings"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc = NULL; + GooString *fileName = NULL; + GooString *docTitle = NULL; + GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL; + GooString *htmlFileName = NULL; + GooString *psFileName = NULL; + HtmlOutputDev *htmlOut = NULL; + PSOutputDev *psOut = NULL; + GBool ok; + char *p; + char extension[16] = "png"; + GooString *ownerPW, *userPW; + Object info; + char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL}; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) { + fprintf(stderr, "pdftohtml version %s http://pdftohtml.sourceforge.net/, based on Xpdf version %s\n", "0.36", xpdfVersion); + fprintf(stderr, "%s\n", "Copyright 1999-2003 Gueorgui Ovtcharov and Rainer Dorsch"); + fprintf(stderr, "%s\n\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftohtml", " [ ]", argDesc); + } + exit(1); + } + + // init error file + //errorInit(); + + // read config file + globalParams = new GlobalParams(""); + + if (errQuiet) { + globalParams->setErrQuiet(errQuiet); + printCommands = gFalse; // I'm not 100% what is the differecne between them + } + + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + if( !globalParams->getTextEncoding() ) { + goto error; + } + } + + // open PDF file + if (ownerPassword[0]) { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0]) { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + fileName = new GooString(argv[1]); + + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + goto error; + } + + // check for copy permission + if (!doc->okToCopy()) { + if (!noDrm) { + error(-1, "Copying of text from this document is not allowed."); + goto error; + } + fprintf(stderr, "Document has copy-protection bit set.\n"); + } + + // construct text file name + if (argc == 3) { + GooString* tmp = new GooString(argv[2]); + p=tmp->getCString()+tmp->getLength()-5; + if (!xml) + if (!strcmp(p, ".html") || !strcmp(p, ".HTML")) + htmlFileName = new GooString(tmp->getCString(), + tmp->getLength() - 5); + else htmlFileName =new GooString(tmp); + else + if (!strcmp(p, ".xml") || !strcmp(p, ".XML")) + htmlFileName = new GooString(tmp->getCString(), + tmp->getLength() - 5); + else htmlFileName =new GooString(tmp); + + delete tmp; + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) + htmlFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + else + htmlFileName = fileName->copy(); + // htmlFileName->append(".html"); + } + + if (scale>3.0) scale=3.0; + if (scale<0.5) scale=0.5; + + if (complexMode) { + //noframes=gFalse; + stout=gFalse; + } + + if (stout) { + noframes=gTrue; + complexMode=gFalse; + } + + if (xml) + { + complexMode = gTrue; + noframes = gTrue; + noMerge = gTrue; + } + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + doc->getDocInfo(&info); + if (info.isDict()) { + docTitle = getInfoString(info.getDict(), "Title"); + author = getInfoString(info.getDict(), "Author"); + keywords = getInfoString(info.getDict(), "Keywords"); + subject = getInfoString(info.getDict(), "Subject"); + date = getInfoDate(info.getDict(), "ModDate"); + if( !date ) + date = getInfoDate(info.getDict(), "CreationDate"); + } + info.free(); + if( !docTitle ) docTitle = new GooString(htmlFileName); + + /* determine extensions of output backgroun images */ + {int i; + for(i = 0; extsList[i]; i++) + { + if( strstr(gsDevice, extsList[i]) != (char *) NULL ) + { + strncpy(extension, extsList[i], sizeof(extension)); + break; + } + }} + + rawOrder = complexMode; // todo: figure out what exactly rawOrder do :) + + // write text file + htmlOut = new HtmlOutputDev(htmlFileName->getCString(), + docTitle->getCString(), + author ? author->getCString() : NULL, + keywords ? keywords->getCString() : NULL, + subject ? subject->getCString() : NULL, + date ? date->getCString() : NULL, + extension, + rawOrder, + firstPage, + doc->getCatalog()->getOutline()->isDict()); + delete docTitle; + if( author ) + { + delete author; + } + if( keywords ) + { + delete keywords; + } + if( subject ) + { + delete subject; + } + if( date ) + { + delete date; + } + + if (htmlOut->isOk()) + { + doc->displayPages(htmlOut, firstPage, lastPage, 72, 72, static_cast(72*scale), 0, gTrue); + if (!xml) + { + htmlOut->dumpDocOutline(doc->getCatalog()); + } + } + + if( complexMode && !xml && !ignore ) { + int h=xoutRound(htmlOut->getPageHeight()/scale); + int w=xoutRound(htmlOut->getPageWidth()/scale); + //int h=xoutRound(doc->getPageHeight(1)/scale); + //int w=xoutRound(doc->getPageWidth(1)/scale); + + psFileName = new GooString(htmlFileName->getCString()); + psFileName->append(".ps"); + + globalParams->setPSPaperWidth(w); + globalParams->setPSPaperHeight(h); + // XXX + // globalParams->setPSNoText(gTrue); + psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(), + doc->getCatalog(), firstPage, lastPage, psModePS); + doc->displayPages(psOut, firstPage, lastPage, 72, 72, + static_cast(72*scale), 0, gFalse); + delete psOut; + + /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r72 -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, htmlFileName->getCString(), w, h, + psFileName->getCString());*/ + + GooString *gsCmd = new GooString(GHOSTSCRIPT); + GooString *tw, *th, *sc; + gsCmd->append(" -sDEVICE="); + gsCmd->append(gsDevice); + gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r"); + sc = GooString::fromInt(static_cast(72*scale)); + gsCmd->append(sc); + gsCmd->append(" -sOutputFile="); + gsCmd->append("\""); + gsCmd->append(htmlFileName); + gsCmd->append("%03d."); + gsCmd->append(extension); + gsCmd->append("\" -g"); + tw = GooString::fromInt(static_cast(scale*w)); + gsCmd->append(tw); + gsCmd->append("x"); + th = GooString::fromInt(static_cast(scale*h)); + gsCmd->append(th); + gsCmd->append(" -q \""); + gsCmd->append(psFileName); + gsCmd->append("\""); +// printf("running: %s\n", gsCmd->getCString()); + if( !executeCommand(gsCmd->getCString()) && !errQuiet) { + error(-1, "Failed to launch Ghostscript!\n"); + } + unlink(psFileName->getCString()); + delete tw; + delete th; + delete sc; + delete gsCmd; + delete psFileName; + } + + delete htmlOut; + + // clean up + error: + if(doc) delete doc; + if(globalParams) delete globalParams; + + if(htmlFileName) delete htmlFileName; + HtmlFont::clear(); + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return 0; +} + +static GooString* getInfoString(Dict *infoDict, char *key) { + Object obj; + GooString *s1 = NULL; + + if (infoDict->lookup(key, &obj)->isString()) { + s1 = new GooString(obj.getString()); + } + obj.free(); + return s1; +} + +static GooString* getInfoDate(Dict *infoDict, char *key) { + Object obj; + char *s; + int year, mon, day, hour, min, sec; + struct tm tmStruct; + GooString *result = NULL; + char buf[256]; + + if (infoDict->lookup(key, &obj)->isString()) { + s = obj.getString()->getCString(); + if (s[0] == 'D' && s[1] == ':') { + s += 2; + } + if (sscanf(s, "%4d%2d%2d%2d%2d%2d", + &year, &mon, &day, &hour, &min, &sec) == 6) { + tmStruct.tm_year = year - 1900; + tmStruct.tm_mon = mon - 1; + tmStruct.tm_mday = day; + tmStruct.tm_hour = hour; + tmStruct.tm_min = min; + tmStruct.tm_sec = sec; + tmStruct.tm_wday = -1; + tmStruct.tm_yday = -1; + tmStruct.tm_isdst = -1; + mktime(&tmStruct); // compute the tm_wday and tm_yday fields + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S+00:00", &tmStruct)) { + result = new GooString(buf); + } else { + result = new GooString(s); + } + } else { + result = new GooString(s); + } + } + obj.free(); + return result; +} + diff -urN poppler-0.4.3.orig/utils/pdftoppm.1 poppler-0.4.3/utils/pdftoppm.1 --- poppler-0.4.3.orig/utils/pdftoppm.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftoppm.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,113 @@ +.\" Copyright 2004 Glyph & Cog, LLC +.TH pdftoppm 1 "22 January 2004" +.SH NAME +pdftoppm \- Portable Document Format (PDF) to Portable Pixmap (PPM) +converter (version 3.00) +.SH SYNOPSIS +.B pdftoppm +[options] +.I PDF-file PPM-root +.SH DESCRIPTION +.B Pdftoppm +converts Portable Document Format (PDF) files to color image files in +Portable Pixmap (PPM) format, grayscale image files in Portable +Graymap (PGM) format, or monochrome image files in Portable Bitmap +(PBM) format. +.PP +Pdftoppm reads the PDF file, +.IR PDF-file , +and writes one PPM file for each page, +.IR PPM-root - nnnnnn .ppm, +where +.I nnnnnn +is the page number. +.SH CONFIGURATION FILE +Pdftoppm reads a configuration file at startup. It first tries to +find the user's private config file, ~/.xpdfrc. If that doesn't +exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.BI \-r " number" +Specifies the resolution, in DPI. The default is 150 DPI. +.TP +.B \-mono +Generate a monochrome PBM file (instead of a color PPM file). +.TP +.B \-gray +Generate a grayscale PGM file (instead of a color PPM file). +.TP +.BI \-t1lib " yes | no" +Enable or disable t1lib (a Type 1 font rasterizer). This defaults to +"yes". +.RB "[config file: " enableT1lib ] +.TP +.BI \-freetype " yes | no" +Enable or disable FreeType (a TrueType / Type 1 font rasterizer). +This defaults to "yes". +.RB "[config file: " enableFreeType ] +.TP +.BI \-aa " yes | no" +Enable or disable font anti-aliasing. This defaults to "yes". +.RB "[config file: " antialias ] +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.RB "[config file: " errQuiet ] +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftoppm software and documentation are copyright 1996-2004 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftops (1), +.BR pdftotext (1), +.BR pdfinfo (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdftoppm.cc poppler-0.4.3/utils/pdftoppm.cc --- poppler-0.4.3.orig/utils/pdftoppm.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftoppm.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,189 @@ +//======================================================================== +// +// pdftoppm.cc +// +// Copyright 2003 Glyph & Cog, LLC +// +//======================================================================== + +#include +#include +#include "parseargs.h" +#include "goo/gmem.h" +#include "goo/GooString.h" +#include "GlobalParams.h" +#include "Object.h" +#include "PDFDoc.h" +#include "splash/SplashBitmap.h" +#include "splash/Splash.h" +#include "SplashOutputDev.h" +#include "config.h" + +static int firstPage = 1; +static int lastPage = 0; +static int resolution = 150; +static GBool mono = gFalse; +static GBool gray = gFalse; +static char enableT1libStr[16] = ""; +static char enableFreeTypeStr[16] = ""; +static char antialiasStr[16] = ""; +static char ownerPassword[33] = ""; +static char userPassword[33] = ""; +static GBool quiet = gFalse; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to print"}, + {"-l", argInt, &lastPage, 0, + "last page to print"}, + {"-r", argInt, &resolution, 0, + "resolution, in DPI (default is 150)"}, + {"-mono", argFlag, &mono, 0, + "generate a monochrome PBM file"}, + {"-gray", argFlag, &gray, 0, + "generate a grayscale PGM file"}, +#if HAVE_T1LIB_H + {"-t1lib", argString, enableT1libStr, sizeof(enableT1libStr), + "enable t1lib font rasterizer: yes, no"}, +#endif +#if HAVE_FREETYPE_FREETYPE_H | HAVE_FREETYPE_H + {"-freetype", argString, enableFreeTypeStr, sizeof(enableFreeTypeStr), + "enable FreeType font rasterizer: yes, no"}, +#endif + {"-aa", argString, antialiasStr, sizeof(antialiasStr), + "enable font anti-aliasing: yes, no"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + char *ppmRoot; + char ppmFile[512]; + GooString *ownerPW, *userPW; + SplashColor paperColor; + SplashOutputDev *splashOut; + GBool ok; + int exitCode; + int pg; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (mono && gray) { + ok = gFalse; + } + if (!ok || argc != 3 || printVersion || printHelp) { + fprintf(stderr, "pdftoppm version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftoppm", " ", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + ppmRoot = argv[2]; + + // read config file + globalParams = new GlobalParams(cfgFileName); + globalParams->setupBaseFonts(NULL); + if (enableT1libStr[0]) { + if (!globalParams->setEnableT1lib(enableT1libStr)) { + fprintf(stderr, "Bad '-t1lib' value on command line\n"); + } + } + if (enableFreeTypeStr[0]) { + if (!globalParams->setEnableFreeType(enableFreeTypeStr)) { + fprintf(stderr, "Bad '-freetype' value on command line\n"); + } + } + if (antialiasStr[0]) { + if (!globalParams->setAntialias(antialiasStr)) { + fprintf(stderr, "Bad '-aa' value on command line\n"); + } + } + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0]) { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0]) { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + // write PPM files + paperColor.rgb8 = splashMakeRGB8(255, 255, 255); + splashOut = new SplashOutputDev(mono ? splashModeMono1 : + gray ? splashModeMono8 : + splashModeRGB8, + gFalse, paperColor); + splashOut->startDoc(doc->getXRef()); + for (pg = firstPage; pg <= lastPage; ++pg) { + doc->displayPage(splashOut, pg, resolution, resolution, 0, gTrue, gFalse); + sprintf(ppmFile, "%.*s-%06d.%s", + (int)sizeof(ppmFile) - 32, ppmRoot, pg, + mono ? "pbm" : gray ? "pgm" : "ppm"); + splashOut->getBitmap()->writePNMFile(ppmFile); + } + delete splashOut; + + exitCode = 0; + + // clean up + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff -urN poppler-0.4.3.orig/utils/pdftops.1 poppler-0.4.3/utils/pdftops.1 --- poppler-0.4.3.orig/utils/pdftops.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftops.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,224 @@ +.\" Copyright 1996-2004 Glyph & Cog, LLC +.TH pdftops 1 "22 January 2004" +.SH NAME +pdftops \- Portable Document Format (PDF) to PostScript converter +(version 3.00) +.SH SYNOPSIS +.B pdftops +[options] +.RI [ PDF-file +.RI [ PS-file ]] +.SH DESCRIPTION +.B Pdftops +converts Portable Document Format (PDF) files to PostScript so they +can be printed. +.PP +Pdftops reads the PDF file, +.IR PDF-file , +and writes a PostScript file, +.IR PS-file . +If +.I PS-file +is not specified, pdftops converts +.I file.pdf +to +.I file.ps +(or +.I file.eps +with the -eps option). If +.I PS-file +is \'-', the PostScript is sent to stdout. +.SH CONFIGURATION FILE +Pdftops reads a configuration file at startup. It first tries to find +the user's private config file, ~/.xpdfrc. If that doesn't exist, it +looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to print. +.TP +.BI \-l " number" +Specifies the last page to print. +.TP +.B \-level1 +Generate Level 1 PostScript. The resulting PostScript files will be +significantly larger (if they contain images), but will print on Level +1 printers. This also converts all images to black and white. No +more than one of the PostScript level options (-level1, -level1sep, +-level2, -level2sep, -level3, -level3Sep) may be given. +.RB "[config file: " psLevel ] +.TP +.B \-level1sep +Generate Level 1 separable PostScript. All colors are converted to +CMYK. Images are written with separate stream data for the four +components. +.RB "[config file: " psLevel ] +.TP +.B \-level2 +Generate Level 2 PostScript. Level 2 supports color images and image +compression. This is the default setting. +.RB "[config file: " psLevel ] +.TP +.B \-level2sep +Generate Level 2 separable PostScript. All colors are converted to +CMYK. The PostScript separation convention operators are used to +handle custom (spot) colors. +.RB "[config file: " psLevel ] +.TP +.B \-level3 +Generate Level 3 PostScript. This enables all Level 2 features plus +CID font embedding. +.RB "[config file: " psLevel ] +.TP +.B \-level3Sep +Generate Level 3 separable PostScript. The separation handling is the +same as for -level2Sep. +.RB "[config file: " psLevel ] +.TP +.B \-eps +Generate an Encapsulated PostScript (EPS) file. An EPS file contains +a single image, so if you use this option with a multi-page PDF file, +you must use -f and -l to specify a single page. No more than one of +the mode options (-eps, -form) may be given. +.TP +.B \-form +Generate a PostScript form which can be imported by software that +understands forms. A form contains a single page, so if you use this +option with a multi-page PDF file, you must use -f and -l to specify a +single page. The -level1 option cannot be used with -form. +.TP +.B \-opi +Generate OPI comments for all images and forms which have OPI +information. (This option is only available if pdftops was compiled +with OPI support.) +.RB "[config file: " psOPI ] +.TP +.B \-noembt1 +By default, any Type 1 fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. +.RB "[config file: " psEmbedType1Fonts ] +.TP +.B \-noembtt +By default, any TrueType fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. Also, some +PostScript interpreters do not have TrueType rasterizers. +.RB "[config file: " psEmbedTrueTypeFonts ] +.TP +.B \-noembcidps +By default, any CID PostScript fonts which are embedded in the PDF +file are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +PostScript fonts. +.RB "[config file: " psEmbedCIDPostScriptFonts ] +.TP +.B \-noembcidtt +By default, any CID TrueType fonts which are embedded in the PDF file +are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +TrueType fonts. +.RB "[config file: " psEmbedCIDTrueTypeFonts ] +.TP +.BI \-paper " size" +Set the paper size to one of "letter", "legal", "A4", or "A3". This +can also be set to "match", which will set the paper size to match the +size specified in the PDF file. +.RB "[config file: " psPaperSize ] +.TP +.BI \-paperw " size" +Set the paper width, in points. +.RB "[config file: " psPaperSize ] +.TP +.BI \-paperh " size" +Set the paper height, in points. +.RB "[config file: " psPaperSize ] +.TP +.B \-nocrop +By default, output is cropped to the CropBox specified in the PDF +file. This option disables cropping. +.RB "[config file: " psCrop ] +.TP +.B \-expand +Expand PDF pages smaller than the paper to fill the paper. By +default, these pages are not scaled. +.RB "[config file: " psExpandSmaller ] +.TP +.B \-noshrink +Don't scale PDF pages which are larger than the paper. By default, +pages larger than the paper are shrunk to fit. +.RB "[config file: " psShrinkLarger ] +.TP +.B \-nocenter +By default, PDF pages smaller than the paper (after any scaling) are +centered on the paper. This option causes them to be aligned to the +lower-left corner of the paper instead. +.RB "[config file: " psCenter ] +.TP +.B \-duplex +Set the Duplex pagedevice entry in the PostScript file. This tells +duplex-capable printers to enable duplexing. +.RB "[config file: " psDuplex ] +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.RB "[config file: " errQuiet ] +.TP +.BI \-cfg " config-file" +Read +.I config-file +in place of ~/.xpdfrc or the system-wide config file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftops software and documentation are copyright 1996-2004 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftotext (1), +.BR pdfinfo (1), +.BR pdffonts (1), +.BR pdftoppm (1), +.BR pdfimages (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdftops.cc poppler-0.4.3/utils/pdftops.cc --- poppler-0.4.3.orig/utils/pdftops.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftops.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,336 @@ +//======================================================================== +// +// pdftops.cc +// +// Copyright 1996-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PSOutputDev.h" +#include "Error.h" +#include "config.h" + +static int firstPage = 1; +static int lastPage = 0; +static GBool level1 = gFalse; +static GBool level1Sep = gFalse; +static GBool level2 = gFalse; +static GBool level2Sep = gFalse; +static GBool level3 = gFalse; +static GBool level3Sep = gFalse; +static GBool doEPS = gFalse; +static GBool doForm = gFalse; +#if OPI_SUPPORT +static GBool doOPI = gFalse; +#endif +static GBool noEmbedT1Fonts = gFalse; +static GBool noEmbedTTFonts = gFalse; +static GBool noEmbedCIDPSFonts = gFalse; +static GBool noEmbedCIDTTFonts = gFalse; +static char paperSize[15] = ""; +static int paperWidth = 0; +static int paperHeight = 0; +static GBool noCrop = gFalse; +static GBool expand = gFalse; +static GBool noShrink = gFalse; +static GBool noCenter = gFalse; +static GBool duplex = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to print"}, + {"-l", argInt, &lastPage, 0, + "last page to print"}, + {"-level1", argFlag, &level1, 0, + "generate Level 1 PostScript"}, + {"-level1sep", argFlag, &level1Sep, 0, + "generate Level 1 separable PostScript"}, + {"-level2", argFlag, &level2, 0, + "generate Level 2 PostScript"}, + {"-level2sep", argFlag, &level2Sep, 0, + "generate Level 2 separable PostScript"}, + {"-level3", argFlag, &level3, 0, + "generate Level 3 PostScript"}, + {"-level3sep", argFlag, &level3Sep, 0, + "generate Level 3 separable PostScript"}, + {"-eps", argFlag, &doEPS, 0, + "generate Encapsulated PostScript (EPS)"}, + {"-form", argFlag, &doForm, 0, + "generate a PostScript form"}, +#if OPI_SUPPORT + {"-opi", argFlag, &doOPI, 0, + "generate OPI comments"}, +#endif + {"-noembt1", argFlag, &noEmbedT1Fonts, 0, + "don't embed Type 1 fonts"}, + {"-noembtt", argFlag, &noEmbedTTFonts, 0, + "don't embed TrueType fonts"}, + {"-noembcidps", argFlag, &noEmbedCIDPSFonts, 0, + "don't embed CID PostScript fonts"}, + {"-noembcidtt", argFlag, &noEmbedCIDTTFonts, 0, + "don't embed CID TrueType fonts"}, + {"-paper", argString, paperSize, sizeof(paperSize), + "paper size (letter, legal, A4, A3, match)"}, + {"-paperw", argInt, &paperWidth, 0, + "paper width, in points"}, + {"-paperh", argInt, &paperHeight, 0, + "paper height, in points"}, + {"-nocrop", argFlag, &noCrop, 0, + "don't crop pages to CropBox"}, + {"-expand", argFlag, &expand, 0, + "expand pages smaller than the paper size"}, + {"-noshrink", argFlag, &noShrink, 0, + "don't shrink pages larger than the paper size"}, + {"-nocenter", argFlag, &noCenter, 0, + "don't center pages smaller than the paper size"}, + {"-duplex", argFlag, &duplex, 0, + "enable duplex printing"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *psFileName; + PSLevel level; + PSOutMode mode; + GooString *ownerPW, *userPW; + PSOutputDev *psOut; + GBool ok; + char *p; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftops version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftops", " []", argDesc); + } + exit(1); + } + if ((level1 ? 1 : 0) + + (level1Sep ? 1 : 0) + + (level2 ? 1 : 0) + + (level2Sep ? 1 : 0) + + (level3 ? 1 : 0) + + (level3Sep ? 1 : 0) > 1) { + fprintf(stderr, "Error: use only one of the 'level' options.\n"); + exit(1); + } + if (doEPS && doForm) { + fprintf(stderr, "Error: use only one of -eps and -form\n"); + exit(1); + } + if (level1) { + level = psLevel1; + } else if (level1Sep) { + level = psLevel1Sep; + } else if (level2Sep) { + level = psLevel2Sep; + } else if (level3) { + level = psLevel3; + } else if (level3Sep) { + level = psLevel3Sep; + } else { + level = psLevel2; + } + if (doForm && level < psLevel2) { + fprintf(stderr, "Error: forms are only available with Level 2 output.\n"); + exit(1); + } + mode = doEPS ? psModeEPS + : doForm ? psModeForm + : psModePS; + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(cfgFileName); + if (paperSize[0]) { + if (!globalParams->setPSPaperSize(paperSize)) { + fprintf(stderr, "Invalid paper size\n"); + delete fileName; + goto err0; + } + } else { + if (paperWidth) { + globalParams->setPSPaperWidth(paperWidth); + } + if (paperHeight) { + globalParams->setPSPaperHeight(paperHeight); + } + } + if (noCrop) { + globalParams->setPSCrop(gFalse); + } + if (expand) { + globalParams->setPSExpandSmaller(gTrue); + } + if (noShrink) { + globalParams->setPSShrinkLarger(gFalse); + } + if (noCenter) { + globalParams->setPSCenter(gFalse); + } + if (duplex) { + globalParams->setPSDuplex(duplex); + } + if (level1 || level1Sep || level2 || level2Sep || level3 || level3Sep) { + globalParams->setPSLevel(level); + } + if (noEmbedT1Fonts) { + globalParams->setPSEmbedType1(!noEmbedT1Fonts); + } + if (noEmbedTTFonts) { + globalParams->setPSEmbedTrueType(!noEmbedTTFonts); + } + if (noEmbedCIDPSFonts) { + globalParams->setPSEmbedCIDPostScript(!noEmbedCIDPSFonts); + } + if (noEmbedCIDTTFonts) { + globalParams->setPSEmbedCIDTrueType(!noEmbedCIDTTFonts); + } +#if OPI_SUPPORT + if (doOPI) { + globalParams->setPSOPI(doOPI); + } +#endif + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + +#ifdef ENFORCE_PERMISSIONS + // check for print permission + if (!doc->okToPrint()) { + error(-1, "Printing this document is not allowed."); + exitCode = 3; + goto err1; + } +#endif + + // construct PostScript file name + if (argc == 3) { + psFileName = new GooString(argv[2]); + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { + psFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + } else { + psFileName = fileName->copy(); + } + psFileName->append(doEPS ? ".eps" : ".ps"); + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // check for multi-page EPS or form + if ((doEPS || doForm) && firstPage != lastPage) { + error(-1, "EPS and form files can only contain one page."); + goto err2; + } + + // write PostScript file + psOut = new PSOutputDev(psFileName->getCString(), doc->getXRef(), + doc->getCatalog(), firstPage, lastPage, mode); + if (psOut->isOk()) { + doc->displayPages(psOut, firstPage, lastPage, 72, 72, + 0, globalParams->getPSCrop(), gFalse); + } else { + delete psOut; + exitCode = 2; + goto err2; + } + delete psOut; + + exitCode = 0; + + // clean up + err2: + delete psFileName; + err1: + delete doc; + err0: + delete globalParams; + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff -urN poppler-0.4.3.orig/utils/pdftotext.1 poppler-0.4.3/utils/pdftotext.1 --- poppler-0.4.3.orig/utils/pdftotext.1 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftotext.1 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,135 @@ +.\" Copyright 1997-2004 Glyph & Cog, LLC +.TH pdftotext 1 "22 January 2004" +.SH NAME +pdftotext \- Portable Document Format (PDF) to text converter +(version 3.00) +.SH SYNOPSIS +.B pdftotext +[options] +.RI [ PDF-file +.RI [ text-file ]] +.SH DESCRIPTION +.B Pdftotext +converts Portable Document Format (PDF) files to plain text. +.PP +Pdftotext reads the PDF file, +.IR PDF-file , +and writes a text file, +.IR text-file . +If +.I text-file +is not specified, pdftotext converts +.I file.pdf +to +.IR file.txt . +If +.I text-file +is \'-', the text is sent to stdout. +.SH CONFIGURATION FILE +Pdftotext reads a configuration file at startup. It first tries to +find the user's private config file, ~/.xpdfrc. If that doesn't +exist, it looks for a system-wide config file, /etc/xpdf/xpdfrc. See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Many of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.B \-layout +Maintain (as best as possible) the original physical layout of the +text. The default is to \'undo' physical layout (columns, +hyphenation, etc.) and output the text in reading order. +.TP +.B \-raw +Keep the text in content stream order. This is a hack which often +"undoes" column formatting, etc. Use of raw mode is no longer +recommended. +.TP +.B \-htmlmeta +Generate a simple HTML file, including the meta information. This +simply wraps the text in
 and 
and prepends the meta +headers. +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. The +.I encoding\-name +must be defined with the unicodeMap command (see +.BR xpdfrc (5)). +The encoding name is case-sensitive. This defaults to "Latin1" (which +is a built-in encoding). +.RB "[config file: " textEncoding ] +.TP +.BI \-eol " unix | dos | mac" +Sets the end-of-line convention to use for text output. +.RB "[config file: " textEOL ] +.TP +.B \-nopgbrk +Don't insert page breaks (form feed characters) between pages. +.RB "[config file: " textPageBreaks ] +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.RB "[config file: " errQuiet ] +.TP +.BI \-cfg " config-file" +Read +.I config-file +in place of ~/.xpdfrc or the system-wide config file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH BUGS +Some PDF files contain fonts whose encodings have been mangled beyond +recognition. There is no way (short of OCR) to extract text from +these files. +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftotext software and documentation are copyright 1996-2004 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR xpdf (1), +.BR pdftops (1), +.BR pdfinfo (1), +.BR pdffonts (1), +.BR pdftoppm (1), +.BR pdfimages (1), +.BR xpdfrc (5) +.br +.B http://www.foolabs.com/xpdf/ diff -urN poppler-0.4.3.orig/utils/pdftotext.cc poppler-0.4.3/utils/pdftotext.cc --- poppler-0.4.3.orig/utils/pdftotext.cc 1970-01-01 01:00:00.000000000 +0100 +++ poppler-0.4.3/utils/pdftotext.cc 2005-12-30 11:18:50.000000000 +0100 @@ -0,0 +1,336 @@ +//======================================================================== +// +// pdftotext.cc +// +// Copyright 1997-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +#include +#include +#include +#include +#include +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "TextOutputDev.h" +#include "CharTypes.h" +#include "UnicodeMap.h" +#include "Error.h" +#include "config.h" + +static void printInfoString(FILE *f, Dict *infoDict, char *key, + char *text1, char *text2, UnicodeMap *uMap); +static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt); + +static int firstPage = 1; +static int lastPage = 0; +static GBool physLayout = gFalse; +static GBool rawOrder = gFalse; +static GBool htmlMeta = gFalse; +static char textEncName[128] = ""; +static char textEOL[16] = ""; +static GBool noPageBreaks = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-layout", argFlag, &physLayout, 0, + "maintain original physical layout"}, + {"-raw", argFlag, &rawOrder, 0, + "keep strings in content stream order"}, + {"-htmlmeta", argFlag, &htmlMeta, 0, + "generate a simple HTML file, including the meta information"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-eol", argString, textEOL, sizeof(textEOL), + "output end-of-line convention (unix, dos, or mac)"}, + {"-nopgbrk", argFlag, &noPageBreaks, 0, + "don't insert page breaks between pages"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *textFileName; + GooString *ownerPW, *userPW; + TextOutputDev *textOut; + FILE *f; + UnicodeMap *uMap; + Object info; + GBool ok; + char *p; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftotext version %s\n", xpdfVersion); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftotext", " []", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(cfgFileName); + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + } + if (textEOL[0]) { + if (!globalParams->setTextEOL(textEOL)) { + fprintf(stderr, "Bad '-eol' value on command line\n"); + } + } + if (noPageBreaks) { + globalParams->setTextPageBreaks(gFalse); + } + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(-1, "Couldn't get text encoding"); + delete fileName; + goto err1; + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + doc = new PDFDoc(fileName, ownerPW, userPW); + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; + } + +#ifdef ENFORCE_PERMISSIONS + // check for copy permission + if (!doc->okToCopy()) { + error(-1, "Copying of text from this document is not allowed."); + exitCode = 3; + goto err2; + } +#endif + + // construct text file name + if (argc == 3) { + textFileName = new GooString(argv[2]); + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { + textFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + } else { + textFileName = fileName->copy(); + } + textFileName->append(htmlMeta ? ".html" : ".txt"); + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // write HTML header + if (htmlMeta) { + if (!textFileName->cmp("-")) { + f = stdout; + } else { + if (!(f = fopen(textFileName->getCString(), "wb"))) { + error(-1, "Couldn't open text file '%s'", textFileName->getCString()); + exitCode = 2; + goto err3; + } + } + fputs("\n", f); + fputs("\n", f); + doc->getDocInfo(&info); + if (info.isDict()) { + printInfoString(f, info.getDict(), "Title", "", "\n", + uMap); + printInfoString(f, info.getDict(), "Subject", + "\n", uMap); + printInfoString(f, info.getDict(), "Keywords", + "\n", uMap); + printInfoString(f, info.getDict(), "Author", + "\n", uMap); + printInfoString(f, info.getDict(), "Creator", + "\n", uMap); + printInfoString(f, info.getDict(), "Producer", + "\n", uMap); + printInfoDate(f, info.getDict(), "CreationDate", + "\n"); + printInfoDate(f, info.getDict(), "LastModifiedDate", + "\n"); + } + info.free(); + fputs("\n", f); + fputs("\n", f); + fputs("
\n", f);
+    if (f != stdout) {
+      fclose(f);
+    }
+  }
+
+  // write text file
+  textOut = new TextOutputDev(textFileName->getCString(),
+			      physLayout, rawOrder, htmlMeta);
+  if (textOut->isOk()) {
+    doc->displayPages(textOut, firstPage, lastPage, 72, 72, 0, gTrue, gFalse);
+  } else {
+    delete textOut;
+    exitCode = 2;
+    goto err3;
+  }
+  delete textOut;
+
+  // write end of HTML file
+  if (htmlMeta) {
+    if (!textFileName->cmp("-")) {
+      f = stdout;
+    } else {
+      if (!(f = fopen(textFileName->getCString(), "ab"))) {
+	error(-1, "Couldn't open text file '%s'", textFileName->getCString());
+	exitCode = 2;
+	goto err3;
+      }
+    }
+    fputs("
\n", f); + fputs("\n", f); + fputs("\n", f); + if (f != stdout) { + fclose(f); + } + } + + exitCode = 0; + + // clean up + err3: + delete textFileName; + err2: + delete doc; + uMap->decRefCnt(); + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + +static void printInfoString(FILE *f, Dict *infoDict, char *key, + char *text1, char *text2, UnicodeMap *uMap) { + Object obj; + GooString *s1; + GBool isUnicode; + Unicode u; + char buf[8]; + int i, n; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text1, f); + s1 = obj.getString(); + if ((s1->getChar(0) & 0xff) == 0xfe && + (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + i = 2; + } else { + isUnicode = gFalse; + i = 0; + } + while (i < obj.getString()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + } else { + u = s1->getChar(i) & 0xff; + ++i; + } + n = uMap->mapUnicode(u, buf, sizeof(buf)); + fwrite(buf, 1, n, f); + } + fputs(text2, f); + } + obj.free(); +} + +static void printInfoDate(FILE *f, Dict *infoDict, char *key, char *fmt) { + Object obj; + char *s; + + if (infoDict->lookup(key, &obj)->isString()) { + s = obj.getString()->getCString(); + if (s[0] == 'D' && s[1] == ':') { + s += 2; + } + fprintf(f, fmt, s); + } + obj.free(); +}