From 330ed82f46f85a919792d5f68f431073dd4d833c Mon Sep 17 00:00:00 2001 From: Adrian Johnson Date: Sun, 24 Nov 2013 17:16:18 +1030 Subject: [PATCH 10/11] PDFWriter: support multiple document input --- glib/poppler-document.cc | 13 ++-- poppler/PDFWriter.cc | 167 ++++++++++++++++++++++++++++++----------------- poppler/PDFWriter.h | 32 ++++++--- test/pdftopdf.cc | 128 ++++++++++++++++++++++++++---------- 4 files changed, 229 insertions(+), 111 deletions(-) diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 1dcb5be..43d2a32 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -2771,7 +2771,7 @@ poppler_pdf_file_new (PopplerDocument *document) pdf_file = (PopplerPDFFile *) g_object_new (POPPLER_TYPE_PDF_FILE, NULL); pdf_file->document = (PopplerDocument *) g_object_ref (document); - pdf_file->writer = new PDFWriter(document->doc); + pdf_file->writer = new PDFWriter(); return pdf_file; } @@ -3090,10 +3090,12 @@ poppler_pdf_file_set_center (PopplerPDFFile *pdf_file, * **/ void -poppler_pdf_file_add_page (PopplerPDFFile *pdf_file, - gint page) +poppler_pdf_file_add_pages (PopplerPDFFile *pdf_file, + PopplerDocument *document, + gint first, + gint last) { - pdf_file->writer->addPage(page); + pdf_file->writer->addPages(document->doc, first, last); } /** @@ -3111,8 +3113,7 @@ poppler_pdf_file_write (PopplerPDFFile *pdf_file, const char *filename) { if (filename != NULL) { - GooString s(filename); - return pdf_file->writer->writeFile(&s); + return pdf_file->writer->writeFile(filename); } return FALSE; diff --git a/poppler/PDFWriter.cc b/poppler/PDFWriter.cc index e37c82f..9d85543 100644 --- a/poppler/PDFWriter.cc +++ b/poppler/PDFWriter.cc @@ -15,9 +15,8 @@ #include "DeflateStream.h" #include -PDFWriter::PDFWriter(PDFDoc *docA) +PDFWriter::PDFWriter() { - doc = docA; copies = 1; collate = gTrue; reverse = gFalse; @@ -58,9 +57,24 @@ void PDFWriter::addPaperSize(double width, double height, paperSizes.push_back(paperSize); } -void PDFWriter::addPage(int page) +void PDFWriter::addPages(PDFDoc *doc, int first, int last) { - pages.push_back(page); + PagesEntry pe; + pe.doc = doc; + pe.first = first; + pe.last = last; + pages.push_back(pe); + + std::map::iterator it; + it = docToPages.find(doc); + if (it == docToPages.end()) { + DocEntry de; + de.objectOffset = 0; + docToPages.insert(std::pair(doc, de)); + it = docToPages.find(doc); + } + for (int i = first; i <= last; i++) + it->second.pages.push_back(i); } struct PageScale { @@ -347,9 +361,9 @@ void PDFWriter::getNupCTM(int nupPage, PDFRectangle *mediaSize, PDFRectangle *ma ctm->scale(scale, scale); } -void PDFWriter::writeObject(Object *obj) +void PDFWriter::writeObject(Object *obj, int offset) { - PDFDoc::writeObject(obj, outputStr, yRef, 0, + PDFDoc::writeObject(obj, outputStr, yRef, offset, NULL, cryptRC4, 0, 0, 0); } @@ -397,13 +411,15 @@ static GBool includePageEntry(const char *key) // MediaBox and Parent is written. The other *Box keys are removed as // the content may be resized. // Return the media size -void PDFWriter::writePageObject(int pageNum, int copy, PDFRectangle *mediaSize) +void PDFWriter::writePageObject(PDFDoc *doc, int pageNum, int copy, int offset, PDFRectangle *mediaSize) { PDFRectangle margins; Matrix ctm; Ref ctmRef; Page *page = doc->getCatalog()->getPage(pageNum); + if (!page) + return; getPaperSize(page, mediaSize, &margins); getPageCTM(page, mediaSize, &margins, &ctm); GBool ctmRequired = !ctm.isIdentity(); @@ -440,11 +456,11 @@ void PDFWriter::writePageObject(int pageNum, int copy, PDFRectangle *mediaSize) for (int i = 0; i < contentsObj.arrayGetLength(); i++) { Object obj2; contentsObj.arrayGetNF(i, &obj2); - writeObject(&obj2); + writeObject(&obj2, offset); obj2.free(); } } else { - writeObject(&contentsObj); + writeObject(&contentsObj, offset); } contentsObj.free(); outputStr->printf(" ]\n"); @@ -454,7 +470,7 @@ void PDFWriter::writePageObject(int pageNum, int copy, PDFRectangle *mediaSize) if (includePageEntry(key)) { outputStr->printf("/%s ", key); Object obj1; - writeObject(pageDict->getValNF(i, &obj1)); + writeObject(pageDict->getValNF(i, &obj1), offset); outputStr->printf("\n"); obj1.free(); } @@ -463,14 +479,14 @@ void PDFWriter::writePageObject(int pageNum, int copy, PDFRectangle *mediaSize) outputStr->printf("endobj\n"); } -void PDFWriter::markPageObjects(Dict *pageDict, GBool markContent) +void PDFWriter::markPageObjects(PDFDoc *doc, Dict *pageDict, int offset, GBool markContent) { for (int i = 0; i < pageDict->getLength(); i++) { Object obj; const char *key = pageDict->getKey(i); if (includePageEntry(key) || (markContent && strcmp(key, "Contents") == 0)) { - doc->markObject(pageDict->getValNF(i, &obj), yRef, countRef, 0); + doc->markObject(pageDict->getValNF(i, &obj), yRef, countRef, offset); obj.free(); } } @@ -482,9 +498,11 @@ void PDFWriter::markPageObjects(Dict *pageDict, GBool markContent) // Group values are copied from the Page. As XObjects cannot split the // content into multiple streams, all the Page content streams are // concatenated together into a single XObject stream. -void PDFWriter::writeXObject(int pageNum) +void PDFWriter::writeXObject(PDFDoc *doc, int pageNum, int offset) { Page *page = doc->getCatalog()->getPage(pageNum); + if (!page) + return; Ref *refPage = doc->getCatalog()->getPageRef(pageNum); Object pageObj; doc->getXRef()->fetch(refPage->num, refPage->gen, &pageObj); @@ -504,14 +522,14 @@ void PDFWriter::writeXObject(int pageNum) if (!pageDict->lookupNF("Resources", &obj)->isNull()) { outputStr->printf("/Resources "); - writeObject(&obj); + writeObject(&obj, offset); obj.free(); outputStr->printf("\n"); } if (!pageDict->lookupNF("Group", &obj)->isNull()) { outputStr->printf("/Group "); - writeObject(&obj); + writeObject(&obj, offset); obj.free(); outputStr->printf("\n"); } @@ -670,60 +688,87 @@ void PDFWriter::writePageTree() outputStr->printf("endobj\n"); } -GBool PDFWriter::writeFile(GooString *name) +GBool PDFWriter::writeFile(const char *filename) { - FILE *f = fopen(name->getCString(), "wb"); + FILE *f = fopen(filename, "wb"); OutStream *outStr = new FileOutStream(f, 0); - writeStream(outStr); + writeStream(outStr, filename); outStr->close(); if (ferror(f)) { - error(errIO, -1, "Couldn't write file '{0:t}'", name); + error(errIO, -1, "Couldn't write file '{0:t}'", filename); return gFalse; } return gTrue; } -void PDFWriter::writeStream(OutStream *outStr) +// filename is only used to generate the catalog ID. It can be NULL if not available +void PDFWriter::writeStream(OutStream *outStr, const char *filename) { PDFRectangle mediaSize; int outputPageNum; int nupPageNum; // 0..numberUp-1 - int pageNum; + int numOffset; outputStr = outStr; - // header objectsCount = 0; + numOffset = 0; yRef = new XRef(); countRef = new XRef(); yRef->add(0, 65535, 0, gFalse); - PDFDoc::writeHeader(outputStr, doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); + + // find highest PDF version required + int majorVersion = 1; + int minorVersion = 1; + for (std::map::iterator it = docToPages.begin(); it != docToPages.end(); ++it) { + PDFDoc *doc = it->first; + if (doc->getPDFMajorVersion() > majorVersion) { + majorVersion = doc->getPDFMajorVersion(); + minorVersion = doc->getPDFMinorVersion(); + } else if (doc->getPDFMajorVersion() == majorVersion) { + if (doc->getPDFMinorVersion() > minorVersion) { + minorVersion = doc->getPDFMinorVersion(); + } + } + } + + // header + PDFDoc::writeHeader(outputStr, majorVersion, minorVersion); // Mark all page objects. when printing n-up the content streams // are not marked as they will be included in the XObjects created for // each page. outputPageNum = 1; nupPageNum = 0; - for (int i = 0; i < (int)pages.size(); i++) { - pageNum = pages[i]; - if (pageSet == ALL || - (pageSet == ODD && outputPageNum % 2 == 1) || - (pageSet == EVEN && outputPageNum % 2 == 0)) { - Object pageObj; - Ref *refPage = doc->getCatalog()->getPageRef(pageNum); - doc->getXRef()->fetch(refPage->num, refPage->gen, &pageObj); - markPageObjects(pageObj.getDict(), numberUp > 1 ? gFalse : gTrue); - pageObj.free(); - } - nupPageNum++; - if (nupPageNum == numberUp) { - nupPageNum = 0; - outputPageNum++; + for (std::map::iterator it = docToPages.begin(); it != docToPages.end(); ++it) { + PDFDoc *doc = it->first; + DocEntry *de = &it->second; + de->objectOffset = numOffset; + for (int i = 0; i < (int)de->pages.size(); i++) { + int pageNum = de->pages[i]; + if (pageSet == ALL || + (pageSet == ODD && outputPageNum % 2 == 1) || + (pageSet == EVEN && outputPageNum % 2 == 0)) { + Object pageObj; + Ref *refPage = doc->getCatalog()->getPageRef(pageNum); + if (!refPage) + continue; + doc->getXRef()->fetch(refPage->num, refPage->gen, &pageObj); + markPageObjects(doc, pageObj.getDict(), numOffset, numberUp > 1 ? gFalse : gTrue); + pageObj.free(); + } + nupPageNum++; + if (nupPageNum == numberUp) { + nupPageNum = 0; + outputPageNum++; + } } - } // write marked page objects - doc->writePageObjects(outputStr, yRef, 0, gTrue); + doc->writePageObjects(outputStr, yRef, numOffset, gTrue); + numOffset = yRef->getNumObjects() + 1; + } nextObject = yRef->getNumObjects() + 1; + numOffset = 0; // Write a Page object for each page (nup == 1) or // Xobject for each page (nup > 1) + a Page object for each sheet. @@ -739,29 +784,32 @@ void PDFWriter::writeStream(OutStream *outStr) outputPageNum = 1; nupPageNum = 0; for (int i = 0; i < (int)pages.size(); i++) { - pageNum = pages[i]; - if (pageSet == ALL || - (pageSet == ODD && outputPageNum % 2 == 1) || - (pageSet == EVEN && outputPageNum % 2 == 0)) { - if (numberUp == 1) { - for (int cp = 0; cp < copies; cp++) - writePageObject(pageNum, cp, &mediaSize); - } else { - writeXObject(pageNum); - if (nupPageNum == numberUp - 1 || i == (int)pages.size() - 1) { + PDFDoc *doc = pages[i].doc; + int offset = docToPages[doc].objectOffset; + for (int pageNum = pages[i].first; pageNum <= pages[i].last; pageNum++) { + if (pageSet == ALL || + (pageSet == ODD && outputPageNum % 2 == 1) || + (pageSet == EVEN && outputPageNum % 2 == 0)) { + if (numberUp == 1) { for (int cp = 0; cp < copies; cp++) - writeSheetPageObject(cp, &mediaSize); - xobjectRefs.clear(); + writePageObject(doc, pageNum, cp, offset, &mediaSize); + } else { + writeXObject(doc, pageNum, offset); + if (nupPageNum == numberUp - 1 || i == (int)pages.size() - 1) { + for (int cp = 0; cp < copies; cp++) + writeSheetPageObject(cp, &mediaSize); + xobjectRefs.clear(); + } } } - } - nupPageNum++; - if (nupPageNum == numberUp) { - nupPageNum = 0; - outputPageNum++; + nupPageNum++; + if (nupPageNum == numberUp) { + nupPageNum = 0; + outputPageNum++; + } } } - // when printing multiple copies in duplex: if there are an odd + // when printing multiple copies in duplex: if there is an odd // number of pages, add a blank page the same size as the last page // to ensure the first page of each copy starts on a new sheet. if (duplex && copies > 1 && (pageRefs[0].size() % 2 == 1)) { @@ -775,8 +823,7 @@ void PDFWriter::writeStream(OutStream *outStr) // trailer Goffset uxrefOffset = outputStr->getPos(); Dict *trailerDict = PDFDoc::createTrailerDict(objectsCount, gFalse, 0, &catalogRef, yRef, - doc->getFileName()->getCString(), - outputStr->getPos()); + filename, outputStr->getPos()); PDFDoc::writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */, uxrefOffset, outputStr, yRef); delete trailerDict; diff --git a/poppler/PDFWriter.h b/poppler/PDFWriter.h index d4e5a4c..d834978 100644 --- a/poppler/PDFWriter.h +++ b/poppler/PDFWriter.h @@ -40,7 +40,7 @@ public: enum Orientation { PORTRAIT, LANDSCAPE, REVERSE_PORTRAIT, REVERSE_LANDSCAPE }; enum Resize { NONE, SHRINK, FIT }; - PDFWriter(PDFDoc *docA); + PDFWriter(); ~PDFWriter(); // print options @@ -90,11 +90,12 @@ public: // If true center page within margins of paper void setCenter(GBool centerA) { center = centerA; } - void addPage(int page); + void addPages(PDFDoc *doc, int first, int last); - void writeStream(OutStream *outputStr); + // filename is only used to generate the catalog ID. It can be NULL if not available + void writeStream(OutStream *outputStr, const char *filename); - GBool writeFile(GooString *name); + GBool writeFile(const char *filename); private: int getNextPage(); @@ -107,13 +108,13 @@ private: Ref createRef(); void beginIndirectObject(Ref *ref); - void writeObject(Object *obj); - void writePageObject(int pageNum, int copy, PDFRectangle *mediaSize); - void writeXObject(int pageNum); + void writeObject(Object *obj, int offset = 0); + void writePageObject(PDFDoc *doc, int pageNum, int copy, int offset, PDFRectangle *mediaSize); + void writeXObject(PDFDoc *doc, int pageNum, int offset); void writeSheetPageObject(int copy, PDFRectangle *mediaSize); void writeBlankPage(int copy, PDFRectangle *mediaSize); void writePageTree(); - void markPageObjects(Dict *pageDict, GBool markContent); + void markPageObjects(PDFDoc *doc, Dict *pageDict, int offset, GBool markContent); struct PaperSize { double width; @@ -124,7 +125,17 @@ private: double right; }; - PDFDoc *doc; + struct PagesEntry { + PDFDoc *doc; + int first; + int last; + }; + + struct DocEntry { + int objectOffset; + std::vector pages; + }; + int nextObject; int objectsCount; Ref catalogRef; @@ -142,7 +153,8 @@ private: GBool collate; GBool reverse; GBool duplex; - std::vector pages; + std::vector pages; + std::map docToPages; std::vector paperSizes; PageSet pageSet; int numberUp; diff --git a/test/pdftopdf.cc b/test/pdftopdf.cc index 7fae678..9946465 100644 --- a/test/pdftopdf.cc +++ b/test/pdftopdf.cc @@ -25,8 +25,9 @@ #include "goo/GooString.h" #include "goo/gstrtod.h" #include "utils/parseargs.h" +#include -static GooString pageRanges; +static GooString outputName; static int copies = 1; static GBool collate = gFalse; static GBool reverse = gFalse; @@ -53,8 +54,10 @@ static GBool printHelp = gFalse; static const ArgDesc argDesc[] = { - {"-pages", argGooString, &pageRanges, 0, - "Comma separated page ranges. eg \"1,3-5,8,14-\". Default is all pages. "}, + {"-p", argStringDummy, NULL, 0, + "Comma separated page ranges. eg \"1,3-5,8,14-\". Default is all pages. This option (if used) must appear after the source file"}, + {"-o", argGooString, &outputName, 0, + "Output file" }, {"-copies", argInt, &copies, 0, "Number of copies. Default is 1."}, {"-collate", argFlag, &collate, 0, @@ -169,7 +172,7 @@ static char *getPaperSize(char *s, double *width, double *height) { } // return string from s up to next ',' for use in error messages -char *extractField(char *s) { +char *extractField(const char *s) { static char buf[1000]; char *p; @@ -321,8 +324,9 @@ static void setOrder(PDFWriter *writer) { } // parse the page range at p. last = -1 means last page in document -static char *getNextPageRange(char *s, int *first, int *last) { - char *p, *end; +static const char *getNextPageRange(const char *s, int *first, int *last) { + const char *p; + char *end; GBool err = gFalse; if (*s == 0) { @@ -375,28 +379,30 @@ static char *getNextPageRange(char *s, int *first, int *last) { exit(99); } +struct Pages { + const char *file; + PDFDoc *doc; + char *pages; +}; + int main (int argc, char *argv[]) { - PDFDoc *doc = NULL; - GooString *inputName = NULL; - GooString *outputName = NULL; GooString *ownerPW = NULL; GooString *userPW = NULL; PDFWriter *writer; - char *p; - int first = 1, last = 1; int res = 0; + std::list pagesList; // parse args if (!parseArgs(argDesc, &argc, argv)) exit(99); - if (argc != 3 || printVersion || printHelp) { + if (printVersion || printHelp) { fprintf(stderr, "pdftopdf version %s\n", PACKAGE_VERSION); fprintf(stderr, "%s\n", popplerCopyright); fprintf(stderr, "%s\n", xpdfCopyright); if (!printVersion) { - printUsage("pdftopdf", " ", argDesc); + printUsage("pdftopdf", " -o ", argDesc); } if (printVersion || printHelp) exit(0); @@ -405,6 +411,11 @@ int main (int argc, char *argv[]) } + if (outputName.getLength() == 0) { + fprintf(stderr, "Error: output file not specified\n"); + exit(99); + } + if (odd && even) { fprintf(stderr, "Error: use only one of the options -odd and -even\n"); exit(99); @@ -430,9 +441,6 @@ int main (int argc, char *argv[]) exit(99); } - inputName = new GooString(argv[1]); - outputName = new GooString(argv[2]); - if (ownerPassword[0]) { ownerPW = new GooString(ownerPassword); } @@ -442,14 +450,8 @@ int main (int argc, char *argv[]) // load input document globalParams = new GlobalParams(); - doc = new PDFDoc(inputName, ownerPW, userPW); - if (!doc->isOk()) { - fprintf(stderr, "Error loading input document\n"); - res = 1; - goto done; - } - writer = new PDFWriter(doc); + writer = new PDFWriter(); // set output options setPaperSizes(writer); @@ -483,23 +485,79 @@ int main (int argc, char *argv[]) writer->setResize(PDFWriter::NONE); writer->setCenter(!nocenter); - p = pageRanges.getCString(); - while (p) { - p = getNextPageRange(p, &first, &last); - if (last == -1) - last = doc->getNumPages(); - for (int i = first; i <= last; i++) { - writer->addPage(i); + argv++; + argc--; + Pages pages; + pages.file = NULL; + pages.doc = NULL; + pages.pages = NULL; + while (argc) { + if (strcmp(argv[0], "-p") == 0) { + if (!pages.doc) { + fprintf(stderr, "Error: no source file specified for -p\n"); + exit(99); + } + if (pages.pages) { + fprintf(stderr, "Error: can not use -p twice for same source file\n"); + exit(99); + } + if (argc > 1) { + pages.pages = argv[1]; + argv++; + argc--; + } else { + fprintf(stderr, "Error: missing page range\n"); + exit(99); + } + } else { + if (pages.doc) { + pagesList.push_back(pages); + pages.file = NULL; + pages.doc = NULL; + pages.pages = NULL; + } + pages.file = argv[0]; + GooString *name = new GooString(argv[0]); + PDFDoc *doc = new PDFDoc(name, ownerPW, userPW); + if (!doc->isOk()) { + fprintf(stderr, "Error loading input document %s\n", name->getCString()); + exit(99); + } + pages.doc = doc; + } + argv++; + argc--; + } + if (pages.doc) { + pagesList.push_back(pages); + } else { + fprintf(stderr, "Error: no source files provided\n"); + exit(99); + } + + for (std::list::iterator it = pagesList.begin(); it != pagesList.end(); ++it) { + const char *p = it->pages ? it->pages : ""; + int first = 1, last = 1; + while (p) { + p = getNextPageRange(p, &first, &last); + if (last == -1) + last = it->doc->getNumPages(); + if (first < 1 || last > it->doc->getNumPages()) { + fprintf(stderr, "Error: page out of range for file %s\n", it->file); + exit(99); + } + writer->addPages(it->doc, first, last); } } - writer->writeFile(outputName); - delete writer; -done: - delete doc; + writer->writeFile(outputName.getCString()); + + for (std::list::iterator it = pagesList.begin(); it != pagesList.end(); ++it) { + delete it->doc; + } + delete writer; delete globalParams; delete userPW; delete ownerPW; return res; } - -- 1.8.3.2