From 0deb05560241d23ca08f420258b84b71b949e106 Mon Sep 17 00:00:00 2001 From: Marek Kasik Date: Thu, 26 Sep 2013 16:20:18 +0200 Subject: [PATCH] glib: Add poppler_documents_save_pages() Function poppler_documents_save_pages() saves pages from given documents to the specified uri. It saves only pages specified in "pages" parameter which is a list of lists of indices terminated by "-1". Number of the lists is the same as number of given documents. This function allows to merge / split PDF documents. It is based on the code of pdfunite utility. --- glib/poppler-document.cc | 170 +++++++++++++++++++++++++++++++++++++++++++++++ glib/poppler-document.h | 6 ++ 2 files changed, 176 insertions(+) diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 61d92e8..f5f7304 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -18,6 +18,7 @@ #include "config.h" #include +#include #ifndef __GI_SCANNER__ #include @@ -1527,6 +1528,175 @@ poppler_document_init (PopplerDocument *document) { } +/** + * poppler_documents_save_pages: + * @documents: array of #PopplerDocument + * @pages: list of lists of number of pages to save, each terminated by -1 + * @n_documents: number of documents + * @uri: uri of file to save + * @error: (allow-none): return location for an error, or %NULL + * + * Saves specified pages from given documents. + * If @error is set, %FALSE will be returned. Possible errors + * include those in the #G_FILE_ERROR domain. + * + * Return value: %TRUE, if the document was successfully saved + **/ +gboolean +poppler_documents_save_pages (PopplerDocument **documents, + int **pages, + int n_documents, + const char *uri, + GError **error) +{ + int objectsCount = 0; + Guint numOffset = 0; + std::vector new_pages; + std::vector offsets; + XRef *yRef, *countRef; + FILE *f; + OutStream *outStr; + int i, j, rootNum; + std::vectordocs; + int majorVersion = 0; + int minorVersion = 0; + Object page; + Object value; + PDFDoc *doc; + Ref *refPage; + Dict *pageDict; + Goffset uxrefOffset; + Ref ref; + Dict *trailerDict; + const char *key; + + if (documents == NULL || pages == NULL) { + g_set_error (error, POPPLER_ERROR, + POPPLER_ERROR_INVALID, + "No document neither pages given"); + return FALSE; + } + + for (i = 0; i < n_documents; i++) { + g_return_val_if_fail (POPPLER_IS_DOCUMENT (documents[i]), FALSE); + } + + if (!globalParams) { + globalParams = new GlobalParams (); + } + + for (i = 0; i < n_documents; i++) { + doc = documents[i]->doc; + if (doc->isOk () && !doc->isEncrypted ()) { + docs.push_back (doc); + if (doc->getPDFMajorVersion () > majorVersion) { + majorVersion = doc->getPDFMajorVersion (); + minorVersion = doc->getPDFMinorVersion (); + } else if (doc->getPDFMajorVersion () == majorVersion) { + if (doc->getPDFMinorVersion () > minorVersion) { + minorVersion = doc->getPDFMinorVersion (); + } + } + } else if (doc->isOk ()) { + g_set_error (error, POPPLER_ERROR, + POPPLER_ERROR_ENCRYPTED, + "Document is encrypted"); + return FALSE; + } else { + g_set_error (error, POPPLER_ERROR, + POPPLER_ERROR_DAMAGED, + "PDF document is damaged"); + return FALSE; + } + } + + if (!(f = fopen (uri, "wb"))) { + g_set_error (error, G_FILE_ERROR, + g_file_error_from_errno (errno), + "%s", g_strerror (errno)); + return FALSE; + } + + outStr = new FileOutStream (f, 0); + yRef = new XRef (); + countRef = new XRef (); + + yRef->add (0, 65535, 0, gFalse); + PDFDoc::writeHeader (outStr, majorVersion, minorVersion); + + for (i = 0; i < (int) docs.size (); i++) { + if (pages[i] != NULL) { + for (j = 0; pages[i][j] != -1; j++) { + refPage = docs[i]->getCatalog ()->getPageRef (pages[i][j]); + docs[i]->getXRef ()->fetch (refPage->num, refPage->gen, &page); + new_pages.push_back (page); + offsets.push_back (numOffset); + pageDict = page.getDict (); + docs[i]->markPageObjects (pageDict, yRef, countRef, numOffset); + } + + objectsCount += docs[i]->writePageObjects (outStr, yRef, numOffset, gTrue); + numOffset = yRef->getNumObjects () + 1; + } + } + + rootNum = yRef->getNumObjects () + 1; + yRef->add (rootNum, 0, outStr->getPos (), gTrue); + outStr->printf ("%d 0 obj\n", rootNum); + outStr->printf ("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); + outStr->printf (">>\nendobj\n"); + objectsCount++; + + yRef->add (rootNum + 1, 0, outStr->getPos (), gTrue); + outStr->printf ("%d 0 obj\n", rootNum + 1); + outStr->printf ("<< /Type /Pages /Kids ["); + for (j = 0; j < (int) new_pages.size (); j++) + outStr->printf (" %d 0 R", rootNum + j + 2); + outStr->printf (" ] /Count %zd >>\nendobj\n", new_pages.size ()); + objectsCount++; + + for (i = 0; i < (int) new_pages.size (); i++) { + yRef->add (rootNum + i + 2, 0, outStr->getPos (), gTrue); + outStr->printf ("%d 0 obj\n", rootNum + i + 2); + outStr->printf ("<< "); + pageDict = new_pages[i].getDict (); + for (j = 0; j < pageDict->getLength (); j++) { + if (j > 0) + outStr->printf (" "); + key = pageDict->getKey (j); + pageDict->getValNF (j, &value); + if (strcmp (key, "Parent") == 0) { + outStr->printf ("/Parent %d 0 R", rootNum + 1); + } else { + outStr->printf ("/%s ", key); + PDFDoc::writeObject (&value, outStr, yRef, offsets[i], NULL, cryptRC4, 0, 0, 0); + } + value.free (); + } + outStr->printf (" >>\nendobj\n"); + objectsCount++; + new_pages[i].free (); + } + + ref.num = rootNum; + ref.gen = 0; + uxrefOffset = outStr->getPos (); + trailerDict = PDFDoc::createTrailerDict (objectsCount, gFalse, 0, &ref, yRef, + uri, outStr->getPos ()); + + PDFDoc::writeXRefTableTrailer (trailerDict, yRef, gFalse /* do not write unnecessary entries */, + uxrefOffset, outStr, yRef); + + delete trailerDict; + delete yRef; + delete countRef; + + outStr->close (); + fclose (f); + + return TRUE; +} + /* PopplerIndexIter: For determining the index of a tree */ struct _PopplerIndexIter { diff --git a/glib/poppler-document.h b/glib/poppler-document.h index a34e88c..314c581 100644 --- a/glib/poppler-document.h +++ b/glib/poppler-document.h @@ -227,6 +227,12 @@ PopplerDest *poppler_document_find_dest (PopplerDocument *do PopplerFormField *poppler_document_get_form_field (PopplerDocument *document, gint id); +gboolean poppler_documents_save_pages (PopplerDocument **documents, + int **pages, + int n_documents, + const char *uri, + GError **error); + /* Interface for getting the Index of a poppler_document */ #define POPPLER_TYPE_INDEX_ITER (poppler_index_iter_get_type ()) GType poppler_index_iter_get_type (void) G_GNUC_CONST; -- 1.8.3.1