From dd90adae97bd92fc6dacbfeb4966a17a3d6135a0 Mon Sep 17 00:00:00 2001 From: Adrian Perez de Castro Date: Thu, 9 May 2013 12:01:59 +0300 Subject: [PATCH v10 05/12] glib: Expose the document structure tree Implements a new PopplerStructureElement classe, which builds upon StructTreeRoot and StructElement to expose the document structure of tagged PDFs in the GLib binding. Navigation of the structure tree is done by an iterator-based interface, using PopplerStructureElementIter. --- glib/Makefile.am | 2 + glib/poppler-private.h | 10 + glib/poppler-structure-element.cc | 685 ++++++++++++++++++++++++++++++++++++ glib/poppler-structure-element.h | 112 ++++++ glib/poppler.h | 3 + glib/reference/poppler-docs.sgml | 2 + glib/reference/poppler-sections.txt | 37 ++ glib/reference/poppler.types | 2 + 8 files changed, 853 insertions(+) create mode 100644 glib/poppler-structure-element.cc create mode 100644 glib/poppler-structure-element.h diff --git a/glib/Makefile.am b/glib/Makefile.am index a38e052..040996a 100644 --- a/glib/Makefile.am +++ b/glib/Makefile.am @@ -41,6 +41,7 @@ poppler_glib_public_headers = \ poppler-layer.h \ poppler-media.h \ poppler-movie.h \ + poppler-structure-element.h \ poppler.h poppler_glib_includedir = $(includedir)/poppler/glib @@ -67,6 +68,7 @@ libpoppler_glib_la_SOURCES = \ poppler-cached-file-loader.h \ poppler-input-stream.cc \ poppler-input-stream.h \ + poppler-structure-element.cc \ poppler.cc \ poppler-private.h diff --git a/glib/poppler-private.h b/glib/poppler-private.h index ab39b49..7b42078 100644 --- a/glib/poppler-private.h +++ b/glib/poppler-private.h @@ -17,6 +17,7 @@ #include #include #include +#include #endif struct _PopplerDocument @@ -95,6 +96,15 @@ struct _PopplerLayer gchar *title; }; + +struct _PopplerStructureElement +{ + /*< private >*/ + GObject parent_instance; + PopplerDocument *document; + StructElement *elem; +}; + GList *_poppler_document_get_layers (PopplerDocument *document); GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document, Layer *layer); diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc new file mode 100644 index 0000000..f799df5 --- /dev/null +++ b/glib/poppler-structure-element.cc @@ -0,0 +1,685 @@ +/* poppler-structure.cc: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "config.h" + +#ifndef __GI_SCANNER__ +#include +#include +#include +#include +#endif /* !__GI_SCANNER__ */ + +#include "poppler.h" +#include "poppler-private.h" +#include "poppler-structure-element.h" + + +/** + * SECTION:poppler-structure-element + * @short_description: Document structure element. + * @title: PopplerStructureElement + * @see_also: #PopplerStructure + * + * Instances of #PopplerStructureElement are used to describe the structure + * of a #PopplerDocument. To access the elements in the structure of the + * document, first use poppler_document_get_structure() to obtain its + * #PopplerStructure, and then use poppler_structure_get_n_children() + * and poppler_structure_get_child() to enumerate the top level elements. + */ + +static PopplerStructureElement * + _poppler_structure_element_new (PopplerDocument *document, StructElement *element); + + +struct _PopplerStructureElementIter +{ + PopplerDocument *document; + union { + StructElement *elem; + StructTreeRoot *root; + }; + gboolean is_root; + unsigned index; +}; + +POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter, + poppler_structure_element_iter, + poppler_structure_element_iter_copy, + poppler_structure_element_iter_free) + +/** + * poppler_structure_element_iter_copy: + * @iter: a #PopplerStructureElementIter + * + * Creates a new #PopplerStructureElementIter as a copy of @iter. The + * returned value must be freed with poppler_structure_element_iter_free(). + * + * Return value: (transfer full): a new #PopplerStructureElementIter + * + * Since: 0.26 + */ +PopplerStructureElementIter * +poppler_structure_element_iter_copy (PopplerStructureElementIter *iter) +{ + PopplerStructureElementIter *new_iter; + + g_return_val_if_fail (iter != NULL, NULL); + + new_iter = g_slice_dup (PopplerStructureElementIter, iter); + new_iter->document = (PopplerDocument *) g_object_ref (new_iter->document); + + return new_iter; +} + +/** + * poppler_structure_element_iter_free: + * @iter: a #PopplerStructureElementIter + * + * Frees @iter. + * + * Since: 0.26 + */ +void +poppler_structure_element_iter_free (PopplerStructureElementIter *iter) +{ + if (G_UNLIKELY (iter == NULL)) + return; + + g_object_unref (iter->document); + g_slice_free (PopplerStructureElementIter, iter); +} + +/** + * poppler_structure_element_iter_new: + * @poppler_document: a #PopplerDocument. + * + * Returns the root #PopplerStructureElementIter for @document, or %NULL. The + * returned value must be freed with poppler_structure_element_iter_free(). + * + * Documents may have an associated structure tree &mdashmostly, Tagged-PDF + * compliant documents— which can be used to obtain information about + * the document structure and its contents. Each node in the tree contains + * a #PopplerStructureElement. + * + * Here is a simple example that walks the whole tree: + * + * + * static void + * walk_structure (PopplerStructureElementIter *iter) + * { + * do { + * /* Get the element and do something with it */ + * PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter); + * if (child) + * walk_structure (child); + * poppler_structure_element_iter_free (child); + * } while (poppler_structure_element_iter_next (iter)); + * } + * ... + * { + * iter = poppler_structure_element_iter_new (document); + * walk_structure (iter); + * poppler_structure_element_iter_free (iter); + * } + * + * + * Return value: (transfer full): a new #PopplerStructureElementIter + * + * Since: 0.26 + */ +PopplerStructureElementIter * +poppler_structure_element_iter_new (PopplerDocument *poppler_document) +{ + PopplerStructureElementIter *iter; + StructTreeRoot *root; + + g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL); + + root = poppler_document->doc->getStructTreeRoot (); + if (root == NULL) + return NULL; + + if (root->getNumElements () == 0) + return NULL; + + iter = g_slice_new0 (PopplerStructureElementIter); + iter->document = (PopplerDocument *) g_object_ref (poppler_document); + iter->is_root = TRUE; + iter->root = root; + + return iter; +} + +/** + * poppler_structure_element_iter_next: + * @iter: a #PopplerStructureElementIter + * + * Sets @iter to point to the next structure element at the current level + * of the tree, if valid. See poppler_structure_element_iter_new() for more + * information. + * + * Return value: %TRUE, if @iter was set to the next structure element + * + * Since: 0.26 + */ +gboolean +poppler_structure_element_iter_next (PopplerStructureElementIter *iter) +{ + unsigned elements; + + g_return_val_if_fail (iter != NULL, FALSE); + + elements = iter->is_root + ? iter->root->getNumElements () + : iter->elem->getNumElements (); + + return ++iter->index < elements; +} + +/** + * poppler_structure_element_iter_get_element: + * @iter: a #PopplerStructureElementIter + * + * Returns the #PopplerStructureElementIter associated with @iter. + * + * Return value: (transfer full): a new #PopplerStructureElementIter + * + * Since: 0.26 + */ +PopplerStructureElement * +poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter) +{ + StructElement *elem; + + g_return_val_if_fail (iter != NULL, NULL); + + elem = iter->is_root + ? iter->root->getElement (iter->index) + : iter->elem->getElement (iter->index); + + return _poppler_structure_element_new (iter->document, elem); +} + +/** + * poppler_structure_element_iter_get_child: + * @parent: a #PopplerStructureElementIter + * + * Returns a new iterator to the children elements of the + * #PopplerStructureElement associated with @iter. The returned value must + * be freed with poppler_structure_element_iter_free(). + * + * Return value: a new #PopplerStructureElementIter + * + * Since: 0.26 + */ +PopplerStructureElementIter * +poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent) +{ + StructElement *elem; + + g_return_val_if_fail (parent != NULL, NULL); + + elem = parent->is_root + ? parent->root->getElement (parent->index) + : parent->elem->getElement (parent->index); + + if (elem->getNumElements () > 0) + { + PopplerStructureElementIter *child = g_slice_new0 (PopplerStructureElementIter); + child->document = (PopplerDocument *) g_object_ref (parent->document); + child->elem = elem; + return child; + } + + return NULL; +} + + +static PopplerStructureElementKind +_poppler_structelement_type_to_poppler_structure_element_kind (StructElement::Type type) +{ + switch (type) + { + case StructElement::Unknown: + return POPPLER_STRUCTURE_ELEMENT_UNKNOWN; + case StructElement::MCID: + return POPPLER_STRUCTURE_ELEMENT_CONTENT; + case StructElement::OBJR: + return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE; + case StructElement::Document: + return POPPLER_STRUCTURE_ELEMENT_DOCUMENT; + case StructElement::Part: + return POPPLER_STRUCTURE_ELEMENT_PART; + case StructElement::Sect: + return POPPLER_STRUCTURE_ELEMENT_SECTION; + case StructElement::Div: + return POPPLER_STRUCTURE_ELEMENT_DIV; + case StructElement::Span: + return POPPLER_STRUCTURE_ELEMENT_SPAN; + case StructElement::Quote: + return POPPLER_STRUCTURE_ELEMENT_QUOTE; + case StructElement::Note: + return POPPLER_STRUCTURE_ELEMENT_NOTE; + case StructElement::Reference: + return POPPLER_STRUCTURE_ELEMENT_REFERENCE; + case StructElement::BibEntry: + return POPPLER_STRUCTURE_ELEMENT_BIBENTRY; + case StructElement::Code: + return POPPLER_STRUCTURE_ELEMENT_CODE; + case StructElement::Link: + return POPPLER_STRUCTURE_ELEMENT_LINK; + case StructElement::Annot: + return POPPLER_STRUCTURE_ELEMENT_ANNOT; + case StructElement::Ruby: + return POPPLER_STRUCTURE_ELEMENT_RUBY; + case StructElement::Warichu: + return POPPLER_STRUCTURE_ELEMENT_WARICHU; + case StructElement::BlockQuote: + return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE; + case StructElement::Caption: + return POPPLER_STRUCTURE_ELEMENT_CAPTION; + case StructElement::NonStruct: + return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT; + case StructElement::TOC: + return POPPLER_STRUCTURE_ELEMENT_TOC; + case StructElement::TOCI: + return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM; + case StructElement::Index: + return POPPLER_STRUCTURE_ELEMENT_INDEX; + case StructElement::Private: + return POPPLER_STRUCTURE_ELEMENT_PRIVATE; + case StructElement::P: + return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH; + case StructElement::H: + return POPPLER_STRUCTURE_ELEMENT_HEADING; + case StructElement::H1: + return POPPLER_STRUCTURE_ELEMENT_HEADING_1; + case StructElement::H2: + return POPPLER_STRUCTURE_ELEMENT_HEADING_2; + case StructElement::H3: + return POPPLER_STRUCTURE_ELEMENT_HEADING_3; + case StructElement::H4: + return POPPLER_STRUCTURE_ELEMENT_HEADING_4; + case StructElement::H5: + return POPPLER_STRUCTURE_ELEMENT_HEADING_5; + case StructElement::H6: + return POPPLER_STRUCTURE_ELEMENT_HEADING_6; + case StructElement::L: + return POPPLER_STRUCTURE_ELEMENT_LIST; + case StructElement::LI: + return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM; + case StructElement::Lbl: + return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL; + case StructElement::LBody: + return POPPLER_STRUCTURE_ELEMENT_LIST_BODY; + case StructElement::Table: + return POPPLER_STRUCTURE_ELEMENT_TABLE; + case StructElement::TR: + return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW; + case StructElement::TH: + return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING; + case StructElement::TD: + return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA; + case StructElement::THead: + return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER; + case StructElement::TFoot: + return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER; + case StructElement::TBody: + return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY; + case StructElement::Figure: + return POPPLER_STRUCTURE_ELEMENT_FIGURE; + case StructElement::Formula: + return POPPLER_STRUCTURE_ELEMENT_FORMULA; + case StructElement::Form: + return POPPLER_STRUCTURE_ELEMENT_FORM; + default: + g_assert_not_reached (); + } +} + + +typedef struct _PopplerStructureElementClass PopplerStructureElementClass; +struct _PopplerStructureElementClass +{ + GObjectClass parent_class; +}; + +G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT); + + +static PopplerStructureElement * +_poppler_structure_element_new (PopplerDocument *document, StructElement *element) +{ + PopplerStructureElement *poppler_structure_element; + + g_assert (POPPLER_IS_DOCUMENT (document)); + g_assert (element); + + poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL); + poppler_structure_element->document = (PopplerDocument *) g_object_ref (document); + poppler_structure_element->elem = element; + + return poppler_structure_element; +} + + +static void +poppler_structure_element_init (PopplerStructureElement *poppler_structure_element) +{ +} + + +static void +poppler_structure_element_finalize (GObject *object) +{ + PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object); + + /* poppler_structure_element->elem is owned by the StructTreeRoot */ + g_object_unref (poppler_structure_element->document); + + G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object); +} + + +static void +poppler_structure_element_class_init (PopplerStructureElementClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + gobject_class->finalize = poppler_structure_element_finalize; +} + + +/** + * poppler_structure_element_get_kind: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: A #PopplerStructureElementKind value. + * + * Since: 0.26 + */ +PopplerStructureElementKind +poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN); + g_assert (poppler_structure_element->elem); + + return _poppler_structelement_type_to_poppler_structure_element_kind (poppler_structure_element->elem->getType ()); +} + +/** + * poppler_structure_element_get_page: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the page number in which the element is contained. + * + * Return value: Number of the page that contains the element, of + * -1 if not defined. + * + * Since: 0.26 + */ +gint +poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN); + g_assert (poppler_structure_element->elem); + + Ref ref; + if (poppler_structure_element->elem->getPageRef (ref)) + { + return poppler_structure_element->document->doc->findPage(ref.num, ref.gen) - 1; + } + + return -1; +} + +/** + * poppler_structure_element_is_content: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is actual document content. + * + * Return value: Whether the element is content. + * + * Since: 0.26 + */ +gboolean +poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isContent (); +} + +/** + * poppler_structure_element_is_inline: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is an inline element. + * + * Return value: Whether the element is an inline element. + * + * Since: 0.26 + */ +gboolean +poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isInline (); +} + +/** + * poppler_structure_element_is_block: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is a block element. + * + * Return value: Whether the element is a block element. + * + * Since: 0.26 + */ +gboolean +poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isBlock (); +} + +/** + * poppler_structure_element_get_id: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the identifier of an element. + * + * Return value: (transfer full): The identifier of the element (if + * defined), or %NULL. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + GooString *string = poppler_structure_element->elem->getID (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * poppler_structure_element_get_title: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the title of an element. + * + * Return value: (transfer full): The title of the element, or %NULL. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + GooString *string = poppler_structure_element->elem->getTitle (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * popppler_structure_element_get_abbreviation: + * @poppler_structure_element: A #PopplerStructureElement + * + * Acronyms and abbreviations contained in elements of type + * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded + * text form, which can be retrieved using this function. + * + * Return value: (transfer full): Text of the expanded abbreviation if the + * element text is an abbreviation or acrony, %NULL if not. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->getType () != StructElement::Span) + return NULL; + + GooString *string = poppler_structure_element->elem->getExpandedAbbr (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * poppler_structure_element_get_language: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the language and country code for the content in an element, + * in two-letter ISO format, e.g. en_ES, or %NULL if not + * defined. + * + * Return value: (transfer full): language and country code, or %NULL. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + GooString *string = poppler_structure_element->elem->getLanguage (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * poppler_structure_element_get_alt_text: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the “alternate” text representation of the element (and its child + * elements). This is mostly used for non-text elements like images and + * figures, to specify a textual description of the element. + * + * Note that for elements containing proper text, the function + * poppler_structure_element_get_text() must be used instead. + * + * Return value: (transfer full): The alternate text representation for the + * element, or %NULL if not defined. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + GooString *string = poppler_structure_element->elem->getAltText (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * poppler_structure_element_get_actual_text: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the actual text enclosed by the element (and its child elements). + * The actual text is mostly used for non-text elements like images and + * figures which do have the graphical appearance of text, like + * a logo. For those the actual text is the equivalent text to those + * graphical elements which look like text when rendered. + * + * Note that for elements containing proper text, the function + * poppler_structure_element_get_text() must be used instead. + * + * Return value: (transfer full): The actual text for the element, or %NULL + * if not defined. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + GooString *string = poppler_structure_element->elem->getActualText (); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} + +/** + * poppler_structure_element_get_text: + * @poppler_structure_element: A #PopplerStructureElement + * @recursive: If %TRUE, the text of child elements is gathered recursively + * in logical order and returned as part of the result. + * + * Obtains the text enclosed by an element, or the text enclosed by the + * elements in the subtree (including the element itself). + * + * Return value: (transfer full): A string. + * + * Since: 0.26 + */ +gchar * +poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element, + gboolean recursive) +{ + GooString *string = poppler_structure_element->elem->getText (recursive); + gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL; + delete string; + return result; +} diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h new file mode 100644 index 0000000..a5ac04b --- /dev/null +++ b/glib/poppler-structure-element.h @@ -0,0 +1,112 @@ +/* poppler-structure-element.h: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __POPPLER_STRUCTURE_ELEMENT_H__ +#define __POPPLER_STRUCTURE_ELEMENT_H__ + +#include +#include "poppler.h" + +G_BEGIN_DECLS + +#define POPPLER_TYPE_STRUCTURE_ELEMENT (poppler_structure_element_get_type ()) +#define POPPLER_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement)) +#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT)) + +/** + * PopplerStructureElementKind: + */ +typedef enum { + POPPLER_STRUCTURE_ELEMENT_UNKNOWN, + POPPLER_STRUCTURE_ELEMENT_CONTENT, + POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE, + POPPLER_STRUCTURE_ELEMENT_DOCUMENT, + POPPLER_STRUCTURE_ELEMENT_PART, + POPPLER_STRUCTURE_ELEMENT_ARTICLE, + POPPLER_STRUCTURE_ELEMENT_SECTION, + POPPLER_STRUCTURE_ELEMENT_DIV, + POPPLER_STRUCTURE_ELEMENT_SPAN, + POPPLER_STRUCTURE_ELEMENT_QUOTE, + POPPLER_STRUCTURE_ELEMENT_NOTE, + POPPLER_STRUCTURE_ELEMENT_REFERENCE, + POPPLER_STRUCTURE_ELEMENT_BIBENTRY, + POPPLER_STRUCTURE_ELEMENT_CODE, + POPPLER_STRUCTURE_ELEMENT_LINK, + POPPLER_STRUCTURE_ELEMENT_ANNOT, + POPPLER_STRUCTURE_ELEMENT_RUBY, + POPPLER_STRUCTURE_ELEMENT_WARICHU, + POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE, + POPPLER_STRUCTURE_ELEMENT_CAPTION, + POPPLER_STRUCTURE_ELEMENT_NONSTRUCT, + POPPLER_STRUCTURE_ELEMENT_TOC, + POPPLER_STRUCTURE_ELEMENT_TOC_ITEM, + POPPLER_STRUCTURE_ELEMENT_INDEX, + POPPLER_STRUCTURE_ELEMENT_PRIVATE, + POPPLER_STRUCTURE_ELEMENT_PARAGRAPH, + POPPLER_STRUCTURE_ELEMENT_HEADING, + POPPLER_STRUCTURE_ELEMENT_HEADING_1, + POPPLER_STRUCTURE_ELEMENT_HEADING_2, + POPPLER_STRUCTURE_ELEMENT_HEADING_3, + POPPLER_STRUCTURE_ELEMENT_HEADING_4, + POPPLER_STRUCTURE_ELEMENT_HEADING_5, + POPPLER_STRUCTURE_ELEMENT_HEADING_6, + POPPLER_STRUCTURE_ELEMENT_LIST, + POPPLER_STRUCTURE_ELEMENT_LIST_ITEM, + POPPLER_STRUCTURE_ELEMENT_LIST_LABEL, + POPPLER_STRUCTURE_ELEMENT_LIST_BODY, + POPPLER_STRUCTURE_ELEMENT_TABLE, + POPPLER_STRUCTURE_ELEMENT_TABLE_ROW, + POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING, + POPPLER_STRUCTURE_ELEMENT_TABLE_DATA, + POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER, + POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER, + POPPLER_STRUCTURE_ELEMENT_TABLE_BODY, + POPPLER_STRUCTURE_ELEMENT_FIGURE, + POPPLER_STRUCTURE_ELEMENT_FORMULA, + POPPLER_STRUCTURE_ELEMENT_FORM, +} PopplerStructureElementKind; + + +GType poppler_structure_element_get_type (void) G_GNUC_CONST; +PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element); +gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element, + gboolean recursive); +gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element); +gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element); + +#define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ()) +GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST; +PopplerStructureElementIter *poppler_structure_element_iter_new (PopplerDocument *poppler_document); +PopplerStructureElementIter *poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent); +PopplerStructureElementIter *poppler_structure_element_iter_copy (PopplerStructureElementIter *iter); +PopplerStructureElement *poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter); +gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter); +void poppler_structure_element_iter_free (PopplerStructureElementIter *iter); + +G_END_DECLS + +#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */ diff --git a/glib/poppler.h b/glib/poppler.h index 2d190f3..c8f8670 100644 --- a/glib/poppler.h +++ b/glib/poppler.h @@ -202,6 +202,8 @@ typedef struct _PopplerAnnotFileAttachment PopplerAnnotFileAttachment; typedef struct _PopplerAnnotMovie PopplerAnnotMovie; typedef struct _PopplerAnnotScreen PopplerAnnotScreen; typedef struct _PopplerAnnotCalloutLine PopplerAnnotCalloutLine; +typedef struct _PopplerStructureElement PopplerStructureElement; +typedef struct _PopplerStructureElementIter PopplerStructureElementIter; typedef enum { @@ -227,5 +229,6 @@ G_END_DECLS #include "poppler-date.h" #include "poppler-movie.h" #include "poppler-media.h" +#include "poppler-structure-element.h" #endif /* __POPPLER_GLIB_H__ */ diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml index a9d5158..9617ed7 100644 --- a/glib/reference/poppler-docs.sgml +++ b/glib/reference/poppler-docs.sgml @@ -23,6 +23,8 @@ + + diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 6fb14bc..dd86237 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -535,6 +535,43 @@ poppler_movie_get_type
+poppler-structure-element +PopplerStructureElement +PopplerStructureElement +PopplerStructureElementKind +PopplerStructureElementIter +poppler_structure_element_iter_new +poppler_structure_element_iter_next +poppler_structure_element_iter_copy +poppler_structure_element_iter_free +poppler_structure_element_iter_get_child +poppler_structure_element_iter_get_element +poppler_structure_element_get_kind +poppler_structure_element_get_page +poppler_structure_element_is_content +poppler_structure_element_is_inline +poppler_structure_element_is_block +poppler_structure_element_get_id +poppler_structure_element_get_title +poppler_structure_element_get_abbreviation +poppler_structure_element_get_language +poppler_structure_element_get_text +poppler_structure_element_get_alt_text +poppler_structure_element_get_actual_text + + +POPPLER_STRUCTURE_ELEMENT +POPPLER_IS_STRUCTURE_ELEMENT +POPPLER_TYPE_STRUCTURE_ELEMENT +POPPLER_TYPE_STRUCTURE_ELEMENT_ITER +POPPLER_TYPE_STRUCTURE_ELEMENT_KIND + + +poppler_structure_element_get_type +poppler_structure_element_iter_get_type +
+ +
poppler-features POPPLER_HAS_CAIRO POPPLER_MAJOR_VERSION diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types index eed9849..388852a 100644 --- a/glib/reference/poppler.types +++ b/glib/reference/poppler.types @@ -8,3 +8,5 @@ poppler_annot_get_type poppler_layer_get_type poppler_media_get_type poppler_movie_get_type +poppler_structure_element_get_type +poppler_structure_element_iter_get_type -- 1.8.4.2