From 1a75be83e1f28d5fd4bcd45208b7907bc8cded61 Mon Sep 17 00:00:00 2001 From: Adrian Perez de Castro Date: Thu, 9 May 2013 12:01:59 +0300 Subject: [PATCH v4 5/6] Tagged-PDF: Expose the structure tree in poppler-glib Implements two new GObject classes, which build upon StructTreeRoot and StructElement to expose the document structure of tagged PDFs in the GLib binding: - PopplerStructure wraps StrucTreeRoot, and contains additional utility methods to search for elements in the structure tree. - PopplerStructureElement wraps StructElement, and does the heavy lifting of exposing data in GLib-friendly data types. For standard attributes, to avoid cluttering the class with fmethods, a single poppler_structure_element_get_attribute() method is implemented, which returns either NULL (for undefined attributes) or a GVariant containing a sensible representation of the value. --- glib/Makefile.am | 4 + glib/poppler-document.cc | 22 + glib/poppler-document.h | 1 + glib/poppler-private.h | 31 + glib/poppler-structure-element.cc | 1712 +++++++++++++++++++++++++++++++++++ glib/poppler-structure-element.h | 366 ++++++++ glib/poppler-structure.cc | 362 ++++++++ glib/poppler-structure.h | 43 + glib/poppler.h | 3 + glib/reference/poppler-docs.sgml | 2 + glib/reference/poppler-sections.txt | 86 ++ glib/reference/poppler.types | 2 + 12 files changed, 2634 insertions(+) create mode 100644 glib/poppler-structure-element.cc create mode 100644 glib/poppler-structure-element.h create mode 100644 glib/poppler-structure.cc create mode 100644 glib/poppler-structure.h diff --git a/glib/Makefile.am b/glib/Makefile.am index a38e052..645cfd3 100644 --- a/glib/Makefile.am +++ b/glib/Makefile.am @@ -41,6 +41,8 @@ poppler_glib_public_headers = \ poppler-layer.h \ poppler-media.h \ poppler-movie.h \ + poppler-structure.h \ + poppler-structure-element.h \ poppler.h poppler_glib_includedir = $(includedir)/poppler/glib @@ -67,6 +69,8 @@ libpoppler_glib_la_SOURCES = \ poppler-cached-file-loader.h \ poppler-input-stream.cc \ poppler-input-stream.h \ + poppler-structure.cc \ + poppler-structure-element.cc \ poppler.cc \ poppler-private.h diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc index 61d92e8..bc39314 100644 --- a/glib/poppler-document.cc +++ b/glib/poppler-document.cc @@ -1220,6 +1220,28 @@ poppler_document_get_metadata (PopplerDocument *document) return retval; } +/** + * poppler_document_get_structure: + * @document: A #PopplerDocument + * + * Returns the #PopplerStructure of the document. This object is owned by + * the called. + * + * Return value: (transfer full): The #PopplerStructure of the document. + */ +PopplerStructure * +poppler_document_get_structure (PopplerDocument *document) +{ + StructTreeRoot *tree_root; + + g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL); + + tree_root = document->doc->getStructTreeRoot (); + if (!tree_root) return NULL; + + return _poppler_structure_new (document, tree_root); +} + static void poppler_document_get_property (GObject *object, guint prop_id, diff --git a/glib/poppler-document.h b/glib/poppler-document.h index a34e88c..97cea4a 100644 --- a/glib/poppler-document.h +++ b/glib/poppler-document.h @@ -213,6 +213,7 @@ PopplerPageLayout poppler_document_get_page_layout (PopplerDocument *doc PopplerPageMode poppler_document_get_page_mode (PopplerDocument *document); PopplerPermissions poppler_document_get_permissions (PopplerDocument *document); gchar *poppler_document_get_metadata (PopplerDocument *document); +PopplerStructure *poppler_document_get_structure (PopplerDocument *document); /* Attachments */ guint poppler_document_get_n_attachments (PopplerDocument *document); diff --git a/glib/poppler-private.h b/glib/poppler-private.h index ab39b49..0fee303 100644 --- a/glib/poppler-private.h +++ b/glib/poppler-private.h @@ -17,6 +17,7 @@ #include #include #include +#include #endif struct _PopplerDocument @@ -95,6 +96,32 @@ struct _PopplerLayer gchar *title; }; +struct _PopplerStructure +{ + /*< private >*/ + GObject parent_instance; + PopplerDocument *document; + StructTreeRoot *root; + PopplerStructureElement **children; +}; + +struct _PopplerStructureElement +{ + /*< private >*/ + GObject parent_instance; + StructElement *elem; + gchar *id; + gchar *title; + gchar *text; + gchar *text_r; + gchar *text_abbrev; + gchar *alt_text; + gchar *actual_text; + GList *text_spans; + PopplerStructure *structure; + PopplerStructureElement **children; +}; + GList *_poppler_document_get_layers (PopplerDocument *document); GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document, Layer *layer); @@ -120,6 +147,10 @@ PopplerAnnot *_poppler_annot_free_text_new (Annot *annot); PopplerAnnot *_poppler_annot_file_attachment_new (Annot *annot); PopplerAnnot *_poppler_annot_movie_new (Annot *annot); PopplerAnnot *_poppler_annot_screen_new (Annot *annot); +PopplerStructure *_poppler_structure_new (PopplerDocument *poppler_document, + StructTreeRoot *struct_tree_root); +PopplerStructureElement *_poppler_structure_element_new (PopplerStructure *structure, + StructElement *struct_element); char *_poppler_goo_string_to_utf8(GooString *s); gboolean _poppler_convert_pdf_date_to_gtime (GooString *date, diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc new file mode 100644 index 0000000..4604eb1 --- /dev/null +++ b/glib/poppler-structure-element.cc @@ -0,0 +1,1712 @@ +/* poppler-structure.cc: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "config.h" + +#ifndef __GI_SCANNER__ +#include +#include +#include +#include +#endif /* !__GI_SCANNER__ */ + +#include "poppler.h" +#include "poppler-private.h" +#include "poppler-structure-element.h" + + +static inline PopplerStructureElementKind +_poppler_structelement_type_to_poppler_structure_element_kind (StructElement::Type type) +{ + switch (type) + { + case StructElement::Unknown: + return POPPLER_STRUCTURE_ELEMENT_UNKNOWN; + case StructElement::MCID: + return POPPLER_STRUCTURE_ELEMENT_CONTENT; + case StructElement::OBJR: + return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE; + case StructElement::Document: + return POPPLER_STRUCTURE_ELEMENT_DOCUMENT; + case StructElement::Part: + return POPPLER_STRUCTURE_ELEMENT_PART; + case StructElement::Sect: + return POPPLER_STRUCTURE_ELEMENT_SECTION; + case StructElement::Div: + return POPPLER_STRUCTURE_ELEMENT_DIV; + case StructElement::Span: + return POPPLER_STRUCTURE_ELEMENT_SPAN; + case StructElement::Quote: + return POPPLER_STRUCTURE_ELEMENT_QUOTE; + case StructElement::Note: + return POPPLER_STRUCTURE_ELEMENT_NOTE; + case StructElement::Reference: + return POPPLER_STRUCTURE_ELEMENT_REFERENCE; + case StructElement::BibEntry: + return POPPLER_STRUCTURE_ELEMENT_BIBENTRY; + case StructElement::Code: + return POPPLER_STRUCTURE_ELEMENT_CODE; + case StructElement::Link: + return POPPLER_STRUCTURE_ELEMENT_LINK; + case StructElement::Annot: + return POPPLER_STRUCTURE_ELEMENT_ANNOT; + case StructElement::Ruby: + return POPPLER_STRUCTURE_ELEMENT_RUBY; + case StructElement::Warichu: + return POPPLER_STRUCTURE_ELEMENT_WARICHU; + case StructElement::BlockQuote: + return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE; + case StructElement::Caption: + return POPPLER_STRUCTURE_ELEMENT_CAPTION; + case StructElement::NonStruct: + return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT; + case StructElement::TOC: + return POPPLER_STRUCTURE_ELEMENT_TOC; + case StructElement::TOCI: + return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM; + case StructElement::Index: + return POPPLER_STRUCTURE_ELEMENT_INDEX; + case StructElement::Private: + return POPPLER_STRUCTURE_ELEMENT_PRIVATE; + case StructElement::P: + return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH; + case StructElement::H: + return POPPLER_STRUCTURE_ELEMENT_HEADING; + case StructElement::H1: + return POPPLER_STRUCTURE_ELEMENT_HEADING_1; + case StructElement::H2: + return POPPLER_STRUCTURE_ELEMENT_HEADING_2; + case StructElement::H3: + return POPPLER_STRUCTURE_ELEMENT_HEADING_3; + case StructElement::H4: + return POPPLER_STRUCTURE_ELEMENT_HEADING_4; + case StructElement::H5: + return POPPLER_STRUCTURE_ELEMENT_HEADING_5; + case StructElement::H6: + return POPPLER_STRUCTURE_ELEMENT_HEADING_6; + case StructElement::L: + return POPPLER_STRUCTURE_ELEMENT_LIST; + case StructElement::LI: + return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM; + case StructElement::Lbl: + return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL; + case StructElement::Table: + return POPPLER_STRUCTURE_ELEMENT_TABLE; + case StructElement::TR: + return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW; + case StructElement::TH: + return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING; + case StructElement::TD: + return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA; + case StructElement::THead: + return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER; + case StructElement::TFoot: + return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER; + case StructElement::TBody: + return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY; + case StructElement::Figure: + return POPPLER_STRUCTURE_ELEMENT_FIGURE; + case StructElement::Formula: + return POPPLER_STRUCTURE_ELEMENT_FORMULA; + case StructElement::Form: + return POPPLER_STRUCTURE_ELEMENT_FORM; + default: + g_assert_not_reached (); + } +} + +static GBool +_rgb_array_to_doubles(Array *array, double rgb[3]) +{ + double r, g, b; + Object obj; + + if (array->getLength() != 3) + return FALSE; + + if (!array->getNF(0, &obj)->isReal()) goto not_a_real; + r = obj.getReal(); + obj.free(); + + if (!array->getNF(1, &obj)->isReal()) goto not_a_real; + g = obj.getReal(); + obj.free(); + + if (!array->getNF(2, &obj)->isReal()) goto not_a_real; + b = obj.getReal(); + obj.free(); + + rgb[0] = r; + rgb[1] = g; + rgb[2] = b; + + return TRUE; + +not_a_real: + obj.free(); + return FALSE; +} + + +static GVariant* +_g_variant_new_from_rgb_array (Array *array) +{ + double v[3]; + + g_return_val_if_fail (array->getLength () == 3, NULL); + + if (!_rgb_array_to_doubles (array, v)) + return NULL; + + return g_variant_new ("(ddd)", v[0], v[1], v[2]); +} + + +static GVariant* +_g_variant_new_from_rgb_array_or_x4 (Array *array) +{ + double v[12]; + + if (array->getLength() == 3) + { + if (!_rgb_array_to_doubles (array, v)) + return NULL; + + v[ 9] = v[6] = v[3] = v[0]; + v[10] = v[7] = v[4] = v[1]; + v[11] = v[8] = v[5] = v[2]; + } + else if (array->getLength () == 4) + { + for (int i = 0; i < 4; i++) + { + Object item; + if (!array->get(i, &item)->isArray()) + return NULL; + if (!_rgb_array_to_doubles (item.getArray(), &v[i * 3])) + return NULL; + } + } + else + return NULL; + + return g_variant_new ("((ddd)(ddd)(ddd)(ddd))", + v[ 0], v[ 1], v[ 2], + v[ 3], v[ 4], v[ 5], + v[ 6], v[ 7], v[ 8], + v[ 9], v[10], v[11]); +} + + +template +struct EnumNameValue { + const gchar *name; + EnumType value; + + static const EnumNameValue values[]; + static const EnumType null = static_cast (-1); +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Block", POPPLER_STRUCTURE_PLACEMENT_BLOCK }, + { "Inline", POPPLER_STRUCTURE_PLACEMENT_INLINE }, + { "Before", POPPLER_STRUCTURE_PLACEMENT_BEFORE }, + { "Start", POPPLER_STRUCTURE_PLACEMENT_START }, + { "End", POPPLER_STRUCTURE_PLACEMENT_END }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "LrTb", POPPLER_STRUCTURE_WRITING_MODE_LR_TB }, + { "RlTb", POPPLER_STRUCTURE_WRITING_MODE_RL_TB }, + { "TbRl", POPPLER_STRUCTURE_WRITING_MODE_TB_RL }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "None", POPPLER_STRUCTURE_BORDER_STYLE_NONE }, + { "Hidden", POPPLER_STRUCTURE_BORDER_STYLE_HIDDEN }, + { "Dotted", POPPLER_STRUCTURE_BORDER_STYLE_DOTTED }, + { "Dashed", POPPLER_STRUCTURE_BORDER_STYLE_DASHED }, + { "Solid", POPPLER_STRUCTURE_BORDER_STYLE_SOLID }, + { "Double", POPPLER_STRUCTURE_BORDER_STYLE_DOUBLE }, + { "Groove", POPPLER_STRUCTURE_BORDER_STYLE_GROOVE }, + { "Inset", POPPLER_STRUCTURE_BORDER_STYLE_INSET }, + { "Outset", POPPLER_STRUCTURE_BORDER_STYLE_OUTSET }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Start", POPPLER_STRUCTURE_TEXT_ALIGN_START }, + { "Center", POPPLER_STRUCTURE_TEXT_ALIGN_CENTER }, + { "End", POPPLER_STRUCTURE_TEXT_ALIGN_END }, + { "Justify", POPPLER_STRUCTURE_TEXT_ALIGN_JUSTIFY }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Before", POPPLER_STRUCTURE_BLOCK_ALIGN_BEFORE }, + { "Middle", POPPLER_STRUCTURE_BLOCK_ALIGN_MIDDLE }, + { "After", POPPLER_STRUCTURE_BLOCK_ALIGN_AFTER }, + { "Justify", POPPLER_STRUCTURE_BLOCK_ALIGN_JUSTIFY }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Start", POPPLER_STRUCTURE_INLINE_ALIGN_START }, + { "Center", POPPLER_STRUCTURE_INLINE_ALIGN_CENTER }, + { "End", POPPLER_STRUCTURE_INLINE_ALIGN_END }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "None", POPPLER_STRUCTURE_TEXT_DECORATION_NONE }, + { "Underline", POPPLER_STRUCTURE_TEXT_DECORATION_UNDERLINE }, + { "Overline", POPPLER_STRUCTURE_TEXT_DECORATION_OVERLINE }, + { "LineThrough", POPPLER_STRUCTURE_TEXT_DECORATION_LINETHROUGH }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Start", POPPLER_STRUCTURE_RUBY_ALIGN_START }, + { "Center", POPPLER_STRUCTURE_RUBY_ALIGN_CENTER }, + { "End", POPPLER_STRUCTURE_RUBY_ALIGN_END }, + { "Justify", POPPLER_STRUCTURE_RUBY_ALIGN_JUSTIFY }, + { "Distribute", POPPLER_STRUCTURE_RUBY_ALIGN_DISTRIBUTE }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Before", POPPLER_STRUCTURE_RUBY_POSITION_BEFORE }, + { "After", POPPLER_STRUCTURE_RUBY_POSITION_AFTER }, + { "Warichu", POPPLER_STRUCTURE_RUBY_POSITION_WARICHU }, + { "Inline", POPPLER_STRUCTURE_RUBY_POSITION_INLINE }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Auto", POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO }, + { "90", POPPLER_STRUCTURE_GLYPH_ORIENTATION_90 }, + { "180", POPPLER_STRUCTURE_GLYPH_ORIENTATION_180 }, + { "270", POPPLER_STRUCTURE_GLYPH_ORIENTATION_270 }, + { "360", POPPLER_STRUCTURE_GLYPH_ORIENTATION_0 }, + { "-90", POPPLER_STRUCTURE_GLYPH_ORIENTATION_270 }, + { "-180", POPPLER_STRUCTURE_GLYPH_ORIENTATION_180 }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "None", POPPLER_STRUCTURE_LIST_NUMBERING_NONE }, + { "Disc", POPPLER_STRUCTURE_LIST_NUMBERING_DISC }, + { "Circle", POPPLER_STRUCTURE_LIST_NUMBERING_CIRCLE }, + { "Square", POPPLER_STRUCTURE_LIST_NUMBERING_SQUARE }, + { "Decimal", POPPLER_STRUCTURE_LIST_NUMBERING_DECIMAL }, + { "UpperRoman", POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ROMAN }, + { "LowerRoman", POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ROMAN }, + { "UpperAlpha", POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ALPHA }, + { "LowerAlpha", POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ALPHA }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "rb", POPPLER_STRUCTURE_ROLE_RADIO_BUTTON }, + { "cb", POPPLER_STRUCTURE_ROLE_CHECKBOX }, + { "pb", POPPLER_STRUCTURE_ROLE_PUSH_BUTTON }, + { "tv", POPPLER_STRUCTURE_ROLE_TEXT_VALUE }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "on", POPPLER_STRUCTURE_CHECKED_ON }, + { "off", POPPLER_STRUCTURE_CHECKED_OFF }, + { "neutral", POPPLER_STRUCTURE_CHECKED_NEUTRAL }, + { NULL } +}; + +template<> +const EnumNameValue EnumNameValue::values[] = +{ + { "Row", POPPLER_STRUCTURE_SCOPE_ROW }, + { "Column", POPPLER_STRUCTURE_SCOPE_COLUMN }, + { "Both", POPPLER_STRUCTURE_SCOPE_BOTH }, + { NULL } +}; + +template +static EnumType +name_to_enum (Object *name_value, + EnumType default_value = EnumType::null) +{ + if (!name_value) + return default_value; + + for (const EnumNameValue *item = EnumNameValue::values ; item->name; item++) + if (name_value->isName (item->name)) + return item->value; + + return default_value; +} + + +template +static GVariant* +name_to_variant_enum (Object *name_value, + EnumType default_value = EnumNameValue::null) +{ + EnumType value = name_to_enum (name_value, default_value); + return value == EnumNameValue::null ? NULL : g_variant_new_uint32 (value); +} + + +static GVariant* +string_to_variant (Object *object) +{ + if (object->isName ()) + return g_variant_new_string (object->getName ()); + if (object->isString ()) + { + gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ()); + GVariant* result = g_variant_new_string (utf8_string); + g_free (utf8_string); + return result; + } + return NULL; +} + + +static GVariant* +_g_variant_new_from_border_style (Object *object) +{ + PopplerStructureBorderStyle border_style[4]; + + if (object->isArray () && object->arrayGetLength () == 4) + { + Object item; + for (int i = 0; i < 4; i++) + border_style[i] = name_to_enum (object->arrayGet (i, &item), + POPPLER_STRUCTURE_BORDER_STYLE_NONE); + } + else if (object->isName ()) + { + border_style[0] = border_style[1] = border_style[2] = border_style[3] = + name_to_enum (object, POPPLER_STRUCTURE_BORDER_STYLE_NONE); + } + else + return NULL; + + return g_variant_new ("(uuuu)", + border_style[0], + border_style[1], + border_style[2], + border_style[3]); +} + + +static GVariant* +_g_variant_new_from_number_or_x4 (Object *object) +{ + double v[4]; + + if (object->isArray () && object->arrayGetLength () == 4) + { + Object item; + for (int i = 0; i < 4; i++) + { + if (object->arrayGet (i, &item)->isReal ()) + v[i] = item.getReal (); + else if (item.isInt ()) + v[i] = (double) item.getInt (); + else + return NULL; + } + } + else if (object->isReal ()) + v[0] = v[1] = v[2] = v[3] = object->getReal (); + else if (object->isInt ()) + v[0] = v[1] = v[2] = v[3] = (double) object->getInt (); + else + return NULL; + + return g_variant_new ("(dddd)", v[0], v[1], v[2], v[3]); +} + + +static inline GVariant* +_g_variant_new_from_number_x4 (Object *object) +{ + return object->isArray () ? _g_variant_new_from_number_or_x4 (object) : NULL; +} + + +static GVariant* +_g_variant_new_from_number (Object *object) +{ + if (object->isReal ()) + return g_variant_new_double (object->getReal ()); + if (object->isInt ()) + return g_variant_new_double ((double) object->getInt ()); + return NULL; +} + + +static GVariant* +_g_variant_new_from_number_or_auto (Object *object) +{ + if (object->isName ("Auto")) + return g_variant_new ("md", NULL); + if (object->isReal ()) + return g_variant_new ("md", object->getReal ()); + if (object->isInt ()) + return g_variant_new ("md", (double) object->getInt ()); + return NULL; +} + + +static inline GVariant* +_g_variant_new_from_number_or_auto_or_normal (Object *object) +{ + return object->isName ("Normal") ? g_variant_new ("md", NULL) + : _g_variant_new_from_number_or_auto (object); +} + + +static GVariant* +_g_variant_new_number_array (Object *object) +{ + GVariantBuilder *builder = g_variant_builder_new (G_VARIANT_TYPE ("ad")); + + if (object->isReal ()) + g_variant_builder_add (builder, "d", object->getReal ()); + else if (object->isInt ()) + g_variant_builder_add (builder, "d", (double) object->getInt ()); + else if (object->isArray ()) + { + for (int i = 0; i < object->arrayGetLength (); i++) + { + Object item; + if (object->arrayGet (i, &item)->isReal ()) + g_variant_builder_add (builder, "d", item.getReal ()); + else if (item.isInt ()) + g_variant_builder_add (builder, "d", (double) item.getInt ()); + } + } + + GVariant *value = g_variant_new ("ad", builder); + g_variant_builder_unref (builder); + return value; +} + + +static GVariant* +_g_variant_new_string_array (Object *object) +{ + GVariantBuilder *builder = g_variant_builder_new (G_VARIANT_TYPE ("as")); + + if (object->isName ()) + g_variant_builder_add (builder, "s", object->getName ()); + else if (object->isString ()) + { + gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ()); + g_variant_builder_add (builder, "s", utf8_string); + g_free (utf8_string); + } + else if (object->isArray ()) + { + for (int i = 0; i < object->arrayGetLength (); i++) + { + Object item; + if (object->arrayGet (i, &item)->isName ()) + g_variant_builder_add (builder, "s", object->getName ()); + else if (object->isString ()) + { + gchar *utf8_string = _poppler_goo_string_to_utf8 (object->getString ()); + g_variant_builder_add (builder, "s", utf8_string); + g_free (utf8_string); + } + } + } + + GVariant *value = g_variant_new ("as", builder); + g_variant_builder_unref (builder); + return value; +} + + +static inline Object* +attr_value_or_default (PopplerStructureElement *poppler_structure_element, + Attribute::Type attribute_type, + gboolean inherit) +{ + Object *value = Attribute::getDefaultValue (attribute_type); + const Attribute *attr; + + if ((attr = poppler_structure_element->elem->findAttribute (attribute_type, inherit))) + value = attr->getValue (); + + return value; +} + + +static void _poppler_text_span_free (gpointer data) +{ + PopplerTextSpan *span = (PopplerTextSpan*) data; + g_free (span->text); + g_free (span->font_name); + g_free (span->link_target); + g_slice_free (PopplerTextSpan, data); +} + + +/** + * SECTION:poppler-structure-element + * @short_description: Document structure element. + * @title: PopplerStructureElement + * @see_also: #PopplerStructure + * + * Instances of #PopplerStructureElement are used to describe the structure + * of a #PopplerDocument. To access the elements in the structure of the + * document, first use poppler_document_get_structure() to obtain its + * #PopplerStructure, and then use poppler_structure_get_n_children() + * and poppler_structure_get_child() to enumerate the top level elements. + */ + +typedef struct _PopplerStructureElementClass PopplerStructureElementClass; +struct _PopplerStructureElementClass +{ + GObjectClass parent_class; +}; + +G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT); + + +PopplerStructureElement* +_poppler_structure_element_new (PopplerStructure *structure, StructElement *element) +{ + PopplerStructureElement *poppler_structure_element; + + g_assert (structure); + g_assert (element); + + poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL); + poppler_structure_element->text = NULL; + poppler_structure_element->text_r = NULL; + poppler_structure_element->children = NULL; + poppler_structure_element->structure = structure; + poppler_structure_element->elem = element; + + if (element->getNumElements ()) + poppler_structure_element->children = (PopplerStructureElement**) g_new0 (PopplerStructureElement*, + element->getNumElements ()); + return poppler_structure_element; +} + + +static void +poppler_structure_element_init (PopplerStructureElement *poppler_structure_element) +{ +} + + +static void +poppler_structure_element_finalize (GObject *object) +{ + PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object); + + /* poppler_structure_element->elem is owned by the StructTreeRoot */ + g_free (poppler_structure_element->text_r); + g_free (poppler_structure_element->text); + g_free (poppler_structure_element->title); + g_free (poppler_structure_element->id); + g_list_free_full (poppler_structure_element->text_spans, _poppler_text_span_free); + + if (poppler_structure_element->children) + { + for (unsigned i = 0; i < poppler_structure_element->elem->getNumElements (); i++) + g_object_unref (poppler_structure_element->children[i]); + g_free (poppler_structure_element->children); + } + + G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object); +} + + +static void +poppler_structure_element_class_init (PopplerStructureElementClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + gobject_class->finalize = poppler_structure_element_finalize; +} + + +/** + * poppler_structure_element_get_kind: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: A #PopplerStructureElementKind value. + */ +PopplerStructureElementKind +poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN); + g_assert (poppler_structure_element->elem); + + return _poppler_structelement_type_to_poppler_structure_element_kind (poppler_structure_element->elem->getType ()); +} + +/** + * poppler_structure_element_get_page: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: Number of the page that contains the element, of + * -1 if not defined. + */ +gint +poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->hasPageRef ()) + { + const Ref ref = poppler_structure_element->elem->getPageRef (); + return poppler_structure_element->structure->document->doc->findPage(ref.num, ref.gen) - 1; + } + + return -1; +} + +/** + * poppler_structure_element_is_content: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is actual document content. + * + * Return value: Whether the element is content. + */ +gboolean +poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isContent (); +} + +/** + * poppler_structure_element_is_inline: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is an inline element. + * + * Return value: Whether the element is inline. + */ +gboolean +poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isInline (); +} + +/** + * poppler_structure_element_is_block: + * @poppler_structure_element: A #PopplerStructureElement + * + * Checks whether an element is a block element. + * + * Return value: Whether the element is block. + */ +gboolean +poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isBlock (); +} + +/** + * poppler_structure_element_get_n_children: + * @poppler_structure_element: A #PopplerStructureElement + * + * Gets the number of children of @structure_element. + * + * Return value: Number of children elements. + */ +guint +poppler_structure_element_get_n_children (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), 0); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->getNumElements (); +} + +/** + * poppler_structure_element_get_child: + * @poppler_structure_element: A #PopplerStructureElement + * @index: Index of the children element to obtain. + * + * Return value: (transfer none): A #PopplerStructureElement. + */ +PopplerStructureElement* +poppler_structure_element_get_child (PopplerStructureElement *poppler_structure_element, + guint index) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + g_assert (poppler_structure_element->elem->getNumElements () >= 0); + g_return_val_if_fail (index < (guint) poppler_structure_element->elem->getNumElements (), NULL); + + if (!poppler_structure_element->children[index]) + { + poppler_structure_element->children[index] = _poppler_structure_element_new (poppler_structure_element->structure, + poppler_structure_element->elem->getElement (index)); + g_object_ref_sink (poppler_structure_element->children[index]); + } + return poppler_structure_element->children[index]; +} + +/** + * poppler_structure_element_get_id: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer none): The identifier of the element (if + * defined), or %NULL. + */ +const gchar* +poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->id && poppler_structure_element->elem->getID ()) + poppler_structure_element->id = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getID ()); + + return poppler_structure_element->id; +} + +/** + * poppler_structure_element_get_title: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer none): The title of the element (if defined), + * or %NULL. + */ +const gchar* +poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->title && poppler_structure_element->elem->getTitle ()) + poppler_structure_element->title = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getTitle ()); + + return poppler_structure_element->title; +} + +/** + * popppler_structure_element_get_abbreviation: + * @poppler_structure_element: A #PopplerStructureElement + * + * Acronyms and abbreviations contained in elements of type + * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded + * text form, which can be retrieved using this function. + * + * Return value: (transfer none): Text of the expanded abbreviation, if the + * element text is an abbreviation or acronym. + */ +const gchar* +poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->getType () != StructElement::Span) + return NULL; + + if (!poppler_structure_element->text_abbrev && poppler_structure_element->elem->getExpandedAbbr ()) + poppler_structure_element->text_abbrev = _poppler_goo_string_to_utf8 (poppler_structure_element->elem->getExpandedAbbr ()); + + return poppler_structure_element->text_abbrev; +} + +/** + * poppler_structure_element_get_language: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer none): language and country code, in two-letter + * ISO format, e.g. en_US, or %NULL if not defined. + */ +const gchar* +poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->getLang (); +} + +/** + * poppler_structure_element_get_alt_text: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the “alternate” text representation of the element (and its child + * elements). This is mostly used for non-text elements like images and + * figures, to specify a textual description of the element. + * + * Note that for elements containing proper text, the function + * poppler_structure_element_get_text() must be used instead. + * + * Return value: (transfer none): The alternate text representation for the + * element, or %NULL if not defined. + */ +const gchar* +poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->alt_text && poppler_structure_element->elem->getAltText ()) + { + GooString *s = poppler_structure_element->elem->getAltText (); + if (s) + poppler_structure_element->alt_text = _poppler_goo_string_to_utf8 (s); + delete s; + } + + return poppler_structure_element->alt_text; +} + +/** + * poppler_structure_element_get_actual_text: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the actual text enclosed by the element (and its child elements). + * The actual text is mostly used for non-text elements like images and + * figures which do have the graphical appearance of text, like + * a logo. For those the actual text is the equivalent text to those + * graphical elements which look like text when rendered. + * + * Note that for elements containing proper text, the function + * poppler_structure_element_get_text() must be used instead. + * + * Return value: (transfer none): The actual text for the element, or %NULL + * if not defined. + */ +const gchar* +poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->actual_text && poppler_structure_element->elem->getActualText ()) + { + GooString *s = poppler_structure_element->elem->getActualText (); + if (s) + poppler_structure_element->actual_text = _poppler_goo_string_to_utf8 (s); + delete s; + } + + return poppler_structure_element->actual_text; +} + +/** + * poppler_structure_element_get_text: + * @poppler_structure_element: A #PopplerStructureElement + * @recursive: If %TRUE, the text of child elements is gathered recursively + * in logical order and returned as part of the result. + * + * Obtains the text enclosed by an element, or the subtree under an element. + * + * Return value: (transfer none): A string. + */ +const gchar* +poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element, + gboolean recursive) +{ + if (recursive) + { + if (!poppler_structure_element->text_r) + { + GooString *s = poppler_structure_element->elem->getText (NULL, gTrue); + if (s) + poppler_structure_element->text_r = _poppler_goo_string_to_utf8 (s); + delete s; + } + return poppler_structure_element->text_r; + } + + if (!poppler_structure_element->text) + { + GooString *s = poppler_structure_element->elem->getText (NULL, gFalse); + if (s) + poppler_structure_element->text = _poppler_goo_string_to_utf8 (s); + delete s; + } + return poppler_structure_element->text; +} + + +class SpanBuilder { +public: + SpanBuilder(): + font(), text(), link(), + map(globalParams->getTextEncoding()), + glist(NULL), + flags(0), + color(0) + {} + + ~SpanBuilder() { + map->decRefCnt(); + g_list_free_full (glist, _poppler_text_span_free); + } + + void process(const MCOpArray& ops) { + for (MCOpArray::const_iterator i = ops.begin(); i != ops.end(); ++i) + process(*i); + } + + void process(const MCOp& op) { + if (op.type == MCOp::Unichar) { + int n = map->mapUnicode(op.unichar, buf, sizeof(buf)); + text.append(buf, n); + return; + } + + Guint oldFlags = flags; + + if (op.type == MCOp::Flags) { + if (op.flags & MCOp::FlagFontBold) + flags |= POPPLER_TEXT_SPAN_BOLD; + else + flags &= ~POPPLER_TEXT_SPAN_BOLD; + + if (op.flags & MCOp::FlagFontFixed) + flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH; + else + flags &= ~POPPLER_TEXT_SPAN_FIXED_WIDTH; + + if (op.flags & MCOp::FlagFontItalic) + flags |= POPPLER_TEXT_SPAN_ITALIC; + else + flags &= ~POPPLER_TEXT_SPAN_ITALIC; + } + + if (op.type == MCOp::Color && (color = op.color.rgbPixel ())) { + flags |= POPPLER_TEXT_SPAN_COLOR; + } else { + flags &= ~POPPLER_TEXT_SPAN_COLOR; + } + + if (op.type == MCOp::FontName) { + if (op.value) { + flags |= POPPLER_TEXT_SPAN_FONT; + font.append(op.value); + } else { + flags &= ~POPPLER_TEXT_SPAN_FONT; + } + } + + if (flags != oldFlags) + newSpan(); + } + + void newSpan() { + // If there is no text, do not append a new PopplerTextSpan + // and keep the attributes/flags for the next span. + if (text.getLength ()) { + PopplerTextSpan *span = g_slice_new0 (PopplerTextSpan); + span->color = color; + span->flags = flags; + span->text = _poppler_goo_string_to_utf8 (&text); + text.clear(); + + if (font.getLength()) { + span->font_name = _poppler_goo_string_to_utf8 (&font); + font.clear(); + } + + if (link.getLength()) { + assert(flags & POPPLER_TEXT_SPAN_LINK); + span->link_target = _poppler_goo_string_to_utf8 (&link); + } + + glist = g_list_append (glist, span); + } + + // Link is always cleared + link.clear(); + } + + GList* end() { + GList *result = glist; + glist = NULL; + return result; + } + +private: + GooString font; + GooString text; + GooString link; + UnicodeMap *map; + GList *glist; + char buf[8]; + Guint flags; + Guint color; +}; + + +/** + * poppler_structure_element_get_text_spans: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the text enclosed by an element, as a #GList of #PopplerTextSpan + * structures. Each item in the list is a piece of text which share the same + * attributes, plus its attributes. + * + * Return value: (transfer none) (element-type PopplerTextSpan): A #GList + * of #PopplerTextSpan structures. + */ +GList* +poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->elem->isContent ()) + return NULL; + + if (!poppler_structure_element->text_spans) + { + SpanBuilder builder; + builder.process(poppler_structure_element->elem->getMCOps ()); + poppler_structure_element->text_spans = builder.end(); + } + return poppler_structure_element->text_spans; +} + +/** + * poppler_structure_element_get_attribute: + * @poppler_structure_element: A #PopplerStructureElement. + * @attribute: A #PopperStructureAttribute value. + * @value (out): A #GValue in which to return the value of the attribute. + * @inherit: Whether to look up for inheritable attribute values in the + * ancestors of the element, if the attribute is not defined in the + * element. + * + * + * Types returned for each attribute + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
AttributesReturned type
POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENTA #PopplerStructurePlacement value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODEA #PopplerStructureWritingMode value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGNA #PopplerStructureTextAlign, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGNA #PopplerStructureBlockAlign, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGNA #PopplerStructureInlineAlign, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATIONA #PopplerStructureTextDecoration value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGNA #PopplerStructureRubyAlign value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITIONA #PopplerStructureRubyPosition value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATIONA #PopplerStructureGlyphOrientation value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERINGA #PopplerStructureListNumbering value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_ROLEA #PopplerStructureRole value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_CHECKEDA #PopplerStructureChecked value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_SCOPEA #PopplerStructureScope value, as a guint32.
POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTIONA string, as a const gchar*.
POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY
POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORENumber, as a double.
POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER
POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT
POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT
POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT
POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT
POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS
POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT
POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN
POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN
POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP + * An array of double numbers. The type of the + * returned #GVariant is ad. + *
POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS
POPPLER_STRUCTURE_ATTRIBUTE_WIDTH + * A maybe-double number. That is, a #GVariant with type + * md. If the number is undefined, the value + * is meant to be calculated automatically. + *
POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT
POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT
POPPLER_STRUCTURE_ATTRIBUTE_COLOR + * A 3-tuple of doubles, with values in the [0, 1] range, + * in red-green-blue (RGB) order. The type of the returned #GVariant is + * (ddd). + *
POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR
POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR
POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLORA 4-tuple of 3-tuples. Each one of the tuples is a RGB color, + * being each color component a double in the [0, 1] + * range. The four returned colors are in top-right-bottom-left + * order. The type of the returned #GVariant is + * ((ddd)(ddd)(ddd)(ddd)). + *
POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE + * A 4-tuple of #PopplerStructureBorderStyle values, each one as a + * %guint32, in top-right-bottom-left order. The type of the + * returned #GVariant is (uuuu). + *
POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE
POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS + * A 4-tuple of #double numbers, in top-right-bottom-left order. + * The type of the returned #GVariant is (dddd). + *
POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING
POPPLER_STRUCTURE_ATTRIBUTE_PADDING
POPPLER_STRUCTURE_ATTRIBUTE_BBOX
POPPLER_STRUCTURE_ATTRIBUTE_HEADERSAn array of strings, each string being a const gchar*. + * The type of the returned #GVariant is as.
+ * + * Return value: (transfer full): A #GVariant, with value varying depending + * on the attribute requested, as specified in the table. If the + * attribute is not defined, NULL is returned. + */ +GVariant* +poppler_structure_element_get_attribute (PopplerStructureElement *poppler_structure_element, + PopplerStructureAttribute attribute, + gboolean inherit) +{ + Object *value = NULL; + + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_return_val_if_fail (attribute != POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN, NULL); + g_return_val_if_fail (attribute != POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY, NULL); + + switch (attribute) + { + case POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENT: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::Placement, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODE: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::WritingMode, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR: + value = attr_value_or_default (poppler_structure_element, Attribute::BackgroundColor, inherit); + return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLOR: + value = attr_value_or_default (poppler_structure_element, Attribute::BorderColor, inherit); + return (value && value->isArray ()) ? _g_variant_new_from_rgb_array_or_x4 (value->getArray ()) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE: + value = attr_value_or_default (poppler_structure_element, Attribute::BorderStyle, inherit); + return value ?_g_variant_new_from_border_style (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS: + value = attr_value_or_default (poppler_structure_element, Attribute::BorderThickness, inherit); + return value ? _g_variant_new_from_number_or_x4 (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_COLOR: + value = attr_value_or_default (poppler_structure_element, Attribute::Color, inherit); + return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_PADDING: + value = attr_value_or_default (poppler_structure_element, Attribute::Padding, inherit); + return value ? _g_variant_new_from_number_or_x4 (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORE: + value = attr_value_or_default (poppler_structure_element, Attribute::SpaceBefore, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER: + value = attr_value_or_default (poppler_structure_element, Attribute::SpaceAfter, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT: + value = attr_value_or_default (poppler_structure_element, Attribute::StartIndent, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT: + value = attr_value_or_default (poppler_structure_element, Attribute::EndIndent, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT: + value = attr_value_or_default (poppler_structure_element, Attribute::TextIndent, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGN: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::TextAlign, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_BBOX: + value = attr_value_or_default (poppler_structure_element, Attribute::BBox, inherit); + return value ? _g_variant_new_from_number_x4 (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_WIDTH: + value = attr_value_or_default (poppler_structure_element, Attribute::Width, inherit); + return value ? _g_variant_new_from_number_or_auto (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT: + value = attr_value_or_default (poppler_structure_element, Attribute::Height, inherit); + return value ? _g_variant_new_from_number_or_auto (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGN: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::BlockAlign, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGN: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::InlineAlign, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE: + value = attr_value_or_default (poppler_structure_element, Attribute::TBorderStyle, inherit); + return value ?_g_variant_new_from_border_style (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING: + value = attr_value_or_default (poppler_structure_element, Attribute::TPadding, inherit); + return value ? _g_variant_new_from_number_or_x4 (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT: + value = attr_value_or_default (poppler_structure_element, Attribute::BaselineShift, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT: + value = attr_value_or_default (poppler_structure_element, Attribute::LineHeight, inherit); + return value ? _g_variant_new_from_number_or_auto_or_normal (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR: + value = attr_value_or_default (poppler_structure_element, Attribute::TextDecorationColor, inherit); + return (value && value->isArray ()) ? _g_variant_new_from_rgb_array (value->getArray ()) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS: + value = attr_value_or_default (poppler_structure_element, Attribute::LineHeight, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::TextDecorationType, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGN: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::RubyAlign, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITION: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::RubyPosition, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATION: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::GlyphOrientationVertical, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT: + value = attr_value_or_default (poppler_structure_element, Attribute::ColumnCount, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP: + value = attr_value_or_default (poppler_structure_element, Attribute::ColumnGap, inherit); + return value ? _g_variant_new_number_array (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS: + value = attr_value_or_default (poppler_structure_element, Attribute::ColumnGap, inherit); + return value ? _g_variant_new_number_array (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERING: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::ListNumbering, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_ROLE: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::Role, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_CHECKED: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::checked, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTION: + return string_to_variant (attr_value_or_default (poppler_structure_element, + Attribute::Desc, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN: + value = attr_value_or_default (poppler_structure_element, Attribute::RowSpan, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN: + value = attr_value_or_default (poppler_structure_element, Attribute::ColSpan, inherit); + return value ? _g_variant_new_from_number (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_HEADERS: + value = attr_value_or_default (poppler_structure_element, Attribute::Headers, inherit); + return value ? _g_variant_new_string_array (value) : NULL; + + case POPPLER_STRUCTURE_ATTRIBUTE_SCOPE: + return name_to_variant_enum (attr_value_or_default (poppler_structure_element, + Attribute::Scope, inherit)); + case POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY: + return string_to_variant (attr_value_or_default (poppler_structure_element, Attribute::Summary, inherit)); + + case POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY: + case POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN: + default: + g_assert_not_reached (); + return NULL; + } +} + +/** + * poppler_structure_element_is_reference: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: Whether the element is a reference to another object. + */ +gboolean +poppler_structure_element_is_reference (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE); + g_assert (poppler_structure_element->elem); + + return poppler_structure_element->elem->isObjectRef (); +} + +/** + * poppler_structure_element_get_reference_type: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: The type of object pointed to by the reference, a value of + * #PopplerStructureReference. + */ +PopplerStructureReference +poppler_structure_element_get_reference_type (PopplerStructureElement *poppler_structure_element) +{ + PopplerStructureReference reftype = POPPLER_STRUCTURE_REFERENCE_UNKNOWN; + + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), reftype); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->isObjectRef ()) + { + Object obj; + const Ref ref = poppler_structure_element->elem->getObjectRef (); + XRef *xref = poppler_structure_element->structure->document->doc->getXRef (); + + if (xref->fetch(ref.num, ref.gen, &obj)->isDict("Annot")) + { + reftype = POPPLER_STRUCTURE_REFERENCE_ANNOT; + Object subtype; + if (obj.dictLookup("Subtype", &subtype)->isName("Link")) + reftype = POPPLER_STRUCTURE_REFERENCE_LINK; + subtype.free(); + } + + obj.free(); + } + + return reftype; +} + +/** + * poppler_structure_element_get_reference_link: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer full): The #PopplerAnnot pointed by the object + * reference, or %NULL of the element is not a reference pointing to + * a #PopplerLink. + */ +PopplerLinkMapping* +poppler_structure_element_get_reference_link (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element_get_reference_type (poppler_structure_element) + != POPPLER_STRUCTURE_REFERENCE_LINK) + return NULL; + + gint num = poppler_structure_element_get_page (poppler_structure_element); + if (num < 0 || num >= poppler_document_get_n_pages (poppler_structure_element->structure->document)) + return NULL; + + AnnotLink *link = NULL; + Page *page = poppler_structure_element->structure->document->doc->getPage (num + 1); + Links links(page->getAnnots ()); + + for (gint i = 0; i < links.getNumLinks(); i++) + { + AnnotLink *l = links.getLink (i); + const StructElement *parent = poppler_structure_element->structure->root->findParentElement (l->getTreeKey ()); + if (parent == poppler_structure_element->elem) + { + link = l; + break; + } + } + + if (!link) + return NULL; + + PopplerRectangle rect; + LinkAction *link_action = link->getAction (); + PopplerLinkMapping *mapping = poppler_link_mapping_new (); + mapping->action = _poppler_action_new (poppler_structure_element->structure->document, link_action, NULL); + + link->getRect (&rect.x1, &rect.y1, &rect.x2, &rect.y2); + + rect.x1 -= page->getCropBox()->x1; + rect.x2 -= page->getCropBox()->x1; + rect.y1 -= page->getCropBox()->y1; + rect.y2 -= page->getCropBox()->y1; + + switch (page->getRotate ()) + { + case 90: + mapping->area.x1 = rect.y1; + mapping->area.y1 = page->getCropWidth () - rect.x2; + mapping->area.x2 = mapping->area.x1 + (rect.y2 - rect.y1); + mapping->area.y2 = mapping->area.y1 + (rect.x2 - rect.x1); + break; + case 180: + mapping->area.x1 = page->getCropWidth () - rect.x2; + mapping->area.y1 = page->getCropHeight () - rect.y2; + mapping->area.x2 = mapping->area.x1 + (rect.x2 - rect.x1); + mapping->area.y2 = mapping->area.y1 + (rect.y2 - rect.y1); + break; + case 270: + mapping->area.x1 = page->getCropHeight () - rect.y2; + mapping->area.y1 = rect.x1; + mapping->area.x2 = mapping->area.x1 + (rect.y2 - rect.y1); + mapping->area.y2 = mapping->area.y1 + (rect.x2 - rect.x1); + break; + default: + mapping->area.x1 = rect.x1; + mapping->area.y2 = rect.y1; + mapping->area.x2 = rect.x2; + mapping->area.y2 = rect.y2; + } + + return mapping; +} + +/** + * poppler_structure_element_find_link: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer full): A #PopplerLinkMapping, or %NULL if the + * link cannot be found. + */ +PopplerLinkMapping* +poppler_structure_element_find_link (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->getType () != StructElement::Link) + return NULL; + + for (unsigned i = 0; i < poppler_structure_element_get_n_children (poppler_structure_element); i++) + { + PopplerStructureElement *child = poppler_structure_element_get_child (poppler_structure_element, i); + if (poppler_structure_element_get_reference_type (child) == POPPLER_STRUCTURE_REFERENCE_LINK) + { + PopplerLinkMapping *mapping = poppler_structure_element_get_reference_link (poppler_structure_element); + if (mapping) + return mapping; + } + } + + return NULL; +} + +/** + * poppler_structure_element_get_form_field: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer full): A #PopplerFormField, or %NULL if + * the element is not a %POPPLER_STRUCTURE_ELEMENT_FORM. + */ +PopplerFormField* +poppler_structure_element_get_form_field (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (poppler_structure_element->elem->getType () != StructElement::Form) + return NULL; + + // TODO Handle elements which have a Role attribute (used sometimes for + // non-editable widgets, to describe their appearance). Editable + // fields have only a single child, with the field identifier. + if (poppler_structure_element->elem->getNumElements () != 1) + return NULL; + + gint field_id = -1; + const StructElement *child = poppler_structure_element->elem->getElement (0); + if (child->isContent ()) + { + if (child->isObjectRef ()) + { + // TODO Handle this case -- I have yet to see a PDF using this. + } + else + { + // Element contains the form field ID as the MCID attribute. + field_id = child->getMCID (); + } + } + + if (field_id < 0) + return NULL; + + return (field_id < 0) ? NULL : poppler_document_get_form_field (poppler_structure_element->structure->document, + field_id); +} + +/** + * poppler_structure_element_get_form_field_mapping: + * @poppler_structure_element: A #PopplerStructureElement + * + * Return value: (transfer full): A #PopplerFormFieldMapping, or %NULL if + * the element is not a %POPPLER_STRUCTURE_ELEMENT_FORM. + */ +PopplerFormFieldMapping* +poppler_structure_element_get_form_field_mapping (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + gint page_num = poppler_structure_element_get_page (poppler_structure_element); + g_assert (page_num < poppler_document_get_n_pages (poppler_structure_element->structure->document)); + if (page_num < 0) + return NULL; + + Page *page = poppler_structure_element->structure->document->doc->getPage (page_num + 1); + if (!page) + return NULL; + + FormPageWidgets *forms = page->getFormWidgets (); + if (!forms) + return NULL; + + PopplerFormField *field = poppler_structure_element_get_form_field (poppler_structure_element); + if (!field) + return NULL; + + FormWidget *widget = NULL; + for (int i = 0; i < forms->getNumWidgets (); i++) + { + FormWidget *w = forms->getWidget (i); + if ((gint) w->getID () == poppler_form_field_get_id (field)) + { + widget = w; + break; + } + } + + if (!field) + { + g_object_unref (field); + return NULL; + } + + PopplerFormFieldMapping *mapping = poppler_form_field_mapping_new (); + mapping->field = field; + + widget->getRect (&mapping->area.x1, &mapping->area.y1, + &mapping->area.x2, &mapping->area.y2); + mapping->area.x1 -= page->getCropBox ()->x1; + mapping->area.x2 -= page->getCropBox ()->x1; + mapping->area.y1 -= page->getCropBox ()->y1; + mapping->area.y2 -= page->getCropBox ()->y1; + + return mapping; +} diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h new file mode 100644 index 0000000..b3076d2 --- /dev/null +++ b/glib/poppler-structure-element.h @@ -0,0 +1,366 @@ +/* poppler-structure-element.h: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __POPPLER_STRUCTURE_ELEMENT_H__ +#define __POPPLER_STRUCTURE_ELEMENT_H__ + +#include +#include "poppler.h" + +G_BEGIN_DECLS + +#define POPPLER_TYPE_STRUCTURE_ELEMENT (poppler_structure_element_get_type ()) +#define POPPLER_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement)) +#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT)) + +/** + * PopplerStructureElementKind: + */ +typedef enum { + POPPLER_STRUCTURE_ELEMENT_UNKNOWN, + POPPLER_STRUCTURE_ELEMENT_CONTENT, + POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE, + POPPLER_STRUCTURE_ELEMENT_DOCUMENT, + POPPLER_STRUCTURE_ELEMENT_PART, + POPPLER_STRUCTURE_ELEMENT_ARTICLE, + POPPLER_STRUCTURE_ELEMENT_SECTION, + POPPLER_STRUCTURE_ELEMENT_DIV, + POPPLER_STRUCTURE_ELEMENT_SPAN, + POPPLER_STRUCTURE_ELEMENT_QUOTE, + POPPLER_STRUCTURE_ELEMENT_NOTE, + POPPLER_STRUCTURE_ELEMENT_REFERENCE, + POPPLER_STRUCTURE_ELEMENT_BIBENTRY, + POPPLER_STRUCTURE_ELEMENT_CODE, + POPPLER_STRUCTURE_ELEMENT_LINK, + POPPLER_STRUCTURE_ELEMENT_ANNOT, + POPPLER_STRUCTURE_ELEMENT_RUBY, + POPPLER_STRUCTURE_ELEMENT_WARICHU, + POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE, + POPPLER_STRUCTURE_ELEMENT_CAPTION, + POPPLER_STRUCTURE_ELEMENT_NONSTRUCT, + POPPLER_STRUCTURE_ELEMENT_TOC, + POPPLER_STRUCTURE_ELEMENT_TOC_ITEM, + POPPLER_STRUCTURE_ELEMENT_INDEX, + POPPLER_STRUCTURE_ELEMENT_PRIVATE, + POPPLER_STRUCTURE_ELEMENT_PARAGRAPH, + POPPLER_STRUCTURE_ELEMENT_HEADING, + POPPLER_STRUCTURE_ELEMENT_HEADING_1, + POPPLER_STRUCTURE_ELEMENT_HEADING_2, + POPPLER_STRUCTURE_ELEMENT_HEADING_3, + POPPLER_STRUCTURE_ELEMENT_HEADING_4, + POPPLER_STRUCTURE_ELEMENT_HEADING_5, + POPPLER_STRUCTURE_ELEMENT_HEADING_6, + POPPLER_STRUCTURE_ELEMENT_LIST, + POPPLER_STRUCTURE_ELEMENT_LIST_ITEM, + POPPLER_STRUCTURE_ELEMENT_LIST_LABEL, + POPPLER_STRUCTURE_ELEMENT_TABLE, + POPPLER_STRUCTURE_ELEMENT_TABLE_ROW, + POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING, + POPPLER_STRUCTURE_ELEMENT_TABLE_DATA, + POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER, + POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER, + POPPLER_STRUCTURE_ELEMENT_TABLE_BODY, + POPPLER_STRUCTURE_ELEMENT_FIGURE, + POPPLER_STRUCTURE_ELEMENT_FORMULA, + POPPLER_STRUCTURE_ELEMENT_FORM, +} PopplerStructureElementKind; + +/** + * PopplerStructureAttribute: + */ +typedef enum { + POPPLER_STRUCTURE_ATTRIBUTE_UNKNOWN, + POPPLER_STRUCTURE_ATTRIBUTE_USER_PROPERTY, + POPPLER_STRUCTURE_ATTRIBUTE_PLACEMENT, + POPPLER_STRUCTURE_ATTRIBUTE_WRITING_MODE, + POPPLER_STRUCTURE_ATTRIBUTE_BACKGROUND_COLOR, + POPPLER_STRUCTURE_ATTRIBUTE_BORDER_COLOR, + POPPLER_STRUCTURE_ATTRIBUTE_BORDER_STYLE, + POPPLER_STRUCTURE_ATTRIBUTE_BORDER_THICKNESS, + POPPLER_STRUCTURE_ATTRIBUTE_COLOR, + POPPLER_STRUCTURE_ATTRIBUTE_PADDING, + POPPLER_STRUCTURE_ATTRIBUTE_SPACE_BEFORE, + POPPLER_STRUCTURE_ATTRIBUTE_SPACE_AFTER, + POPPLER_STRUCTURE_ATTRIBUTE_START_INDENT, + POPPLER_STRUCTURE_ATTRIBUTE_END_INDENT, + POPPLER_STRUCTURE_ATTRIBUTE_TEXT_INDENT, + POPPLER_STRUCTURE_ATTRIBUTE_TEXT_ALIGN, + POPPLER_STRUCTURE_ATTRIBUTE_BBOX, + POPPLER_STRUCTURE_ATTRIBUTE_WIDTH, + POPPLER_STRUCTURE_ATTRIBUTE_HEIGHT, + POPPLER_STRUCTURE_ATTRIBUTE_BLOCK_ALIGN, + POPPLER_STRUCTURE_ATTRIBUTE_INLINE_ALIGN, + POPPLER_STRUCTURE_ATTRIBUTE_TABLE_BORDER_STYLE, + POPPLER_STRUCTURE_ATTRIBUTE_TABLE_PADDING, + POPPLER_STRUCTURE_ATTRIBUTE_BASELINE_SHIFT, + POPPLER_STRUCTURE_ATTRIBUTE_LINE_HEIGHT, + POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_COLOR, + POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION_THICKNESS, + POPPLER_STRUCTURE_ATTRIBUTE_TEXT_DECORATION, + POPPLER_STRUCTURE_ATTRIBUTE_RUBY_ALIGN, + POPPLER_STRUCTURE_ATTRIBUTE_RUBY_POSITION, + POPPLER_STRUCTURE_ATTRIBUTE_GLYPH_ORIENTATION, + POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_COUNT, + POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_GAP, + POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_WIDTHS, + POPPLER_STRUCTURE_ATTRIBUTE_LIST_NUMBERING, + POPPLER_STRUCTURE_ATTRIBUTE_ROLE, + POPPLER_STRUCTURE_ATTRIBUTE_CHECKED, + POPPLER_STRUCTURE_ATTRIBUTE_DESCRIPTION, + POPPLER_STRUCTURE_ATTRIBUTE_ROW_SPAN, + POPPLER_STRUCTURE_ATTRIBUTE_COLUMN_SPAN, + POPPLER_STRUCTURE_ATTRIBUTE_HEADERS, + POPPLER_STRUCTURE_ATTRIBUTE_SCOPE, + POPPLER_STRUCTURE_ATTRIBUTE_SUMMARY, +} PopplerStructureAttribute; + +/** + * PopplerStructurePlacement: + */ +typedef enum { + POPPLER_STRUCTURE_PLACEMENT_BLOCK, + POPPLER_STRUCTURE_PLACEMENT_INLINE, + POPPLER_STRUCTURE_PLACEMENT_BEFORE, + POPPLER_STRUCTURE_PLACEMENT_START, + POPPLER_STRUCTURE_PLACEMENT_END, +} PopplerStructurePlacement; + +/** + * PopplerStructureWritingMode: + */ +typedef enum { + POPPLER_STRUCTURE_WRITING_MODE_LR_TB, + POPPLER_STRUCTURE_WRITING_MODE_RL_TB, + POPPLER_STRUCTURE_WRITING_MODE_TB_RL, +} PopplerStructureWritingMode; + +/** + * PopplerStructureBorderStyle: + */ +typedef enum { + POPPLER_STRUCTURE_BORDER_STYLE_NONE, + POPPLER_STRUCTURE_BORDER_STYLE_HIDDEN, + POPPLER_STRUCTURE_BORDER_STYLE_DOTTED, + POPPLER_STRUCTURE_BORDER_STYLE_DASHED, + POPPLER_STRUCTURE_BORDER_STYLE_SOLID, + POPPLER_STRUCTURE_BORDER_STYLE_DOUBLE, + POPPLER_STRUCTURE_BORDER_STYLE_GROOVE, + POPPLER_STRUCTURE_BORDER_STYLE_INSET, + POPPLER_STRUCTURE_BORDER_STYLE_OUTSET, +} PopplerStructureBorderStyle; + +/** + * PopplerStructureTextAlign: + */ +typedef enum { + POPPLER_STRUCTURE_TEXT_ALIGN_START, + POPPLER_STRUCTURE_TEXT_ALIGN_CENTER, + POPPLER_STRUCTURE_TEXT_ALIGN_END, + POPPLER_STRUCTURE_TEXT_ALIGN_JUSTIFY, +} PopplerStructureTextAlign; + +/** + * PopplerStructureBlockAlign: + */ +typedef enum { + POPPLER_STRUCTURE_BLOCK_ALIGN_BEFORE, + POPPLER_STRUCTURE_BLOCK_ALIGN_MIDDLE, + POPPLER_STRUCTURE_BLOCK_ALIGN_AFTER, + POPPLER_STRUCTURE_BLOCK_ALIGN_JUSTIFY, +} PopplerStructureBlockAlign; + +/** + * PopplerStructureInlineAlign: + */ +typedef enum { + POPPLER_STRUCTURE_INLINE_ALIGN_START, + POPPLER_STRUCTURE_INLINE_ALIGN_CENTER, + POPPLER_STRUCTURE_INLINE_ALIGN_END, +} PopplerStructureInlineAlign; + +/** + * PopplerStructureTextDecoration: + */ +typedef enum { + POPPLER_STRUCTURE_TEXT_DECORATION_NONE, + POPPLER_STRUCTURE_TEXT_DECORATION_UNDERLINE, + POPPLER_STRUCTURE_TEXT_DECORATION_OVERLINE, + POPPLER_STRUCTURE_TEXT_DECORATION_LINETHROUGH, +} PopplerStructureTextDecoration; + +/** + * PopplerStructureRubyAlign: + */ +typedef enum +{ + POPPLER_STRUCTURE_RUBY_ALIGN_START, + POPPLER_STRUCTURE_RUBY_ALIGN_CENTER, + POPPLER_STRUCTURE_RUBY_ALIGN_END, + POPPLER_STRUCTURE_RUBY_ALIGN_JUSTIFY, + POPPLER_STRUCTURE_RUBY_ALIGN_DISTRIBUTE, +} PopplerStructureRubyAlign; + +/** + * PopplerStructureRubyPosition: + */ +typedef enum { + POPPLER_STRUCTURE_RUBY_POSITION_BEFORE, + POPPLER_STRUCTURE_RUBY_POSITION_AFTER, + POPPLER_STRUCTURE_RUBY_POSITION_WARICHU, + POPPLER_STRUCTURE_RUBY_POSITION_INLINE, +} PopplerStructureRubyPosition; + +/** + * PopplerStructureGlyphOrientation: + */ +typedef enum { + POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO, + POPPLER_STRUCTURE_GLYPH_ORIENTATION_0 = POPPLER_STRUCTURE_GLYPH_ORIENTATION_AUTO, + POPPLER_STRUCTURE_GLYPH_ORIENTATION_90, + POPPLER_STRUCTURE_GLYPH_ORIENTATION_180, + POPPLER_STRUCTURE_GLYPH_ORIENTATION_270, +} PopplerStructureGlyphOrientation; + +/** + * PopplerStructureListNumbering: + */ +typedef enum { + POPPLER_STRUCTURE_LIST_NUMBERING_NONE, + POPPLER_STRUCTURE_LIST_NUMBERING_DISC, + POPPLER_STRUCTURE_LIST_NUMBERING_CIRCLE, + POPPLER_STRUCTURE_LIST_NUMBERING_SQUARE, + POPPLER_STRUCTURE_LIST_NUMBERING_DECIMAL, + POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ROMAN, + POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ROMAN, + POPPLER_STRUCTURE_LIST_NUMBERING_UPPER_ALPHA, + POPPLER_STRUCTURE_LIST_NUMBERING_LOWER_ALPHA, +} PopplerStructureListNumbering; + +/** + * PopplerStructureRole: + */ +typedef enum { + POPPLER_STRUCTURE_ROLE_RADIO_BUTTON, + POPPLER_STRUCTURE_ROLE_PUSH_BUTTON, + POPPLER_STRUCTURE_ROLE_TEXT_VALUE, + POPPLER_STRUCTURE_ROLE_CHECKBOX, +} PopplerStructureRole; + +/** + * PopplerStructureChecked: + */ +typedef enum { + POPPLER_STRUCTURE_CHECKED_ON, + POPPLER_STRUCTURE_CHECKED_OFF, + POPPLER_STRUCTURE_CHECKED_NEUTRAL, +} PopplerStructureChecked; + +/** + * PopplerStructureScope: + */ +typedef enum { + POPPLER_STRUCTURE_SCOPE_ROW, + POPPLER_STRUCTURE_SCOPE_COLUMN, + POPPLER_STRUCTURE_SCOPE_BOTH, +} PopplerStructureScope; + +/** + * PopplerStructureReference: + */ +typedef enum { + POPPLER_STRUCTURE_REFERENCE_UNKNOWN, + POPPLER_STRUCTURE_REFERENCE_ANNOT, + POPPLER_STRUCTURE_REFERENCE_LINK, +} PopplerStructureReference; + + +typedef struct _PopplerTextSpan PopplerTextSpan; +struct _PopplerTextSpan { + gchar *text; + gchar *font_name; + gchar *link_target; + guint flags; + guint color; /* 0x00RRGGBB */ +}; + +enum { + POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0), + POPPLER_TEXT_SPAN_SERIF_FONT = (1 << 1), + POPPLER_TEXT_SPAN_ITALIC = (1 << 2), + POPPLER_TEXT_SPAN_BOLD = (1 << 3), + POPPLER_TEXT_SPAN_LINK = (1 << 4), + POPPLER_TEXT_SPAN_COLOR = (1 << 5), + POPPLER_TEXT_SPAN_FONT = (1 << 6), +}; + + +static inline gboolean poppler_text_span_is_fixed_width (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH); +} + +static inline gboolean poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF_FONT); +} + +static inline gboolean poppler_text_span_is_bold (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD); +} + +static inline gboolean poppler_text_span_is_link (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_LINK); +} + + +GType poppler_structure_element_get_type (void) G_GNUC_CONST; +PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element); +gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element); +gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element); +guint poppler_structure_element_get_n_children (PopplerStructureElement *poppler_structure_element); +PopplerStructureElement *poppler_structure_element_get_child (PopplerStructureElement *poppler_structure_element, + guint index); +const gchar *poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element); +const gchar *poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element); +const gchar *poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element); +const gchar *poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element); +const gchar *poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element, + gboolean recursive); +GList *poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element, + gboolean recursive); +const gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element); +const gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element); +GVariant *poppler_structure_element_get_attribute (PopplerStructureElement *poppler_structure_element, + PopplerStructureAttribute attribute, + gboolean inherit); +gboolean poppler_structure_element_is_reference (PopplerStructureElement *poppler_structure_element); +PopplerStructureReference poppler_structure_element_get_reference_type (PopplerStructureElement *poppler_structure_element); +PopplerLinkMapping *poppler_structure_element_get_reference_link (PopplerStructureElement *poppler_structure_element); +PopplerLinkMapping *poppler_structure_element_find_link (PopplerStructureElement *poppler_structure_element); + +PopplerFormField *poppler_structure_element_get_form_field (PopplerStructureElement *poppler_structure_element); +PopplerFormFieldMapping *poppler_structure_element_get_form_field_mapping (PopplerStructureElement *poppler_structure_element); + +G_END_DECLS + +#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */ diff --git a/glib/poppler-structure.cc b/glib/poppler-structure.cc new file mode 100644 index 0000000..f9671a6 --- /dev/null +++ b/glib/poppler-structure.cc @@ -0,0 +1,362 @@ +/* poppler-structure.cc: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "config.h" + +#ifndef __GI_SCANNER__ +#include +#include +#endif /* !__GI_SCANNER__ */ + +#include "poppler.h" +#include "poppler-private.h" +#include "poppler-structure.h" + + +static inline StructElement::Type +_poppler_structure_element_kind_to_structelement_type (PopplerStructureElementKind kind) +{ + switch (kind) + { + case POPPLER_STRUCTURE_ELEMENT_UNKNOWN: + return StructElement::Unknown; + case POPPLER_STRUCTURE_ELEMENT_CONTENT: + return StructElement::MCID; + case POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE: + return StructElement::OBJR; + case POPPLER_STRUCTURE_ELEMENT_DOCUMENT: + return StructElement::Document; + case POPPLER_STRUCTURE_ELEMENT_PART: + return StructElement::Part; + case POPPLER_STRUCTURE_ELEMENT_SECTION: + return StructElement::Sect; + case POPPLER_STRUCTURE_ELEMENT_DIV: + return StructElement::Div; + case POPPLER_STRUCTURE_ELEMENT_SPAN: + return StructElement::Span; + case POPPLER_STRUCTURE_ELEMENT_QUOTE: + return StructElement::Quote; + case POPPLER_STRUCTURE_ELEMENT_NOTE: + return StructElement::Note; + case POPPLER_STRUCTURE_ELEMENT_REFERENCE: + return StructElement::Reference; + case POPPLER_STRUCTURE_ELEMENT_BIBENTRY: + return StructElement::BibEntry; + case POPPLER_STRUCTURE_ELEMENT_CODE: + return StructElement::Code; + case POPPLER_STRUCTURE_ELEMENT_LINK: + return StructElement::Link; + case POPPLER_STRUCTURE_ELEMENT_ANNOT: + return StructElement::Annot; + case POPPLER_STRUCTURE_ELEMENT_RUBY: + return StructElement::Ruby; + case POPPLER_STRUCTURE_ELEMENT_WARICHU: + return StructElement::Warichu; + case POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE: + return StructElement::BlockQuote; + case POPPLER_STRUCTURE_ELEMENT_CAPTION: + return StructElement::Caption; + case POPPLER_STRUCTURE_ELEMENT_NONSTRUCT: + return StructElement::NonStruct; + case POPPLER_STRUCTURE_ELEMENT_TOC: + return StructElement::TOC; + case POPPLER_STRUCTURE_ELEMENT_TOC_ITEM: + return StructElement::TOCI; + case POPPLER_STRUCTURE_ELEMENT_INDEX: + return StructElement::Index; + case POPPLER_STRUCTURE_ELEMENT_PRIVATE: + return StructElement::Private; + case POPPLER_STRUCTURE_ELEMENT_PARAGRAPH: + return StructElement::P; + case POPPLER_STRUCTURE_ELEMENT_HEADING: + return StructElement::H; + case POPPLER_STRUCTURE_ELEMENT_HEADING_1: + return StructElement::H1; + case POPPLER_STRUCTURE_ELEMENT_HEADING_2: + return StructElement::H2; + case POPPLER_STRUCTURE_ELEMENT_HEADING_3: + return StructElement::H3; + case POPPLER_STRUCTURE_ELEMENT_HEADING_4: + return StructElement::H4; + case POPPLER_STRUCTURE_ELEMENT_HEADING_5: + return StructElement::H5; + case POPPLER_STRUCTURE_ELEMENT_HEADING_6: + return StructElement::H6; + case POPPLER_STRUCTURE_ELEMENT_LIST: + return StructElement::L; + case POPPLER_STRUCTURE_ELEMENT_LIST_ITEM: + return StructElement::LI; + case POPPLER_STRUCTURE_ELEMENT_LIST_LABEL: + return StructElement::Lbl; + case POPPLER_STRUCTURE_ELEMENT_TABLE: + return StructElement::Table; + case POPPLER_STRUCTURE_ELEMENT_TABLE_ROW: + return StructElement::TR; + case POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING: + return StructElement::TH; + case POPPLER_STRUCTURE_ELEMENT_TABLE_DATA: + return StructElement::TD; + case POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER: + return StructElement::THead; + case POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER: + return StructElement::TFoot; + case POPPLER_STRUCTURE_ELEMENT_TABLE_BODY: + return StructElement::TBody; + case POPPLER_STRUCTURE_ELEMENT_FIGURE: + return StructElement::Figure; + case POPPLER_STRUCTURE_ELEMENT_FORMULA: + return StructElement::Formula; + case POPPLER_STRUCTURE_ELEMENT_FORM: + return StructElement::Form; + default: + g_assert_not_reached (); + } +} + + + +/** + * SECTION:poppler-structure + * @short_description: Document structure + * @title: PopplerStructure + * + * #PopplerStructure is used to represent the structure of a #PopplerDocument. + * If a structure is defined, poppler_document_get_structure() will return a + * valid pointer to its #PopplerStructure, which represents the document + * tree. Elements can be obtained using poppler_structure_get_n_children() + * and poppler_structure_get_child(). Seraching for elements of a given + * #PopplerStructureElementType and/or page can be done using + * poppler_structure_find_elements(). + * + * The document structure tree is formed by #PopplerStructureElement objects, + * describing each one of them a relevant element of the document. The + * logical order of the documents is that of doing a depth-first traversal + * of the tree. Elements may recursively contain other child elements, which + * can be obtained using poppler_structure_element_get_n_children() and + * poppler_structure_element_get_child(). + * + * Elements may have attached attributes describing additional information + * about them. The standard attributes (as defined in the PDF specification, + * see #PopplerStructureAttribute for a complete list) can be obtained using + * poppler_structure_element_get_attribute(). Elements may also contain + * non-standard attributes with arbitrary names called “user properties”, + * see poppler_structure_element_get_user_property() for details. + */ + +typedef struct _PopplerStructureClass PopplerStructureClass; +struct _PopplerStructureClass +{ + GObjectClass parent_class; +}; + +G_DEFINE_TYPE (PopplerStructure, poppler_structure, G_TYPE_OBJECT); + + +static void +poppler_structure_init (PopplerStructure *poppler_structure) +{ +} + + +static void +poppler_structure_finalize (GObject *object) +{ + PopplerStructure *poppler_structure = POPPLER_STRUCTURE (object); + + /* poppler_structure->root is owned by the catalog */ + g_object_unref (poppler_structure->document); + + if (poppler_structure->children) + { + for (unsigned i = 0; i < poppler_structure->root->getNumElements (); i++) + g_object_unref (poppler_structure->children[i]); + g_free (poppler_structure->children); + } + + G_OBJECT_CLASS (poppler_structure_parent_class)->finalize (object); +} + + +static void +poppler_structure_class_init (PopplerStructureClass *klass) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS (klass); + gobject_class->finalize = poppler_structure_finalize; +} + + +PopplerStructure* +_poppler_structure_new (PopplerDocument *poppler_document, + StructTreeRoot *root) +{ + PopplerStructure *poppler_structure; + + g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL); + g_assert (root); + + poppler_structure = (PopplerStructure*) g_object_new (POPPLER_TYPE_STRUCTURE, NULL, NULL); + + poppler_structure->document = (PopplerDocument*) g_object_ref (poppler_document); + poppler_structure->root = root; + poppler_structure->children = NULL; + + if (root->getNumElements ()) + poppler_structure->children = (PopplerStructureElement**) g_new0 (PopplerStructureElement*, + root->getNumElements ()); + return poppler_structure; +} + + +/** + * poppler_structure_get_n_children: + * @poppler_structure: A #PopplerStructure + * + * Return value: Number of structure elements in the root of the + * structure tree. + */ +guint +poppler_structure_get_n_children (PopplerStructure *poppler_structure) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), 0); + g_assert (poppler_structure->root); + + return poppler_structure->root->getNumElements (); +} + +/** + * poppler_structure_get_child: + * @poppler_structure: A #PopplerStructure + * @index: Index of the root structure element to obtain. + * + * Return value: (transfer none): A #PopplerStructureElement. + */ +PopplerStructureElement* +poppler_structure_get_child (PopplerStructure *poppler_structure, + guint index) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), NULL); + g_assert (poppler_structure->root); + g_assert (poppler_structure->root->getNumElements () >= 0); + g_return_val_if_fail (index < (guint) poppler_structure->root->getNumElements (), NULL); + + if (!poppler_structure->children[index]) + { + poppler_structure->children[index] = _poppler_structure_element_new (poppler_structure, + poppler_structure->root->getElement (index)); + g_object_ref_sink (poppler_structure->children[index]); + } + return poppler_structure->children[index]; +} + + +/* + * XXX PopplerStructure and PopplerStructureElement cache their children + * PopplerStructureElement wrapper objects. This find function could + * traverse the cache instead of creating a list of new objects, but + * it is unclear whether that would be faster -- it will mostly depend + * on the use-case. + */ +static void +_poppler_structure_find_elements_helper (PopplerStructure *structure, + StructElement *element, + StructElement::Type type, + Ref *pageRef, + GList **result) +{ + g_assert (element); + g_assert (result); + + /* Traverse the tree depth-first, to get elements in logical order */ + for (unsigned i = 0; i < element->getNumElements (); i++) + { + _poppler_structure_find_elements_helper (structure, + element->getElement (i), + type, + pageRef, + result); + } + + /* + * If filtering by type, when the type does not match, + * return without adding the element to the result list. + */ + if (type != StructElement::Unknown && (type != element->getType ())) + return; + + /* + * If filtering by page, when the page does not match, + * return without adding the element to the result list. + */ + if (pageRef) + { + Ref ref = element->getPageRef (); + if (!(pageRef->num == ref.num && pageRef->gen == ref.gen)) + return; + } + + /* + * All filtering checks passed, add element to result list. + */ + *result = g_list_append (*result, _poppler_structure_element_new (structure, element)); +} + +/** + * poppler_structure_find_elements: + * @poppler_structure: A #PopplerStructure. + * @kind: A #PopplerStructureElementKind value. Use + * #POPPLER_STRUCTURE_ELEMENT_UNKNOWN to return all the structure + * elements, or any other value to obtain only the elements of the + * given type. + * @page: Limit the elements returned to those whose content is to + * be displayed in a certain page. Use -1 to obtain + * elements from all the pages. + * + * The elements are returned in *logical order* as defined in the + * PDF specification, being that the ordering resulting of a depth-first + * traversal of the structure tree. + * + * Return value: (element-type PopplerStructureElement) (transfer full): + * A #GList of #PopplerStructureElement objects. + */ +GList* +poppler_structure_find_elements (PopplerStructure *poppler_structure, + PopplerStructureElementKind kind, + gint page) +{ + GList *result = NULL; + Ref *pageRef = NULL; + + g_return_val_if_fail (POPPLER_IS_STRUCTURE (poppler_structure), NULL); + + if (page >= 0) + { + pageRef = poppler_structure->document->doc->getCatalog ()->getPageRef(page); + } + + for (unsigned i = 0; i < poppler_structure->root->getNumElements (); i++) + { + _poppler_structure_find_elements_helper (poppler_structure, + poppler_structure->root->getElement (i), + _poppler_structure_element_kind_to_structelement_type (kind), + pageRef, + &result); + } + + return result; +} diff --git a/glib/poppler-structure.h b/glib/poppler-structure.h new file mode 100644 index 0000000..d01d551 --- /dev/null +++ b/glib/poppler-structure.h @@ -0,0 +1,43 @@ +/* poppler-structure.h: glib interface to poppler + * + * Copyright (C) 2013 Igalia S.L. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __POPPLER_STRUCTURE_H__ +#define __POPPLER_STRUCTURE_H__ + +#include +#include "poppler.h" +#include "poppler-structure-element.h" + +G_BEGIN_DECLS + +#define POPPLER_TYPE_STRUCTURE (poppler_structure_get_type ()) +#define POPPLER_STRUCTURE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE, PopplerStructure)) +#define POPPLER_IS_STRUCTURE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE)) + +GType poppler_structure_get_type (void) G_GNUC_CONST; +guint poppler_structure_get_n_children (PopplerStructure *poppler_structure); +PopplerStructureElement *poppler_structure_get_child (PopplerStructure *poppler_structure, + guint index); +GList *poppler_structure_find_elements (PopplerStructure *poppler_structure, + PopplerStructureElementKind kind, + gint page); + +G_END_DECLS + +#endif /* !__POPPLER_STRUCTURE_H__ */ diff --git a/glib/poppler.h b/glib/poppler.h index 2d190f3..3cdc077 100644 --- a/glib/poppler.h +++ b/glib/poppler.h @@ -202,6 +202,8 @@ typedef struct _PopplerAnnotFileAttachment PopplerAnnotFileAttachment; typedef struct _PopplerAnnotMovie PopplerAnnotMovie; typedef struct _PopplerAnnotScreen PopplerAnnotScreen; typedef struct _PopplerAnnotCalloutLine PopplerAnnotCalloutLine; +typedef struct _PopplerStructure PopplerStructure; +typedef struct _PopplerStructureElement PopplerStructureElement; typedef enum { @@ -227,5 +229,6 @@ G_END_DECLS #include "poppler-date.h" #include "poppler-movie.h" #include "poppler-media.h" +#include "poppler-structure.h" #endif /* __POPPLER_GLIB_H__ */ diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml index a9d5158..9617ed7 100644 --- a/glib/reference/poppler-docs.sgml +++ b/glib/reference/poppler-docs.sgml @@ -23,6 +23,8 @@ + + diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 6fb14bc..6c4ebc9 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -145,6 +145,7 @@ poppler_document_get_n_attachments poppler_document_has_attachments poppler_document_get_attachments poppler_document_get_form_field +poppler_document_get_structure poppler_index_iter_new poppler_index_iter_copy poppler_index_iter_free @@ -535,6 +536,91 @@ poppler_movie_get_type
+poppler-structure +PopplerStructure +PopplerStructure +poppler_structure_get_n_children +poppler_structure_get_child +poppler_structure_find_elements + + +POPPLER_STRUCTURE +POPPLER_IS_STRUCTURE +POPPLER_TYPE_STRUCTURE + + +poppler_structure_get_type +
+ +
+poppler-structure-element +PopplerStructureElement +PopplerStructureElement +poppler_structure_element_get_element_type +poppler_structure_element_get_n_children +poppler_structure_element_get_child +poppler_structure_element_get_attribute +poppler_structure_element_get_user_property +PopplerStructureElementType +PopplerStructureAttribute +PopplerStructurePlacement +PopplerStructureWritingMode +PopplerStructureBorderStyle +PopplerStructureTextAlign +PopplerStructureBlockAlign +PopplerStructureInlineAlign +PopplerStructureTextDecoration +PopplerStructureRubyAlign +PopplerStructureRubyPosition +PopplerStructureGlyphOrientation +PopplerStructureListNumbering +PopplerStructureRole +PopplerStructureChecked +PopplerStructureScope + + +POPPLER_STRUCTURE_ELEMENT +POPPLER_IS_STRUCTURE_ELEMENT +POPPLER_STRUCTURE_ELEMENT_TYPE +POPPLER_TYPE_STRUCTURE_ATTRIBUTE +POPPLER_TYPE_STRUCTURE_BLOCK_ALIGN +POPPLER_TYPE_STRUCTURE_BORDER_STYLE +POPPLER_TYPE_STRUCTURE_CHECKED +POPPLER_TYPE_STRUCTURE_ELEMENT +POPPLER_TYPE_STRUCTURE_ELEMENT_TYPE +POPPLER_TYPE_STRUCTURE_GLYPH_ORIENTATION +POPPLER_TYPE_STRUCTURE_INLINE_ALIGN +POPPLER_TYPE_STRUCTURE_LIST_NUMBERING +POPPLER_TYPE_STRUCTURE_PLACEMENT +POPPLER_TYPE_STRUCTURE_ROLE +POPPLER_TYPE_STRUCTURE_RUBY_ALIGN +POPPLER_TYPE_STRUCTURE_RUBY_POSITION +POPPLER_TYPE_STRUCTURE_SCOPE +POPPLER_TYPE_STRUCTURE_TEXT_ALIGN +POPPLER_TYPE_STRUCTURE_TEXT_DECORATION +POPPLER_TYPE_STRUCTURE_WRITING_MODE + + +poppler_structure_element_get_type +poppler_structure_attribute_get_type +poppler_structure_block_align_get_type +poppler_structure_border_style_get_type +poppler_structure_checked_get_type +poppler_structure_element_type_get_type +poppler_structure_glyph_orientation_get_type +poppler_structure_inline_align_get_type +poppler_structure_list_numbering_get_type +poppler_structure_placement_get_type +poppler_structure_role_get_type +poppler_structure_ruby_align_get_type +poppler_structure_ruby_position_get_type +poppler_structure_scope_get_type +poppler_structure_text_align_get_type +poppler_structure_text_decoration_get_type +poppler_structure_writing_mode_get_type +
+ +
poppler-features POPPLER_HAS_CAIRO POPPLER_MAJOR_VERSION diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types index eed9849..6d40ac3 100644 --- a/glib/reference/poppler.types +++ b/glib/reference/poppler.types @@ -8,3 +8,5 @@ poppler_annot_get_type poppler_layer_get_type poppler_media_get_type poppler_movie_get_type +poppler_structure_get_type +poppler_structure_element_get_type -- 1.8.3.1