From 46ed48e0cd85f65620f87d62f8c3c6acc05dba4e Mon Sep 17 00:00:00 2001 From: Adrian Perez de Castro Date: Thu, 26 Sep 2013 17:50:51 +0300 Subject: [PATCH v11 07/11] glib: Expose inline attributes of structure elements Allows obtaining inline text attributes from structure elements. The text is divived into "spans", which are groups of consecutive glyphs that share their attributes. Each one of those is represented by a PopplerTextSpan, which gives information about the text font and color, and the link target for links. The list of PopplerTextSpans is created lazily when first used. --- glib/poppler-structure-element.cc | 273 ++++++++++++++++++++++++++++++++++++ glib/poppler-structure-element.h | 14 ++ glib/reference/poppler-sections.txt | 13 ++ 3 files changed, 300 insertions(+) diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc index f799df5..918cc97 100644 --- a/glib/poppler-structure-element.cc +++ b/glib/poppler-structure-element.cc @@ -64,6 +64,27 @@ POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter, poppler_structure_element_iter_copy, poppler_structure_element_iter_free) +struct _PopplerTextSpan { + gchar *text; + gchar *font_name; + gchar *link_target; + guint flags; + PopplerColor color; +}; + +POPPLER_DEFINE_BOXED_TYPE (PopplerTextSpan, + poppler_text_span, + poppler_text_span_copy, + poppler_text_span_free) + +enum { + POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0), + POPPLER_TEXT_SPAN_SERIF_FONT = (1 << 1), + POPPLER_TEXT_SPAN_ITALIC = (1 << 2), + POPPLER_TEXT_SPAN_BOLD = (1 << 3), +}; + + /** * poppler_structure_element_iter_copy: * @iter: a #PopplerStructureElementIter @@ -683,3 +704,255 @@ poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_e delete string; return result; } + + +/** + * poppler_structure_element_get_text_spans: + * @poppler_structure_element: A #PopplerStructureElement + * + * Obtains the text enclosed by an element, as a #GList of #PopplerTextSpan + * structures. Each item in the list is a piece of text which share the same + * attributes, plus its attributes. The following example shows how to + * obtain and free the text spans of an element: + * + * + * GList *text_spans = poppler_structure_element_get_text_spans (element); + * /* Use the text spans */ + * g_list_free_full (text_spans, + * (GDestroyNotify) poppler_text_span_free); + * + * + * Return value: (transfer full) (element-type PopplerTextSpan): A #GList + * of #PopplerTextSpan structures. + * + * Since: 0.26 + */ +GList * +poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->elem->isContent ()) + return NULL; + + GList *poppler_spans = NULL; + const TextSpanArray spans(poppler_structure_element->elem->getTextSpans ()); + + for (TextSpanArray::const_reverse_iterator i = spans.rbegin (); i != spans.rend (); ++i) + { + PopplerTextSpan *span = g_slice_new0 (PopplerTextSpan); + span->text = _poppler_goo_string_to_utf8 (i->getText ()); + + GfxRGB rgb = i->getColor(); + span->color.red = colToDbl(rgb.r) * 65535; + span->color.green = colToDbl(rgb.g) * 65535; + span->color.blue = colToDbl(rgb.b) * 65535; + + if (i->getLink ()) + span->link_target = _poppler_goo_string_to_utf8 (i->getLink ()); + + if (i->getFont ()) + { + span->font_name = _poppler_goo_string_to_utf8 (i->getFont ()->getFamily ()); + if (i->getFont ()->isFixedWidth ()) + span->flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH; + if (i->getFont ()->isSerif ()) + span->flags |= POPPLER_TEXT_SPAN_SERIF_FONT; + if (i->getFont ()->isItalic ()) + span->flags |= POPPLER_TEXT_SPAN_ITALIC; + if (i->getFont ()->isBold ()) + span->flags |= POPPLER_TEXT_SPAN_BOLD; + + /* isBold() can return false for some fonts whose weight is heavy */ + switch (i->getFont ()->getWeight ()) + { + case GfxFont::W500: + case GfxFont::W600: + case GfxFont::W700: + case GfxFont::W800: + case GfxFont::W900: + span->flags |= POPPLER_TEXT_SPAN_BOLD; + default: + break; + } + } + + poppler_spans = g_list_prepend (poppler_spans, span); + } + + return poppler_spans; +} + +/** + * poppler_text_span_copy: + * @poppler_text_span: a #PopplerTextSpan + * + * Makes a copy of a text span. + * + * Return value: (transfer full): A new #PopplerTextSpan + * + * Since: 0.26 + */ +PopplerTextSpan * +poppler_text_span_copy (PopplerTextSpan *poppler_text_span) +{ + PopplerTextSpan *new_span = g_slice_new0 (PopplerTextSpan); + new_span->text = g_strdup (poppler_text_span->text); + if (poppler_text_span->font_name) + new_span->font_name = g_strdup (poppler_text_span->font_name); + if (poppler_text_span->link_target) + new_span->link_target = g_strdup (poppler_text_span->link_target); + memcpy (&new_span->color, &poppler_text_span->color, sizeof (PopplerColor)); + return new_span; +} + +/** + * poppler_text_span_free: + * @poppler_text_span: A #PopplerTextSpan + * + * Freed a text span. + * + * Since: 0.26 + */ +void +poppler_text_span_free (PopplerTextSpan *poppler_text_span) +{ + g_free (poppler_text_span->text); + g_free (poppler_text_span->font_name); + g_free (poppler_text_span->link_target); + g_slice_free (PopplerTextSpan, poppler_text_span); +} + +/** + * poppler_text_span_is_fixed_width_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check wether a text span is meant to be rendered using a fixed-width font. + * + * Return value: Whether the span uses a fixed-width font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH); +} + +/** + * poppler_text_span_is_serif_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check whether a text span is meant to be rendered using a serif font. + * + * Return value: Whether the span uses a serif font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF_FONT); +} + +/** + * poppler_text_span_is_bold_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check whether a text span is meant to be rendered using a bold font. + * + * Return value: Whether the span uses bold font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD); +} + +/** + * poppler_text_span_is_link: + * @poppler_text_span: a #PopplerTextSpan + * + * Check whether a text span is a link (e.g. to another document, + * a different page, or an URL). + * + * Return value: Whether the span is a link. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_link (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->link_target != NULL); +} + +/** + * poppler_text_span_get_color: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the color in which the text is to be rendered. + * + * Return value: (transfer none): A #PopplerColor. + * + * Since: 0.26 + */ +PopplerColor * +poppler_text_span_get_color (PopplerTextSpan *poppler_text_span) +{ + return &poppler_text_span->color; +} + +/** + * poppler_text_span_get_text: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the text contained in the span. + * + * Return value: (transfer none): A string. + * + * Since: 0.26 + */ +const gchar * +poppler_text_span_get_text (PopplerTextSpan *poppler_text_span) +{ + return poppler_text_span->text; +} + +/** + * poppler_text_span_get_font_name: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the name of the font in which the span is to be rendered. + * + * Return value: (transfer none): A string containing the font name, or + * %NULL if a font is not defined. + * + * Since: 0.26 + */ +const gchar * +poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span) +{ + return poppler_text_span->font_name; +} + +/** + * poppler_text_span_get_link_target: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the link taeget of a span. Note that for any text span for which + * poppler_text_span_is_link() returns %FALSE, the link target is not defined + * %NULL will be returned. + * + * Return value: (transfer none): A string containing a description of the + * link target, or %NULL if undefined. + * + * Since: 0.26 + */ +const gchar * +poppler_text_span_get_link_target (PopplerTextSpan *poppler_text_span) +{ + return poppler_text_span->link_target; +} diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h index a5ac04b..fee8b12 100644 --- a/glib/poppler-structure-element.h +++ b/glib/poppler-structure-element.h @@ -82,6 +82,8 @@ typedef enum { POPPLER_STRUCTURE_ELEMENT_FORM, } PopplerStructureElementKind; +typedef struct _PopplerTextSpan PopplerTextSpan; + GType poppler_structure_element_get_type (void) G_GNUC_CONST; PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element); @@ -97,6 +99,7 @@ gchar *poppler_structure_element_get_text gboolean recursive); gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element); gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element); +GList *poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element); #define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ()) GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST; @@ -107,6 +110,17 @@ PopplerStructureElement *poppler_structure_element_iter_get_element gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter); void poppler_structure_element_iter_free (PopplerStructureElementIter *iter); +PopplerTextSpan *poppler_text_span_copy (PopplerTextSpan *poppler_text_span); +void poppler_text_span_free (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_link (PopplerTextSpan *poppler_text_span); +PopplerColor *poppler_text_span_get_color (PopplerTextSpan *poppler_text_span); +const gchar *poppler_text_span_get_text (PopplerTextSpan *poppler_text_span); +const gchar *poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span); +const gchar *poppler_text_span_get_link_target (PopplerTextSpan *poppler_text_span); + G_END_DECLS #endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */ diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 5b62566..1905310 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -557,6 +557,7 @@ poppler_movie_get_type PopplerStructureElement PopplerStructureElementKind PopplerStructureElementIter +PopplerTextSpan poppler_structure_element_iter_new poppler_structure_element_iter_next poppler_structure_element_iter_copy @@ -575,6 +576,17 @@ poppler_structure_element_get_language poppler_structure_element_get_text poppler_structure_element_get_alt_text poppler_structure_element_get_actual_text +poppler_structure_element_get_text_spans +poppler_text_span_copy +poppler_text_span_free +poppler_text_span_is_fixed_width_font +poppler_text_span_is_serif_font +poppler_text_span_is_bold_font +poppler_text_span_is_link +poppler_text_span_get_color +poppler_text_span_get_text +poppler_text_span_get_font_name +poppler_text_span_get_link_target POPPLER_STRUCTURE_ELEMENT @@ -585,6 +597,7 @@ POPPLER_TYPE_STRUCTURE_ELEMENT_KIND poppler_structure_element_get_type +poppler_structure_element_kind_get_type poppler_structure_element_iter_get_type -- 1.8.4.2