From a8d311dc4ddc499b4c7691fca3ea3947d177f3ae Mon Sep 17 00:00:00 2001 From: Adrian Perez de Castro Date: Thu, 26 Sep 2013 17:50:51 +0300 Subject: [PATCH v12 07/11] glib: Expose inline attributes of structure elements Allows obtaining inline text attributes from structure elements. The text is divived into "spans", which are groups of consecutive glyphs that share their attributes. Each one of those is represented by a PopplerTextSpan, which gives information about the text font and color, and the link target for links. The list of PopplerTextSpans is created lazily when first used. --- glib/poppler-structure-element.cc | 266 ++++++++++++++++++++++++++++++++++++ glib/poppler-structure-element.h | 15 ++ glib/reference/poppler-sections.txt | 12 ++ 3 files changed, 293 insertions(+) diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc index f799df5..3ddbaa9 100644 --- a/glib/poppler-structure-element.cc +++ b/glib/poppler-structure-element.cc @@ -64,6 +64,26 @@ POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter, poppler_structure_element_iter_copy, poppler_structure_element_iter_free) +struct _PopplerTextSpan { + gchar *text; + gchar *font_name; + guint flags; + PopplerColor color; +}; + +POPPLER_DEFINE_BOXED_TYPE (PopplerTextSpan, + poppler_text_span, + poppler_text_span_copy, + poppler_text_span_free) + +enum { + POPPLER_TEXT_SPAN_FIXED_WIDTH = (1 << 0), + POPPLER_TEXT_SPAN_SERIF = (1 << 1), + POPPLER_TEXT_SPAN_ITALIC = (1 << 2), + POPPLER_TEXT_SPAN_BOLD = (1 << 3), +}; + + /** * poppler_structure_element_iter_copy: * @iter: a #PopplerStructureElementIter @@ -683,3 +703,249 @@ poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_e delete string; return result; } + + +static PopplerTextSpan * +_poppler_convert_text_span (const TextSpan& span) +{ + PopplerTextSpan *new_span = g_slice_new0 (PopplerTextSpan); + new_span->text = _poppler_goo_string_to_utf8 (span.getText ()); + + new_span->color.red = colToDbl (span.getColor ().r) * 65535; + new_span->color.green = colToDbl (span.getColor ().g) * 65535; + new_span->color.blue = colToDbl (span.getColor ().b) * 65535; + + if (span.getFont ()) + { + // GfxFont sometimes does not have a family name but there + // is always a font name that can be used as fallback. + GooString *font_name = span.getFont ()->getFamily (); + if (font_name == NULL) + font_name = span.getFont ()->getName (); + + new_span->font_name = _poppler_goo_string_to_utf8 (font_name); + if (span.getFont ()->isFixedWidth ()) + new_span->flags |= POPPLER_TEXT_SPAN_FIXED_WIDTH; + if (span.getFont ()->isSerif ()) + new_span->flags |= POPPLER_TEXT_SPAN_SERIF; + if (span.getFont ()->isItalic ()) + new_span->flags |= POPPLER_TEXT_SPAN_ITALIC; + if (span.getFont ()->isBold ()) + new_span->flags |= POPPLER_TEXT_SPAN_BOLD; + + /* isBold() can return false for some fonts whose weight is heavy */ + switch (span.getFont ()->getWeight ()) + { + case GfxFont::W500: + case GfxFont::W600: + case GfxFont::W700: + case GfxFont::W800: + case GfxFont::W900: + new_span->flags |= POPPLER_TEXT_SPAN_BOLD; + default: + break; + } + } + + return new_span; +} + + +/** + * poppler_structure_element_get_text_spans: + * @poppler_structure_element: A #PopplerStructureElement + * @n_text_spans: (out): A pointer to the location where the number of elements in + * the returned array will be stored. + * + * Obtains the text enclosed by an element, as an array of #PopplerTextSpan + * structures. Each item in the list is a piece of text which share the same + * attributes, plus its attributes. The following example shows how to + * obtain and free the text spans of an element: + * + * + * guint n_spans; + * PopplerTextSpan **text_spans = + * poppler_structure_element_get_text_spans (element, &n_spans); + * /* Use the text spans */ + * poppler_text_spans_free (text_spans, n_spans); + * + * + * Return value: (transfer full) (array length=n_text_spans): + * Null-terminated array of #PopplerTextSpan structures. + * + * Since: 0.26 + */ +PopplerTextSpan ** +poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element, + guint *n_text_spans) +{ + g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), 0); + g_assert (poppler_structure_element->elem); + + if (!poppler_structure_element->elem->isContent ()) + return NULL; + + const TextSpanArray spans(poppler_structure_element->elem->getTextSpans ()); + PopplerTextSpan** text_spans = g_new0 (PopplerTextSpan*, spans.size () + 1); + + size_t i = 0; + for (TextSpanArray::const_iterator s = spans.begin (); s != spans.end (); ++s) + text_spans[i++] = _poppler_convert_text_span (*s); + + if (n_text_spans != NULL) + *n_text_spans = spans.size (); + + return text_spans; +} + +/** + * poppler_structure_element_free_text_spans: + * @poppler_text_spans: Array of #PopplerTextSpan pointers. + * + * Frees the memory used by an array of #PopplerTextSpan elements, typically + * obtained using poppler_structure_element_get_text_spans(). + */ +void +poppler_structure_element_free_text_spans (PopplerTextSpan **poppler_text_spans) +{ + g_return_if_fail (poppler_text_spans != NULL); + + /* Array is NULL-terminated */ + for (PopplerTextSpan **span = poppler_text_spans; *span; span++) + poppler_text_span_free (*span); + + g_free (poppler_text_spans); +} + +/** + * poppler_text_span_copy: + * @poppler_text_span: a #PopplerTextSpan + * + * Makes a copy of a text span. + * + * Return value: (transfer full): A new #PopplerTextSpan + * + * Since: 0.26 + */ +PopplerTextSpan * +poppler_text_span_copy (PopplerTextSpan *poppler_text_span) +{ + PopplerTextSpan *new_span = g_slice_dup (PopplerTextSpan, poppler_text_span); + new_span->text = g_strdup (poppler_text_span->text); + if (poppler_text_span->font_name) + new_span->font_name = g_strdup (poppler_text_span->font_name); + return new_span; +} + +/** + * poppler_text_span_free: + * @poppler_text_span: A #PopplerTextSpan + * + * Freed a text span. + * + * Since: 0.26 + */ +void +poppler_text_span_free (PopplerTextSpan *poppler_text_span) +{ + g_free (poppler_text_span->text); + g_free (poppler_text_span->font_name); + g_slice_free (PopplerTextSpan, poppler_text_span); +} + +/** + * poppler_text_span_is_fixed_width_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check wether a text span is meant to be rendered using a fixed-width font. + * + * Return value: Whether the span uses a fixed-width font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_FIXED_WIDTH); +} + +/** + * poppler_text_span_is_serif_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check whether a text span is meant to be rendered using a serif font. + * + * Return value: Whether the span uses a serif font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_SERIF); +} + +/** + * poppler_text_span_is_bold_font: + * @poppler_text_span: a #PopplerTextSpan + * + * Check whether a text span is meant to be rendered using a bold font. + * + * Return value: Whether the span uses bold font. + * + * Since: 0.26 + */ +gboolean +poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span) +{ + return (poppler_text_span->flags & POPPLER_TEXT_SPAN_BOLD); +} + +/** + * poppler_text_span_get_color: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the color in which the text is to be rendered. + * + * Return value: (transfer none): A #PopplerColor. + * + * Since: 0.26 + */ +PopplerColor * +poppler_text_span_get_color (PopplerTextSpan *poppler_text_span) +{ + return &poppler_text_span->color; +} + +/** + * poppler_text_span_get_text: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the text contained in the span. + * + * Return value: (transfer none): A string. + * + * Since: 0.26 + */ +const gchar * +poppler_text_span_get_text (PopplerTextSpan *poppler_text_span) +{ + return poppler_text_span->text; +} + +/** + * poppler_text_span_get_font_name: + * @poppler_text_span: a #PopplerTextSpan + * + * Obtains the name of the font in which the span is to be rendered. + * + * Return value: (transfer none): A string containing the font name, or + * %NULL if a font is not defined. + * + * Since: 0.26 + */ +const gchar * +poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span) +{ + return poppler_text_span->font_name; +} diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h index a5ac04b..613f6e7 100644 --- a/glib/poppler-structure-element.h +++ b/glib/poppler-structure-element.h @@ -82,6 +82,8 @@ typedef enum { POPPLER_STRUCTURE_ELEMENT_FORM, } PopplerStructureElementKind; +typedef struct _PopplerTextSpan PopplerTextSpan; + GType poppler_structure_element_get_type (void) G_GNUC_CONST; PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element); @@ -97,6 +99,9 @@ gchar *poppler_structure_element_get_text gboolean recursive); gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element); gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element); +PopplerTextSpan **poppler_structure_element_get_text_spans (PopplerStructureElement *poppler_structure_element, + guint *n_text_spans); +void poppler_structure_element_free_text_spans (PopplerTextSpan **poppler_text_spans); #define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ()) GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST; @@ -107,6 +112,16 @@ PopplerStructureElement *poppler_structure_element_iter_get_element gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter); void poppler_structure_element_iter_free (PopplerStructureElementIter *iter); +PopplerTextSpan *poppler_text_span_copy (PopplerTextSpan *poppler_text_span); +void poppler_text_span_free (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_fixed_width_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_serif_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_bold_font (PopplerTextSpan *poppler_text_span); +gboolean poppler_text_span_is_link (PopplerTextSpan *poppler_text_span); +PopplerColor *poppler_text_span_get_color (PopplerTextSpan *poppler_text_span); +const gchar *poppler_text_span_get_text (PopplerTextSpan *poppler_text_span); +const gchar *poppler_text_span_get_font_name (PopplerTextSpan *poppler_text_span); + G_END_DECLS #endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */ diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 5b62566..524c2d5 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -557,6 +557,7 @@ poppler_movie_get_type PopplerStructureElement PopplerStructureElementKind PopplerStructureElementIter +PopplerTextSpan poppler_structure_element_iter_new poppler_structure_element_iter_next poppler_structure_element_iter_copy @@ -575,6 +576,16 @@ poppler_structure_element_get_language poppler_structure_element_get_text poppler_structure_element_get_alt_text poppler_structure_element_get_actual_text +poppler_structure_element_get_text_spans +poppler_structure_element_free_text_spans +poppler_text_span_copy +poppler_text_span_free +poppler_text_span_is_fixed_width_font +poppler_text_span_is_serif_font +poppler_text_span_is_bold_font +poppler_text_span_get_color +poppler_text_span_get_text +poppler_text_span_get_font_name POPPLER_STRUCTURE_ELEMENT @@ -585,6 +596,7 @@ POPPLER_TYPE_STRUCTURE_ELEMENT_KIND poppler_structure_element_get_type +poppler_structure_element_kind_get_type poppler_structure_element_iter_get_type -- 1.8.4.2