From 0d35f3759707c689c6474a8888c288752556c92b Mon Sep 17 00:00:00 2001 From: danigm Date: Tue, 25 Jan 2011 10:28:12 +0100 Subject: [PATCH] [glib] added get_text_attributes --- glib/poppler-page.cc | 112 +++++++++++++++++++++++++++++++++++ glib/poppler-page.h | 3 + glib/reference/poppler-sections.txt | 1 + 3 files changed, 116 insertions(+), 0 deletions(-) diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 064e4af..d0fc218 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -2203,3 +2203,115 @@ poppler_page_get_text_layout (PopplerPage *page, return TRUE; } + +/** + * poppler_page_get_text_attributes: + * @page: A #PopplerPage + * @attributes: (out) (array length=n_attributes) (transfer container): return location for an array of #PangoAttrList + * @n_attributes: length of returned array + * + * Obtains the attributes of the text as a list of #PangoAttrList + * Each array element must be freed with pango_attr_list_unref () when done. + * And the array must be freed with g_free () + * + * The position in the array represents an offset in the text returned by + * poppler_page_get_text () + * + * Return value: %TRUE if the page contains text, %FALSE otherwise + * + * Since: 0.16 + **/ +gboolean +poppler_page_get_text_attributes (PopplerPage *page, + PangoAttrList ***attributes, + guint *n_attributes) +{ + TextPage *text; + TextWordList *wordlist; + TextWord *word; + PangoAttrList *attr_list; + gchar *fontname; + gboolean subset = FALSE; + gint i, j, offset = 0; + + g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); + + *n_attributes = 0; + + text = poppler_page_get_text_page (page); + wordlist = text->makeWordList (gFalse); + + if (wordlist->getLength () <= 0) + { + delete wordlist; + return FALSE; + } + + // Getting the array size + for (i = 0; i < wordlist->getLength (); i++) + { + word = wordlist->get (i); + *n_attributes += word->getLength () + 1; + } + + *attributes = (PangoAttrList**) g_malloc (sizeof (PangoAttrList*) * (*n_attributes)); + + // Calculating each word attributes + for (i = 0; i < wordlist->getLength (); i++) + { + word = wordlist->get (i); + fontname = word->getFontName()->getCString(); + gdouble r, g, b; + PangoAttribute *font_attr, *size_attr, *underline_attr, *color_attr; + + // check for a font subset name: capital letters followed by a '+' sign + if (fontname) { + gchar *tmpfontname = fontname; + gint i; + for (i=0; *(tmpfontname + i); i++) { + if (*(tmpfontname + i) < 'A' || *(tmpfontname + i) > 'Z') { + break; + } + } + subset = i > 0 && *(tmpfontname + i) == '+'; + } + + if (subset) { + while (*fontname && *fontname != '+') fontname++; + fontname++; + } + + // word attributes + font_attr = pango_attr_family_new (fontname); + size_attr = pango_attr_size_new ((int)word->getFontSize()); + underline_attr = pango_attr_underline_new (word->isUnderlined() ? PANGO_UNDERLINE_SINGLE : PANGO_UNDERLINE_NONE); + word->getColor (&r, &g, &b); + color_attr = pango_attr_foreground_new ((int)(r * 65535), (int)(g * 65535), (int)(b * 65535)); + + // each char of the word has the same attributes + for (j = 0; j < word->getLength (); j++) + { + (*attributes)[offset] = pango_attr_list_new (); + attr_list = (*attributes)[offset]; + offset++; + + pango_attr_list_insert (attr_list, pango_attribute_copy (font_attr)); + pango_attr_list_insert (attr_list, pango_attribute_copy (size_attr)); + pango_attr_list_insert (attr_list, pango_attribute_copy (underline_attr)); + pango_attr_list_insert (attr_list, pango_attribute_copy (color_attr)); + } + + pango_attribute_destroy (font_attr); + pango_attribute_destroy (size_attr); + pango_attribute_destroy (underline_attr); + pango_attribute_destroy (color_attr); + + // space or breakline + (*attributes)[offset] = pango_attr_list_new (); + offset++; + } + + delete wordlist; + + return TRUE; +} diff --git a/glib/poppler-page.h b/glib/poppler-page.h index d40c0ee..f662b15 100644 --- a/glib/poppler-page.h +++ b/glib/poppler-page.h @@ -128,6 +128,9 @@ void poppler_page_get_crop_box (PopplerPage *page, gboolean poppler_page_get_text_layout (PopplerPage *page, PopplerRectangle **rectangles, guint *n_rectangles); +gboolean poppler_page_get_text_attributes (PopplerPage *page, + PangoAttrList ***attributes, + guint *n_attributes); /* A rectangle on a page, with coordinates in PDF points. */ #define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ()) diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 6ad9ee2..14521e5 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -38,6 +38,7 @@ poppler_page_get_selected_text poppler_page_find_text poppler_page_get_text poppler_page_get_text_layout +poppler_page_get_text_attributes poppler_page_get_link_mapping poppler_page_free_link_mapping poppler_page_get_image_mapping -- 1.7.4.rc2.258.g844d7