From 47049764458f5c61b41d18b2c3756ee31455006b Mon Sep 17 00:00:00 2001 From: danigm Date: Tue, 25 Jan 2011 10:28:12 +0100 Subject: [PATCH] [glib] Added get text attributes --- glib/demo/text.c | 47 ++++++++- glib/poppler-page.cc | 197 +++++++++++++++++++++++++++++++++++ glib/poppler-page.h | 32 ++++++ glib/poppler.h | 1 + glib/reference/poppler-sections.txt | 1 + 5 files changed, 276 insertions(+), 2 deletions(-) diff --git a/glib/demo/text.c b/glib/demo/text.c index 54d140b..b1716b7 100644 --- a/glib/demo/text.c +++ b/glib/demo/text.c @@ -37,6 +37,7 @@ typedef struct { GtkTextBuffer *buffer; GtkWidget *treeview; GtkListStore *model; + GtkLabel *textinfo; gint page; } PgdTextDemo; @@ -152,6 +153,11 @@ pgd_text_selection_changed (GtkTreeSelection *treeselection, GtkTreeModel *model; GtkTreeIter iter; + PopplerPage *page; + GList *text_info_list, *elem; + PopplerTextInfo *text_info; + gchar text_info_str[500]; + if (gtk_tree_selection_get_selected (treeselection, &model, &iter)) { gpointer offset; GtkTextIter begin_iter, end_iter; @@ -164,6 +170,34 @@ pgd_text_selection_changed (GtkTreeSelection *treeselection, end_iter = begin_iter; gtk_text_iter_forward_char (&end_iter); gtk_text_buffer_select_range (demo->buffer, &begin_iter, &end_iter); + + // getting text info + page = poppler_document_get_page (demo->doc, demo->page); + text_info_list = poppler_page_get_text_attributes (page); + for(elem = text_info_list; elem; elem = elem->next) { + text_info = elem->data; + if (GPOINTER_TO_INT (offset) >= text_info->start && + GPOINTER_TO_INT (offset) <= text_info->end) { + break; + } else { + text_info = NULL; + } + } + + if (!text_info) + return; + + g_snprintf (text_info_str, 500, "Font: %s\n" + "Size: %d\n" + "Underline: %s\n" + "Color RGB: %d, %d, %d\n", + text_info->font, + text_info->size, + text_info->underline ? "True" : "False", + text_info->color->red, + text_info->color->green, + text_info->color->blue); + gtk_label_set_text (demo->textinfo, text_info_str); } } @@ -232,7 +266,8 @@ pgd_text_create_widget (PopplerDocument *document) { PgdTextDemo *demo; GtkWidget *label; - GtkWidget *vbox; + GtkWidget *vbox, *vbox2; + GtkWidget *textinfo; GtkWidget *hbox, *page_selector; GtkWidget *button; GtkWidget *swindow, *textview, *treeview; @@ -249,6 +284,8 @@ pgd_text_create_widget (PopplerDocument *document) n_pages = poppler_document_get_n_pages (document); vbox = gtk_vbox_new (FALSE, 12); + vbox2 = gtk_vbox_new (FALSE, 12); + textinfo = gtk_label_new ("TextInfo"); hbox = gtk_hbox_new (FALSE, 6); @@ -341,9 +378,15 @@ pgd_text_create_widget (PopplerDocument *document) gtk_container_add (GTK_CONTAINER (swindow), treeview); gtk_widget_show (treeview); - gtk_paned_add1 (GTK_PANED (hpaned), swindow); + gtk_container_add (GTK_CONTAINER (vbox2), swindow); + gtk_box_pack_start (GTK_BOX (vbox2), textinfo, FALSE, FALSE, 12); + gtk_paned_add1 (GTK_PANED (hpaned), vbox2); + gtk_widget_show (vbox2); + gtk_widget_show (textinfo); gtk_widget_show (swindow); + demo->textinfo = GTK_LABEL (textinfo); + swindow = gtk_scrolled_window_new (NULL, NULL); gtk_scrolled_window_set_policy (GTK_SCROLLED_WINDOW (swindow), GTK_POLICY_AUTOMATIC, diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index d1f1bcf..d00f4ba 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -1479,6 +1479,120 @@ poppler_rectangle_free (PopplerRectangle *rectangle) g_slice_free (PopplerRectangle, rectangle); } +/* PopplerTextInfo type */ + +POPPLER_DEFINE_BOXED_TYPE (PopplerTextInfo, poppler_text_info, + poppler_text_info_copy, + poppler_text_info_free) + +/** + * poppler_text_info_new: + * + * Creates a new #PopplerTextInfo + * + * Returns: a new #PopplerTextInfo with + * poppler_text_info_free() to free it + */ +PopplerTextInfo * +poppler_text_info_new (void) +{ + PopplerTextInfo *tinfo = g_slice_new0 (PopplerTextInfo); + tinfo->color = poppler_color_new (); + return tinfo; +} + +/* + * Allocates a new PopplerTextInfo with word attributes + */ +static PopplerTextInfo * +poppler_text_info_new_from_word (TextWord *word) +{ + gchar *fontname; + gboolean subset = FALSE; + gdouble r, g, b; + + gint tsize; + gboolean tunderline; + gint tred, tgreen, tblue; + GooString *font = NULL; + + PopplerTextInfo *tinfo=NULL; + + font = word->getFontName(); + fontname = font ? font->getCString() : (gchar*)"NO FONTNAME"; + + // check for a font subset name: capital letters followed by a '+' sign + if (fontname) { + gchar *tmpfontname = fontname; + gint i; + for (i=0; *(tmpfontname + i); i++) { + if (*(tmpfontname + i) < 'A' || *(tmpfontname + i) > 'Z') { + break; + } + } + subset = i > 0 && *(tmpfontname + i) == '+'; + } + + if (subset) { + while (*fontname && *fontname != '+') fontname++; + fontname++; + } + + // word attributes + tsize = (int)(word->getFontSize()); + tunderline = word->isUnderlined(); + word->getColor (&r, &g, &b); + tred = (int)(r * 65535); + tgreen = (int)(g * 65535); + tblue = (int)(b * 65535); + + // word attributes + tinfo = poppler_text_info_new (); + tinfo->font = g_strdup (fontname); + tinfo->size = tsize; + tinfo->underline = tunderline; + tinfo->color->red = tred; + tinfo->color->green = tgreen; + tinfo->color->blue = tblue; + + return tinfo; +} + +/** + * poppler_text_info_copy: + * @textinfo: a #PopplerTextInfo to copy + * + * Creates a copy of @textinfo + * + * Returns: a new allocated copy of @textinfo + */ +PopplerTextInfo * +poppler_text_info_copy (PopplerTextInfo *textinfo) +{ + PopplerTextInfo *tinfo = NULL; + g_return_val_if_fail (textinfo != NULL, NULL); + + tinfo = g_slice_dup (PopplerTextInfo, textinfo); + tinfo->color = poppler_color_copy (textinfo->color); + tinfo->font = g_strdup (textinfo->font); + return tinfo; +} + +/** + * poppler_text_info_free: + * @textinfo: a #PopplerTextInfo + * + * Frees the given #PopplerTextInfo + */ +void +poppler_text_info_free (PopplerTextInfo *textinfo) +{ + poppler_color_free (textinfo->color); + if (textinfo->font) + g_free (textinfo->font); + g_slice_free (PopplerTextInfo, textinfo); +} + /* PopplerColor type */ POPPLER_DEFINE_BOXED_TYPE (PopplerColor, poppler_color, poppler_color_copy, poppler_color_free) @@ -1904,3 +2018,86 @@ poppler_page_get_text_layout (PopplerPage *page, return TRUE; } + +/** + * poppler_page_free_text_attributes; + * @list: A list of #PopplerTextInfos + * + * Frees a list of #PopplerTextInfos allocated by + * poppler_page_get_text_attributes(). + **/ +void +poppler_page_free_text_attributes (GList *list) +{ + g_list_free_full (list, (GDestroyNotify)poppler_text_info_free); +} + +/** + * poppler_page_get_text_attributes: + * @page: A #PopplerPage + * + * Obtains the attributes of the text as a GList of #PopplerTextInfo + * + * This list must be freed with poppler_page_free_text_attributes () when + * done. + * + * Return value: (element-type PopplerTextInfo) (transfer full): A #GList of #PopplerTextInfo + * + * Each list element is a textinfo with start and end indicates the + * start and end indexes of the group with this textinfo attributes. + * This indexes are the same returned by poppler_page_get_text () + * + * Since: 0.18 + **/ +GList * +poppler_page_get_text_attributes (PopplerPage *page) +{ + TextPage *text; + TextWordList *wordlist; + TextWord *word; + PopplerTextInfo *textinfo = 0, *textinfo2 = 0; + gint i, offset = 0; + GList *attributes = 0; + + g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL); + + text = poppler_page_get_text_page (page); + wordlist = text->makeWordList (gFalse); + + if (wordlist->getLength () <= 0) + { + delete wordlist; + return NULL; + } + + // Calculating each word attributes + for (i = 0; i < wordlist->getLength (); i++) + { + word = wordlist->get (i); + + // each char of the word has the same attributes + textinfo2 = poppler_text_info_new_from_word (word); + if (!textinfo || + strcmp (textinfo2->font, textinfo->font) || + textinfo2->size != textinfo->size || + textinfo2->underline != textinfo->underline || + textinfo2->color->red != textinfo->color->red || + textinfo2->color->green != textinfo->color->green || + textinfo2->color->blue != textinfo->color->blue) + { + if (textinfo) + textinfo->end -= 1; + textinfo = textinfo2; + textinfo->start = offset; + attributes = g_list_append (attributes, textinfo); + } else { + poppler_text_info_free (textinfo2); + } + offset += word->getLength () + 1; + textinfo->end = offset; + } + + delete wordlist; + + return attributes; +} diff --git a/glib/poppler-page.h b/glib/poppler-page.h index 03e22ec..d5a28c5 100644 --- a/glib/poppler-page.h +++ b/glib/poppler-page.h @@ -94,6 +94,8 @@ void poppler_page_get_crop_box (PopplerPage *page, gboolean poppler_page_get_text_layout (PopplerPage *page, PopplerRectangle **rectangles, guint *n_rectangles); +GList *poppler_page_get_text_attributes (PopplerPage *page); +void poppler_page_free_text_attributes (GList *array); /* A rectangle on a page, with coordinates in PDF points. */ #define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ()) @@ -120,6 +122,36 @@ PopplerRectangle *poppler_rectangle_new (void); PopplerRectangle *poppler_rectangle_copy (PopplerRectangle *rectangle); void poppler_rectangle_free (PopplerRectangle *rectangle); +/* A textword attributes. */ +#define POPPLER_TYPE_TEXT_INFO (poppler_text_info_get_type ()) +/** + * PopplerTextInfo: + * @font: word font name + * @size: word font size + * @uniderline: if word is underlined + * @color: a #PopplerColor, the word foreground color + * @start: start position for this text info + * @end: end position for this text info + * + * A #PopplerTextInfo is used to describe + * a word text attriutes + */ +struct _PopplerTextInfo +{ + gchar *font; + gint size; + gboolean underline; + PopplerColor *color; + + gint start; + gint end; +}; + +GType poppler_text_info_get_type (void) G_GNUC_CONST; +PopplerTextInfo *poppler_text_info_new (void); +PopplerTextInfo *poppler_text_info_copy (PopplerTextInfo *textinfo); +void poppler_text_info_free (PopplerTextInfo *textinfo); + /* A color in RGB */ #define POPPLER_TYPE_COLOR (poppler_color_get_type ()) diff --git a/glib/poppler.h b/glib/poppler.h index 008f5d8..b5574d2 100644 --- a/glib/poppler.h +++ b/glib/poppler.h @@ -158,6 +158,7 @@ typedef struct _PopplerIndexIter PopplerIndexIter; typedef struct _PopplerFontsIter PopplerFontsIter; typedef struct _PopplerLayersIter PopplerLayersIter; typedef struct _PopplerRectangle PopplerRectangle; +typedef struct _PopplerTextInfo PopplerTextInfo; typedef struct _PopplerColor PopplerColor; typedef struct _PopplerLinkMapping PopplerLinkMapping; typedef struct _PopplerPageTransition PopplerPageTransition; diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index 9495052..dc170b6 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -38,6 +38,7 @@ poppler_page_get_selected_text poppler_page_find_text poppler_page_get_text poppler_page_get_text_layout +poppler_page_get_text_attributes poppler_page_get_link_mapping poppler_page_free_link_mapping poppler_page_get_image_mapping -- 1.7.6