From 850e7563b4a50c838749bc728b99ba587e1ebe72 Mon Sep 17 00:00:00 2001 From: Daniel Garcia Date: Mon, 7 Jun 2010 13:46:03 +0200 Subject: [PATCH] poppler page get text mapping in poppler-glib --- glib/poppler-page.cc | 105 +++++++++++++++++++++++++++++++++++++++++++++++++- glib/poppler-page.h | 15 +++++++ glib/poppler.h | 1 + 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 39645bd..256872f 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -1743,14 +1743,115 @@ poppler_annot_mapping_free (PopplerAnnotMapping *mapping) g_free (mapping); } -void +void poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect) { PDFRectangle* cropBox = page->page->getCropBox (); - + rect->x1 = cropBox->x1; rect->x2 = cropBox->x2; rect->y1 = cropBox->y1; rect->y2 = cropBox->y2; } +/* PopplerTextMapping type */ +POPPLER_DEFINE_BOXED_TYPE (PopplerTextMapping, poppler_text_mapping, + poppler_text_mapping_copy, + poppler_text_mapping_free) + +PopplerTextMapping * +poppler_text_mapping_new (void) +{ + return (PopplerTextMapping *) g_new0 (PopplerTextMapping, 1); +} + +PopplerTextMapping * +poppler_text_mapping_copy (PopplerTextMapping *mapping) +{ + PopplerTextMapping *new_mapping; + + new_mapping = poppler_text_mapping_new (); + *new_mapping = *mapping; + + return new_mapping; +} + +void +poppler_text_mapping_free (PopplerTextMapping *mapping) +{ + g_free (mapping); +} + +/** + * poppler_page_get_text_mapping: + * @page: A #PopplerPage + * + * Returns a list of #PopplerTextMapping items + * This list must be freed with poppler_page_free_text_mapping() when done. + * + * Return value: A #GList of #PopplerTextMapping + **/ +GList * +poppler_page_get_text_mapping (PopplerPage *page) +{ + TextPage *text; + TextWordList *wordlist; + TextWord *word; + GList *mapping_list = NULL; + PopplerTextMapping *mapping; + int i, j, offset = 0; + + text = poppler_page_get_text_page (page); + wordlist = text->makeWordList (gFalse); + + for (i=0; i < wordlist->getLength (); i++) + { + word = wordlist->get(i); + for (j=0; j < word->getLength (); j++) + { + mapping = poppler_text_mapping_new (); + mapping->offset = offset++; + + word->getCharBBox (j, + &(mapping->area.x1), + &(mapping->area.y1), + &(mapping->area.x2), + &(mapping->area.y2)); + + mapping_list = g_list_prepend (mapping_list, mapping); + } + + // adding spaces and break lines + mapping = poppler_text_mapping_new (); + mapping->offset = offset++; + + word->getBBox (&(mapping->area.x1), + &(mapping->area.y1), + &(mapping->area.x2), + &(mapping->area.y2)); + + mapping->area.x1 = mapping->area.x2; + mapping->area.y1 = mapping->area.y2; + + mapping_list = g_list_prepend (mapping_list, mapping); + } + + return g_list_reverse (mapping_list); +} + +/** + * poppler_page_free_text_mapping: + * @list: A list of #PopplerTextMappings + * + * Frees a list of #PopplerTextMappings allocated by + * poppler_page_get_text_mapping(). + **/ +void +poppler_page_free_text_mapping (GList *list) +{ + if (!list) + return; + + g_list_foreach (list, (GFunc)poppler_text_mapping_free, NULL); + g_list_free (list); +} diff --git a/glib/poppler-page.h b/glib/poppler-page.h index 20dc20f..288276f 100644 --- a/glib/poppler-page.h +++ b/glib/poppler-page.h @@ -114,6 +114,8 @@ GList *poppler_page_get_annot_mapping (PopplerPage *pa void poppler_page_free_annot_mapping (GList *list); void poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect); +GList *poppler_page_get_text_mapping (PopplerPage *page); +void poppler_page_free_text_mapping (GList *list); /* A rectangle on a page, with coordinates in PDF points. */ @@ -215,6 +217,19 @@ PopplerAnnotMapping *poppler_annot_mapping_new (void); PopplerAnnotMapping *poppler_annot_mapping_copy (PopplerAnnotMapping *mapping); void poppler_annot_mapping_free (PopplerAnnotMapping *mapping); +/* Mapping between areas on the current page and text */ +#define POPPLER_TYPE_TEXT_MAPPING (poppler_text_mapping_get_type ()) +struct _PopplerTextMapping +{ + PopplerRectangle area; + gint offset; +}; + +GType poppler_text_mapping_get_type (void) G_GNUC_CONST; +PopplerTextMapping *poppler_text_mapping_new (void); +PopplerTextMapping *poppler_text_mapping_copy (PopplerTextMapping *mapping); +void poppler_text_mapping_free (PopplerTextMapping *mapping); + G_END_DECLS #endif /* __POPPLER_PAGE_H__ */ diff --git a/glib/poppler.h b/glib/poppler.h index 01bcafa..b9cd7dd 100644 --- a/glib/poppler.h +++ b/glib/poppler.h @@ -90,6 +90,7 @@ typedef struct _PopplerPageTransition PopplerPageTransition; typedef struct _PopplerImageMapping PopplerImageMapping; typedef struct _PopplerFormFieldMapping PopplerFormFieldMapping; typedef struct _PopplerAnnotMapping PopplerAnnotMapping; +typedef struct _PopplerTextMapping PopplerTextMapping; typedef struct _PopplerPage PopplerPage; typedef struct _PopplerFontInfo PopplerFontInfo; typedef struct _PopplerLayer PopplerLayer; -- 1.7.1.226.g770c5