From 8b70329855898e8053c62c8be72ffab3b08402b5 Mon Sep 17 00:00:00 2001 From: Adrian Perez de Castro Date: Tue, 4 Feb 2014 19:26:53 +0200 Subject: [PATCH] Allow properly identifying grouping elements Grouping elements in the Tagged-PDF structure should be identifiable as such (see section 14.8.4.2 "Grouping Elements" of the PDF standard). Those were previously reported as inline elements, which is quite not correct. This patch introduces a new StructElement::isGrouping() method which correctly reports grouping elements as such. --- poppler/StructElement.cc | 31 +++++++++++++++++++------------ poppler/StructElement.h | 1 + 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/poppler/StructElement.cc b/poppler/StructElement.cc index 6392433..1c5ba28 100644 --- a/poppler/StructElement.cc +++ b/poppler/StructElement.cc @@ -506,6 +506,7 @@ static GBool ownerHasMorePriority(Attribute::Owner a, Attribute::Owner b) enum ElementType { elementTypeUndefined, + elementTypeGrouping, elementTypeInline, elementTypeBlock, }; @@ -516,16 +517,16 @@ static const struct TypeMapEntry { ElementType elementType; const AttributeMapEntry **attributes; } typeMap[] = { - { StructElement::Document, "Document", elementTypeInline, attributeMapShared }, - { StructElement::Part, "Part", elementTypeInline, attributeMapShared }, - { StructElement::Art, "Art", elementTypeInline, attributeMapColumns }, - { StructElement::Sect, "Sect", elementTypeInline, attributeMapColumns }, - { StructElement::Div, "Div", elementTypeInline, attributeMapColumns }, - { StructElement::BlockQuote, "BlockQuote", elementTypeInline, attributeMapInline }, - { StructElement::Caption, "Caption", elementTypeInline, attributeMapInline }, - { StructElement::NonStruct, "NonStruct", elementTypeInline, attributeMapInline }, - { StructElement::Index, "Index", elementTypeInline, attributeMapInline }, - { StructElement::Private, "Private", elementTypeInline, attributeMapInline }, + { StructElement::Document, "Document", elementTypeGrouping, attributeMapShared }, + { StructElement::Part, "Part", elementTypeGrouping, attributeMapShared }, + { StructElement::Art, "Art", elementTypeGrouping, attributeMapColumns }, + { StructElement::Sect, "Sect", elementTypeGrouping, attributeMapColumns }, + { StructElement::Div, "Div", elementTypeGrouping, attributeMapColumns }, + { StructElement::BlockQuote, "BlockQuote", elementTypeGrouping, attributeMapInline }, + { StructElement::Caption, "Caption", elementTypeGrouping, attributeMapInline }, + { StructElement::NonStruct, "NonStruct", elementTypeGrouping, attributeMapInline }, + { StructElement::Index, "Index", elementTypeGrouping, attributeMapInline }, + { StructElement::Private, "Private", elementTypeGrouping, attributeMapInline }, { StructElement::Span, "Span", elementTypeInline, attributeMapInline }, { StructElement::Quote, "Quote", elementTypeInline, attributeMapInline }, { StructElement::Note, "Note", elementTypeInline, attributeMapInline }, @@ -563,8 +564,8 @@ static const struct TypeMapEntry { { StructElement::Figure, "Figure", elementTypeUndefined, attributeMapIllustration }, { StructElement::Formula, "Formula", elementTypeUndefined, attributeMapIllustration }, { StructElement::Form, "Form", elementTypeUndefined, attributeMapIllustration }, - { StructElement::TOC, "TOC", elementTypeUndefined, attributeMapShared }, - { StructElement::TOCI, "TOCI", elementTypeUndefined, attributeMapShared }, + { StructElement::TOC, "TOC", elementTypeGrouping, attributeMapShared }, + { StructElement::TOCI, "TOCI", elementTypeGrouping, attributeMapShared }, }; @@ -913,6 +914,12 @@ GBool StructElement::isInline() const return entry ? (entry->elementType == elementTypeInline) : gFalse; } +GBool StructElement::isGrouping() const +{ + const TypeMapEntry *entry = getTypeMapEntry(type); + return entry ? (entry->elementType == elementTypeGrouping) : gFalse; +} + GBool StructElement::hasPageRef() const { return pageRef.isRef() || (parent && parent->hasPageRef()); diff --git a/poppler/StructElement.h b/poppler/StructElement.h index b9eef8a..7de0082 100644 --- a/poppler/StructElement.h +++ b/poppler/StructElement.h @@ -153,6 +153,7 @@ public: GBool isOk() const { return type != Unknown; } GBool isBlock() const; GBool isInline() const; + GBool isGrouping() const; inline GBool isContent() const { return (type == MCID) || isObjectRef(); } inline GBool isObjectRef() const { return (type == OBJR && c->ref.num != -1 && c->ref.gen != -1); } -- 1.8.5.3