Bug 9405 - Enhancements to PSTokenizer performance
Summary: Enhancements to PSTokenizer performance
Status: RESOLVED FIXED
Alias: None
Product: poppler
Classification: Unclassified
Component: general (show other bugs)
Version: unspecified
Hardware: All All
: high enhancement
Assignee: poppler-bugs
QA Contact:
URL:
Whiteboard:
Keywords:
Depends on:
Blocks:
 
Reported: 2006-12-20 08:59 UTC by Scott Turner
Modified: 2006-12-23 05:14 UTC (History)
0 users

See Also:
i915 platform:
i915 features:


Attachments

Description Scott Turner 2006-12-20 08:59:51 UTC
Enhance PSTokenizer::getToken 

1. Reserve space for terminating 0 by decrementing size in advance (rather than always having to 
account for it)
2. Utilize new method consumeChar() to indicate we've used the char returned from lookChar().
3. Made placement of calls to consumeChar() consistent inside parsing code.

Add new method PSTokenizer::consumeChar()
This method is more efficient than getChar() for consuming character returned by lookChar().

Enhance PSTokenizer::getChar()
Changed to reduce writes to charBuffer.

*** PSTokenizer.h.~1.1.1.1.~    Thu Mar  3 11:46:01 2005
--- PSTokenizer.h       Wed Dec 20 08:37:09 2006
***************
*** 29,34 ****
--- 29,35 ----
  private:
  
    int lookChar();
+   void consumeChar();
    int getChar();
  
    int (*getCharFunc)(void *);

*** PSTokenizer.cc.~1.1.1.1.~   Thu Mar  3 11:46:03 2005
--- PSTokenizer.cc      Wed Dec 20 08:32:02 2006
***************
*** 55,61 ****
    int c;
    int i;
  
!   // skip whitespace and comments
    comment = gFalse;
    while (1) {
      if ((c = getChar()) == EOF) {
--- 55,61 ----
    int c;
    int i;
  
!   // skip leading whitespace and comments
    comment = gFalse;
    while (1) {
      if ((c = getChar()) == EOF) {
***************
*** 74,89 ****
      }
    }
  
    // read a token
    i = 0;
    buf[i++] = c;
    if (c == '(') {
      backslash = gFalse;
      while ((c = lookChar()) != EOF) {
!       if (i < size - 1) {
        buf[i++] = c;
        }
-       getChar();
        if (c == '\\') {
        backslash = gTrue;
        } else if (!backslash && c == ')') {
--- 74,93 ----
      }
    }
  
+   // Reserve room for terminating '\0'
+   size--;
+ 
    // read a token
    i = 0;
    buf[i++] = c;
+ 
    if (c == '(') {
      backslash = gFalse;
      while ((c = lookChar()) != EOF) {
!       consumeChar();
!       if (i < size) {
        buf[i++] = c;
        }
        if (c == '\\') {
        backslash = gTrue;
        } else if (!backslash && c == ')') {
***************
*** 94,101 ****
      }
    } else if (c == '<') {
      while ((c = lookChar()) != EOF) {
!       getChar();
!       if (i < size - 1) {
        buf[i++] = c;
        }
        if (c == '>') {
--- 98,105 ----
      }
    } else if (c == '<') {
      while ((c = lookChar()) != EOF) {
!       consumeChar();
!       if (i < size) {
        buf[i++] = c;
        }
        if (c == '>') {
***************
*** 104,116 ****
      }
    } else if (c != '[' && c != ']') {
      while ((c = lookChar()) != EOF && !specialChars[c]) {
!       getChar();
!       if (i < size - 1) {
        buf[i++] = c;
        }
      }
    }
    buf[i] = '\0';
    *length = i;
  
    return gTrue;
--- 108,124 ----
      }
    } else if (c != '[' && c != ']') {
      while ((c = lookChar()) != EOF && !specialChars[c]) {
!       consumeChar();
!       if (i < size) {
        buf[i++] = c;
        }
      }
    }
+ 
+   // Zero terminate token string
    buf[i] = '\0';
+ 
+   // Return length of token
    *length = i;
  
    return gTrue;
***************
*** 123,135 ****
    return charBuf;
  }
  
  int PSTokenizer::getChar() {
!   int c;
  
!   if (charBuf < 0) {
!     charBuf = (*getCharFunc)(data);
    }
!   c = charBuf;
!   charBuf = -1;
    return c;
  }
--- 131,148 ----
    return charBuf;
  }
  
+ void PSTokenizer::consumeChar() {
+   charBuf = -1;
+ }
+ 
  int PSTokenizer::getChar() {
!   int c = charBuf;
  
!   if (c < 0) {
!     c = (*getCharFunc)(data);
!   } else {
!     charBuf = -1;
    }
! 
    return c;
  }
Comment 1 Albert Astals Cid 2006-12-20 11:00:37 UTC
Can you please attach a diff using -ub parameters, it's the only thing my eyes 
understand :-D
Comment 2 Scott Turner 2006-12-20 14:29:41 UTC
--- PSTokenizer.cc.~1.1.1.1.~   2005-03-03 11:46:03.000000000 -0800
+++ PSTokenizer.cc      2006-12-20 08:32:02.000000000 -0800
@@ -55,7 +55,7 @@
   int c;
   int i;
 
-  // skip whitespace and comments
+  // skip leading whitespace and comments
   comment = gFalse;
   while (1) {
     if ((c = getChar()) == EOF) {
@@ -74,16 +74,20 @@
     }
   }
 
+  // Reserve room for terminating '\0'
+  size--;
+
   // read a token
   i = 0;
   buf[i++] = c;
+
   if (c == '(') {
     backslash = gFalse;
     while ((c = lookChar()) != EOF) {
-      if (i < size - 1) {
+      consumeChar();
+      if (i < size) {
        buf[i++] = c;
       }
-      getChar();
       if (c == '\\') {
        backslash = gTrue;
       } else if (!backslash && c == ')') {
@@ -94,8 +98,8 @@
     }
   } else if (c == '<') {
     while ((c = lookChar()) != EOF) {
-      getChar();
-      if (i < size - 1) {
+      consumeChar();
+      if (i < size) {
        buf[i++] = c;
       }
       if (c == '>') {
@@ -104,13 +108,17 @@
     }
   } else if (c != '[' && c != ']') {
     while ((c = lookChar()) != EOF && !specialChars[c]) {
-      getChar();
-      if (i < size - 1) {
+      consumeChar();
+      if (i < size) {
        buf[i++] = c;
       }
     }
   }
+
+  // Zero terminate token string
   buf[i] = '\0';
+
+  // Return length of token
   *length = i;
 
   return gTrue;
@@ -123,13 +131,18 @@
   return charBuf;
 }
 
+void PSTokenizer::consumeChar() {
+  charBuf = -1;
+}
+
 int PSTokenizer::getChar() {
-  int c;
+  int c = charBuf;
 
-  if (charBuf < 0) {
-    charBuf = (*getCharFunc)(data);
-  }
-  c = charBuf;
+  if (c < 0) {
+    c = (*getCharFunc)(data);
+  } else {
   charBuf = -1;
+  }
+
   return c;
 }

--- PSTokenizer.h.~1.1.1.1.~    2005-03-03 11:46:01.000000000 -0800
+++ PSTokenizer.h       2006-12-20 08:37:09.000000000 -0800
@@ -29,6 +29,7 @@
 private:
 
   int lookChar();
+  void consumeChar();
   int getChar();
 
   int (*getCharFunc)(void *);
Comment 3 Albert Astals Cid 2006-12-23 05:13:16 UTC
Patch commited but only to trunk, not to 0.5 branch, because even if it seems 
and probably is safe to commit 0.5 is only open for bugfixes.
Comment 4 Albert Astals Cid 2006-12-23 05:14:21 UTC
forgot to say thanks.

Thanks :-)


Use of freedesktop.org services, including Bugzilla, is subject to our Code of Conduct. How we collect and use information is described in our Privacy Policy.