Enhance PSTokenizer::getToken 1. Reserve space for terminating 0 by decrementing size in advance (rather than always having to account for it) 2. Utilize new method consumeChar() to indicate we've used the char returned from lookChar(). 3. Made placement of calls to consumeChar() consistent inside parsing code. Add new method PSTokenizer::consumeChar() This method is more efficient than getChar() for consuming character returned by lookChar(). Enhance PSTokenizer::getChar() Changed to reduce writes to charBuffer. *** PSTokenizer.h.~1.1.1.1.~ Thu Mar 3 11:46:01 2005 --- PSTokenizer.h Wed Dec 20 08:37:09 2006 *************** *** 29,34 **** --- 29,35 ---- private: int lookChar(); + void consumeChar(); int getChar(); int (*getCharFunc)(void *); *** PSTokenizer.cc.~1.1.1.1.~ Thu Mar 3 11:46:03 2005 --- PSTokenizer.cc Wed Dec 20 08:32:02 2006 *************** *** 55,61 **** int c; int i; ! // skip whitespace and comments comment = gFalse; while (1) { if ((c = getChar()) == EOF) { --- 55,61 ---- int c; int i; ! // skip leading whitespace and comments comment = gFalse; while (1) { if ((c = getChar()) == EOF) { *************** *** 74,89 **** } } // read a token i = 0; buf[i++] = c; if (c == '(') { backslash = gFalse; while ((c = lookChar()) != EOF) { ! if (i < size - 1) { buf[i++] = c; } - getChar(); if (c == '\\') { backslash = gTrue; } else if (!backslash && c == ')') { --- 74,93 ---- } } + // Reserve room for terminating '\0' + size--; + // read a token i = 0; buf[i++] = c; + if (c == '(') { backslash = gFalse; while ((c = lookChar()) != EOF) { ! consumeChar(); ! if (i < size) { buf[i++] = c; } if (c == '\\') { backslash = gTrue; } else if (!backslash && c == ')') { *************** *** 94,101 **** } } else if (c == '<') { while ((c = lookChar()) != EOF) { ! getChar(); ! if (i < size - 1) { buf[i++] = c; } if (c == '>') { --- 98,105 ---- } } else if (c == '<') { while ((c = lookChar()) != EOF) { ! consumeChar(); ! if (i < size) { buf[i++] = c; } if (c == '>') { *************** *** 104,116 **** } } else if (c != '[' && c != ']') { while ((c = lookChar()) != EOF && !specialChars[c]) { ! getChar(); ! if (i < size - 1) { buf[i++] = c; } } } buf[i] = '\0'; *length = i; return gTrue; --- 108,124 ---- } } else if (c != '[' && c != ']') { while ((c = lookChar()) != EOF && !specialChars[c]) { ! consumeChar(); ! if (i < size) { buf[i++] = c; } } } + + // Zero terminate token string buf[i] = '\0'; + + // Return length of token *length = i; return gTrue; *************** *** 123,135 **** return charBuf; } int PSTokenizer::getChar() { ! int c; ! if (charBuf < 0) { ! charBuf = (*getCharFunc)(data); } ! c = charBuf; ! charBuf = -1; return c; } --- 131,148 ---- return charBuf; } + void PSTokenizer::consumeChar() { + charBuf = -1; + } + int PSTokenizer::getChar() { ! int c = charBuf; ! if (c < 0) { ! c = (*getCharFunc)(data); ! } else { ! charBuf = -1; } ! return c; }
Can you please attach a diff using -ub parameters, it's the only thing my eyes understand :-D
--- PSTokenizer.cc.~1.1.1.1.~ 2005-03-03 11:46:03.000000000 -0800 +++ PSTokenizer.cc 2006-12-20 08:32:02.000000000 -0800 @@ -55,7 +55,7 @@ int c; int i; - // skip whitespace and comments + // skip leading whitespace and comments comment = gFalse; while (1) { if ((c = getChar()) == EOF) { @@ -74,16 +74,20 @@ } } + // Reserve room for terminating '\0' + size--; + // read a token i = 0; buf[i++] = c; + if (c == '(') { backslash = gFalse; while ((c = lookChar()) != EOF) { - if (i < size - 1) { + consumeChar(); + if (i < size) { buf[i++] = c; } - getChar(); if (c == '\\') { backslash = gTrue; } else if (!backslash && c == ')') { @@ -94,8 +98,8 @@ } } else if (c == '<') { while ((c = lookChar()) != EOF) { - getChar(); - if (i < size - 1) { + consumeChar(); + if (i < size) { buf[i++] = c; } if (c == '>') { @@ -104,13 +108,17 @@ } } else if (c != '[' && c != ']') { while ((c = lookChar()) != EOF && !specialChars[c]) { - getChar(); - if (i < size - 1) { + consumeChar(); + if (i < size) { buf[i++] = c; } } } + + // Zero terminate token string buf[i] = '\0'; + + // Return length of token *length = i; return gTrue; @@ -123,13 +131,18 @@ return charBuf; } +void PSTokenizer::consumeChar() { + charBuf = -1; +} + int PSTokenizer::getChar() { - int c; + int c = charBuf; - if (charBuf < 0) { - charBuf = (*getCharFunc)(data); - } - c = charBuf; + if (c < 0) { + c = (*getCharFunc)(data); + } else { charBuf = -1; + } + return c; } --- PSTokenizer.h.~1.1.1.1.~ 2005-03-03 11:46:01.000000000 -0800 +++ PSTokenizer.h 2006-12-20 08:37:09.000000000 -0800 @@ -29,6 +29,7 @@ private: int lookChar(); + void consumeChar(); int getChar(); int (*getCharFunc)(void *);
Patch commited but only to trunk, not to 0.5 branch, because even if it seems and probably is safe to commit 0.5 is only open for bugfixes.
forgot to say thanks. Thanks :-)
Use of freedesktop.org services, including Bugzilla, is subject to our Code of Conduct. How we collect and use information is described in our Privacy Policy.