Index: goo/FastAlloc.cc =================================================================== --- goo/FastAlloc.cc (revision 0) +++ goo/FastAlloc.cc (revision 0) @@ -0,0 +1,300 @@ +/* Written by: Krzysztof Kowalczyk (http://blog.kowalczyk.info) + License: Public Domain (http://creativecommons.org/licenses/publicdomain/) + Take all the code you want, I'll just write more. +*/ + +#include "FastAlloc.h" +#include "FastFixedAllocator.h" +#include +#include +#include +#include +#include "gtypes.h" + +//#define DO_MEM_STATS 1 +#ifdef DO_MEM_STATS + +#define ALLOCS_AT_A_TIME 1024 + +typedef struct { + size_t size; + void * p; +} AllocRecord; + +static AllocRecord gAllocs[ALLOCS_AT_A_TIME]; +static int gAllocsCount = 0; + +#define ITS_A_FREE (size_t)-1 + +#define MAX_BUF_FILENAME_SIZE 256 +#define LOG_BUF_SIZE 2048 + +static char gLogFileName[MAX_BUF_FILENAME_SIZE] = {0}; +static char gLogBuf[LOG_BUF_SIZE]; +static const char * gLogFileNamePattern = "fast_alloc_%d.txt"; +static int gLogBufUsed = 0; + +static GBool FileExists(const char *fileName) +{ + struct stat buf; + int res; + + res = stat(fileName, &buf); + if (0 != res) + return gFalse; + return gTrue; +} + +void LogSetFileNamePattern(const char *fileNamePattern) +{ + assert(!gLogFileNamePattern); + gLogFileNamePattern = fileNamePattern; +} + +static GBool LogGenUniqueFile(void) +{ + static GBool tried = gFalse; + int n = 0; + + assert(gLogFileNamePattern); + if (gLogFileName[0]) + return gTrue; /* already generated */ + if (tried) + return gFalse; + tried = gTrue; + while (n < 999) { + sprintf(gLogFileName, gLogFileNamePattern, n); + if (!FileExists(gLogFileName)) + return gTrue; + ++n; + } + gLogFileName[0] = 0; + assert(0); + return gFalse; +} + +void LogAllocs(void) +{ + FILE * fp; + int i; + char buf[256]; + int len; + + assert(gAllocsCount <= ALLOCS_AT_A_TIME); + if (0 == gAllocsCount) + return; + + if (!LogGenUniqueFile()) + return; + fp = fopen(gLogFileName, "a"); + if (!fp) + return; + + for (i = 0; i < gAllocsCount; i++) { + if (ITS_A_FREE == gAllocs[i].size) { + len = sprintf(buf, "- 0x%p\n", gAllocs[i].p); + } else { + len = sprintf(buf, "+ 0x%p 0x%x\n", gAllocs[i].p, gAllocs[i].size); + } + assert(len >= 0); + if (len > 0) + fwrite(buf, 1, len, fp); + } + fclose(fp); +} + +/* Just to make sure that we log everything without explicitly calling + LogAllocs() at the end, this class' destructor should be called by C++ + runtime at the end of program */ +class EnsureLastAllocsLogged { +public: + ~EnsureLastAllocsLogged(void) { + LogAllocs(); + gAllocsCount = 0; + } +}; + +static EnsureLastAllocsLogged gEnsureLastAllocsLogged; + +void RecordEntry(void *p, size_t size) +{ + if (ALLOCS_AT_A_TIME == gAllocsCount) { + LogAllocs(); + gAllocsCount = 0; + } + assert(gAllocsCount < ALLOCS_AT_A_TIME); + gAllocs[gAllocsCount].size = size; + gAllocs[gAllocsCount].p = p; + ++gAllocsCount; +} + +void RecordMalloc(void *p, size_t size) +{ + assert(size != ITS_A_FREE); + if (p) + RecordEntry(p, size); +} + +void RecordFree(void *p) +{ + if (p) + RecordEntry(p, ITS_A_FREE); +} + +void RecordRealloc(void *oldP, void *newP, size_t newSize) +{ + RecordFree(oldP); + RecordMalloc(newP, newSize); +} +#else +#define RecordMalloc(p, size) +#define RecordFree(p) +#define RecordRealloc(oldP, newP, newSize) +#endif + +void malloc_hook(void *p, size_t size) +{ + RecordMalloc(p, size); +} + +void free_hook(void *p) +{ + RecordFree(p); +} + +void realloc_hook(void *oldP, void *newP, size_t newSize) +{ + RecordRealloc(oldP, newP, newSize); +} + +void *operator new(size_t size) { + return fast_malloc(size); +} + +void *operator new[](size_t size) { + return fast_malloc(size); +} + +void operator delete(void *p) { + fast_free(p); +} + +void operator delete[](void *p) { + fast_free(p); +} + +#define MAGIC_COOKIE_FOR_0_16 (FreeListNode*)0xa1b2c3d4 +#define MAGIC_COOKIE_FOR_32 (FreeListNode*)0x11326374 +#define MAGIC_COOKIE_FOR_88 (FreeListNode*)0x91a2c8d9 +#define MAGIC_COOKIE_FOR_20_24 (FreeListNode*)0x51783c4d + +/* Acording to my profiles, covering allocations in ranges <0-16>, <20-24>, + 32 and 88 covers 89% allocations (in scenario I've measured which was + loading a PDF document PDFReference16.pdf (available at adobe website). + They're also ordered by frequence (sizes most frequently allocated first) */ +#define ALLOCATORS_COUNT 4 +static FastFixedAllocator gAllocators[ALLOCATORS_COUNT] = { + FastFixedAllocator(0,16,4096,MAGIC_COOKIE_FOR_0_16), + FastFixedAllocator(32,32,1024,MAGIC_COOKIE_FOR_32), + FastFixedAllocator(88,88,1024,MAGIC_COOKIE_FOR_88), + FastFixedAllocator(20,24,1024,MAGIC_COOKIE_FOR_20_24), +}; + +/* Return an allocator for a given size or NULL if doesn't exist. + Speed of this function is critical so it was inlined */ +static inline FastFixedAllocator *GetAllocatorForSize(size_t size) +{ + if ((size > 0) && (size <= 16)) + return &gAllocators[0]; + else if (32 == size) + return &gAllocators[1]; + else if (88 == size) + return &gAllocators[2]; + else if ((size >= 20) && (size <= 24)) + return &gAllocators[3]; + else + return NULL; +} + +/* Return an allocator that allocated this pointer or NULL if comes + from system alloc. + Speed of this function is critical so it was inlined */ +static inline FastFixedAllocator *GetAllocatorForPointer(void *p) +{ + FreeListNode * freeListNode; + if (!p) + return NULL; + freeListNode = (FreeListNode*)((char*)p - sizeof(FreeListNode)); + if (MAGIC_COOKIE_FOR_0_16 == freeListNode->next) { + if (gAllocators[0].AllocatedPointerFast(p)) { + return &gAllocators[0]; + } + } else if (MAGIC_COOKIE_FOR_32 == freeListNode->next) { + if (gAllocators[1].AllocatedPointerFast(p)) { + return &gAllocators[1]; + } + } else if (MAGIC_COOKIE_FOR_88 == freeListNode->next) { + if (gAllocators[2].AllocatedPointerFast(p)) { + return &gAllocators[2]; + } + } else if (MAGIC_COOKIE_FOR_20_24 == freeListNode->next) { + if (gAllocators[3].AllocatedPointerFast(p)) { + return &gAllocators[3]; + } + } + return NULL; +} + +void *fast_malloc(size_t size) +{ + FastFixedAllocator *allocator; + void * p = NULL; + + allocator = GetAllocatorForSize(size); + if (allocator) + p = allocator->Alloc(size); + + if (!p) + p = malloc(size); + malloc_hook(p, size); + return p; +} + +void fast_free(void *p) +{ + FastFixedAllocator *allocator; + free_hook(p); + allocator = GetAllocatorForPointer(p); + if (allocator) + allocator->Free(p); + else + free(p); +} + +void *fast_realloc(void *oldP, size_t size) +{ + FastFixedAllocator * allocator; + void * newP = NULL; + size_t oldSize; + + if (!oldP) + return fast_malloc(size); + + allocator = GetAllocatorForPointer(oldP); + if (allocator) { + if (size > 0) { + oldSize = allocator->AllocationSize(); + if (size > oldSize) { + newP = fast_malloc(size); + memcpy(newP, oldP, oldSize); + } else + newP = oldP; + } + allocator->Free(oldP); + } + else + newP = realloc(oldP, size); + realloc_hook(oldP, newP, size); + return newP; +} + Index: goo/FastAlloc.h =================================================================== --- goo/FastAlloc.h (revision 0) +++ goo/FastAlloc.h (revision 0) @@ -0,0 +1,24 @@ +#ifndef FAST_ALLOC_H_ +#define FAST_ALLOC_H_ + +/* Written by: Krzysztof Kowalczyk (http://blog.kowalczyk.info) + License: Public Domain (http://creativecommons.org/licenses/publicdomain/) + Take all the code you want, I'll just write more. +*/ + +#define USE_FAST_ALLOC 1 + +#ifdef __cplusplus +extern "C" +{ +#endif + +void * fast_malloc(size_t size); +void fast_free(void *p); +void * fast_realloc(void *oldP, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif Index: goo/FastFixedAllocator.cc =================================================================== --- goo/FastFixedAllocator.cc (revision 0) +++ goo/FastFixedAllocator.cc (revision 0) @@ -0,0 +1,133 @@ +/* Written by: Krzysztof Kowalczyk (http://blog.kowalczyk.info) + License: Public Domain (http://creativecommons.org/licenses/publicdomain/) + Take all the code you want, I'll just write more. +*/ + +#include "FastFixedAllocator.h" +#include +#include + +void *FastFixedAllocator::Alloc(size_t size) { + char * p; + void * toReturn; + ChunkBlockNode * currChunk; + int totalBlockSize; + bool needsToAllocateNewBlock; + FreeListNode * freeListNode; + + /* can we satisfy the allocation from the free list ? */ + if (freeListRoot) { + freeListNode = freeListRoot; + freeListRoot = freeListRoot->next; + freeListNode->next = magicCookie; /* mark as being used */ + p = (char*)freeListNode; + toReturn = (void*) (p + sizeof(FreeListNode)); +#ifdef FAST_ALLOC_STATS + totalAllocs++; + allocsFromFreeList++; +#endif + return toReturn; + } + + /* can we satisfy the allocation from the last allocated block? + We only have to look at the last allocated chunk since + the lack of free list entries means all data in chunks is currently used */ + currChunk = chunkBlockListRoot; + needsToAllocateNewBlock = true; + +#ifdef FAST_ALLOC_STATS + totalAllocs++; +#endif + + if (currChunk) { + assert(currChunk->chunksUsed >= 0); + assert(currChunk->chunksUsed <= chunksAtATime); + if (currChunk->chunksUsed < chunksAtATime) { + /* we still have space in the last allocated block */ + needsToAllocateNewBlock = false; + } + } + + if (needsToAllocateNewBlock) { + totalBlockSize = GetBlockSize(); + currChunk = (ChunkBlockNode*)malloc(totalBlockSize); + if (!currChunk) + return NULL; + currChunk->chunksUsed = 0; + currChunk->next = chunkBlockListRoot; + chunkBlockListRoot = currChunk; + } +#ifdef FAST_ALLOC_STATS + else { + allocsFromBlock++; + } +#endif + + p = (char*)currChunk; + p += sizeof(ChunkBlockNode); + p += (GetTotalChunkSize() * currChunk->chunksUsed); + freeListNode = (FreeListNode*)p; + freeListNode->next = magicCookie; /* mark as being used */ + toReturn = (void*)(p + sizeof(FreeListNode)); + currChunk->chunksUsed++; + assert(currChunk->chunksUsed <= chunksAtATime); + return toReturn; +} + +void FastFixedAllocator::Free(void *p) { + FreeListNode * freeListNode; + + assert(AllocatedPointer(p)); + + freeListNode = (FreeListNode*)((char*)p - sizeof(FreeListNode)); + assert(magicCookie == freeListNode->next); + freeListNode->next = freeListRoot; + freeListRoot = freeListNode; +} + +/* Return true if is a pointer that belongs to memory that we allocated + (and is currently used; doesn't work for pointers in our free list + This check must be fast. */ +bool FastFixedAllocator::AllocatedPointerSlow(void *pA) { + FreeListNode * freeListNode; + ChunkBlockNode * currChunk; + char * p, *blockStart, *blockEnd; + + if (!pA) + return false; + + /* note: this might actually fail on some systems because we're looking + at a memory of unknown origin. If the memory before the pointer is not + accessible, we'll crash. Some debugging memory allocators use a trick + of protecting memory right before and after the allocated chunk, to + catch accesses beyond allocated chunks. + This shouldn't be a problem when running on regular Unix or Windows system. + I don't have a better idea on how to make this check fast. */ + freeListNode = (FreeListNode*)((char*)pA - sizeof(FreeListNode)); + if (magicCookie != freeListNode->next) { + /* no magic value so it's not a pointer we allocated */ + return false; + } + + /* It's possible (although highly unlikely) that a random pointer will have + our magic value as well, so we still have to traverse all blocks to see + if a pointer lies within memory allocated for blocks */ + p = (char*)pA - sizeof(FreeListNode); + currChunk = chunkBlockListRoot; + while (currChunk) { + blockStart = (char*)currChunk; + blockEnd = blockStart + GetBlockSize(); + blockStart += sizeof(ChunkBlockNode); + if ((p >= blockStart) && (p < blockEnd)) { +#ifdef DEBUG + /* something's wrong if it looks like allocated by us but not on a proper + boundary */ + unsigned long pos = (unsigned long)(p - blockStart); + assert(0 == pos % GetTotalChunkSize()); +#endif + return true; + } + currChunk = currChunk->next; + } + return false; +} Index: goo/FastFixedAllocator.h =================================================================== --- goo/FastFixedAllocator.h (revision 0) +++ goo/FastFixedAllocator.h (revision 0) @@ -0,0 +1,150 @@ +#ifndef FAST_FIXED_ALLOCATOR_H_ +#define FAST_FIXED_ALLOCATOR_H_ + +/* Written by: Krzysztof Kowalczyk (http://blog.kowalczyk.info) + License: Public Domain (http://creativecommons.org/licenses/publicdomain/) + Take all the code you want, I'll just write more. +*/ + +#include + +/* +This is a a specialized allocator design to allocate large numbers of small +objects (chunks) quickly. + +It does that by employing two techniques: +- allocating blocks of objects (chunks) at a time +- a free list of all freed allocations + +By allocating memory in blocks it makes less trips to system memory allocator. + +By using free list it can satisfy new allocations from memory that has been +previously allocated and freed. This helps especially if there's a lot of +trashing (allocating/freeing). + +It adds sizeof(char*) overhead for each allocation i.e. on 32-bit machine +allocating 4 byte chunks uses 8 bytes. The overhead is for storing a pointer +to the next free chunk if the chunk is on free list. + +This is ok, because we actually use less memory than most OS allocators, since: +- OS allocators also have overhead for keeping track of allocation, which is + likely to be bigger than sizeof(char*) +- OS allocators round allocation (e.g. to 16-byte boundary on Ubuntu 6 libc) +*/ + +/* Define if you want statistics about how many allocations there were and + what is our hit ratio (number of allocations served from free list and + from existing block of memory) */ +#define FAST_ALLOC_STATS 1 + +/* We prepend this to every allocated chunk */ +typedef struct FreeListNode { + struct FreeListNode *next; +} FreeListNode; + +/* Keeps info about a bunch of chunks allocated as one block */ +typedef struct ChunkBlockNode { + struct ChunkBlockNode * next; + int chunksUsed; + /* data comes inline here */ +} ChunkBlockNode; + +class FastFixedAllocator { + +public: + FastFixedAllocator(size_t chunkSizeLowA, size_t chunkSizeHighA, + int chunksAtATimeA, FreeListNode *magicCookieA) : + chunkSizeLow(chunkSizeLowA), + chunkSizeHigh(chunkSizeHighA), + chunksAtATime(chunksAtATimeA), + magicCookie(magicCookieA), + freeListRoot(NULL), chunkBlockListRoot(NULL) +#ifdef FAST_ALLOC_STATS + , totalAllocs(0), allocsFromFreeList(0), allocsFromBlock(0) +#endif + {} + ~FastFixedAllocator(void) { + } + + bool HandlesSize(size_t size) { + if ((size <= chunkSizeHigh) && (size >= chunkSizeLow)) + return true; + return false; + } + + size_t AllocationSize(void) { return chunkSizeHigh; } + void * Alloc(size_t size); + void Free(void* p); + bool AllocatedPointerSlow(void *pA); + + /* like AllcoatedPointerSlow but assumes that NULL check and magicCookie + check has already been performed. Also, forced inline by being defined + in a header file. Speed of this function is critical to the allocator */ + bool AllocatedPointerFast(void *pA) { + ChunkBlockNode * currChunk; + char * p, *blockStart, *blockEnd; + +#if DEBUG + assert(pA); + FreeListNode * freeListNode; + freeListNode = (FreeListNode*)((char*)pA - sizeof(FreeListNode)); + assert(magicCookie == freeListNode->next); +#endif + + p = (char*)pA - sizeof(FreeListNode); + currChunk = chunkBlockListRoot; + while (currChunk) { + blockStart = (char*)currChunk; + blockEnd = blockStart + GetBlockSize(); + blockStart += sizeof(ChunkBlockNode); + if ((p >= blockStart) && (p < blockEnd)) { + #ifdef DEBUG + /* something's wrong if it looks like allocated by us but not on a proper + boundary */ + unsigned long pos = (unsigned long)(p - blockStart); + assert(0 == pos % GetTotalChunkSize()); + #endif + return true; + } + currChunk = currChunk->next; + } + return false; +} + +private: + int GetTotalChunkSize(void) { + return sizeof(FreeListNode) + chunkSizeHigh; + } + int GetBlockSize(void) { + return sizeof(ChunkBlockNode) + (GetTotalChunkSize() * chunksAtATime); + } + +#ifdef FAST_ALLOC_STATS + /* gather statistics about allocations */ + int totalAllocs; + int allocsFromFreeList; + int allocsFromBlock; +#endif + + /* This allocator handles allocations for allocation sizes + in range */ + size_t chunkSizeLow; + size_t chunkSizeHigh; + + /* We need a relly fast check for deciding whether a given pointer was allocated + by us or not, so that we know we can put it on a free list when it's being + freed. + We do that by putting a magic value ( in the place used to store + pointer to next free chunk when this value is on a free list. That way + if the magic value is not there, we know it's not pointer to memory we + allocated (should be majority of cases). + for each instance should be unique (otherwise it'll be slow) + */ + FreeListNode * magicCookie; + + int chunksAtATime; + FreeListNode * freeListRoot; + ChunkBlockNode * chunkBlockListRoot; +}; + +#endif Index: goo/gmem.c =================================================================== --- goo/gmem.c (revision 42) +++ goo/gmem.c (working copy) @@ -13,6 +13,7 @@ #include #include #include "gmem.h" +#include "FastAlloc.h" #ifdef DEBUG_MEM @@ -85,6 +86,9 @@ *p = gMemDeadVal; return data; #else +#ifdef USE_FAST_ALLOC + return fast_malloc(size); +#else void *p; if (size <= 0) @@ -95,6 +99,7 @@ } return p; #endif +#endif } void *grealloc(void *p, size_t size) { @@ -119,6 +124,9 @@ } return q; #else +#ifdef USE_FAST_ALLOC + return fast_realloc(p, size); +#else void *q; if (size <= 0) { @@ -136,6 +144,7 @@ } return q; #endif +#endif } void *gmallocn(int nObjs, int objSize) { @@ -196,8 +205,12 @@ } } #else +#ifdef USE_FAST_ALLOC + fast_free(p); +#else free(p); #endif +#endif } #ifdef DEBUG_MEM