From 0434aa3ee8f794628d9e10c90442236bbe6d0ead Mon Sep 17 00:00:00 2001 From: Stef Walter Date: Tue, 2 Apr 2013 16:33:24 +0200 Subject: [PATCH] Update to MurmurHash3 This should also fix problems with accessing memory in a non-aligned fashion on platforms where this causes problems. https://bugs.freedesktop.org/show_bug.cgi?id=62819 --- common/attrs.c | 2 +- common/dict.c | 2 +- common/hash.c | 149 +++++++++++++++++++++++++---------------------- common/hash.h | 4 +- common/tests/test-hash.c | 18 +++--- 5 files changed, 91 insertions(+), 84 deletions(-) diff --git a/common/attrs.c b/common/attrs.c index e656189..c1e060a 100644 --- a/common/attrs.c +++ b/common/attrs.c @@ -505,7 +505,7 @@ p11_attr_hash (const void *data) const CK_ATTRIBUTE *attr = data; uint32_t hash; - p11_hash_murmur2 (&hash, + p11_hash_murmur3 (&hash, &attr->type, sizeof (attr->type), attr->pValue, (size_t)attr->ulValueLen, NULL); diff --git a/common/dict.c b/common/dict.c index 409e3a4..db7b575 100644 --- a/common/dict.c +++ b/common/dict.c @@ -329,7 +329,7 @@ unsigned int p11_dict_str_hash (const void *string) { uint32_t hash; - p11_hash_murmur2 (&hash, string, strlen (string), NULL); + p11_hash_murmur3 (&hash, string, strlen (string), NULL); return hash; } diff --git a/common/hash.c b/common/hash.c index 2041f1f..9fe3668 100644 --- a/common/hash.c +++ b/common/hash.c @@ -541,70 +541,69 @@ p11_hash_md5 (unsigned char *hash, md5_invalidate (&md5); } -/* - * MurmurHash.c - * MYUtilities - * - * This file created by Jens Alfke on 3/17/08. - * Algorithm & source code by Austin Appleby, released to public domain. - * - * Downloaded 3/16/2008. - * Modified slightly by Jens Alfke (use standard uint32_t and size_t types; - * change 'm' and 'r' to #defines for better C compatibility.) +/* This code is based on the public domain MurmurHash3 from Austin Appleby: + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp * + * We use only the 32 bit variant, and slow it down a bit to support unaligned + * reads. */ -/*----------------------------------------------------------------------------- - * MurmurHash2, by Austin Appleby - * - * Note - This code makes a few assumptions about how your machine behaves - - * - * 1. We can read a 4-byte value from any address without crashing - * 2. sizeof(int) == 4 **Jens: I fixed this by changing 'unsigned int' to 'uint32_t'** - * - * And it has a few limitations - - * - * 1. It will not work incrementally. - * 2. It will not produce the same results on little-endian and big-endian - * machines. +#if !defined(__cplusplus) && (__GNUC__ > 2) +#define GNUC_INLINE __attribute__((always_inline)) +#else +#define GNUC_INLINE +#endif + +GNUC_INLINE static inline uint32_t +rotl (uint32_t x, + int8_t r) +{ + return (x << r) | (x >> (32 - r)); +} + +/* + * Finalization mix - force all bits of a hash block to avalanche */ +GNUC_INLINE static inline uint32_t +fmix (uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + + void -p11_hash_murmur2 (void *hash, +p11_hash_murmur3 (void *hash, const void *input, size_t len, ...) { - /* - * 'm' and 'r' are mixing constants generated offline. - * They're not really 'magic', they just happen to work well. - * seed is arbitrarily chosen - */ - - #define m 0x5bd1e995 - #define r 24 - #define seed 42 - - const unsigned char * data = input; - unsigned char overflow[4]; + uint8_t overflow[4]; + const uint8_t *data; va_list va; - uint32_t h; + uint32_t h1; + uint32_t k1; + uint32_t c1; + uint32_t c2; - /* Initialize the hash based on the length */ - va_start (va, len); - h = len; - while (va_arg (va, const void *)) - h += va_arg (va, size_t); - h ^= seed; - va_end (va); + h1 = 42; /* arbitrary choice of seed */ + c1 = 0xcc9e2d51; + c2 = 0x1b873593; + data = input; + + /* body */ /* Mix 4 bytes at a time into the hash */ va_start (va, len); for (;;) { - uint32_t k; - if (len >= 4) { - k = *(uint32_t *)data; + memcpy (&k1, data, 4); data += 4; len -= 4; @@ -635,34 +634,42 @@ p11_hash_murmur2 (void *hash, break; } - k = *(uint32_t *)overflow; + memcpy (&k1, overflow, 4); } - k *= m; - k ^= k >> r; - k *= m; + k1 *= c1; + k1 = rotl (k1, 15); + k1 *= c2; - h *= m; - h ^= k; + h1 ^= k1; + h1 = rotl (h1, 13); + h1 = h1 * 5 + 0xe6546b64; } - va_end (va); - /* Handle the last few bytes of the input array */ - switch(len) { - case 3: h ^= overflow[2] << 16; - case 2: h ^= overflow[1] << 8; - case 1: h ^= overflow[0]; - h *= m; - }; - - /* - * Do a few final mixes of the hash to ensure the last few - * bytes are well-incorporated. - */ - h ^= h >> 13; - h *= m; - h ^= h >> 15; + /* tail */ + + k1 = 0; + + switch (len) { + case 3: + k1 ^= overflow[2] << 16; + case 2: + k1 ^= overflow[1] << 8; + case 1: + k1 ^= overflow[0]; + k1 *= c1; + k1 = rotl (k1, 15); + k1 *= c2; + h1 ^= k1; + default: + break; + } + + /* finalization */ + + h1 ^= len; + h1 = fmix(h1); - assert (sizeof (h) == P11_HASH_MURMUR2_LEN); - memcpy (hash, &h, sizeof (h)); + assert (sizeof (h1) == P11_HASH_MURMUR3_LEN); + memcpy (hash, &h1, sizeof (h1)); } diff --git a/common/hash.h b/common/hash.h index b06438d..09f7646 100644 --- a/common/hash.h +++ b/common/hash.h @@ -57,9 +57,9 @@ void p11_hash_sha1 (unsigned char *hash, size_t length, ...) GNUC_NULL_TERMINATED; -#define P11_HASH_MURMUR2_LEN 4 +#define P11_HASH_MURMUR3_LEN 4 -void p11_hash_murmur2 (void *hash, +void p11_hash_murmur3 (void *hash, const void *input, size_t length, ...) GNUC_NULL_TERMINATED; diff --git a/common/tests/test-hash.c b/common/tests/test-hash.c index a1cb917..eecf09b 100644 --- a/common/tests/test-hash.c +++ b/common/tests/test-hash.c @@ -137,14 +137,14 @@ test_murmur2 (CuTest *cu) { uint32_t one, two, four, seven, eleven, split; - assert (sizeof (one) == P11_HASH_MURMUR2_LEN); + assert (sizeof (one) == P11_HASH_MURMUR3_LEN); - p11_hash_murmur2 ((unsigned char *)&one, "one", 3, NULL); - p11_hash_murmur2 ((unsigned char *)&two, "two", 3, NULL); - p11_hash_murmur2 ((unsigned char *)&four, "four", 4, NULL); - p11_hash_murmur2 ((unsigned char *)&seven, "seven", 5, NULL); - p11_hash_murmur2 ((unsigned char *)&eleven, "eleven", 6, NULL); - p11_hash_murmur2 ((unsigned char *)&split, "ele", 3, "ven", 3, NULL); + p11_hash_murmur3 ((unsigned char *)&one, "one", 3, NULL); + p11_hash_murmur3 ((unsigned char *)&two, "two", 3, NULL); + p11_hash_murmur3 ((unsigned char *)&four, "four", 4, NULL); + p11_hash_murmur3 ((unsigned char *)&seven, "seven", 5, NULL); + p11_hash_murmur3 ((unsigned char *)&eleven, "eleven", 6, NULL); + p11_hash_murmur3 ((unsigned char *)&split, "ele", 3, "ven", 3, NULL); CuAssertTrue (cu, one != two); CuAssertTrue (cu, one != four); @@ -166,11 +166,11 @@ test_murmur2_incr (CuTest *cu) { uint32_t first, second; - p11_hash_murmur2 ((unsigned char *)&first, + p11_hash_murmur3 ((unsigned char *)&first, "this is the long input!", (size_t)23, NULL); - p11_hash_murmur2 ((unsigned char *)&second, + p11_hash_murmur3 ((unsigned char *)&second, "this", (size_t)4, " ", (size_t)1, "is ", (size_t)3, -- 1.8.2