diff --git a/vlib/builtin/map.v b/vlib/builtin/map.v index ba0db25ef3..7ec50424f4 100644 --- a/vlib/builtin/map.v +++ b/vlib/builtin/map.v @@ -10,26 +10,26 @@ import ( ) /* -This is a very fast hashmap implementation. It has several properties that in -combination makes it very fast. Here is a short explanation of each property. +This is a very fast hashmap implementation. It has several properties that in +combination makes it very fast. Here is a short explanation of each property. After reading this you should have a basic understanding of how it works: 1. |Hash-function (Wyhash)|. Wyhash is the fastest hash-function passing SMHash- er, so it was an easy choice. -2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is +2. |Open addressing (Robin Hood Hashing)|. With this method a hash collision is resolved by probing. As opposed to linear probing, Robin Hood hashing has a sim- ple but clever twist: As new keys are inserted, old keys are shifted around in a way such that all keys stay reasonably close to the slot they originally hash to. 3. |Memory layout|. Key-value pairs are stored in a `DenseArray`, with an avera- ge of roughly 6.25% unused memory, as opposed to most other dynamic array imple- -mentations with a growth factor of 1.5 or 2. The key-values keep their index in -the array - they are not probed. Instead, this implementation uses another array -"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A +mentations with a growth factor of 1.5 or 2. The key-values keep their index in +the array - they are not probed. Instead, this implementation uses another array +"metas" storing "metas" (meta-data). Each Key-value has a corresponding meta. A meta stores a reference to its key-value, and its index in "metas" is determined -by the hash of the key and probing. A meta also stores bits from the hash (for -faster rehashing etc.) and how far away it is from the index it was originally +by the hash of the key and probing. A meta also stores bits from the hash (for +faster rehashing etc.) and how far away it is from the index it was originally hashed to (probe_count). probe_count is 0 if empty, 1 if not probed, 2 if probed by 1. @@ -37,22 +37,22 @@ meta (64 bit) = kv_index (32 bit) | probe_count (8 bits) | hashbits (24 bits) metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...] key_values = [kv, kv, kv, kv, kv, ...] -4. |Power of two size array|. The size of metas is a power of two. This makes it -possible to find a bucket from a hash code you can use hash & (SIZE -1) instead +4. |Power of two size array|. The size of metas is a power of two. This makes it +possible to find a bucket from a hash code you can use hash & (SIZE -1) instead of abs(hash) % SIZE. Modulo is extremely expensive so using '&' is a big perfor- mance improvement. The general concern with this is that you only use the lower bits of the hash and can cause many collisions. This is solved by using very go- -od hash-function. +od hash-function. -5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick +5. |Extra metas|. The hashmap keeps track of the highest probe_count. The trick is to allocate extra metas > max(probe_count), so you never have to do any boun- -ds-checking because the extra metas ensures that an element will never go beyond -index the last index. +ds-checking because the extra metas ensures that an element will never go beyond +index the last index. 6. |Cached rehashing|. When the load_factor of the map exceeds the max_load_fac- tor the size of metas is doubled and all the elements need to be "rehashed" to -find the index in the new array. Instead of rehashing complete, it simply uses -the hashbits stored in the meta. +find the index in the new array. Instead of rehashing complete, it simply uses +the hashbits stored in the meta. */ const ( @@ -66,7 +66,7 @@ const ( init_capicity = 1 << init_log_capicity // Initial max load-factor init_max_load_factor = 0.8 - // Minimum Load-factor. + // Minimum Load-factor. // Number is picked to make delete O(1) amortized min_load_factor = 0.3 // Initial range cap @@ -77,7 +77,7 @@ const ( // Bitmask to select all the hashbits hash_mask = u32(0x00FFFFFF) // Used for incrementing the probe-count - probe_inc = u32(0x01000000) + probe_inc = u32(0x01000000) // Bitmask for maximum probe count max_probe = u32(0xFF000000) ) @@ -209,7 +209,7 @@ fn meta_less(metas &u32, i u64, m u32) (u64, u32){ [inline] fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 { mut metas := ms - mut meta := me + mut meta := me mut index := i mut kv_index := kvi for metas[index] != 0 { @@ -230,13 +230,13 @@ fn (m mut map) meta_greater(ms &u32, i u64, me u32, kvi u32) &u32 { if (probe_count << 1) == m.extra_metas { m.extra_metas += extra_metas_inc mem_size := (m.cap + 2 + m.extra_metas) - metas = &u32(realloc(metas, sizeof(u32) * mem_size)) - memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc) + metas = &u32(C.realloc(metas, sizeof(u32) * mem_size)) + C.memset(metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc) // Should almost never happen if probe_count == 252 { panic("Probe overflow") } - } + } return metas } @@ -259,7 +259,7 @@ fn (m mut map) set(key string, value voidptr) { } // Match not possible anymore kv := KeyValue{ - key: key + key: key value: malloc(m.value_bytes) } C.memcpy(kv.value, value, m.value_bytes) @@ -445,4 +445,4 @@ pub fn (m map_string) str() string { } sb.writeln('}') return sb.str() -} \ No newline at end of file +}