mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
494 lines
12 KiB
V
494 lines
12 KiB
V
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
|
|
// Use of this source code is governed by an MIT license
|
|
// that can be found in the LICENSE file.
|
|
|
|
module builtin
|
|
|
|
import (
|
|
strings
|
|
hash.wyhash
|
|
)
|
|
|
|
/*
|
|
This is a very fast hashmap implementation. It has several properties
|
|
that in combination makes it very fast. Here is a short explanation of
|
|
each property. After reading this you should have a basic understanding
|
|
of how it works:
|
|
|
|
1. Hash-function (Wyhash). Wyhash is the fastest hash-function
|
|
passing SMHasher, so it was an easy choice.
|
|
|
|
2. Open addressing (Robin Hood Hashing). With this method a hash
|
|
collision is resolved by probing. As opposed to linear probing,
|
|
Robin Hood hashing has simple but clever twist: As new keys are
|
|
inserted, old keys are shifted around in a way such that all keys
|
|
stay reasonably close to the slot they originally hash to.
|
|
|
|
3. Memory layout. Key-value pairs are stored in a "DenseArray",
|
|
with an average of rougly 6.25% unused memory as opposed to
|
|
most other dynamic array implementation with a growth factor
|
|
of 1.5 or 2. The key-values keep their index in the array -
|
|
they and are not probed. Instead, this implementation uses another
|
|
array "metas" storing "metas" (meta-data). Each Key-value has
|
|
a corresponding meta. A meta stores a reference to its key-value, and
|
|
its index in "metas" is determined by the hash of the key and probing.
|
|
A meta also stores bits from the hash (for faster rehashing etc.)
|
|
and how far away it is from the index it was originally hashed to
|
|
(probe count).
|
|
|
|
meta (64 bit) = probe_count (8 bits) | hashbits (24 bits) | kv_index (32 bit)
|
|
metas = [meta, 0, meta, 0, meta, meta, meta, 0, ...]
|
|
key_values = [kv, kv, kv, kv, kv, ...]
|
|
|
|
4. Power of two. TODO: explain
|
|
5. Extra metas. TODO: explain
|
|
6. Cached rehashing TODO: explain
|
|
7. Load-factor. TODO: explain
|
|
8. Deletion. TODO: explain
|
|
*/
|
|
|
|
const (
|
|
// Number of bits from the hash stored for each entry
|
|
hashbits = 24
|
|
// Number of bits from the hash stored for rehasing
|
|
cached_hashbits = 16
|
|
// Initial log-number of buckets in the hashtable
|
|
init_log_capicity = 5
|
|
// Initial number of buckets in the hashtable
|
|
init_capicity = 1 << init_log_capicity
|
|
// Initial max load-factor
|
|
init_max_load_factor = 0.8
|
|
// Minimum Load-factor.
|
|
// Number is picked to make delete O(1) amortized
|
|
min_load_factor = 0.3
|
|
// Initial range cap
|
|
init_cap = init_capicity - 2
|
|
// Used for incrementing `extra_metas` when max
|
|
// probe count is too high, to avoid overflow
|
|
extra_metas_inc = 4
|
|
// Bitmask to select all the hashbits
|
|
hash_mask = u32(0x00FFFFFF)
|
|
// Used for incrementing the probe-count
|
|
probe_inc = u32(0x01000000)
|
|
// Bitmask for maximum probe count
|
|
max_probe = u32(0xFF000000)
|
|
)
|
|
|
|
struct KeyValue {
|
|
key string
|
|
mut:
|
|
value voidptr
|
|
}
|
|
|
|
// Dynamic array with very low growth factor
|
|
struct DenseArray {
|
|
mut:
|
|
data &KeyValue
|
|
cap u32
|
|
size u32
|
|
}
|
|
|
|
[inline]
|
|
fn new_dense_array() DenseArray {
|
|
unsafe {
|
|
return DenseArray {
|
|
data: &KeyValue(malloc(8 * sizeof(KeyValue)))
|
|
cap: 8
|
|
size: 0
|
|
}
|
|
}
|
|
}
|
|
|
|
// Push element to array and return index
|
|
// The growth-factor is roughly 12.5 `(x + (x >> 3))`
|
|
[inline]
|
|
fn (d mut DenseArray) push(kv KeyValue) u32 {
|
|
if d.cap == d.size {
|
|
d.cap += d.cap >> 3
|
|
d.data = &KeyValue(C.realloc(d.data, sizeof(KeyValue) * d.cap))
|
|
}
|
|
push_index := d.size
|
|
d.data[push_index] = kv
|
|
d.size++
|
|
return push_index
|
|
}
|
|
|
|
// Move all zeros to the end of the array
|
|
// and resize array
|
|
fn (d mut DenseArray) zeros_to_end() {
|
|
mut count := u32(0)
|
|
for i in 0..d.size {
|
|
if d.data[i].key.str != 0 {
|
|
tmp := d.data[count]
|
|
d.data[count] = d.data[i]
|
|
d.data[i] = tmp
|
|
count++
|
|
}
|
|
}
|
|
d.size = count
|
|
d.cap = if count < 8 {8} else {count}
|
|
d.data = &KeyValue(C.realloc(d.data, sizeof(KeyValue) * d.cap))
|
|
}
|
|
|
|
pub struct map {
|
|
// Byte size of value
|
|
value_bytes int
|
|
mut:
|
|
// Index of the highest index in the hashtable
|
|
cap u32
|
|
// Number of cached hashbits left for rehasing
|
|
window byte
|
|
// Used for right-shifting out used hashbits
|
|
shift byte
|
|
// Pointer to Key-value memory
|
|
key_values DenseArray
|
|
// Pointer to meta-data
|
|
metas &u32
|
|
// Measure that decides when to increase the capacity
|
|
max_load_factor f32
|
|
// Extra metas that allows for no ranging when incrementing
|
|
// index in the hashmap
|
|
extra_metas u32
|
|
pub mut:
|
|
// Number of key-values currently in the hashmap
|
|
size int
|
|
}
|
|
|
|
fn new_map(n, value_bytes int) map {
|
|
return map{
|
|
value_bytes: value_bytes
|
|
cap: init_cap
|
|
window: cached_hashbits
|
|
shift: init_log_capicity
|
|
key_values: new_dense_array()
|
|
metas: &u32(vcalloc(sizeof(u32) * (init_capicity + extra_metas_inc)))
|
|
max_load_factor: init_max_load_factor
|
|
extra_metas: extra_metas_inc
|
|
size: 0
|
|
}
|
|
}
|
|
|
|
fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
|
|
mut out := new_map(n, value_bytes)
|
|
for i in 0 .. n {
|
|
out.set(keys[i], values + i * value_bytes)
|
|
}
|
|
return out
|
|
}
|
|
|
|
fn (m mut map) set(key string, value voidptr) {
|
|
if (f32(m.size << 1) / f32(m.cap)) > m.max_load_factor {
|
|
m.expand()
|
|
}
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
mut meta := u32(((hash >> m.shift) & hash_mask) | probe_inc)
|
|
mut index := hash & m.cap
|
|
// While probe count is less
|
|
for meta < m.metas[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
// While we might have a match
|
|
for meta == m.metas[index] {
|
|
kv_index := m.metas[index + 1]
|
|
if key == m.key_values.data[kv_index].key {
|
|
C.memcpy(m.key_values.data[kv_index].value, value, m.value_bytes)
|
|
return
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
// Match is not possible anymore.
|
|
// Probe until an empty index is found.
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
kv := KeyValue{
|
|
key: key
|
|
value: malloc(m.value_bytes)
|
|
}
|
|
C.memcpy(kv.value, value, m.value_bytes)
|
|
mut kv_index := m.key_values.push(kv)
|
|
for m.metas[index] != 0 {
|
|
if meta > m.metas[index] {
|
|
tmp_meta := m.metas[index]
|
|
m.metas[index] = meta
|
|
meta = tmp_meta
|
|
tmp_index := m.metas[index + 1]
|
|
m.metas[index + 1] = kv_index
|
|
kv_index = tmp_index
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
probe_count := (meta >> hashbits) - 1
|
|
if (probe_count << 1) == m.extra_metas {
|
|
// Should almost never happen
|
|
if (meta & max_probe) == max_probe {
|
|
m.expand()
|
|
m.set(kv.key, kv.value)
|
|
return
|
|
}
|
|
m.extra_metas += extra_metas_inc
|
|
mem_size := (m.cap + 2 + m.extra_metas)
|
|
m.metas = &u32(C.realloc(m.metas, sizeof(u32) * mem_size))
|
|
C.memset(m.metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
|
|
}
|
|
m.metas[index] = meta
|
|
m.metas[index + 1] = kv_index
|
|
m.size++
|
|
}
|
|
|
|
// Doubles the size of the hashmap
|
|
fn (m mut map) expand() {
|
|
old_cap := m.cap
|
|
m.cap = ((m.cap + 2)<<1) - 2
|
|
// Check if any hashbits are left
|
|
if m.window == 0 {
|
|
m.shift += cached_hashbits
|
|
m.rehash()
|
|
m.window = cached_hashbits
|
|
}
|
|
else {
|
|
m.cached_rehash(old_cap)
|
|
}
|
|
m.window--
|
|
}
|
|
|
|
// Halves the size of the hashmap
|
|
fn (m mut map) shrink() {
|
|
m.key_values.zeros_to_end()
|
|
m.cap = ((m.cap + 2)>>1) - 2
|
|
if m.window == 16 {
|
|
m.shift -= cached_hashbits
|
|
m.window = 0
|
|
}
|
|
m.rehash()
|
|
m.window++
|
|
}
|
|
|
|
fn (m mut map) rehash() {
|
|
meta_bytes := sizeof(u32) * (m.cap + 2 + m.extra_metas)
|
|
m.metas = &u32(C.realloc(m.metas, meta_bytes))
|
|
C.memset(m.metas, 0, meta_bytes)
|
|
for i := u32(0); i < m.key_values.size; i++ {
|
|
if m.key_values.data[i].key.str == 0 {
|
|
continue
|
|
}
|
|
kv := m.key_values.data[i]
|
|
hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
|
|
mut meta := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
mut index := hash & m.cap
|
|
// While probe count is less
|
|
for meta < m.metas[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
// Match is not possible anymore.
|
|
// Probe until an empty index is found.
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
mut kv_index := i
|
|
for m.metas[index] != 0 {
|
|
if meta > m.metas[index] {
|
|
tmp_meta := m.metas[index]
|
|
m.metas[index] = meta
|
|
meta = tmp_meta
|
|
tmp_index := m.metas[index + 1]
|
|
m.metas[index + 1] = kv_index
|
|
kv_index = tmp_index
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
probe_count := (meta >> hashbits) - 1
|
|
if (probe_count << 1) == m.extra_metas {
|
|
// Should almost never happen
|
|
if (meta & max_probe) == max_probe {
|
|
m.expand()
|
|
return
|
|
}
|
|
m.extra_metas += extra_metas_inc
|
|
mem_size := (m.cap + 2 + m.extra_metas)
|
|
m.metas = &u32(C.realloc(m.metas, sizeof(u32) * mem_size))
|
|
C.memset(m.metas + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
|
|
}
|
|
m.metas[index] = meta
|
|
m.metas[index + 1] = kv_index
|
|
}
|
|
}
|
|
|
|
fn (m mut map) cached_rehash(old_cap u32) {
|
|
mut new_meta := &u32(vcalloc(sizeof(u32) * (m.cap + 2 + m.extra_metas)))
|
|
old_extra_metas := m.extra_metas
|
|
for i := 0; i <= old_cap + old_extra_metas; i += 2 {
|
|
if m.metas[i] == 0 {
|
|
continue
|
|
}
|
|
old_meta := m.metas[i]
|
|
old_probe_count := u64((old_meta>>hashbits) - 1) << 1
|
|
old_index := (i - old_probe_count) & (m.cap >> 1)
|
|
mut index := u64(old_index) | (old_meta << m.shift) & m.cap
|
|
mut meta := (old_meta & hash_mask) | probe_inc
|
|
// While probe count is less
|
|
for meta < new_meta[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
// Match is not possible anymore.
|
|
// Probe until an empty index is found.
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
mut kv_index := m.metas[i + 1]
|
|
for new_meta[index] != 0 {
|
|
if meta > new_meta[index] {
|
|
tmp_meta := new_meta[index]
|
|
new_meta[index] = meta
|
|
meta = tmp_meta
|
|
tmp_index := new_meta[index + 1]
|
|
new_meta[index + 1] = kv_index
|
|
kv_index = tmp_index
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
probe_count := (meta >> hashbits) - 1
|
|
if (probe_count << 1) == m.extra_metas {
|
|
// Should almost never happen
|
|
if (meta & max_probe) == max_probe {
|
|
free(new_meta)
|
|
m.expand()
|
|
return
|
|
}
|
|
m.extra_metas += extra_metas_inc
|
|
mem_size := (m.cap + 2 + m.extra_metas)
|
|
new_meta = &u32(C.realloc(new_meta, sizeof(u32) * mem_size))
|
|
C.memset(new_meta + mem_size - extra_metas_inc, 0, sizeof(u32) * extra_metas_inc)
|
|
}
|
|
new_meta[index] = meta
|
|
new_meta[index + 1] = kv_index
|
|
}
|
|
unsafe{
|
|
free(m.metas)
|
|
}
|
|
m.metas = new_meta
|
|
}
|
|
|
|
[inline]
|
|
fn (m map) get(key string, out voidptr) bool {
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
mut index := hash & m.cap
|
|
mut meta := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
for meta < m.metas[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
for meta == m.metas[index] {
|
|
kv_index := m.metas[index + 1]
|
|
if key == m.key_values.data[kv_index].key {
|
|
C.memcpy(out, m.key_values.data[kv_index].value, m.value_bytes)
|
|
return true
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
return false
|
|
}
|
|
|
|
[inline]
|
|
fn (m map) exists(key string) bool {
|
|
if m.value_bytes == 0 {
|
|
return false
|
|
}
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
mut index := hash & m.cap
|
|
mut meta := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
for meta < m.metas[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
for meta == m.metas[index] {
|
|
kv_index := m.metas[index + 1]
|
|
if key == m.key_values.data[kv_index].key {
|
|
return true
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
return false
|
|
}
|
|
|
|
pub fn (m mut map) delete(key string) {
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
mut index := hash & m.cap
|
|
mut meta := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
for meta < m.metas[index] {
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
// Perform backwards shifting
|
|
for meta == m.metas[index] {
|
|
kv_index := m.metas[index + 1]
|
|
if key == m.key_values.data[kv_index].key {
|
|
C.memset(&m.key_values.data[kv_index], 0, sizeof(KeyValue))
|
|
mut old_index := index
|
|
index += 2
|
|
mut cur_meta := m.metas[index]
|
|
mut cur_index := m.metas[index + 1]
|
|
for (cur_meta >> hashbits) > 1 {
|
|
m.metas[old_index] = cur_meta - probe_inc
|
|
m.metas[old_index + 1] = cur_index
|
|
old_index = index
|
|
index += 2
|
|
cur_meta = m.metas[index]
|
|
cur_index = m.metas[index + 1]
|
|
}
|
|
m.metas[old_index] = 0
|
|
m.size--
|
|
if m.cap == 30 {return}
|
|
if (f32(m.size << 1) / f32(m.cap)) < min_load_factor {
|
|
m.shrink()
|
|
}
|
|
return
|
|
}
|
|
index += 2
|
|
meta += probe_inc
|
|
}
|
|
}
|
|
|
|
pub fn (m &map) keys() []string {
|
|
mut keys := [''].repeat(m.size)
|
|
if m.value_bytes == 0 {
|
|
return keys
|
|
}
|
|
mut j := 0
|
|
for i := u32(0); i < m.key_values.size; i++ {
|
|
if m.key_values.data[i].key.str == 0 {
|
|
continue
|
|
}
|
|
keys[j] = m.key_values.data[i].key
|
|
j++
|
|
}
|
|
return keys
|
|
}
|
|
|
|
pub fn (m map) free() {
|
|
unsafe {
|
|
free(m.metas)
|
|
free(m.key_values.data)
|
|
}
|
|
}
|
|
|
|
pub fn (m map) print() {
|
|
println('TODO')
|
|
}
|
|
|
|
pub fn (m map_string) str() string {
|
|
if m.size == 0 {
|
|
return '{}'
|
|
}
|
|
mut sb := strings.new_builder(50)
|
|
sb.writeln('{')
|
|
for key, val in m {
|
|
sb.writeln(' "$key" => "$val"')
|
|
}
|
|
sb.writeln('}')
|
|
return sb.str()
|
|
}
|