1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

hashmap: new load_factor and optimizations

This commit is contained in:
ka-weihe 2020-01-27 16:56:18 +01:00 committed by Alexander Medvednikov
parent bf6b206fca
commit 31899eac2a

View File

@ -6,29 +6,18 @@ module hashmap
const ( const (
initial_size = 2 << 4 initial_size = 2 << 4
initial_cap = initial_size - 1 initial_cap = initial_size - 1
load_factor = 0.5
probe_offset = u16(256) probe_offset = u16(256)
load_factor = 0.8
)
// hash-function should not be in this file
const (
fnv64_prime = 1099511628211 fnv64_prime = 1099511628211
fnv64_offset_basis = 14695981039346656037 fnv64_offset_basis = 14695981039346656037
fnv32_offset_basis = u32(2166136261) fnv32_offset_basis = u32(2166136261)
fnv32_prime = u32(16777619) fnv32_prime = u32(16777619)
) )
pub struct Hashmap {
mut:
info &u16
key_values &KeyValue
cap int
pub mut:
size int
}
struct KeyValue {
key string
mut:
value int
}
[inline] [inline]
fn fnv1a64(data string) u64 { fn fnv1a64(data string) u64 {
mut hash := fnv64_offset_basis mut hash := fnv64_offset_basis
@ -38,11 +27,28 @@ fn fnv1a64(data string) u64 {
return hash return hash
} }
pub struct Hashmap {
mut:
info &u16
key_values &KeyValue
cap int
pub mut:
load_factor f32
size int
}
struct KeyValue {
key string
mut:
value int
}
pub fn new_hashmap() Hashmap { pub fn new_hashmap() Hashmap {
return Hashmap{ return Hashmap{
info: &u16(calloc(sizeof(u16) * initial_size)) info: &u16(calloc(sizeof(u16) * initial_size))
key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size))
cap: initial_cap cap: initial_cap
load_factor: 0.8
size: 0 size: 0
} }
} }
@ -51,7 +57,7 @@ pub fn (h mut Hashmap) set(key string, value int) {
// The load factor is 0.5. // The load factor is 0.5.
// It will be adjustable in the future and with // It will be adjustable in the future and with
// a higher default settings to lower memory usage. // a higher default settings to lower memory usage.
if (h.size<<1) == (h.cap - 1) { if (f32(h.size) / f32(h.cap)) > h.load_factor {
h.rehash() h.rehash()
} }
// Hash-function will be swapped for wyhash // Hash-function will be swapped for wyhash
@ -75,18 +81,17 @@ pub fn (h mut Hashmap) set(key string, value int) {
// Match is not possible anymore. // Match is not possible anymore.
// Probe until an empty index is found. // Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood). // Swap when probe count is higher/richer (Robin Hood).
mut current_key := key mut current_kv := KeyValue{key, value}
mut current_value := value
for h.info[index] != 0 { for h.info[index] != 0 {
if info > h.info[index] { if info > h.info[index] {
tmp_kv := h.key_values[index] // Swap info word
tmp_info := h.info[index] tmp_info := h.info[index]
h.key_values[index] = KeyValue{
current_key,current_value}
h.info[index] = info h.info[index] = info
current_key = tmp_kv.key
current_value = tmp_kv.value
info = tmp_info info = tmp_info
// Swap KeyValue
tmp_kv := h.key_values[index]
h.key_values[index] = current_kv
current_kv = tmp_kv
} }
index = (index + 1) & h.cap index = (index + 1) & h.cap
info += probe_offset info += probe_offset
@ -94,12 +99,11 @@ pub fn (h mut Hashmap) set(key string, value int) {
// Should almost never happen // Should almost never happen
if (info & 0xFF00) == 0xFF00 { if (info & 0xFF00) == 0xFF00 {
h.rehash() h.rehash()
h.set(current_key, current_value) h.set(current_kv.key, current_kv.value)
return return
} }
h.info[index] = info h.info[index] = info
h.key_values[index] = KeyValue{ h.key_values[index] = current_kv
current_key,current_value}
h.size++ h.size++
} }
@ -110,9 +114,8 @@ fn (h mut Hashmap) rehash() {
mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1))) mut new_info := &u16(calloc(sizeof(u16) * (h.cap + 1)))
for i in 0 .. (old_cap + 1) { for i in 0 .. (old_cap + 1) {
if h.info[i] != 0 { if h.info[i] != 0 {
key := h.key_values[i].key mut kv := h.key_values[i]
value := h.key_values[i].value hash := fnv1a64(kv.key)
hash := fnv1a64(key)
mut info := u16((hash >> 56) | probe_offset) mut info := u16((hash >> 56) | probe_offset)
mut index := hash & h.cap mut index := hash & h.cap
// While probe count is less // While probe count is less
@ -120,30 +123,18 @@ fn (h mut Hashmap) rehash() {
index = (index + 1) & h.cap index = (index + 1) & h.cap
info += probe_offset info += probe_offset
} }
// While we might have a match
for info == new_info[index] {
if key == new_key_values[index].key {
new_key_values[index].value = value
return
}
index = (index + 1) & h.cap
info += probe_offset
}
// Match is not possible anymore.
// Probe until an empty index is found. // Probe until an empty index is found.
// Swap when probe count is higher/richer (Robin Hood). // Swap when probe count is higher/richer (Robin Hood).
mut current_key := key
mut current_value := value
for new_info[index] != 0 { for new_info[index] != 0 {
if info > new_info[index] { if info > new_info[index] {
tmp_kv := new_key_values[index] // Swap info word
tmp_info := new_info[index] tmp_info := new_info[index]
new_key_values[index] = KeyValue{
current_key,current_value}
new_info[index] = info new_info[index] = info
current_key = tmp_kv.key
current_value = tmp_kv.value
info = tmp_info info = tmp_info
// Swap KeyValue
tmp_kv := new_key_values[index]
new_key_values[index] = kv
kv = tmp_kv
} }
index = (index + 1) & h.cap index = (index + 1) & h.cap
info += probe_offset info += probe_offset
@ -151,12 +142,11 @@ fn (h mut Hashmap) rehash() {
// Should almost never happen // Should almost never happen
if (info & 0xFF00) == 0xFF00 { if (info & 0xFF00) == 0xFF00 {
h.rehash() h.rehash()
h.set(current_key, current_value) h.set(kv.key, kv.value)
return return
} }
new_info[index] = info new_info[index] = info
new_key_values[index] = KeyValue{ new_key_values[index] = kv
current_key,current_value}
} }
} }
h.key_values = new_key_values h.key_values = new_key_values
@ -230,8 +220,7 @@ pub fn (h Hashmap) exists(key string) bool {
} }
pub fn (h Hashmap) keys() []string { pub fn (h Hashmap) keys() []string {
size := h.size mut keys := [''].repeat(h.size)
mut keys := [''].repeat(size)
mut j := 0 mut j := 0
for i in 0 .. (h.cap + 1) { for i in 0 .. (h.cap + 1) {
if h.info[i] != 0 { if h.info[i] != 0 {