2019-10-01 02:14:12 +03:00
|
|
|
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
|
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
2019-12-22 00:38:43 +03:00
|
|
|
module hashmap
|
2019-10-01 02:14:12 +03:00
|
|
|
|
2020-01-29 06:06:05 +03:00
|
|
|
import hash.wyhash
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
const (
|
2020-01-31 04:29:35 +03:00
|
|
|
log_size = 5
|
|
|
|
n_hashbits = 24
|
|
|
|
window_size = 16
|
|
|
|
initial_size = 1 << log_size
|
2020-01-24 22:13:17 +03:00
|
|
|
initial_cap = initial_size - 1
|
2020-01-31 04:29:35 +03:00
|
|
|
default_load_factor = 0.8
|
|
|
|
hashbit_mask = u32(0xFFFFFF)
|
|
|
|
probe_offset = u32(0x1000000)
|
|
|
|
max_probe = u32(0xFF000000)
|
2020-01-27 18:56:18 +03:00
|
|
|
)
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub struct Hashmap {
|
|
|
|
mut:
|
2020-01-31 04:29:35 +03:00
|
|
|
cap u32
|
|
|
|
shift byte
|
|
|
|
window byte
|
|
|
|
info &u32
|
2020-01-27 18:56:18 +03:00
|
|
|
key_values &KeyValue
|
2019-12-22 00:38:43 +03:00
|
|
|
pub mut:
|
2020-01-27 18:56:18 +03:00
|
|
|
load_factor f32
|
|
|
|
size int
|
2019-10-01 02:14:12 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
struct KeyValue {
|
|
|
|
key string
|
2019-12-22 00:38:43 +03:00
|
|
|
mut:
|
2020-01-24 22:13:17 +03:00
|
|
|
value int
|
2019-10-01 02:14:12 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn new_hashmap() Hashmap {
|
|
|
|
return Hashmap{
|
|
|
|
cap: initial_cap
|
2020-01-31 04:29:35 +03:00
|
|
|
shift: log_size
|
|
|
|
window: window_size
|
|
|
|
info: &u32(calloc(sizeof(u32) * initial_size))
|
|
|
|
key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size))
|
|
|
|
load_factor: default_load_factor
|
2020-01-24 22:13:17 +03:00
|
|
|
size: 0
|
|
|
|
}
|
|
|
|
}
|
2019-12-21 09:59:12 +03:00
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn (h mut Hashmap) set(key string, value int) {
|
2020-01-29 06:06:05 +03:00
|
|
|
// load_factor can be adjusted.
|
2020-01-27 18:56:18 +03:00
|
|
|
if (f32(h.size) / f32(h.cap)) > h.load_factor {
|
2020-01-24 22:13:17 +03:00
|
|
|
h.rehash()
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2020-01-29 06:06:05 +03:00
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
2020-01-31 04:29:35 +03:00
|
|
|
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
|
2020-01-24 22:13:17 +03:00
|
|
|
mut index := hash & h.cap
|
|
|
|
// While probe count is less
|
|
|
|
for info < h.info[index] {
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2020-01-24 22:13:17 +03:00
|
|
|
// While we might have a match
|
|
|
|
for info == h.info[index] {
|
|
|
|
if key == h.key_values[index].key {
|
|
|
|
h.key_values[index].value = value
|
|
|
|
return
|
|
|
|
}
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
// Match is not possible anymore.
|
|
|
|
// Probe until an empty index is found.
|
|
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
2020-01-27 18:56:18 +03:00
|
|
|
mut current_kv := KeyValue{key, value}
|
2020-01-24 22:13:17 +03:00
|
|
|
for h.info[index] != 0 {
|
|
|
|
if info > h.info[index] {
|
2020-01-27 18:56:18 +03:00
|
|
|
// Swap info word
|
2020-01-24 22:13:17 +03:00
|
|
|
tmp_info := h.info[index]
|
|
|
|
h.info[index] = info
|
|
|
|
info = tmp_info
|
2020-01-27 18:56:18 +03:00
|
|
|
// Swap KeyValue
|
2020-01-31 04:29:35 +03:00
|
|
|
tmp_kv := h.key_values[index]
|
2020-01-27 18:56:18 +03:00
|
|
|
h.key_values[index] = current_kv
|
|
|
|
current_kv = tmp_kv
|
2020-01-24 22:13:17 +03:00
|
|
|
}
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2020-01-24 22:13:17 +03:00
|
|
|
// Should almost never happen
|
2020-01-31 04:29:35 +03:00
|
|
|
if (info & max_probe) == max_probe {
|
2020-01-24 22:13:17 +03:00
|
|
|
h.rehash()
|
2020-01-27 18:56:18 +03:00
|
|
|
h.set(current_kv.key, current_kv.value)
|
2020-01-24 22:13:17 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
h.info[index] = info
|
2020-01-27 18:56:18 +03:00
|
|
|
h.key_values[index] = current_kv
|
2020-01-24 22:13:17 +03:00
|
|
|
h.size++
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2019-10-01 02:14:12 +03:00
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
fn (h mut Hashmap) rehash() {
|
|
|
|
old_cap := h.cap
|
2020-01-31 04:29:35 +03:00
|
|
|
h.window--
|
|
|
|
// check if any hashbits are left
|
|
|
|
if h.window == 0 {
|
|
|
|
h.shift += window_size
|
|
|
|
}
|
|
|
|
// double the size of the hashmap
|
2020-01-27 18:56:18 +03:00
|
|
|
h.cap = ((h.cap + 1) << 1) - 1
|
2020-01-24 22:13:17 +03:00
|
|
|
mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1)))
|
2020-01-31 04:29:35 +03:00
|
|
|
mut new_info := &u32(calloc(sizeof(u32) * (h.cap + 1)))
|
2020-01-24 22:13:17 +03:00
|
|
|
for i in 0 .. (old_cap + 1) {
|
|
|
|
if h.info[i] != 0 {
|
2020-01-27 18:56:18 +03:00
|
|
|
mut kv := h.key_values[i]
|
2020-01-31 04:29:35 +03:00
|
|
|
mut hash := u64(0)
|
|
|
|
mut info := u32(0)
|
|
|
|
if h.window == 0 {
|
|
|
|
hash = wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
|
|
|
|
info = u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
original := u64(i - ((h.info[i] >> n_hashbits) - 1)) & (h.cap >> 1)
|
|
|
|
hash = original | (h.info[i] << h.shift)
|
|
|
|
info = (h.info[i] & hashbit_mask) | probe_offset
|
|
|
|
}
|
2020-01-24 22:13:17 +03:00
|
|
|
mut index := hash & h.cap
|
|
|
|
// While probe count is less
|
|
|
|
for info < new_info[index] {
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
// Probe until an empty index is found.
|
|
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
|
|
for new_info[index] != 0 {
|
|
|
|
if info > new_info[index] {
|
2020-01-27 18:56:18 +03:00
|
|
|
// Swap info word
|
2020-01-24 22:13:17 +03:00
|
|
|
tmp_info := new_info[index]
|
|
|
|
new_info[index] = info
|
|
|
|
info = tmp_info
|
2020-01-27 18:56:18 +03:00
|
|
|
// Swap KeyValue
|
2020-01-31 04:29:35 +03:00
|
|
|
tmp_kv := new_key_values[index]
|
2020-01-27 18:56:18 +03:00
|
|
|
new_key_values[index] = kv
|
|
|
|
kv = tmp_kv
|
2020-01-24 22:13:17 +03:00
|
|
|
}
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
// Should almost never happen
|
2020-01-31 04:29:35 +03:00
|
|
|
if (info & max_probe) == max_probe {
|
2020-01-24 22:13:17 +03:00
|
|
|
h.rehash()
|
2020-01-27 18:56:18 +03:00
|
|
|
h.set(kv.key, kv.value)
|
2020-01-24 22:13:17 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
new_info[index] = info
|
2020-01-27 18:56:18 +03:00
|
|
|
new_key_values[index] = kv
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2019-10-01 02:14:12 +03:00
|
|
|
}
|
2020-01-31 04:29:35 +03:00
|
|
|
if h.window == 0 {
|
|
|
|
h.window = window_size
|
|
|
|
}
|
|
|
|
free(h.key_values)
|
|
|
|
free(h.info)
|
2020-01-24 22:13:17 +03:00
|
|
|
h.key_values = new_key_values
|
|
|
|
h.info = new_info
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2019-10-01 02:14:12 +03:00
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn (h mut Hashmap) delete(key string) {
|
2020-01-29 06:06:05 +03:00
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
2020-01-24 22:13:17 +03:00
|
|
|
mut index := hash & h.cap
|
2020-01-31 04:29:35 +03:00
|
|
|
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
|
2020-01-24 22:13:17 +03:00
|
|
|
for info < h.info[index] {
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
// Perform backwards shifting
|
|
|
|
for info == h.info[index] {
|
|
|
|
if key == h.key_values[index].key {
|
|
|
|
mut old_index := index
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
mut current_info := h.info[index]
|
2020-01-31 04:29:35 +03:00
|
|
|
for (current_info >> n_hashbits) > 1 {
|
2020-01-24 22:13:17 +03:00
|
|
|
h.info[old_index] = current_info - probe_offset
|
|
|
|
h.key_values[old_index] = h.key_values[index]
|
|
|
|
old_index = index
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
current_info = h.info[index]
|
|
|
|
}
|
|
|
|
h.info[old_index] = 0
|
|
|
|
h.size--
|
|
|
|
return
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2020-01-24 22:13:17 +03:00
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
2019-10-01 02:14:12 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn (h Hashmap) get(key string) int {
|
2020-01-29 06:06:05 +03:00
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
2020-01-24 22:13:17 +03:00
|
|
|
mut index := hash & h.cap
|
2020-01-31 04:29:35 +03:00
|
|
|
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
|
2020-01-24 22:13:17 +03:00
|
|
|
for info < h.info[index] {
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
for info == h.info[index] {
|
|
|
|
if key == h.key_values[index].key {
|
|
|
|
return h.key_values[index].value
|
|
|
|
}
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
return 0
|
2019-12-19 23:52:45 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn (h Hashmap) exists(key string) bool {
|
2020-01-29 06:06:05 +03:00
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
2020-01-24 22:13:17 +03:00
|
|
|
mut index := hash & h.cap
|
2020-01-31 04:29:35 +03:00
|
|
|
mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset)
|
2020-01-24 22:13:17 +03:00
|
|
|
for info < h.info[index] {
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
for info == h.info[index] {
|
|
|
|
if key == h.key_values[index].key {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
index = (index + 1) & h.cap
|
|
|
|
info += probe_offset
|
|
|
|
}
|
|
|
|
return false
|
2019-12-21 09:59:12 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:17 +03:00
|
|
|
pub fn (h Hashmap) keys() []string {
|
2020-01-27 18:56:18 +03:00
|
|
|
mut keys := [''].repeat(h.size)
|
2020-01-24 22:13:17 +03:00
|
|
|
mut j := 0
|
|
|
|
for i in 0 .. (h.cap + 1) {
|
|
|
|
if h.info[i] != 0 {
|
|
|
|
keys[j] = h.key_values[i].key
|
|
|
|
j++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return keys
|
2019-12-21 09:59:12 +03:00
|
|
|
}
|