2020-02-03 07:00:36 +03:00
|
|
|
// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
|
2019-06-23 05:21:30 +03:00
|
|
|
// Use of this source code is governed by an MIT license
|
|
|
|
// that can be found in the LICENSE file.
|
2020-01-24 22:13:59 +03:00
|
|
|
|
2019-06-22 21:20:28 +03:00
|
|
|
module builtin
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
import (
|
|
|
|
strings
|
|
|
|
hash.wyhash
|
|
|
|
)
|
2020-01-24 22:13:59 +03:00
|
|
|
|
|
|
|
const (
|
2020-02-20 22:04:06 +03:00
|
|
|
// Number of bits from the hash stored for each entry
|
|
|
|
hashbits = 24
|
|
|
|
// Number of bits from the hash stored for rehasing
|
|
|
|
cached_hashbits = 16
|
|
|
|
// Initial log-number of buckets in the hashtable
|
|
|
|
init_log_capicity = 5
|
|
|
|
// Initial number of buckets in the hashtable
|
|
|
|
init_capicity = 1<<init_log_capicity
|
|
|
|
// Initial load-factor
|
|
|
|
init_load_factor = 0.8
|
|
|
|
// Initial range cap
|
|
|
|
init_range_cap = init_capicity - 1
|
|
|
|
// Bitmask to select all the hashbits
|
|
|
|
hash_mask = u32(0x00FFFFFF)
|
|
|
|
// Used for incrementing the probe-count
|
|
|
|
probe_inc = u32(0x01000000)
|
|
|
|
// Bitmask for maximum probe count
|
|
|
|
max_probe = u32(0xFF000000)
|
2020-01-24 22:13:59 +03:00
|
|
|
)
|
|
|
|
|
2019-10-24 12:47:21 +03:00
|
|
|
pub struct map {
|
2020-02-20 22:04:06 +03:00
|
|
|
// Byte size of value
|
2020-01-24 22:13:59 +03:00
|
|
|
value_bytes int
|
|
|
|
mut:
|
2020-02-20 22:04:06 +03:00
|
|
|
// Index of the highest index in the hashtable
|
|
|
|
range_cap u32
|
|
|
|
// Number of cached hashbits left for rehasing
|
|
|
|
window byte
|
|
|
|
// Used for right-shifting out used hashbits
|
|
|
|
shift byte
|
|
|
|
// Pointer to Key-value memory
|
|
|
|
key_values &KeyValue
|
|
|
|
// Pointer to probe_hash memory. Each Key-value has a
|
|
|
|
// corresponding probe_hash-DWORD. Upper-bits are the
|
|
|
|
// probe-count and lower-bits are bits from the hash.
|
|
|
|
probe_hash &u32
|
|
|
|
// Measure that decides when to increase the capacity
|
|
|
|
load_factor f32
|
2020-01-24 22:13:59 +03:00
|
|
|
pub mut:
|
2020-02-20 22:04:06 +03:00
|
|
|
// Number of key-values currently in the hashmap
|
|
|
|
size int
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
struct KeyValue {
|
|
|
|
key string
|
2020-01-24 22:13:59 +03:00
|
|
|
mut:
|
2020-02-20 22:04:06 +03:00
|
|
|
value voidptr
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn new_map(n, value_bytes int) map {
|
|
|
|
probe_hash_bytes := sizeof(u32) * init_capicity
|
|
|
|
key_value_bytes := sizeof(KeyValue) * init_capicity
|
|
|
|
memory := calloc(key_value_bytes + probe_hash_bytes)
|
|
|
|
return map{
|
2020-01-24 22:13:59 +03:00
|
|
|
value_bytes: value_bytes
|
2020-02-20 22:04:06 +03:00
|
|
|
range_cap: init_range_cap
|
|
|
|
shift: init_log_capicity
|
|
|
|
window: cached_hashbits
|
|
|
|
key_values: &KeyValue(memory)
|
|
|
|
probe_hash: &u32(memory + key_value_bytes)
|
|
|
|
load_factor: init_load_factor
|
2020-01-24 22:13:59 +03:00
|
|
|
size: 0
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:59 +03:00
|
|
|
fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
|
|
|
|
mut out := new_map(n, value_bytes)
|
|
|
|
for i in 0 .. n {
|
|
|
|
out.set(keys[i], values + i * value_bytes)
|
2019-08-03 10:44:08 +03:00
|
|
|
}
|
2020-01-24 22:13:59 +03:00
|
|
|
return out
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
2019-08-03 10:44:08 +03:00
|
|
|
|
2020-01-24 22:13:59 +03:00
|
|
|
fn (m mut map) set(key string, value voidptr) {
|
2020-02-20 22:04:06 +03:00
|
|
|
// load_factor can be adjusted.
|
|
|
|
if (f32(m.size) / f32(m.range_cap)) > m.load_factor {
|
|
|
|
m.expand()
|
|
|
|
}
|
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
|
|
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
// While probe count is less
|
|
|
|
for probe_hash < m.probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// While we might have a match
|
|
|
|
for probe_hash == m.probe_hash[index] {
|
|
|
|
if key == m.key_values[index].key {
|
|
|
|
C.memcpy(m.key_values[index].value, value, m.value_bytes)
|
2020-01-24 22:13:59 +03:00
|
|
|
return
|
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Match is not possible anymore.
|
|
|
|
// Probe until an empty index is found.
|
|
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
2020-02-20 22:30:34 +03:00
|
|
|
mut current_kv := KeyValue{
|
|
|
|
key:key
|
|
|
|
value:malloc(m.value_bytes)
|
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
C.memcpy(current_kv.value, value, m.value_bytes)
|
|
|
|
for m.probe_hash[index] != 0 {
|
|
|
|
if probe_hash > m.probe_hash[index] {
|
|
|
|
// Swap probe_hash
|
|
|
|
tmp_probe_hash := m.probe_hash[index]
|
|
|
|
m.probe_hash[index] = probe_hash
|
|
|
|
probe_hash = tmp_probe_hash
|
|
|
|
// Swap KeyValue
|
|
|
|
tmp_kv := m.key_values[index]
|
|
|
|
m.key_values[index] = current_kv
|
|
|
|
current_kv = tmp_kv
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
// Should almost never happen
|
|
|
|
if (probe_hash & max_probe) == max_probe {
|
|
|
|
m.expand()
|
|
|
|
m.set(current_kv.key, current_kv.value)
|
|
|
|
return
|
2019-12-30 22:01:24 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
m.probe_hash[index] = probe_hash
|
|
|
|
m.key_values[index] = current_kv
|
|
|
|
m.size++
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn (m mut map) expand() {
|
|
|
|
old_range_cap := m.range_cap
|
|
|
|
// double the size of the hashmap
|
|
|
|
m.range_cap = ((m.range_cap + 1)<<1) - 1
|
|
|
|
// check if no hashbits are left
|
|
|
|
if m.window == 0 {
|
|
|
|
m.shift += cached_hashbits
|
|
|
|
m.rehash(old_range_cap)
|
|
|
|
m.window = cached_hashbits
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
else {
|
|
|
|
m.cached_rehash(old_range_cap)
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
m.window--
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn (m mut map) rehash(old_range_cap u32) {
|
|
|
|
probe_hash_bytes := sizeof(u32) * (m.range_cap + 1)
|
|
|
|
key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1)
|
|
|
|
memory := calloc(probe_hash_bytes + key_value_bytes)
|
|
|
|
mut new_key_values := &KeyValue(memory)
|
|
|
|
mut new_probe_hash := &u32(memory + key_value_bytes)
|
2020-03-01 15:26:09 +03:00
|
|
|
for i := u32(0); i < old_range_cap + 1; i++ {
|
2020-02-20 22:04:06 +03:00
|
|
|
if m.probe_hash[i] != 0 {
|
|
|
|
mut kv := m.key_values[i]
|
|
|
|
hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0)
|
|
|
|
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
// While probe count is less
|
|
|
|
for probe_hash < new_probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Probe until an empty index is found.
|
|
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
|
|
for new_probe_hash[index] != 0 {
|
|
|
|
if probe_hash > new_probe_hash[index] {
|
|
|
|
// Swap probe_hash
|
|
|
|
tmp_probe_hash := new_probe_hash[index]
|
|
|
|
new_probe_hash[index] = probe_hash
|
|
|
|
probe_hash = tmp_probe_hash
|
|
|
|
// Swap KeyValue
|
|
|
|
tmp_kv := new_key_values[index]
|
|
|
|
new_key_values[index] = kv
|
|
|
|
kv = tmp_kv
|
|
|
|
}
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Should almost never happen
|
|
|
|
if (probe_hash & max_probe) == max_probe {
|
|
|
|
m.expand()
|
|
|
|
m.set(kv.key, kv.value)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
new_probe_hash[index] = probe_hash
|
|
|
|
new_key_values[index] = kv
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
unsafe{
|
|
|
|
free(m.key_values)
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
m.key_values = new_key_values
|
|
|
|
m.probe_hash = new_probe_hash
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn (m mut map) cached_rehash(old_range_cap u32) {
|
|
|
|
probe_hash_bytes := sizeof(u32) * (m.range_cap + 1)
|
|
|
|
key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1)
|
|
|
|
memory := calloc(probe_hash_bytes + key_value_bytes)
|
|
|
|
mut new_probe_hash := &u32(memory + key_value_bytes)
|
|
|
|
mut new_key_values := &KeyValue(memory)
|
2020-03-01 15:26:09 +03:00
|
|
|
for i := u32(0); i < old_range_cap + 1; i++ {
|
2020-02-20 22:04:06 +03:00
|
|
|
if m.probe_hash[i] != 0 {
|
|
|
|
mut kv := m.key_values[i]
|
|
|
|
mut probe_hash := m.probe_hash[i]
|
|
|
|
original := u64(i - ((probe_hash>>hashbits) - 1)) & (m.range_cap>>1)
|
|
|
|
hash := original | (probe_hash<<m.shift)
|
|
|
|
probe_hash = (probe_hash & hash_mask) | probe_inc
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
// While probe count is less
|
|
|
|
for probe_hash < new_probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Probe until an empty index is found.
|
|
|
|
// Swap when probe count is higher/richer (Robin Hood).
|
|
|
|
for new_probe_hash[index] != 0 {
|
|
|
|
if probe_hash > new_probe_hash[index] {
|
|
|
|
// Swap probe_hash
|
|
|
|
tmp_probe_hash := new_probe_hash[index]
|
|
|
|
new_probe_hash[index] = probe_hash
|
|
|
|
probe_hash = tmp_probe_hash
|
|
|
|
// Swap KeyValue
|
|
|
|
tmp_kv := new_key_values[index]
|
|
|
|
new_key_values[index] = kv
|
|
|
|
kv = tmp_kv
|
|
|
|
}
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Should almost never happen
|
|
|
|
if (probe_hash & max_probe) == max_probe {
|
|
|
|
m.expand()
|
|
|
|
m.set(kv.key, kv.value)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
new_probe_hash[index] = probe_hash
|
|
|
|
new_key_values[index] = kv
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2019-10-30 20:19:59 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
unsafe{
|
|
|
|
free(m.key_values)
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
m.key_values = new_key_values
|
|
|
|
m.probe_hash = new_probe_hash
|
2019-08-29 01:52:32 +03:00
|
|
|
}
|
2019-07-30 22:27:31 +03:00
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
pub fn (m mut map) delete(key string) {
|
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
|
|
for probe_hash < m.probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
// Perform backwards shifting
|
|
|
|
for probe_hash == m.probe_hash[index] {
|
|
|
|
if key == m.key_values[index].key {
|
|
|
|
mut old_index := index
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
mut current_probe_hash := m.probe_hash[index]
|
|
|
|
for (current_probe_hash>>hashbits) > 1 {
|
|
|
|
m.probe_hash[old_index] = current_probe_hash - probe_inc
|
|
|
|
m.key_values[old_index] = m.key_values[index]
|
|
|
|
old_index = index
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
current_probe_hash = m.probe_hash[index]
|
|
|
|
}
|
|
|
|
m.probe_hash[old_index] = 0
|
|
|
|
m.size--
|
|
|
|
return
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2019-12-30 22:01:24 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn (m map) get(key string, out voidptr) bool {
|
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
|
|
for probe_hash < m.probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
|
|
|
}
|
|
|
|
for probe_hash == m.probe_hash[index] {
|
|
|
|
if key == m.key_values[index].key {
|
|
|
|
C.memcpy(out, m.key_values[index].value, m.value_bytes)
|
|
|
|
return true
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
return false
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
fn (m map) exists(key string) bool {
|
|
|
|
if m.value_bytes == 0 {
|
|
|
|
return false
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
hash := wyhash.wyhash_c(key.str, u64(key.len), 0)
|
|
|
|
mut index := hash & m.range_cap
|
|
|
|
mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc)
|
|
|
|
for probe_hash < m.probe_hash[index] {
|
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
2019-12-30 08:57:56 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
for probe_hash == m.probe_hash[index] {
|
|
|
|
if key == m.key_values[index].key {
|
|
|
|
return true
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
index = (index + 1) & m.range_cap
|
|
|
|
probe_hash += probe_inc
|
2020-02-20 19:58:13 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
return false
|
2019-12-30 08:57:56 +03:00
|
|
|
}
|
|
|
|
|
2020-01-24 22:13:59 +03:00
|
|
|
pub fn (m &map) keys() []string {
|
|
|
|
mut keys := [''].repeat(m.size)
|
2020-02-20 22:04:06 +03:00
|
|
|
if m.value_bytes == 0 {
|
2020-01-24 22:13:59 +03:00
|
|
|
return keys
|
2019-12-30 08:57:56 +03:00
|
|
|
}
|
2020-02-20 22:04:06 +03:00
|
|
|
mut j := 0
|
2020-03-01 15:26:09 +03:00
|
|
|
for i := u32(0); i < m.range_cap + 1; i++ {
|
2020-02-20 22:04:06 +03:00
|
|
|
if m.probe_hash[i] != 0 {
|
|
|
|
keys[j] = m.key_values[i].key
|
|
|
|
j++
|
|
|
|
}
|
|
|
|
}
|
2020-01-24 22:13:59 +03:00
|
|
|
return keys
|
2019-12-30 22:01:24 +03:00
|
|
|
}
|
|
|
|
|
2020-02-20 22:04:06 +03:00
|
|
|
pub fn (m mut map) set_load_factor(new_load_factor f32) {
|
|
|
|
if new_load_factor > 1.0 {
|
|
|
|
m.load_factor = 1.0
|
|
|
|
}
|
|
|
|
else if new_load_factor < 0.1 {
|
|
|
|
m.load_factor = 0.1
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
m.load_factor = new_load_factor
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn (m mut map) free() {
|
2020-02-20 22:04:06 +03:00
|
|
|
unsafe{
|
|
|
|
free(m.key_values)
|
2020-01-24 22:13:59 +03:00
|
|
|
}
|
2019-07-23 23:57:06 +03:00
|
|
|
}
|
|
|
|
|
2019-06-27 14:14:59 +03:00
|
|
|
pub fn (m map) print() {
|
2020-02-20 22:04:06 +03:00
|
|
|
println('TODO')
|
2019-06-22 21:20:28 +03:00
|
|
|
}
|
|
|
|
|
2019-06-27 14:14:59 +03:00
|
|
|
pub fn (m map_string) str() string {
|
2019-07-14 12:01:32 +03:00
|
|
|
if m.size == 0 {
|
2019-06-22 21:20:28 +03:00
|
|
|
return '{}'
|
|
|
|
}
|
2019-08-05 06:54:16 +03:00
|
|
|
mut sb := strings.new_builder(50)
|
2019-08-29 01:52:32 +03:00
|
|
|
sb.writeln('{')
|
2020-01-24 22:13:59 +03:00
|
|
|
for key, val in m {
|
2019-08-29 01:52:32 +03:00
|
|
|
sb.writeln(' "$key" => "$val"')
|
2019-08-05 05:34:12 +03:00
|
|
|
}
|
2019-08-29 01:52:32 +03:00
|
|
|
sb.writeln('}')
|
|
|
|
return sb.str()
|
2020-02-20 22:30:34 +03:00
|
|
|
}
|