From f413b2fa49045bac5195e9b7c12f2a43f0cab662 Mon Sep 17 00:00:00 2001 From: Alexander Medvednikov Date: Thu, 20 Feb 2020 17:58:13 +0100 Subject: [PATCH] Revert "map: use hashmap instead of b-tree" This reverts commit 34d926350b30c4577fc0ab5cacc10114960e05ed. --- cmd/tools/bench/hashmap.v | 112 +++++ vlib/builtin/hashmap/hashmap.v | 243 +++++++++++ vlib/builtin/hashmap/hashmap_test.v | 33 ++ vlib/builtin/map.v | 649 ++++++++++++++++------------ vlib/builtin/sorted_map.v | 416 ------------------ 5 files changed, 749 insertions(+), 704 deletions(-) create mode 100644 cmd/tools/bench/hashmap.v create mode 100644 vlib/builtin/hashmap/hashmap.v create mode 100644 vlib/builtin/hashmap/hashmap_test.v delete mode 100644 vlib/builtin/sorted_map.v diff --git a/cmd/tools/bench/hashmap.v b/cmd/tools/bench/hashmap.v new file mode 100644 index 0000000000..ba9070aa6e --- /dev/null +++ b/cmd/tools/bench/hashmap.v @@ -0,0 +1,112 @@ +import rand +import time +import builtin.hashmap + +fn hashmap_set_bench(arr []string, repeat int) { + start_time := time.ticks() + for _ in 0..repeat { + mut b := hashmap.new_hashmap() + for x in arr { + b.set(x, 1) + } + } + end_time := time.ticks() - start_time + println("* hashmap_set: ${end_time} ms") +} + +fn map_set_bench(arr []string, repeat int) { + start_time := time.ticks() + for _ in 0..repeat { + mut b := map[string]int + for x in arr { + b[x] = 1 + } + } + end_time := time.ticks() - start_time + println("* map_set: ${end_time} ms") +} + +fn hashmap_get_bench(arr []string, repeat int) { + mut b := hashmap.new_hashmap() + for x in arr { + b.set(x, 1) + } + start_time := time.ticks() + for _ in 0..repeat { + for x in arr { + b.get(x) + } + } + end_time := time.ticks() - start_time + println("* hashmap_get: ${end_time} ms") +} + +fn map_get_bench(arr []string, repeat int) { + mut b := map[string]int + for x in arr { + b[x] = 1 + } + start_time := time.ticks() + for _ in 0..repeat { + for x in arr { + b[x] + } + } + end_time := time.ticks() - start_time + println("* map_get: ${end_time} ms") +} + +fn benchmark_many_keys() { + key_len := 30 + repeat := 1 + for i := 2048; i <= 10000000; i = i * 2 { + mut arr := []string + for _ in 0..i { + mut buf := []byte + for j in 0..key_len { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + arr << s + } + println("$arr.len keys of length $key_len") + // Uncomment the benchmark you would like to benchmark + // Run one or two at a time while memory leaks is a thing + hashmap_get_bench(arr, repeat) + map_get_bench(arr, repeat) + // hashmap_set_bench(arr, repeat) + // map_set_bench(arr, repeat) + println('') + } +} + +fn benchmark_few_keys() { + key_len := 30 + repeat := 10000 + println("Benchmarks are repeated $repeat times") + for i := 16; i <= 2048; i = i * 2 { + mut arr := []string + for _ in 0..i { + mut buf := []byte + for j in 0..key_len { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + arr << s + } + println("$arr.len keys of length $key_len") + // Uncomment the benchmark you would like to benchmark + // Run one or two at a time while memory leaks is a thing + hashmap_get_bench(arr, repeat) + map_get_bench(arr, repeat) + // hashmap_set_bench(arr, repeat) + // map_set_bench(arr, repeat) + println('') + } +} + +fn main() { + // Uncomment below to benchmark on many keys + // benchmark_many_keys() + benchmark_few_keys() +} \ No newline at end of file diff --git a/vlib/builtin/hashmap/hashmap.v b/vlib/builtin/hashmap/hashmap.v new file mode 100644 index 0000000000..178e6f92a9 --- /dev/null +++ b/vlib/builtin/hashmap/hashmap.v @@ -0,0 +1,243 @@ +// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module hashmap + +import hash.wyhash + +const ( + log_size = 5 + n_hashbits = 24 + window_size = 16 + initial_size = 1 << log_size + initial_cap = initial_size - 1 + default_load_factor = 0.8 + hashbit_mask = u32(0xFFFFFF) + probe_offset = u32(0x1000000) + max_probe = u32(0xFF000000) +) + +pub struct Hashmap { +mut: + cap u32 + shift byte + window byte + info &u32 + key_values &KeyValue +pub mut: + load_factor f32 + size int +} + +struct KeyValue { + key string +mut: + value int +} + +pub fn new_hashmap() Hashmap { + return Hashmap{ + cap: initial_cap + shift: log_size + window: window_size + info: &u32(calloc(sizeof(u32) * initial_size)) + key_values: &KeyValue(calloc(sizeof(KeyValue) * initial_size)) + load_factor: default_load_factor + size: 0 + } +} + +pub fn (h mut Hashmap) set(key string, value int) { + // load_factor can be adjusted. + if (f32(h.size) / f32(h.cap)) > h.load_factor { + h.rehash() + } + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + mut index := hash & h.cap + // While probe count is less + for info < h.info[index] { + index = (index + 1) & h.cap + info += probe_offset + } + // While we might have a match + for info == h.info[index] { + if key == h.key_values[index].key { + h.key_values[index].value = value + return + } + index = (index + 1) & h.cap + info += probe_offset + } + // Match is not possible anymore. + // Probe until an empty index is found. + // Swap when probe count is higher/richer (Robin Hood). + mut current_kv := KeyValue{key, value} + for h.info[index] != 0 { + if info > h.info[index] { + // Swap info word + tmp_info := h.info[index] + h.info[index] = info + info = tmp_info + // Swap KeyValue + tmp_kv := h.key_values[index] + h.key_values[index] = current_kv + current_kv = tmp_kv + } + index = (index + 1) & h.cap + info += probe_offset + } + // Should almost never happen + if (info & max_probe) == max_probe { + h.rehash() + h.set(current_kv.key, current_kv.value) + return + } + h.info[index] = info + h.key_values[index] = current_kv + h.size++ +} + +fn (h mut Hashmap) rehash() { + old_cap := h.cap + h.window-- + // check if any hashbits are left + if h.window == 0 { + h.shift += window_size + } + // double the size of the hashmap + h.cap = ((h.cap + 1) << 1) - 1 + mut new_key_values := &KeyValue(calloc(sizeof(KeyValue) * (h.cap + 1))) + mut new_info := &u32(calloc(sizeof(u32) * (h.cap + 1))) + for i in 0 .. (old_cap + 1) { + if h.info[i] != 0 { + mut kv := h.key_values[i] + mut hash := u64(0) + mut info := u32(0) + if h.window == 0 { + hash = wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) + info = u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + } + else { + original := u64(i - ((h.info[i] >> n_hashbits) - 1)) & (h.cap >> 1) + hash = original | (h.info[i] << h.shift) + info = (h.info[i] & hashbit_mask) | probe_offset + } + mut index := hash & h.cap + // While probe count is less + for info < new_info[index] { + index = (index + 1) & h.cap + info += probe_offset + } + // Probe until an empty index is found. + // Swap when probe count is higher/richer (Robin Hood). + for new_info[index] != 0 { + if info > new_info[index] { + // Swap info word + tmp_info := new_info[index] + new_info[index] = info + info = tmp_info + // Swap KeyValue + tmp_kv := new_key_values[index] + new_key_values[index] = kv + kv = tmp_kv + } + index = (index + 1) & h.cap + info += probe_offset + } + // Should almost never happen + if (info & max_probe) == max_probe { + h.rehash() + h.set(kv.key, kv.value) + return + } + new_info[index] = info + new_key_values[index] = kv + } + } + if h.window == 0 { + h.window = window_size + } + free(h.key_values) + free(h.info) + h.key_values = new_key_values + h.info = new_info +} + +pub fn (h mut Hashmap) delete(key string) { + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & h.cap + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + for info < h.info[index] { + index = (index + 1) & h.cap + info += probe_offset + } + // Perform backwards shifting + for info == h.info[index] { + if key == h.key_values[index].key { + mut old_index := index + index = (index + 1) & h.cap + mut current_info := h.info[index] + for (current_info >> n_hashbits) > 1 { + h.info[old_index] = current_info - probe_offset + h.key_values[old_index] = h.key_values[index] + old_index = index + index = (index + 1) & h.cap + current_info = h.info[index] + } + h.info[old_index] = 0 + h.size-- + return + } + index = (index + 1) & h.cap + info += probe_offset + } +} + +pub fn (h Hashmap) get(key string) int { + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & h.cap + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + for info < h.info[index] { + index = (index + 1) & h.cap + info += probe_offset + } + for info == h.info[index] { + if key == h.key_values[index].key { + return h.key_values[index].value + } + index = (index + 1) & h.cap + info += probe_offset + } + return 0 +} + +pub fn (h Hashmap) exists(key string) bool { + hash := wyhash.wyhash_c(key.str, u64(key.len), 0) + mut index := hash & h.cap + mut info := u32(((hash >> h.shift) & hashbit_mask) | probe_offset) + for info < h.info[index] { + index = (index + 1) & h.cap + info += probe_offset + } + for info == h.info[index] { + if key == h.key_values[index].key { + return true + } + index = (index + 1) & h.cap + info += probe_offset + } + return false +} + +pub fn (h Hashmap) keys() []string { + mut keys := [''].repeat(h.size) + mut j := 0 + for i in 0 .. (h.cap + 1) { + if h.info[i] != 0 { + keys[j] = h.key_values[i].key + j++ + } + } + return keys +} diff --git a/vlib/builtin/hashmap/hashmap_test.v b/vlib/builtin/hashmap/hashmap_test.v new file mode 100644 index 0000000000..cb0de6de86 --- /dev/null +++ b/vlib/builtin/hashmap/hashmap_test.v @@ -0,0 +1,33 @@ +module hashmap + +import rand + +fn test_random_strings() { + mut m := new_hashmap() + for i in 0..1000 { + mut buf := []byte + for j in 0..10 { + buf << byte(rand.next(int(`z`) - int(`a`)) + `a`) + } + s := string(buf) + //println(s) + m.set(s, i) + assert m.get(s) == i + } + m.set('foo', 12) + val := m.get('foo') + assert val == 12 +} + +fn test_large_hashmap() { + N := 300 * 1000 + mut nums := new_hashmap() + for i := 0; i < N; i++ { + key := i.str() + nums.set(key, i) + } + for i := 0; i < N; i++ { + key := i.str() + assert nums.get(key) == i + } +} diff --git a/vlib/builtin/map.v b/vlib/builtin/map.v index e36364d193..e50ca0f982 100644 --- a/vlib/builtin/map.v +++ b/vlib/builtin/map.v @@ -4,73 +4,44 @@ module builtin -import ( - strings - hash.wyhash -) +import strings + +// B-trees are balanced search trees with all leaves at +// the same level. B-trees are generally faster than +// binary search trees due to the better locality of +// reference, since multiple keys are stored in one node. + +// The number for `degree` has been picked through vigor- +// ous benchmarking but can be changed to any number > 1. +// `degree` determines the size of each node. const ( - // Number of bits from the hash stored for each entry - hashbits = 24 - // Number of bits from the hash stored for rehasing - cached_hashbits = 16 - // Initial log-number of buckets in the hashtable - init_log_capicity = 5 - // Initial number of buckets in the hashtable - init_capicity = 1< m.load_factor { - m.expand() - } - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) - mut index := hash & m.range_cap - // While probe count is less - for probe_hash < m.probe_hash[index] { - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // While we might have a match - for probe_hash == m.probe_hash[index] { - if key == m.key_values[index].key { - C.memcpy(m.key_values[index].value, value, m.value_bytes) - return - } - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // Match is not possible anymore. - // Probe until an empty index is found. - // Swap when probe count is higher/richer (Robin Hood). - mut current_kv := KeyValue{key, malloc(m.value_bytes)} - C.memcpy(current_kv.value, value, m.value_bytes) - for m.probe_hash[index] != 0 { - if probe_hash > m.probe_hash[index] { - // Swap probe_hash - tmp_probe_hash := m.probe_hash[index] - m.probe_hash[index] = probe_hash - probe_hash = tmp_probe_hash - // Swap KeyValue - tmp_kv := m.key_values[index] - m.key_values[index] = current_kv - current_kv = tmp_kv - } - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // Should almost never happen - if (probe_hash & max_probe) == max_probe { - m.expand() - m.set(current_kv.key, current_kv.value) - return - } - m.probe_hash[index] = probe_hash - m.key_values[index] = current_kv - m.size++ -} + mut node := m.root + mut child_index := 0 + mut parent := &mapnode(0) + for { -fn (m mut map) expand() { - old_range_cap := m.range_cap - // double the size of the hashmap - m.range_cap = ((m.range_cap + 1)<<1) - 1 - // check if no hashbits are left - if m.window == 0 { - m.shift += cached_hashbits - m.rehash(old_range_cap) - m.window = cached_hashbits - } - else { - m.cached_rehash(old_range_cap) - } - m.window-- -} - -fn (m mut map) rehash(old_range_cap u32) { - probe_hash_bytes := sizeof(u32) * (m.range_cap + 1) - key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1) - memory := calloc(probe_hash_bytes + key_value_bytes) - mut new_key_values := &KeyValue(memory) - mut new_probe_hash := &u32(memory + key_value_bytes) - for i in 0 .. (old_range_cap + 1) { - if m.probe_hash[i] != 0 { - mut kv := m.key_values[i] - hash := wyhash.wyhash_c(kv.key.str, u64(kv.key.len), 0) - mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) - mut index := hash & m.range_cap - // While probe count is less - for probe_hash < new_probe_hash[index] { - index = (index + 1) & m.range_cap - probe_hash += probe_inc + if node.size == max_size { + if isnil(parent) { + parent = new_node() + m.root = parent } - // Probe until an empty index is found. - // Swap when probe count is higher/richer (Robin Hood). - for new_probe_hash[index] != 0 { - if probe_hash > new_probe_hash[index] { - // Swap probe_hash - tmp_probe_hash := new_probe_hash[index] - new_probe_hash[index] = probe_hash - probe_hash = tmp_probe_hash - // Swap KeyValue - tmp_kv := new_key_values[index] - new_key_values[index] = kv - kv = tmp_kv - } - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // Should almost never happen - if (probe_hash & max_probe) == max_probe { - m.expand() - m.set(kv.key, kv.value) + parent.split_child(child_index, mut node) + if key == parent.keys[child_index] { + C.memcpy(parent.values[child_index], value, m.value_bytes) return } - new_probe_hash[index] = probe_hash - new_key_values[index] = kv + node = if key < parent.keys[child_index] { + &mapnode(parent.children[child_index]) + } else { + &mapnode(parent.children[child_index + 1]) + } } - } - unsafe{ - free(m.key_values) - } - m.key_values = new_key_values - m.probe_hash = new_probe_hash -} - -fn (m mut map) cached_rehash(old_range_cap u32) { - probe_hash_bytes := sizeof(u32) * (m.range_cap + 1) - key_value_bytes := sizeof(KeyValue) * (m.range_cap + 1) - memory := calloc(probe_hash_bytes + key_value_bytes) - mut new_probe_hash := &u32(memory + key_value_bytes) - mut new_key_values := &KeyValue(memory) - for i in 0 .. (old_range_cap + 1) { - if m.probe_hash[i] != 0 { - mut kv := m.key_values[i] - mut probe_hash := m.probe_hash[i] - original := u64(i - ((probe_hash>>hashbits) - 1)) & (m.range_cap>>1) - hash := original | (probe_hash< new_probe_hash[index] { - // Swap probe_hash - tmp_probe_hash := new_probe_hash[index] - new_probe_hash[index] = probe_hash - probe_hash = tmp_probe_hash - // Swap KeyValue - tmp_kv := new_key_values[index] - new_key_values[index] = kv - kv = tmp_kv - } - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // Should almost never happen - if (probe_hash & max_probe) == max_probe { - m.expand() - m.set(kv.key, kv.value) - return - } - new_probe_hash[index] = probe_hash - new_key_values[index] = kv - } - } - unsafe{ - free(m.key_values) - } - m.key_values = new_key_values - m.probe_hash = new_probe_hash -} - -pub fn (m mut map) delete(key string) { - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & m.range_cap - mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) - for probe_hash < m.probe_hash[index] { - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - // Perform backwards shifting - for probe_hash == m.probe_hash[index] { - if key == m.key_values[index].key { - mut old_index := index - index = (index + 1) & m.range_cap - mut current_probe_hash := m.probe_hash[index] - for (current_probe_hash>>hashbits) > 1 { - m.probe_hash[old_index] = current_probe_hash - probe_inc - m.key_values[old_index] = m.key_values[index] - old_index = index - index = (index + 1) & m.range_cap - current_probe_hash = m.probe_hash[index] - } - m.probe_hash[old_index] = 0 - m.size-- + mut i := 0 + for i < node.size && key > node.keys[i] { i++ } + if i != node.size && key == node.keys[i] { + C.memcpy(node.values[i], value, m.value_bytes) return } - index = (index + 1) & m.range_cap - probe_hash += probe_inc + if isnil(node.children) { + mut j := node.size - 1 + for j >= 0 && key < node.keys[j] { + node.keys[j + 1] = node.keys[j] + node.values[j + 1] = node.values[j] + j-- + } + node.keys[j + 1] = key + node.values[j + 1] = malloc(m.value_bytes) + C.memcpy(node.values[j + 1], value, m.value_bytes) + node.size++ + m.size++ + return + } + parent = node + child_index = i + node = &mapnode(node.children[child_index]) } } +fn (n mut mapnode) split_child(child_index int, y mut mapnode) { + mut z := new_node() + z.size = mid_index + y.size = mid_index + for j := mid_index - 1; j >= 0; j-- { + z.keys[j] = y.keys[j + degree] + z.values[j] = y.values[j + degree] + } + if !isnil(y.children) { + z.children = &voidptr(malloc(children_bytes)) + for jj := degree - 1; jj >= 0; jj-- { + z.children[jj] = y.children[jj + degree] + } + } + if isnil(n.children) { + n.children = &voidptr(malloc(children_bytes)) + } + n.children[n.size + 1] = n.children[n.size] + for j := n.size; j > child_index; j-- { + n.keys[j] = n.keys[j - 1] + n.values[j] = n.values[j - 1] + n.children[j] = n.children[j - 1] + } + n.keys[child_index] = y.keys[mid_index] + n.values[child_index] = y.values[mid_index] + n.children[child_index] = voidptr(y) + n.children[child_index + 1] = voidptr(z) + n.size++ +} + fn (m map) get(key string, out voidptr) bool { - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & m.range_cap - mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) - for probe_hash < m.probe_hash[index] { - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - for probe_hash == m.probe_hash[index] { - if key == m.key_values[index].key { - C.memcpy(out, m.key_values[index].value, m.value_bytes) + mut node := m.root + for { + mut i := node.size - 1 + for i >= 0 && key < node.keys[i] { + i-- + } + if i != -1 && key == node.keys[i] { + C.memcpy(out, node.values[i], m.value_bytes) return true } - index = (index + 1) & m.range_cap - probe_hash += probe_inc + if isnil(node.children) { + break + } + node = &mapnode(node.children[i + 1]) } return false } fn (m map) exists(key string) bool { - if m.value_bytes == 0 { + if isnil(m.root) { // TODO: find out why root can be nil return false } - hash := wyhash.wyhash_c(key.str, u64(key.len), 0) - mut index := hash & m.range_cap - mut probe_hash := u32(((hash>>m.shift) & hash_mask) | probe_inc) - for probe_hash < m.probe_hash[index] { - index = (index + 1) & m.range_cap - probe_hash += probe_inc - } - for probe_hash == m.probe_hash[index] { - if key == m.key_values[index].key { + mut node := m.root + for { + mut i := node.size - 1 + for i >= 0 && key < node.keys[i] { i-- } + if i != -1 && key == node.keys[i] { return true } - index = (index + 1) & m.range_cap - probe_hash += probe_inc + if isnil(node.children) { + break + } + node = &mapnode(node.children[i + 1]) } return false } -pub fn (m &map) keys() []string { - mut keys := [''].repeat(m.size) - if m.value_bytes == 0 { - return keys +fn (n mapnode) find_key(k string) int { + mut idx := 0 + for idx < n.size && n.keys[idx] < k { + idx++ } - mut j := 0 - for i in 0 .. (m.range_cap + 1) { - if m.probe_hash[i] != 0 { - keys[j] = m.key_values[i].key - j++ + return idx +} + +fn (n mut mapnode) remove_key(k string) bool { + idx := n.find_key(k) + if idx < n.size && n.keys[idx] == k { + if isnil(n.children) { + n.remove_from_leaf(idx) + } else { + n.remove_from_non_leaf(idx) + } + return true + } else { + if isnil(n.children) { + return false + } + flag := if idx == n.size {true} else {false} + if (&mapnode(n.children[idx])).size < degree { + n.fill(idx) + } + + if flag && idx > n.size { + return (&mapnode(n.children[idx - 1])).remove_key(k) + } else { + return (&mapnode(n.children[idx])).remove_key(k) } } +} + +fn (n mut mapnode) remove_from_leaf(idx int) { + for i := idx + 1; i < n.size; i++ { + n.keys[i - 1] = n.keys[i] + n.values[i - 1] = n.values[i] + } + n.size-- +} + +fn (n mut mapnode) remove_from_non_leaf(idx int) { + k := n.keys[idx] + if &mapnode(n.children[idx]).size >= degree { + mut current := &mapnode(n.children[idx]) + for !isnil(current.children) { + current = &mapnode(current.children[current.size]) + } + predecessor := current.keys[current.size - 1] + n.keys[idx] = predecessor + n.values[idx] = current.values[current.size - 1] + (&mapnode(n.children[idx])).remove_key(predecessor) + } else if &mapnode(n.children[idx + 1]).size >= degree { + mut current := &mapnode(n.children[idx + 1]) + for !isnil(current.children) { + current = &mapnode(current.children[0]) + } + successor := current.keys[0] + n.keys[idx] = successor + n.values[idx] = current.values[0] + (&mapnode(n.children[idx + 1])).remove_key(successor) + } else { + n.merge(idx) + (&mapnode(n.children[idx])).remove_key(k) + } +} + +fn (n mut mapnode) fill(idx int) { + if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree { + n.borrow_from_prev(idx) + } else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree { + n.borrow_from_next(idx) + } else if idx != n.size { + n.merge(idx) + } else { + n.merge(idx - 1) + } +} + +fn (n mut mapnode) borrow_from_prev(idx int) { + mut child := &mapnode(n.children[idx]) + mut sibling := &mapnode(n.children[idx - 1]) + for i := child.size - 1; i >= 0; i-- { + child.keys[i + 1] = child.keys[i] + child.values[i + 1] = child.values[i] + } + if !isnil(child.children) { + for i := child.size; i >= 0; i-- { + child.children[i + 1] = child.children[i] + } + } + child.keys[0] = n.keys[idx - 1] + child.values[0] = n.values[idx - 1] + if !isnil(child.children) { + child.children[0] = sibling.children[sibling.size] + } + n.keys[idx - 1] = sibling.keys[sibling.size - 1] + n.values[idx - 1] = sibling.values[sibling.size - 1] + child.size++ + sibling.size-- +} + +fn (n mut mapnode) borrow_from_next(idx int) { + mut child := &mapnode(n.children[idx]) + mut sibling := &mapnode(n.children[idx + 1]) + child.keys[child.size] = n.keys[idx] + child.values[child.size] = n.values[idx] + if !isnil(child.children) { + child.children[child.size + 1] = sibling.children[0] + } + n.keys[idx] = sibling.keys[0] + n.values[idx] = sibling.values[0] + for i := 1; i < sibling.size; i++ { + sibling.keys[i - 1] = sibling.keys[i] + sibling.values[i - 1] = sibling.values[i] + } + if !isnil(sibling.children) { + for i := 1; i <= sibling.size; i++ { + sibling.children[i - 1] = sibling.children[i] + } + } + child.size++ + sibling.size-- +} + +fn (n mut mapnode) merge(idx int) { + mut child := &mapnode(n.children[idx]) + sibling := &mapnode(n.children[idx + 1]) + child.keys[mid_index] = n.keys[idx] + child.values[mid_index] = n.values[idx] + for i := 0; i < sibling.size; i++ { + child.keys[i + degree] = sibling.keys[i] + child.values[i + degree] = sibling.values[i] + } + if !isnil(child.children) { + for i := 0; i <= sibling.size; i++ { + child.children[i + degree] = sibling.children[i] + } + } + for i := idx + 1; i < n.size; i++ { + n.keys[i - 1] = n.keys[i] + n.values[i - 1] = n.values[i] + } + for i := idx + 2; i <= n.size; i++ { + n.children[i - 1] = n.children[i] + } + child.size += sibling.size + 1 + n.size-- + // free(sibling) +} + +pub fn (m mut map) delete(key string) { + if m.root.size == 0 { + return + } + + removed := m.root.remove_key(key) + if removed { + m.size-- + } + + if m.root.size == 0 { + // tmp := t.root + if isnil(m.root.children) { + return + } else { + m.root = &mapnode(m.root.children[0]) + } + // free(tmp) + } +} + +// Insert all keys of the subtree into array `keys` +// starting at `at`. Keys are inserted in order. +fn (n mapnode) subkeys(keys mut []string, at int) int { + mut position := at + if !isnil(n.children) { + // Traverse children and insert + // keys inbetween children + for i in 0..n.size { + child := &mapnode(n.children[i]) + position += child.subkeys(mut keys, position) + keys[position] = n.keys[i] + position++ + } + // Insert the keys of the last child + child := &mapnode(n.children[n.size]) + position += child.subkeys(mut keys, position) + } else { + // If leaf, insert keys + for i in 0..n.size { + keys[position + i] = n.keys[i] + } + position += n.size + } + // Return # of added keys + return position - at +} + +pub fn (m &map) keys() []string { + mut keys := [''].repeat(m.size) + if isnil(m.root) || m.root.size == 0 { + return keys + } + m.root.subkeys(mut keys, 0) return keys } -pub fn (m mut map) set_load_factor(new_load_factor f32) { - if new_load_factor > 1.0 { - m.load_factor = 1.0 - } - else if new_load_factor < 0.1 { - m.load_factor = 0.1 - } - else { - m.load_factor = new_load_factor +fn (n mut mapnode) free() { + mut i := 0 + if isnil(n.children) { + i = 0 + for i < n.size { + i++ + } + } else { + i = 0 + for i < n.size { + &mapnode(n.children[i]).free() + i++ + } + &mapnode(n.children[i]).free() } + // free(n) } pub fn (m mut map) free() { - unsafe{ - free(m.key_values) + if isnil(m.root) { + return } + m.root.free() } pub fn (m map) print() { - println('TODO') + println('<<<<<<<<') + //for i := 0; i < m.entries.len; i++ { + // entry := m.entries[i] + // println('$entry.key => $entry.val') + //} + /* + for i := 0; i < m.cap * m.value_bytes; i++ { + b := m.table[i] + print('$i: ') + C.printf('%02x', b) + println('') + } +*/ + println('>>>>>>>>>>') } pub fn (m map_string) str() string { @@ -371,4 +444,4 @@ pub fn (m map_string) str() string { } sb.writeln('}') return sb.str() -} \ No newline at end of file +} diff --git a/vlib/builtin/sorted_map.v b/vlib/builtin/sorted_map.v deleted file mode 100644 index d2e9fad020..0000000000 --- a/vlib/builtin/sorted_map.v +++ /dev/null @@ -1,416 +0,0 @@ -// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved. -// Use of this source code is governed by an MIT license -// that can be found in the LICENSE file. - -module builtin - -// import strings - -// B-trees are balanced search trees with all leaves at -// the same level. B-trees are generally faster than -// binary search trees due to the better locality of -// reference, since multiple keys are stored in one node. - -// The number for `degree` has been picked through vigor- -// ous benchmarking but can be changed to any number > 1. -// `degree` determines the size of each node. -const ( - degree = 6 - mid_index = degree - 1 - max_size = 2 * degree - 1 - children_bytes = sizeof(voidptr) * (max_size + 1) -) - -pub struct SortedMap { - value_bytes int -mut: - root &mapnode -pub mut: - size int -} - -struct mapnode { -mut: - keys [11]string // TODO: Should use `max_size` - values [11]voidptr // TODO: Should use `max_size` - children &voidptr - size int -} - -fn new_sorted_map(n, value_bytes int) SortedMap { // TODO: Remove `n` - return SortedMap { - value_bytes: value_bytes - root: new_node() - size: 0 - } -} - -fn new_sorted_map_init(n, value_bytes int, keys &string, values voidptr) SortedMap { - mut out := new_sorted_map(n, value_bytes) - for i in 0 .. n { - out.set(keys[i], values + i * value_bytes) - } - return out -} - -// The tree is initialized with an empty node as root to -// avoid having to check whether the root is null for -// each insertion. -fn new_node() &mapnode { - return &mapnode { - children: 0 - size: 0 - } -} - -// This implementation does proactive insertion, meaning -// that splits are done top-down and not bottom-up. -fn (m mut SortedMap) set(key string, value voidptr) { - mut node := m.root - mut child_index := 0 - mut parent := &mapnode(0) - for { - if node.size == max_size { - if isnil(parent) { - parent = new_node() - m.root = parent - } - parent.split_child(child_index, mut node) - if key == parent.keys[child_index] { - C.memcpy(parent.values[child_index], value, m.value_bytes) - return - } - node = if key < parent.keys[child_index] { - &mapnode(parent.children[child_index]) - } else { - &mapnode(parent.children[child_index + 1]) - } - } - mut i := 0 - for i < node.size && key > node.keys[i] { i++ } - if i != node.size && key == node.keys[i] { - C.memcpy(node.values[i], value, m.value_bytes) - return - } - if isnil(node.children) { - mut j := node.size - 1 - for j >= 0 && key < node.keys[j] { - node.keys[j + 1] = node.keys[j] - node.values[j + 1] = node.values[j] - j-- - } - node.keys[j + 1] = key - node.values[j + 1] = malloc(m.value_bytes) - C.memcpy(node.values[j + 1], value, m.value_bytes) - node.size++ - m.size++ - return - } - parent = node - child_index = i - node = &mapnode(node.children[child_index]) - } -} - -fn (n mut mapnode) split_child(child_index int, y mut mapnode) { - mut z := new_node() - z.size = mid_index - y.size = mid_index - for j := mid_index - 1; j >= 0; j-- { - z.keys[j] = y.keys[j + degree] - z.values[j] = y.values[j + degree] - } - if !isnil(y.children) { - z.children = &voidptr(malloc(children_bytes)) - for jj := degree - 1; jj >= 0; jj-- { - z.children[jj] = y.children[jj + degree] - } - } - if isnil(n.children) { - n.children = &voidptr(malloc(children_bytes)) - } - n.children[n.size + 1] = n.children[n.size] - for j := n.size; j > child_index; j-- { - n.keys[j] = n.keys[j - 1] - n.values[j] = n.values[j - 1] - n.children[j] = n.children[j - 1] - } - n.keys[child_index] = y.keys[mid_index] - n.values[child_index] = y.values[mid_index] - n.children[child_index] = voidptr(y) - n.children[child_index + 1] = voidptr(z) - n.size++ -} - -fn (m SortedMap) get(key string, out voidptr) bool { - mut node := m.root - for { - mut i := node.size - 1 - for i >= 0 && key < node.keys[i] { i-- } - if i != -1 && key == node.keys[i] { - C.memcpy(out, node.values[i], m.value_bytes) - return true - } - if isnil(node.children) { - break - } - node = &mapnode(node.children[i + 1]) - } - return false -} - -fn (m SortedMap) exists(key string) bool { - if isnil(m.root) { // TODO: find out why root can be nil - return false - } - mut node := m.root - for { - mut i := node.size - 1 - for i >= 0 && key < node.keys[i] { i-- } - if i != -1 && key == node.keys[i] { - return true - } - if isnil(node.children) { - break - } - node = &mapnode(node.children[i + 1]) - } - return false -} - -fn (n mapnode) find_key(k string) int { - mut idx := 0 - for idx < n.size && n.keys[idx] < k { - idx++ - } - return idx -} - -fn (n mut mapnode) remove_key(k string) bool { - idx := n.find_key(k) - if idx < n.size && n.keys[idx] == k { - if isnil(n.children) { - n.remove_from_leaf(idx) - } else { - n.remove_from_non_leaf(idx) - } - return true - } else { - if isnil(n.children) { - return false - } - flag := if idx == n.size {true} else {false} - if (&mapnode(n.children[idx])).size < degree { - n.fill(idx) - } - - if flag && idx > n.size { - return (&mapnode(n.children[idx - 1])).remove_key(k) - } else { - return (&mapnode(n.children[idx])).remove_key(k) - } - } -} - -fn (n mut mapnode) remove_from_leaf(idx int) { - for i := idx + 1; i < n.size; i++ { - n.keys[i - 1] = n.keys[i] - n.values[i - 1] = n.values[i] - } - n.size-- -} - -fn (n mut mapnode) remove_from_non_leaf(idx int) { - k := n.keys[idx] - if &mapnode(n.children[idx]).size >= degree { - mut current := &mapnode(n.children[idx]) - for !isnil(current.children) { - current = &mapnode(current.children[current.size]) - } - predecessor := current.keys[current.size - 1] - n.keys[idx] = predecessor - n.values[idx] = current.values[current.size - 1] - (&mapnode(n.children[idx])).remove_key(predecessor) - } else if &mapnode(n.children[idx + 1]).size >= degree { - mut current := &mapnode(n.children[idx + 1]) - for !isnil(current.children) { - current = &mapnode(current.children[0]) - } - successor := current.keys[0] - n.keys[idx] = successor - n.values[idx] = current.values[0] - (&mapnode(n.children[idx + 1])).remove_key(successor) - } else { - n.merge(idx) - (&mapnode(n.children[idx])).remove_key(k) - } -} - -fn (n mut mapnode) fill(idx int) { - if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree { - n.borrow_from_prev(idx) - } else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree { - n.borrow_from_next(idx) - } else if idx != n.size { - n.merge(idx) - } else { - n.merge(idx - 1) - } -} - -fn (n mut mapnode) borrow_from_prev(idx int) { - mut child := &mapnode(n.children[idx]) - mut sibling := &mapnode(n.children[idx - 1]) - for i := child.size - 1; i >= 0; i-- { - child.keys[i + 1] = child.keys[i] - child.values[i + 1] = child.values[i] - } - if !isnil(child.children) { - for i := child.size; i >= 0; i-- { - child.children[i + 1] = child.children[i] - } - } - child.keys[0] = n.keys[idx - 1] - child.values[0] = n.values[idx - 1] - if !isnil(child.children) { - child.children[0] = sibling.children[sibling.size] - } - n.keys[idx - 1] = sibling.keys[sibling.size - 1] - n.values[idx - 1] = sibling.values[sibling.size - 1] - child.size++ - sibling.size-- -} - -fn (n mut mapnode) borrow_from_next(idx int) { - mut child := &mapnode(n.children[idx]) - mut sibling := &mapnode(n.children[idx + 1]) - child.keys[child.size] = n.keys[idx] - child.values[child.size] = n.values[idx] - if !isnil(child.children) { - child.children[child.size + 1] = sibling.children[0] - } - n.keys[idx] = sibling.keys[0] - n.values[idx] = sibling.values[0] - for i := 1; i < sibling.size; i++ { - sibling.keys[i - 1] = sibling.keys[i] - sibling.values[i - 1] = sibling.values[i] - } - if !isnil(sibling.children) { - for i := 1; i <= sibling.size; i++ { - sibling.children[i - 1] = sibling.children[i] - } - } - child.size++ - sibling.size-- -} - -fn (n mut mapnode) merge(idx int) { - mut child := &mapnode(n.children[idx]) - sibling := &mapnode(n.children[idx + 1]) - child.keys[mid_index] = n.keys[idx] - child.values[mid_index] = n.values[idx] - for i := 0; i < sibling.size; i++ { - child.keys[i + degree] = sibling.keys[i] - child.values[i + degree] = sibling.values[i] - } - if !isnil(child.children) { - for i := 0; i <= sibling.size; i++ { - child.children[i + degree] = sibling.children[i] - } - } - for i := idx + 1; i < n.size; i++ { - n.keys[i - 1] = n.keys[i] - n.values[i - 1] = n.values[i] - } - for i := idx + 2; i <= n.size; i++ { - n.children[i - 1] = n.children[i] - } - child.size += sibling.size + 1 - n.size-- - // free(sibling) -} - -pub fn (m mut SortedMap) delete(key string) { - if m.root.size == 0 { - return - } - - removed := m.root.remove_key(key) - if removed { - m.size-- - } - - if m.root.size == 0 { - // tmp := t.root - if isnil(m.root.children) { - return - } else { - m.root = &mapnode(m.root.children[0]) - } - // free(tmp) - } -} - -// Insert all keys of the subtree into array `keys` -// starting at `at`. Keys are inserted in order. -fn (n mapnode) subkeys(keys mut []string, at int) int { - mut position := at - if !isnil(n.children) { - // Traverse children and insert - // keys inbetween children - for i in 0..n.size { - child := &mapnode(n.children[i]) - position += child.subkeys(mut keys, position) - keys[position] = n.keys[i] - position++ - } - // Insert the keys of the last child - child := &mapnode(n.children[n.size]) - position += child.subkeys(mut keys, position) - } else { - // If leaf, insert keys - for i in 0..n.size { - keys[position + i] = n.keys[i] - } - position += n.size - } - // Return # of added keys - return position - at -} - -pub fn (m &SortedMap) keys() []string { - mut keys := [''].repeat(m.size) - if isnil(m.root) || m.root.size == 0 { - return keys - } - m.root.subkeys(mut keys, 0) - return keys -} - -fn (n mut mapnode) free() { - println('TODO') -} - -pub fn (m mut SortedMap) free() { - if isnil(m.root) { - return - } - m.root.free() -} - -pub fn (m SortedMap) print() { - println('TODO') -} - -// pub fn (m map_string) str() string { -// if m.size == 0 { -// return '{}' -// } -// mut sb := strings.new_builder(50) -// sb.writeln('{') -// for key, val in m { -// sb.writeln(' "$key" => "$val"') -// } -// sb.writeln('}') -// return sb.str() -// } \ No newline at end of file