1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

map: bring back b-tree

This commit is contained in:
ka-weihe 2020-01-24 20:13:59 +01:00 committed by Alexander Medvednikov
parent 6fd175d9be
commit 7d797090ff
2 changed files with 355 additions and 198 deletions

View File

@ -1,284 +1,444 @@
// Copyright (c) 2019 Alexander Medvednikov. All rights reserved. // Copyright (c) 2019 Alexander Medvednikov. All rights reserved.
// Use of this source code is governed by an MIT license // Use of this source code is governed by an MIT license
// that can be found in the LICENSE file. // that can be found in the LICENSE file.
module builtin module builtin
import strings import strings
// B-trees are balanced search trees with all leaves at
// the same level. B-trees are generally faster than
// binary search trees due to the better locality of
// reference, since multiple keys are stored in one node.
// The number for `degree` has been picked through vigor-
// ous benchmarking but can be changed to any number > 1.
// `degree` determines the size of each node.
const (
degree = 6
mid_index = degree - 1
max_size = 2 * degree - 1
children_bytes = sizeof(voidptr) * (max_size + 1)
)
pub struct map { pub struct map {
element_size int value_bytes int
root &mapnode mut:
pub: root &mapnode
size int pub mut:
size int
} }
struct mapnode { struct mapnode {
left &mapnode mut:
right &mapnode keys [11]string // TODO: Should use `max_size`
is_empty bool // set by delete() values [11]voidptr // TODO: Should use `max_size`
key string children &voidptr
val voidptr size int
} }
fn new_map(cap, elm_size int) map { fn new_map(n, value_bytes int) map { // TODO: Remove `n`
res := map{ return map {
element_size: elm_size value_bytes: value_bytes
root: 0 root: new_node()
size: 0
} }
return res
} }
// `m := { 'one': 1, 'two': 2 }` fn new_map_init(n, value_bytes int, keys &string, values voidptr) map {
fn new_map_init(cap, elm_size int, keys &string, vals voidptr) map { mut out := new_map(n, value_bytes)
mut res := map{ for i in 0 .. n {
element_size: elm_size out.set(keys[i], values + i * value_bytes)
root: 0
} }
for i in 0 .. cap { return out
res.set(keys[i], vals + i * elm_size)
}
return res
} }
fn new_node(key string, val voidptr, element_size int) &mapnode { // The tree is initialized with an empty node as root to
new_e := &mapnode{ // avoid having to check whether the root is null for
key: key // each insertion.
val: malloc(element_size) fn new_node() &mapnode {
left: 0 return &mapnode {
right: 0 children: 0
size: 0
} }
C.memcpy(new_e.val, val, element_size)
return new_e
} }
fn (m mut map) insert(n mut mapnode, key string, val voidptr) { // This implementation does proactive insertion, meaning
if n.key == key { // that splits are done top-down and not bottom-up.
C.memcpy(n.val, val, m.element_size) fn (m mut map) set(key string, value voidptr) {
if n.is_empty { mut node := m.root
mut child_index := 0
mut parent := &mapnode(0)
for {
if node.size == max_size {
if isnil(parent) {
parent = new_node()
m.root = parent
}
parent.split_child(child_index, mut node)
if key == parent.keys[child_index] {
C.memcpy(parent.values[child_index], value, m.value_bytes)
return
}
node = if key < parent.keys[child_index] {
&mapnode(parent.children[child_index])
} else {
&mapnode(parent.children[child_index + 1])
}
}
mut i := 0
for i < node.size && key > node.keys[i] { i++ }
if i != node.size && key == node.keys[i] {
C.memcpy(node.values[i], value, m.value_bytes)
return
}
if isnil(node.children) {
mut j := node.size - 1
for j >= 0 && key < node.keys[j] {
node.keys[j + 1] = node.keys[j]
node.values[j + 1] = node.values[j]
j--
}
node.keys[j + 1] = key
node.values[j + 1] = malloc(m.value_bytes)
C.memcpy(node.values[j + 1], value, m.value_bytes)
node.size++
m.size++ m.size++
n.is_empty = false return
} }
return parent = node
} child_index = i
if n.key > key { node = &mapnode(node.children[child_index])
if n.left == 0 {
n.left = new_node(key, val, m.element_size)
m.size++
}
else {
m.insert(mut n.left, key, val)
}
return
}
if n.right == 0 {
n.right = new_node(key, val, m.element_size)
m.size++
}
else {
m.insert(mut n.right, key, val)
} }
} }
fn (n &mapnode) find(key string, out voidptr, element_size int) bool { fn (n mut mapnode) split_child(child_index int, y mut mapnode) {
if n.key == key { mut z := new_node()
C.memcpy(out, n.val, element_size) z.size = mid_index
y.size = mid_index
for j := mid_index - 1; j >= 0; j-- {
z.keys[j] = y.keys[j + degree]
z.values[j] = y.values[j + degree]
}
if !isnil(y.children) {
z.children = &voidptr(malloc(children_bytes))
for j := degree - 1; j >= 0; j-- {
z.children[j] = y.children[j + degree]
}
}
if isnil(n.children) {
n.children = &voidptr(malloc(children_bytes))
}
n.children[n.size + 1] = n.children[n.size]
for j := n.size; j > child_index; j-- {
n.keys[j] = n.keys[j - 1]
n.values[j] = n.values[j - 1]
n.children[j] = n.children[j - 1]
}
n.keys[child_index] = y.keys[mid_index]
n.values[child_index] = y.values[mid_index]
n.children[child_index] = voidptr(y)
n.children[child_index + 1] = voidptr(z)
n.size++
}
fn (m map) get(key string, out voidptr) bool {
mut node := m.root
for {
mut i := node.size - 1
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
C.memcpy(out, node.values[i], m.value_bytes)
return true
}
if isnil(node.children) {
break
}
node = &mapnode(node.children[i + 1])
}
return false
}
fn (m map) exists(key string) bool {
if isnil(m.root) { // TODO: find out why root can be nil
return false
}
mut node := m.root
for {
mut i := node.size - 1
for i >= 0 && key < node.keys[i] { i-- }
if i != -1 && key == node.keys[i] {
return true
}
if isnil(node.children) {
break
}
node = &mapnode(node.children[i + 1])
}
return false
}
fn (n mapnode) find_key(k string) int {
mut idx := 0
for idx < n.size && n.keys[idx] < k {
idx++
}
return idx
}
fn (n mut mapnode) remove_key(k string) bool {
idx := n.find_key(k)
if idx < n.size && n.keys[idx] == k {
if isnil(n.children) {
n.remove_from_leaf(idx)
} else {
n.remove_from_non_leaf(idx)
}
return true return true
} } else {
else if n.key > key { if isnil(n.children) {
if n.left == 0 {
return false return false
} }
else { flag := if idx == n.size {true} else {false}
return n.left.find(key, out, element_size) if (&mapnode(n.children[idx])).size < degree {
n.fill(idx)
} }
}
else { if flag && idx > n.size {
if n.right == 0 { return (&mapnode(n.children[idx - 1])).remove_key(k)
return false } else {
} return (&mapnode(n.children[idx])).remove_key(k)
else {
return n.right.find(key, out, element_size)
} }
} }
} }
// same as `find`, but doesn't return a value. Used by `exists` fn (n mut mapnode) remove_from_leaf(idx int) {
fn (n &mapnode) find2(key string, element_size int) bool { for i := idx + 1; i < n.size; i++ {
if n.key == key && !n.is_empty { n.keys[i - 1] = n.keys[i]
return true n.values[i - 1] = n.values[i]
} }
else if n.key > key { n.size--
if isnil(n.left) { }
return false
fn (n mut mapnode) remove_from_non_leaf(idx int) {
k := n.keys[idx]
if &mapnode(n.children[idx]).size >= degree {
mut current := &mapnode(n.children[idx])
for !isnil(current.children) {
current = &mapnode(current.children[current.size])
} }
else { predecessor := current.keys[current.size - 1]
return n.left.find2(key, element_size) n.keys[idx] = predecessor
} n.values[idx] = current.values[current.size - 1]
} (&mapnode(n.children[idx])).remove_key(predecessor)
else { } else if &mapnode(n.children[idx + 1]).size >= degree {
if isnil(n.right) { mut current := &mapnode(n.children[idx + 1])
return false for !isnil(current.children) {
} current = &mapnode(current.children[0])
else {
return n.right.find2(key, element_size)
} }
successor := current.keys[0]
n.keys[idx] = successor
n.values[idx] = current.values[0]
(&mapnode(n.children[idx + 1])).remove_key(successor)
} else {
n.merge(idx)
(&mapnode(n.children[idx])).remove_key(k)
} }
} }
fn (m mut map) set(key string, val voidptr) { fn (n mut mapnode) fill(idx int) {
if isnil(m.root) { if idx != 0 && &mapnode(n.children[idx - 1]).size >= degree {
m.root = new_node(key, val, m.element_size) n.borrow_from_prev(idx)
m.size++ } else if idx != n.size && &mapnode(n.children[idx + 1]).size >= degree {
return n.borrow_from_next(idx)
} else if idx != n.size {
n.merge(idx)
} else {
n.merge(idx - 1)
} }
m.insert(mut m.root, key, val)
} }
/* fn (n mut mapnode) borrow_from_prev(idx int) {
fn (m map) bs(query string, start, end int, out voidptr) { mut child := &mapnode(n.children[idx])
// println('bs "$query" $start -> $end') mut sibling := &mapnode(n.children[idx - 1])
mid := start + ((end - start) / 2) for i := child.size - 1; i >= 0; i-- {
if end - start == 0 { child.keys[i + 1] = child.keys[i]
last := m.entries[end] child.values[i + 1] = child.values[i]
C.memcpy(out, last.val, m.element_size)
return
} }
if end - start == 1 { if !isnil(child.children) {
first := m.entries[start] for i := child.size; i >= 0; i-- {
C.memcpy(out, first.val, m.element_size) child.children[i + 1] = child.children[i]
return }
} }
if mid >= m.entries.len { child.keys[0] = n.keys[idx - 1]
return child.values[0] = n.values[idx - 1]
if !isnil(child.children) {
child.children[0] = sibling.children[sibling.size]
} }
mid_msg := m.entries[mid] n.keys[idx - 1] = sibling.keys[sibling.size - 1]
// println('mid.key=$mid_msg.key') n.values[idx - 1] = sibling.values[sibling.size - 1]
if query < mid_msg.key { child.size++
m.bs(query, start, mid, out) sibling.size--
return
}
m.bs(query, mid, end, out)
} }
*/
fn (n mut mapnode) borrow_from_next(idx int) {
mut child := &mapnode(n.children[idx])
mut sibling := &mapnode(n.children[idx + 1])
child.keys[child.size] = n.keys[idx]
child.values[child.size] = n.values[idx]
if !isnil(child.children) {
child.children[child.size + 1] = sibling.children[0]
}
n.keys[idx] = sibling.keys[0]
n.values[idx] = sibling.values[0]
for i := 1; i < sibling.size; i++ {
sibling.keys[i - 1] = sibling.keys[i]
sibling.values[i - 1] = sibling.values[i]
}
if !isnil(sibling.children) {
for i := 1; i <= sibling.size; i++ {
sibling.children[i - 1] = sibling.children[i]
}
}
child.size++
sibling.size--
}
fn preorder_keys(node &mapnode, keys mut []string, key_i int) int { fn (n mut mapnode) merge(idx int) {
mut i := key_i mut child := &mapnode(n.children[idx])
if !node.is_empty { sibling := &mapnode(n.children[idx + 1])
keys[i] = node.key child.keys[mid_index] = n.keys[idx]
i++ child.values[mid_index] = n.values[idx]
for i := 0; i < sibling.size; i++ {
child.keys[i + degree] = sibling.keys[i]
child.values[i + degree] = sibling.values[i]
} }
if !isnil(node.left) { if !isnil(child.children) {
i = preorder_keys(node.left, mut keys, i) for i := 0; i <= sibling.size; i++ {
child.children[i + degree] = sibling.children[i]
}
} }
if !isnil(node.right) { for i := idx + 1; i < n.size; i++ {
i = preorder_keys(node.right, mut keys, i) n.keys[i - 1] = n.keys[i]
n.values[i - 1] = n.values[i]
} }
return i for i := idx + 2; i <= n.size; i++ {
n.children[i - 1] = n.children[i]
}
child.size += sibling.size + 1
n.size--
// free(sibling)
}
pub fn (m mut map) delete(key string) {
if m.root.size == 0 {
return
}
removed := m.root.remove_key(key)
if removed {
m.size--
}
if m.root.size == 0 {
// tmp := t.root
if isnil(m.root.children) {
return
} else {
m.root = &mapnode(m.root.children[0])
}
// free(tmp)
}
}
// Insert all keys of the subtree into array `keys`
// starting at `at`. Keys are inserted in order.
fn (n mapnode) subkeys(keys mut []string, at int) int {
mut position := at
if !isnil(n.children) {
// Traverse children and insert
// keys inbetween children
for i in 0..n.size {
child := &mapnode(n.children[i])
position += child.subkeys(mut keys, position)
keys[position] = n.keys[i]
position++
}
// Insert the keys of the last child
child := &mapnode(n.children[n.size])
position += child.subkeys(mut keys, position)
} else {
// If leaf, insert keys
for i in 0..n.size {
keys[position + i] = n.keys[i]
}
position += n.size
}
// Return # of added keys
return position - at
} }
pub fn (m &map) keys() []string { pub fn (m &map) keys() []string {
mut keys := [''].repeat(m.size) mut keys := [''].repeat(m.size)
if isnil(m.root) { if isnil(m.root) || m.root.size == 0 {
return keys return keys
} }
preorder_keys(m.root, mut keys, 0) m.root.subkeys(mut keys, 0)
return keys return keys
} }
fn (m map) get(key string, out voidptr) bool { fn (n mut mapnode) free() {
// println('g') mut i := 0
if m.root == 0 { if isnil(n.children) {
return false i = 0
for i < n.size {
i++
}
} else {
i = 0
for i < n.size {
&mapnode(n.children[i]).free()
i++
}
&mapnode(n.children[i]).free()
} }
return m.root.find(key, out, m.element_size) // free(n)
} }
pub fn (n mut mapnode) delete(key string, element_size int) { pub fn (m mut map) free() {
if n.key == key { if isnil(m.root) {
C.memset(n.val, 0, element_size)
n.is_empty = true
return return
} }
else if n.key > key { m.root.free()
if isnil(n.left) {
return
}
else {
n.left.delete(key, element_size)
}
}
else {
if isnil(n.right) {
return
}
else {
n.right.delete(key, element_size)
}
}
}
pub fn (m mut map) delete(key string) {
if m.exists(key) {
m.root.delete(key, m.element_size)
m.size--
}
}
fn (m map) exists(key string) bool {
return !isnil(m.root) && m.root.find2(key, m.element_size)
} }
pub fn (m map) print() { pub fn (m map) print() {
println('<<<<<<<<') println('<<<<<<<<')
// for i := 0; i < m.entries.len; i++ { //for i := 0; i < m.entries.len; i++ {
// entry := m.entries[i] // entry := m.entries[i]
// println('$entry.key => $entry.val') // println('$entry.key => $entry.val')
// } //}
/* /*
for i := 0; i < m.cap * m.element_size; i++ { for i := 0; i < m.cap * m.value_bytes; i++ {
b := m.table[i] b := m.table[i]
print('$i: ') print('$i: ')
C.printf('%02x', b) C.printf('%02x', b)
println('') println('')
} }
*/ */
println('>>>>>>>>>>') println('>>>>>>>>>>')
} }
fn (n mut mapnode) free() {
if n.val != 0 {
free(n.val)
}
if n.left != 0 {
n.left.free()
}
if n.right != 0 {
n.right.free()
}
free(n)
}
pub fn (m mut map) free() {
if m.root == 0 {
return
}
m.root.free()
// C.free(m.table)
// C.free(m.keys_table)
}
pub fn (m map_string) str() string { pub fn (m map_string) str() string {
if m.size == 0 { if m.size == 0 {
return '{}' return '{}'
} }
mut sb := strings.new_builder(50) mut sb := strings.new_builder(50)
sb.writeln('{') sb.writeln('{')
for key, val in m { for key, val in m {
sb.writeln(' "$key" => "$val"') sb.writeln(' "$key" => "$val"')
} }
sb.writeln('}') sb.writeln('}')
return sb.str() return sb.str()
} }

View File

@ -23,19 +23,16 @@ fn test_map() {
assert m.size == 2 assert m.size == 2
assert 'hi' in m assert 'hi' in m
mut sum := 0 mut sum := 0
mut key_sum := ''
// Test `for in` // Test `for in`
for key, val in m { for key, val in m {
sum += val sum += val
key_sum += key
} }
assert sum == 80 + 101 assert sum == 80 + 101
assert key_sum == 'hihello'
// Test `.keys()` // Test `.keys()`
keys := m.keys() keys := m.keys()
assert keys.len == 2 assert keys.len == 2
assert keys[0] == 'hi' assert 'hi' in keys
assert keys[1] == 'hello' assert 'hello' in keys
m.delete('hi') m.delete('hi')
assert m.size == 1 assert m.size == 1
m.delete('aloha') m.delete('aloha')