From 0d28f12c54e402a972c6d19421677b9d4d0a2579 Mon Sep 17 00:00:00 2001 From: Nick Treleaven Date: Sat, 5 Dec 2020 21:53:50 +0000 Subject: [PATCH] map: use untyped keys for DenseArray and interleave keys and values (#7142) --- vlib/builtin/cfns.c.v | 2 +- vlib/builtin/map.v | 171 ++++++++++++++++------------------ vlib/v/gen/auto_str_methods.v | 2 +- vlib/v/gen/cgen.v | 2 +- 4 files changed, 81 insertions(+), 96 deletions(-) diff --git a/vlib/builtin/cfns.c.v b/vlib/builtin/cfns.c.v index 82dd3292aa..7e5b06dc81 100644 --- a/vlib/builtin/cfns.c.v +++ b/vlib/builtin/cfns.c.v @@ -1,7 +1,7 @@ module builtin // -fn C.memcpy(byteptr, byteptr, int) voidptr +fn C.memcpy(dest byteptr, src byteptr, n int) voidptr fn C.memcmp(byteptr, byteptr, int) int diff --git a/vlib/builtin/map.v b/vlib/builtin/map.v index d2ca3e59fb..b962e8cbe6 100644 --- a/vlib/builtin/map.v +++ b/vlib/builtin/map.v @@ -94,107 +94,92 @@ fn fast_string_eq(a string, b string) bool { // Dynamic array with very low growth factor struct DenseArray { + key_bytes int value_bytes int + slot_bytes int // sum of 2 fields above mut: - cap u32 - len u32 - deletes u32 - keys &string - values byteptr + cap int + len int + deletes u32 // count + data byteptr // array of interspersed key data and value data } [inline] [unsafe] -fn new_dense_array(value_bytes int) DenseArray { - s8size := int(8 * sizeof(string)) +fn new_dense_array(key_bytes int, value_bytes int) DenseArray { + slot_bytes := key_bytes + value_bytes + cap := 8 return DenseArray{ + key_bytes: key_bytes value_bytes: value_bytes - cap: 8 + slot_bytes: slot_bytes + cap: cap len: 0 deletes: 0 - keys: &string(malloc(s8size)) - values: malloc(8 * value_bytes) + data: malloc(cap * slot_bytes) } } [inline] fn (d &DenseArray) key(i int) voidptr { - return unsafe {voidptr(d.keys + i)} + return unsafe {d.data + i * d.slot_bytes} } // for cgen [inline] fn (d &DenseArray) value(i int) voidptr { - return unsafe {voidptr(d.values + i * d.value_bytes)} + return unsafe {d.data + i * d.slot_bytes + d.key_bytes} } [inline] fn (d &DenseArray) has_index(i int) bool { - pkey := unsafe {d.keys + i} + // assume string keys for now + pkey := unsafe {&string(d.key(i))} return pkey.str != 0 } // Push element to array and return index // The growth-factor is roughly 1.125 `(x + (x >> 3))` [inline] -fn (mut d DenseArray) push(key string, value voidptr) u32 { +fn (mut d DenseArray) push(key voidptr, value voidptr) int { if d.cap == d.len { d.cap += d.cap >> 3 unsafe { - x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap)) - d.keys = &string(x) - d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap)) + d.data = v_realloc(d.data, d.slot_bytes * d.cap) } } push_index := d.len unsafe { - d.keys[push_index] = key - C.memcpy(d.values + push_index * u32(d.value_bytes), value, d.value_bytes) + ptr := d.key(push_index) + C.memcpy(ptr, key, d.key_bytes) + C.memcpy(byteptr(ptr) + d.key_bytes, value, d.value_bytes) } d.len++ return push_index } -fn (d DenseArray) get(i int) voidptr { - $if !no_bounds_checking? { - if i < 0 || i >= int(d.len) { - panic('DenseArray.get: index out of range (i == $i, d.len == $d.len)') - } - } - unsafe { - return byteptr(d.keys) + i * int(sizeof(string)) - } -} - // Move all zeros to the end of the array and resize array fn (mut d DenseArray) zeros_to_end() { - mut tmp_value := malloc(d.value_bytes) - mut count := u32(0) - for i in 0 .. int(d.len) { - if unsafe {d.keys[i]}.str != 0 { - // swap keys + // TODO alloca? + mut tmp_buf := malloc(d.slot_bytes) + mut count := 0 + for i in 0 .. d.len { + if d.has_index(i) { + // swap (TODO: optimize) unsafe { - tmp_key := d.keys[count] - d.keys[count] = d.keys[i] - d.keys[i] = tmp_key - } - // swap values (TODO: optimize) - unsafe { - C.memcpy(tmp_value, d.values + count * u32(d.value_bytes), d.value_bytes) - C.memcpy(d.values + count * u32(d.value_bytes), d.values + i * d.value_bytes, d.value_bytes) - C.memcpy(d.values + i * d.value_bytes, tmp_value, d.value_bytes) + C.memcpy(tmp_buf, d.key(count), d.slot_bytes) + C.memcpy(d.key(count), d.key(i), d.slot_bytes) + C.memcpy(d.key(i), tmp_buf, d.slot_bytes) } count++ } } - free(tmp_value) + free(tmp_buf) d.deletes = 0 d.len = count - d.cap = if count < 8 { u32(8) } else { count } + d.cap = if count < 8 { 8 } else { count } unsafe { - x := v_realloc(byteptr(d.keys), int(sizeof(string) * d.cap)) - d.keys = &string(x) - d.values = v_realloc(byteptr(d.values), d.value_bytes * int(d.cap)) + d.data = v_realloc(d.data, d.slot_bytes * d.cap) } } @@ -229,7 +214,7 @@ fn new_map_1(value_bytes int) map { cap: init_cap cached_hashbits: max_cached_hashbits shift: init_log_capicity - key_values: new_dense_array(value_bytes) + key_values: new_dense_array(int(sizeof(string)), value_bytes) metas: &u32(vcalloc(metasize)) extra_metas: extra_metas_inc len: 0 @@ -324,10 +309,12 @@ fn (mut m map) set(k string, value voidptr) { index,meta = m.meta_less(index, meta) // While we might have a match for meta == unsafe {m.metas[index]} { - kv_index := unsafe {m.metas[index + 1]} - if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { + kv_index := int(unsafe {m.metas[index + 1]}) + pkey := unsafe {&string(m.key_values.key(kv_index))} + if fast_string_eq(key, *pkey) { unsafe { - C.memcpy(m.key_values.values + kv_index * u32(m.value_bytes), value, m.value_bytes) + pval := pkey + 1 // skip string + C.memcpy(pval, value, m.value_bytes) } return } @@ -335,7 +322,7 @@ fn (mut m map) set(k string, value voidptr) { meta += probe_inc } kv_index := m.key_values.push(key, value) - m.meta_greater(index, meta, kv_index) + m.meta_greater(index, meta, u32(kv_index)) m.len++ } @@ -367,13 +354,14 @@ fn (mut m map) rehash() { m.metas = &u32(x) C.memset(m.metas, 0, meta_bytes) } - for i := u32(0); i < m.key_values.len; i++ { - if unsafe {m.key_values.keys[i]}.str == 0 { + for i := 0; i < m.key_values.len; i++ { + if !m.key_values.has_index(i) { continue } - mut index,mut meta := m.key_to_index(unsafe {m.key_values.keys[i]}) + pkey := unsafe {&string(m.key_values.key(i))} + mut index,mut meta := m.key_to_index(*pkey) index,meta = m.meta_less(index, meta) - m.meta_greater(index, meta, i) + m.meta_greater(index, meta, u32(i)) } } @@ -403,18 +391,17 @@ fn (mut m map) cached_rehash(old_cap u32) { } // This method is used for assignment operators. If the argument-key -// does not exist in the map, it's added to the map along with the zero/dafault value. +// does not exist in the map, it's added to the map along with the zero/default value. // If the key exists, its respective value is returned. fn (mut m map) get_and_set(key string, zero voidptr) voidptr { for { mut index,mut meta := m.key_to_index(key) for { if meta == unsafe {m.metas[index]} { - kv_index := unsafe {m.metas[index + 1]} - if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { - unsafe { - return voidptr(m.key_values.values + kv_index * u32(m.value_bytes)) - } + kv_index := int(unsafe {m.metas[index + 1]}) + pkey := unsafe {&string(m.key_values.key(kv_index))} + if fast_string_eq(key, *pkey) { + return unsafe {byteptr(pkey) + m.key_values.key_bytes} } } index += 2 @@ -435,11 +422,10 @@ fn (m map) get(key string, zero voidptr) voidptr { mut index,mut meta := m.key_to_index(key) for { if meta == unsafe {m.metas[index]} { - kv_index := unsafe {m.metas[index + 1]} - if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { - unsafe { - return voidptr(m.key_values.values + kv_index * u32(m.value_bytes)) - } + kv_index := int(unsafe {m.metas[index + 1]}) + pkey := unsafe {&string(m.key_values.key(kv_index))} + if fast_string_eq(key, *pkey) { + return unsafe {byteptr(pkey) + m.key_values.key_bytes} } } index += 2 @@ -454,9 +440,10 @@ fn (m map) exists(key string) bool { mut index,mut meta := m.key_to_index(key) for { if meta == unsafe {m.metas[index]} { - kv_index := unsafe {m.metas[index + 1]} - if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { - return true + kv_index := int(unsafe {m.metas[index + 1]}) + pkey := unsafe {&string(m.key_values.key(kv_index))} + if fast_string_eq(key, *pkey) { + return true } } index += 2 @@ -472,8 +459,9 @@ pub fn (mut m map) delete(key string) { index,meta = m.meta_less(index, meta) // Perform backwards shifting for meta == unsafe {m.metas[index]} { - kv_index := unsafe {m.metas[index + 1]} - if fast_string_eq(key, unsafe {m.key_values.keys[kv_index]}) { + kv_index := int(unsafe {m.metas[index + 1]}) + pkey := unsafe {&string(m.key_values.key(kv_index))} + if fast_string_eq(key, *pkey) { for (unsafe {m.metas[index + 2]} >> hashbits) > 1 { unsafe { m.metas[index] = m.metas[index + 2] - probe_inc @@ -488,8 +476,8 @@ pub fn (mut m map) delete(key string) { m.key_values.deletes++ // Mark key as deleted unsafe { - m.key_values.keys[kv_index].free() - C.memset(&m.key_values.keys[kv_index], 0, sizeof(string)) + (*pkey).free() + C.memset(pkey, 0, sizeof(string)) } if m.key_values.len <= 32 { return @@ -512,11 +500,12 @@ pub fn (mut m map) delete(key string) { pub fn (m &map) keys() []string { mut keys := []string{ len:m.len } mut j := 0 - for i := u32(0); i < m.key_values.len; i++ { - if unsafe {m.key_values.keys[i]}.str == 0 { + for i := 0; i < m.key_values.len; i++ { + if !m.key_values.has_index(i) { continue } - keys[j] = unsafe {m.key_values.keys[i]}.clone() + pkey := unsafe {&string(m.key_values.key(i))} + keys[j] = pkey.clone() j++ } return keys @@ -524,20 +513,16 @@ pub fn (m &map) keys() []string { [unsafe] pub fn (d DenseArray) clone() DenseArray { - ksize := int(d.cap * sizeof(string)) - vsize := int(d.cap * u32(d.value_bytes)) res := DenseArray { + key_bytes: d.key_bytes value_bytes: d.value_bytes + slot_bytes: d.slot_bytes cap: d.cap len: d.len deletes: d.deletes - keys: unsafe {&string(malloc(ksize))} - values: unsafe {byteptr(malloc(vsize))} - } - unsafe { - C.memcpy(res.keys, d.keys, ksize) - C.memcpy(res.values, d.values, vsize) + data: unsafe {memdup(d.data, d.cap * d.slot_bytes)} } + // FIXME clone each key return res } @@ -552,7 +537,7 @@ pub fn (m map) clone() map { key_values: unsafe {m.key_values.clone()} metas: &u32(malloc(metasize)) extra_metas: m.extra_metas - len: m.len + len: m.len } unsafe { C.memcpy(res.metas, m.metas, metasize) @@ -565,17 +550,17 @@ pub fn (m &map) free() { unsafe { free(m.metas) } - for i := u32(0); i < m.key_values.len; i++ { - if unsafe {m.key_values.keys[i]}.str == 0 { + for i := 0; i < m.key_values.len; i++ { + if !m.key_values.has_index(i) { continue } unsafe { - m.key_values.keys[i].free() + pkey := &string(m.key_values.key(i)) + (*pkey).free() } } unsafe { - free(m.key_values.keys) - free(m.key_values.values) + free(m.key_values.data) } } diff --git a/vlib/v/gen/auto_str_methods.v b/vlib/v/gen/auto_str_methods.v index 0481a6b0d9..5b2f7f3d99 100644 --- a/vlib/v/gen/auto_str_methods.v +++ b/vlib/v/gen/auto_str_methods.v @@ -264,7 +264,7 @@ fn (mut g Gen) gen_str_for_map(info table.Map, styp string, str_fn_name string) g.auto_str_funcs.writeln('static string indent_${str_fn_name}($styp m, int indent_count) { /* gen_str_for_map */') g.auto_str_funcs.writeln('\tstrings__Builder sb = strings__new_builder(m.key_values.len*10);') g.auto_str_funcs.writeln('\tstrings__Builder_write(&sb, _SLIT("{"));') - g.auto_str_funcs.writeln('\tfor (unsigned int i = 0; i < m.key_values.len; ++i) {') + g.auto_str_funcs.writeln('\tfor (int i = 0; i < m.key_values.len; ++i) {') g.auto_str_funcs.writeln('\t\tif (!DenseArray_has_index(&m.key_values, i)) { continue; }') g.auto_str_funcs.writeln('\t\tstring key = *(string*)DenseArray_key(&m.key_values, i);') g.auto_str_funcs.writeln('\t\tstrings__Builder_write(&sb, _STR("\'%.*s\\000\'", 2, key));') diff --git a/vlib/v/gen/cgen.v b/vlib/v/gen/cgen.v index 460946be2f..ff1163cba6 100644 --- a/vlib/v/gen/cgen.v +++ b/vlib/v/gen/cgen.v @@ -1233,7 +1233,7 @@ fn (mut g Gen) for_in(it ast.ForInStmt) { g.write('$atmp_styp $atmp = ') g.expr(it.cond) g.writeln(';') - g.writeln('for (int $idx = 0; $idx < (int)${atmp}.key_values.len; ++$idx) {') + g.writeln('for (int $idx = 0; $idx < ${atmp}.key_values.len; ++$idx) {') g.writeln('\tif (!DenseArray_has_index(&${atmp}.key_values, $idx)) {continue;}') if it.key_var != '_' { key_styp := g.typ(it.key_type)