From f75ce9dd822b972bd13f1cef23ccfe69a5377971 Mon Sep 17 00:00:00 2001 From: Subhomoy Haldar Date: Sun, 27 Mar 2022 20:08:59 +0530 Subject: [PATCH] rand: move functions from rand.util to the main module (#13840) --- vlib/rand/rand.v | 107 +++++++++++++++--- vlib/rand/random_numbers_test.v | 48 +++++++- vlib/rand/util/util.v | 52 --------- vlib/rand/util/util_test.v | 57 ---------- ...cs_with_nested_external_generics_fn_test.v | 9 +- 5 files changed, 142 insertions(+), 131 deletions(-) delete mode 100644 vlib/rand/util/util.v delete mode 100644 vlib/rand/util/util_test.v diff --git a/vlib/rand/rand.v b/vlib/rand/rand.v index bf2abc0641..15ac15c013 100644 --- a/vlib/rand/rand.v +++ b/vlib/rand/rand.v @@ -274,6 +274,76 @@ pub fn (mut rng PRNG) ascii(len int) string { return internal_string_from_set(mut rng, rand.ascii_chars, len) } +// Configuration struct for the shuffle functions. +// The start index is inclusive and the end index is exclusive. +// Set the end to 0 to shuffle until the end of the array. +[params] +pub struct ShuffleConfigStruct { +pub: + start int + end int +} + +fn (config ShuffleConfigStruct) validate_for(a []T) ? { + if config.start < 0 || config.start >= a.len { + return error("argument 'config.start' must be in range [0, a.len)") + } + if config.end < 0 || config.end > a.len { + return error("argument 'config.end' must be in range [0, a.len]") + } +} + +// shuffle randomly permutates the elements in `a`. The range for shuffling is +// optional and the entire array is shuffled by default. Leave the end as 0 to +// shuffle all elements until the end. +[direct_array_access] +pub fn (mut rng PRNG) shuffle(mut a []T, config ShuffleConfigStruct) ? { + config.validate_for(a) ? + new_end := if config.end == 0 { a.len } else { config.end } + for i in config.start .. new_end { + x := rng.int_in_range(i, new_end) or { config.start } + // swap + a_i := a[i] + a[i] = a[x] + a[x] = a_i + } +} + +// shuffle_clone returns a random permutation of the elements in `a`. +// The permutation is done on a fresh clone of `a`, so `a` remains unchanged. +pub fn (mut rng PRNG) shuffle_clone(a []T, config ShuffleConfigStruct) ?[]T { + mut res := a.clone() + rng.shuffle(mut res, config) ? + return res +} + +// choose samples k elements from the array without replacement. +// This means the indices cannot repeat and it restricts the sample size to be less than or equal to the size of the given array. +// Note that if the array has repeating elements, then the sample may have repeats as well. +pub fn (mut rng PRNG) choose(array []T, k int) ?[]T { + n := array.len + if k > n { + return error('Cannot choose $k elements without replacement from a $n-element array.') + } + mut results := []T{len: k} + mut indices := []int{len: n, init: it} + rng.shuffle(mut indices) ? + for i in 0 .. k { + results[i] = array[indices[i]] + } + return results +} + +// sample samples k elements from the array with replacement. +// This means the elements can repeat and the size of the sample may exceed the size of the array. +pub fn (mut rng PRNG) sample(array []T, k int) []T { + mut results := []T{len: k} + for i in 0 .. k { + results[i] = array[rng.intn(array.len) or { 0 }] + } + return results +} + __global default_rng &PRNG // new_default returns a new instance of the default RNG. If the seed is not provided, the current time will be used to seed the instance. @@ -440,17 +510,17 @@ const ( // users or business transactions. // (https://news.ycombinator.com/item?id=14526173) pub fn ulid() string { - return internal_ulid_at_millisecond(mut default_rng, u64(time.utc().unix_time_milli())) + return default_rng.ulid() } // ulid_at_millisecond does the same as `ulid` but takes a custom Unix millisecond timestamp via `unix_time_milli`. pub fn ulid_at_millisecond(unix_time_milli u64) string { - return internal_ulid_at_millisecond(mut default_rng, unix_time_milli) + return default_rng.ulid_at_millisecond(unix_time_milli) } // string_from_set returns a string of length `len` containing random characters sampled from the given `charset` pub fn string_from_set(charset string, len int) string { - return internal_string_from_set(mut default_rng, charset, len) + return default_rng.string_from_set(charset, len) } // string returns a string of length `len` containing random characters in range `[a-zA-Z]`. @@ -468,19 +538,28 @@ pub fn ascii(len int) string { return string_from_set(rand.ascii_chars, len) } -// shuffle randomly permutates the elements in `a`. -pub fn shuffle(mut a []T) { - len := a.len - for i in 0 .. len { - si := i + intn(len - i) or { len } - a[si], a[i] = a[i], a[si] - } +// shuffle randomly permutates the elements in `a`. The range for shuffling is +// optional and the entire array is shuffled by default. Leave the end as 0 to +// shuffle all elements until the end. +pub fn shuffle(mut a []T, config ShuffleConfigStruct) ? { + default_rng.shuffle(mut a, config) ? } // shuffle_clone returns a random permutation of the elements in `a`. // The permutation is done on a fresh clone of `a`, so `a` remains unchanged. -pub fn shuffle_clone(a []T) []T { - mut res := a.clone() - shuffle(mut res) - return res +pub fn shuffle_clone(a []T, config ShuffleConfigStruct) ?[]T { + return default_rng.shuffle_clone(a, config) +} + +// choose samples k elements from the array without replacement. +// This means the indices cannot repeat and it restricts the sample size to be less than or equal to the size of the given array. +// Note that if the array has repeating elements, then the sample may have repeats as well. +pub fn choose(array []T, k int) ?[]T { + return default_rng.choose(array, k) +} + +// sample samples k elements from the array with replacement. +// This means the elements can repeat and the size of the sample may exceed the size of the array. +pub fn sample(array []T, k int) []T { + return default_rng.sample(array, k) } diff --git a/vlib/rand/random_numbers_test.v b/vlib/rand/random_numbers_test.v index 06a38cfc2e..b7f5ba7c1e 100644 --- a/vlib/rand/random_numbers_test.v +++ b/vlib/rand/random_numbers_test.v @@ -326,7 +326,6 @@ fn test_shuffle() { a := get_n_random_ints(seed, 10) arrays << a } - // mut digits := []map[int]int{len: 10} for digit in 0 .. 10 { digits[digit] = {} @@ -337,7 +336,7 @@ fn test_shuffle() { for mut a in arrays { o := a.clone() for _ in 0 .. 100 { - rand.shuffle(mut a) + rand.shuffle(mut a) or { panic('shuffle failed') } assert *a != o for idx in 0 .. 10 { digits[idx][a[idx]]++ @@ -355,12 +354,25 @@ fn test_shuffle() { } } +fn test_shuffle_partial() ? { + mut a := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + mut b := a.clone() + + rand.shuffle(mut a, start: 4) ? + assert a[..4] == b[..4] + + a = b.clone() + rand.shuffle(mut a, start: 3, end: 7) ? + assert a[..3] == b[..3] + assert a[7..] == b[7..] +} + fn test_shuffle_clone() { original := [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mut a := original.clone() mut results := [][]int{} for _ in 0 .. 10 { - results << rand.shuffle_clone(a) + results << rand.shuffle_clone(a) or { panic('shuffle failed') } } assert original == a for idx in 1 .. 10 { @@ -369,3 +381,33 @@ fn test_shuffle_clone() { assert results[idx] != original } } + +fn test_choose() ? { + lengths := [1, 3, 4, 5, 6, 7] + a := ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] + for length in lengths { + b := rand.choose(a, length) ? + assert b.len == length + for element in b { + assert element in a + // make sure every element occurs once + mut count := 0 + for e in b { + if e == element { + count++ + } + } + assert count == 1 + } + } +} + +fn test_sample() { + k := 20 + a := ['heads', 'tails'] + b := rand.sample(a, k) + assert b.len == k + for element in b { + assert element in a + } +} diff --git a/vlib/rand/util/util.v b/vlib/rand/util/util.v deleted file mode 100644 index 82a8c73185..0000000000 --- a/vlib/rand/util/util.v +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. -// Use of this source code is governed by an MIT license -// that can be found in the LICENSE file. -module util - -import rand - -// sample_nr returns a sample of the array without replacement. This means the indices cannot repeat and it restricts the sample size to be less than or equal to the size of the given array. Note that if the array has repeating elements, then the sample may have repeats as well. -pub fn sample_nr(array []T, k int) []T { - n := array.len - if k > n { - panic('Cannot sample $k elements without replacement from a $n-element array.') - } - mut results := []T{len: k} - mut indices := []int{len: n} - // Initialize with all indices - // temporary workaround for issue #10343 - for i in 0 .. indices.len { - indices[i] = i - } - shuffle(mut indices, k) - for i in 0 .. k { - results[i] = array[indices[i]] - } - return results -} - -// sample_r returns a sample of the array with replacement. This means the elements can repeat and the size of the sample may exceed the size of the array -pub fn sample_r(array []T, k int) []T { - n := array.len - mut results := []T{len: k} - for i in 0 .. k { - results[i] = array[rand.intn(n) or { 0 }] - } - return results -} - -// shuffle randomizes the first `n` items of an array in place (all if `n` is 0) -[direct_array_access] -pub fn shuffle(mut a []T, n int) { - if n < 0 || n > a.len { - panic("argument 'n' must be in range [0, a.len]") - } - cnt := if n == 0 { a.len - 1 } else { n } - for i in 0 .. cnt { - x := rand.int_in_range(i, a.len) or { 0 } - // swap - a_i := a[i] - a[i] = a[x] - a[x] = a_i - } -} diff --git a/vlib/rand/util/util_test.v b/vlib/rand/util/util_test.v deleted file mode 100644 index f92e70bad7..0000000000 --- a/vlib/rand/util/util_test.v +++ /dev/null @@ -1,57 +0,0 @@ -import rand -import rand.util - -fn test_sample_nr() { - lengths := [1, 3, 4, 5, 6, 7] - a := ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] - for length in lengths { - b := util.sample_nr(a, length) - assert b.len == length - for element in b { - assert element in a - // make sure every element occurs once - mut count := 0 - for e in b { - if e == element { - count++ - } - } - assert count == 1 - } - } -} - -fn test_sample_r() { - k := 20 - a := ['heads', 'tails'] - b := util.sample_r(a, k) - assert b.len == k - for element in b { - assert element in a - } -} - -fn test_shuffle() { - rand.seed([u32(1), 2]) // set seed to produce same results in order - a := [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - mut b := a.clone() - mut c := a.clone() - util.shuffle(mut b, 0) - util.shuffle(mut c, 0) - assert b == [6, 4, 5, 1, 9, 2, 10, 3, 8, 7] - assert c == [1, 6, 5, 8, 7, 2, 10, 9, 3, 4] - // test shuffling a slice - mut d := a.clone() - util.shuffle(mut d[..5], 0) - assert d == [5, 2, 1, 3, 4, 6, 7, 8, 9, 10] - assert d[5..] == a[5..] - // test shuffling n items - mut e := a.clone() - util.shuffle(mut e, 5) - assert e[..5] == [10, 3, 1, 8, 4] - assert e[5..] == [6, 7, 5, 9, 2] - // test shuffling empty array - mut f := a[..0] - util.shuffle(mut f, 0) - assert f == []int{} -} diff --git a/vlib/v/tests/generics_with_nested_external_generics_fn_test.v b/vlib/v/tests/generics_with_nested_external_generics_fn_test.v index 410e863e60..b98b6be90c 100644 --- a/vlib/v/tests/generics_with_nested_external_generics_fn_test.v +++ b/vlib/v/tests/generics_with_nested_external_generics_fn_test.v @@ -1,20 +1,19 @@ -import rand.util import rand -pub fn sample(arr []T, k int) []T { +pub fn sample(arr []T, k int) ?[]T { mut result := arr.clone() rand.seed([u32(1), 2]) // set seed to produce same results in order - util.shuffle(mut result, k) + rand.shuffle(mut result) ? return result[0..k] } -fn test_generics_with_nested_external_generics_fn() { +fn test_generics_with_nested_external_generics_fn() ? { mut arr := [11, 32, 24, 45, 57, 32, 37, 52, 37, 24] println(arr) - ret := sample(arr, 5) + ret := sample(arr, 5) ? println(ret) assert ret == [32, 45, 57, 11, 37]