rand: reorganize: phase 2

2023-08-10 21:13:21 +03:00 · 2020-06-09 18:36:07 +05:30
parent 67fcce2d46
commit e649cf84e3
28 changed files with 603 additions and 408 deletions
--- a/vlib/rand/sys/system_rng.c.v
+++ b/vlib/rand/sys/system_rng.c.v
@ -0,0 +1,278 @@
+// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module sys
+
+import math.bits
+import rand.util
+
+// Implementation note:
+// ====================
+// C.rand() is okay to use within its defined range of C.RAND_MAX.
+// (See: https://web.archive.org/web/20180801210127/http://eternallyconfuzzled.com/arts/jsw_art_rand.aspx)
+// The problem is, this value varies with the libc implementation. On windows,
+// for example, RAND_MAX is usually a measly 32767, whereas on (newer) linux it's generaly
+// 2147483647. The repetition period also varies wildly. In order to provide more entropy
+// without altering the underlying algorithm too much, this implementation simply
+// requests for more random bits until the necessary width for the integers is achieved.
+const (
+	rand_limit     = u64(C.RAND_MAX)
+	rand_bitsize   = bits.len_64(rand_limit)
+	u32_iter_count = calculate_iterations_for(32)
+	u64_iter_count = calculate_iterations_for(64)
+)
+
+fn calculate_iterations_for(bits int) int {
+	base := bits / rand_bitsize
+	extra := if bits % rand_bitsize == 0 { 0 } else { 1 }
+	return base + extra
+}
+
+// C.rand returns a pseudorandom integer from 0 (inclusive) to C.RAND_MAX (exclusive)
+fn C.rand() int
+
+// C.srand seeds the internal PRNG with the given int seed.
+// fn C.srand(seed int)
+// SysRNG is the PRNG provided by default in the libc implementiation that V uses.
+pub struct SysRNG {
+mut:
+	seed u32 = util.time_seed_32()
+}
+
+// r.seed() sets the seed of the accepting SysRNG to the given data.
+pub fn (mut r SysRNG) seed(seed_data []u32) {
+	if seed_data.len != 1 {
+		eprintln('SysRNG needs one 32-bit unsigned integer as the seed.')
+		exit(1)
+	}
+	r.seed = seed_data[0]
+	C.srand(int(r.seed))
+}
+
+// r.default_rand() exposes the default behavior of the system's RNG
+// (equivalent to calling C.rand()). Recommended for testing/comparison
+// b/w V and other languages using libc and not for regular use.
+// This is also a one-off feature of SysRNG, similar to the global seed
+// situation. Other generators will not have this.
+[inline]
+pub fn (r SysRNG) default_rand() int {
+	return C.rand()
+}
+
+// r.u32() returns a pseudorandom u32 value less than 2^32
+[inline]
+pub fn (r SysRNG) u32() u32 {
+	mut result := u32(C.rand())
+	for i in 1 .. u32_iter_count {
+		result = result ^ (u32(C.rand()) << (rand_bitsize * i))
+	}
+	return result
+}
+
+// r.u64() returns a pseudorandom u64 value less than 2^64
+[inline]
+pub fn (r SysRNG) u64() u64 {
+	mut result := u64(C.rand())
+	for i in 1 .. u64_iter_count {
+		result = result ^ (u64(C.rand()) << (rand_bitsize * i))
+	}
+	return result
+}
+
+// r.u32n(max) returns a pseudorandom u32 value that is guaranteed to be less than max
+[inline]
+pub fn (r SysRNG) u32n(max u32) u32 {
+	if max == 0 {
+		eprintln('max must be positive integer')
+		exit(1)
+	}
+	// Owing to the pigeon-hole principle, we can't simply do
+	// val := rng.u32() % max.
+	// It'll wreck the properties of the distribution unless
+	// max evenly divides 2^32. So we divide evenly to
+	// the closest power of two. Then we loop until we find
+	// an int in the required range
+	bit_len := bits.len_32(max)
+	if bit_len == 32 {
+		for {
+			value := r.u32()
+			if value < max {
+				return value
+			}
+		}
+	} else {
+		mask := (u32(1) << (bit_len + 1)) - 1
+		for {
+			value := r.u32() & mask
+			if value < max {
+				return value
+			}
+		}
+	}
+	return u32(0)
+}
+
+// r.u64n(max) returns a pseudorandom u64 value that is guaranteed to be less than max
+[inline]
+pub fn (r SysRNG) u64n(max u64) u64 {
+	if max == 0 {
+		eprintln('max must be positive integer')
+		exit(1)
+	}
+	// Similar procedure for u64s
+	bit_len := bits.len_64(max)
+	if bit_len == 64 {
+		for {
+			value := r.u64()
+			if value < max {
+				return value
+			}
+		}
+	} else {
+		mask := (u64(1) << (bit_len + 1)) - 1
+		for {
+			value := r.u64() & mask
+			if value < max {
+				return value
+			}
+		}
+	}
+	return u64(0)
+}
+
+// r.u32n(min, max) returns a pseudorandom u32 value that is guaranteed to be in [min, max)
+[inline]
+pub fn (r SysRNG) u32_in_range(min, max u32) u32 {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	return min + r.u32n(max - min)
+}
+
+// r.u64n(min, max) returns a pseudorandom u64 value that is guaranteed to be in [min, max)
+[inline]
+pub fn (r SysRNG) u64_in_range(min, max u64) u64 {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	return min + r.u64n(max - min)
+}
+
+// r.int() returns a pseudorandom 32-bit int (which may be negative)
+[inline]
+pub fn (r SysRNG) int() int {
+	return int(r.u32())
+}
+
+// r.i64() returns a pseudorandom 64-bit i64 (which may be negative)
+[inline]
+pub fn (r SysRNG) i64() i64 {
+	return i64(r.u64())
+}
+
+// r.int31() returns a pseudorandom 31-bit int which is non-negative
+[inline]
+pub fn (r SysRNG) int31() int {
+	return int(r.u32() & util.u31_mask) // Set the 32nd bit to 0.
+}
+
+// r.int63() returns a pseudorandom 63-bit int which is non-negative
+[inline]
+pub fn (r SysRNG) int63() i64 {
+	return i64(r.u64() & util.u63_mask) // Set the 64th bit to 0.
+}
+
+// r.intn(max) returns a pseudorandom int that lies in [0, max)
+[inline]
+pub fn (r SysRNG) intn(max int) int {
+	if max <= 0 {
+		eprintln('max has to be positive.')
+		exit(1)
+	}
+	return int(r.u32n(u32(max)))
+}
+
+// r.i64n(max) returns a pseudorandom i64 that lies in [0, max)
+[inline]
+pub fn (r SysRNG) i64n(max i64) i64 {
+	if max <= 0 {
+		eprintln('max has to be positive.')
+		exit(1)
+	}
+	return i64(r.u64n(u64(max)))
+}
+
+// r.int_in_range(min, max) returns a pseudorandom int that lies in [min, max)
+[inline]
+pub fn (r SysRNG) int_in_range(min, max int) int {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	// This supports negative ranges like [-10, -5) because the difference is positive
+	return min + r.intn(max - min)
+}
+
+// r.i64_in_range(min, max) returns a pseudorandom i64 that lies in [min, max)
+[inline]
+pub fn (r SysRNG) i64_in_range(min, max i64) i64 {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	return min + r.i64n(max - min)
+}
+
+// r.f32() returns a pseudorandom f32 value between 0.0 (inclusive) and 1.0 (exclusive) i.e [0, 1)
+[inline]
+pub fn (r SysRNG) f32() f32 {
+	return f32(r.u32()) / util.max_u32_as_f32
+}
+
+// r.f64() returns a pseudorandom f64 value between 0.0 (inclusive) and 1.0 (exclusive) i.e [0, 1)
+[inline]
+pub fn (r SysRNG) f64() f64 {
+	return f64(r.u64()) / util.max_u64_as_f64
+}
+
+// r.f32n() returns a pseudorandom f32 value in [0, max)
+[inline]
+pub fn (r SysRNG) f32n(max f32) f32 {
+	if max <= 0 {
+		eprintln('max has to be positive.')
+		exit(1)
+	}
+	return r.f32() * max
+}
+
+// r.f64n() returns a pseudorandom f64 value in [0, max)
+[inline]
+pub fn (r SysRNG) f64n(max f64) f64 {
+	if max <= 0 {
+		eprintln('max has to be positive.')
+		exit(1)
+	}
+	return r.f64() * max
+}
+
+// r.f32_in_range(min, max) returns a pseudorandom f32 that lies in [min, max)
+[inline]
+pub fn (r SysRNG) f32_in_range(min, max f32) f32 {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	return min + r.f32n(max - min)
+}
+
+// r.i64_in_range(min, max) returns a pseudorandom i64 that lies in [min, max)
+[inline]
+pub fn (r SysRNG) f64_in_range(min, max f64) f64 {
+	if max <= min {
+		eprintln('max must be greater than min')
+		exit(1)
+	}
+	return min + r.f64n(max - min)
+}
--- a/vlib/rand/sys/system_rng.js.v
+++ b/vlib/rand/sys/system_rng.js.v
@ -0,0 +1,15 @@
+// Copyright (c) 2019-2020 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module sys
+
+// Until there's a portable, JS has a seeded way to produce random numbers
+// and not just Math.random(), use any of the existing implementations
+// as the System's RNG
+type SysRNG WyRandRNG
+
+// In the JS version, we simply return the same int as is normally generated.
+[inline]
+pub fn (r SysRNG) default_rand() int {
+	return r.int()
+}
--- a/vlib/rand/sys/system_rng_test.v
+++ b/vlib/rand/sys/system_rng_test.v
@ -0,0 +1,354 @@
+import math
+import sys
+
+const (
+	range_limit = 40
+	value_count = 1000
+	seeds       = [u32(42), 256]
+)
+
+const (
+	sample_size   = 1000
+	stats_epsilon = 0.05
+	inv_sqrt_12   = 1.0 / math.sqrt(12)
+)
+
+fn get_n_randoms(n int, r sys.SysRNG) []int {
+	mut ints := []int{cap: n}
+	for _ in 0 .. n {
+		ints << r.int()
+	}
+	return ints
+}
+
+fn test_sys_rng_reproducibility() {
+	// Note that C.srand() sets the seed globally.
+	// So the order of seeding matters. It is recommended
+	// to obtain all necessary data first, then set the
+	// seed for another batch of data.
+	for seed in seeds {
+		seed_data := [seed]
+		mut r1 := sys.SysRNG{}
+		mut r2 := sys.SysRNG{}
+		r1.seed(seed_data)
+		ints1 := get_n_randoms(value_count, r1)
+		r2.seed(seed_data)
+		ints2 := get_n_randoms(value_count, r2)
+		assert ints1 == ints2
+	}
+}
+
+// TODO: use the `in` syntax and remove this function
+// after generics has been completely implemented
+fn found(value u64, arr []u64) bool {
+	for item in arr {
+		if value == item {
+			return true
+		}
+	}
+	return false
+}
+
+fn test_sys_rng_variability() {
+	// If this test fails and if it is certainly not the implementation
+	// at fault, try changing the seed values. Repeated values are
+	// improbable but not impossible.
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		mut values := []u64{cap: value_count}
+		for i in 0 .. value_count {
+			value := rng.u64()
+			assert !found(value, values)
+			assert values.len == i
+			values << value
+		}
+	}
+}
+
+fn check_uniformity_u64(rng sys.SysRNG, range u64) {
+	range_f64 := f64(range)
+	expected_mean := range_f64 / 2.0
+	mut variance := 0.0
+	for _ in 0 .. sample_size {
+		diff := f64(rng.u64n(range)) - expected_mean
+		variance += diff * diff
+	}
+	variance /= sample_size - 1
+	sigma := math.sqrt(variance)
+	expected_sigma := range_f64 * inv_sqrt_12
+	error := (sigma - expected_sigma) / expected_sigma
+	assert math.abs(error) < stats_epsilon
+}
+
+fn test_sys_rng_uniformity_u64() {
+	// This assumes that C.rand() produces uniform results to begin with.
+	// If the failure persists, report an issue on GitHub
+	ranges := [14019545, 80240, 130]
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for range in ranges {
+			check_uniformity_u64(rng, u64(range))
+		}
+	}
+}
+
+fn check_uniformity_f64(rng sys.SysRNG) {
+	expected_mean := 0.5
+	mut variance := 0.0
+	for _ in 0 .. sample_size {
+		diff := rng.f64() - expected_mean
+		variance += diff * diff
+	}
+	variance /= sample_size - 1
+	sigma := math.sqrt(variance)
+	expected_sigma := inv_sqrt_12
+	error := (sigma - expected_sigma) / expected_sigma
+	assert math.abs(error) < stats_epsilon
+}
+
+fn test_sys_rng_uniformity_f64() {
+	// The f64 version
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		check_uniformity_f64(rng)
+	}
+}
+
+fn test_sys_rng_u32n() {
+	max := u32(16384)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.u32n(max)
+			assert value >= 0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_u64n() {
+	max := u64(379091181005)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.u64n(max)
+			assert value >= 0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_u32_in_range() {
+	max := u32(484468466)
+	min := u32(316846)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.u32_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_u64_in_range() {
+	max := u64(216468454685163)
+	min := u64(6848646868)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.u64_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_intn() {
+	max := 2525642
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.intn(max)
+			assert value >= 0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_i64n() {
+	max := i64(3246727724653636)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.i64n(max)
+			assert value >= 0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_int_in_range() {
+	min := -4252
+	max := 23054962
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.int_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_i64_in_range() {
+	min := i64(-24095)
+	max := i64(324058)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.i64_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_int31() {
+	max_u31 := 0x7FFFFFFF
+	sign_mask := 0x80000000
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.int31()
+			assert value >= 0
+			assert value <= max_u31
+			// This statement ensures that the sign bit is zero
+			assert (value & sign_mask) == 0
+		}
+	}
+}
+
+fn test_sys_rng_int63() {
+	max_u63 := i64(0x7FFFFFFFFFFFFFFF)
+	sign_mask := i64(0x8000000000000000)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.int63()
+			assert value >= 0
+			assert value <= max_u63
+			assert (value & sign_mask) == 0
+		}
+	}
+}
+
+fn test_sys_rng_f32() {
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f32()
+			assert value >= 0.0
+			assert value < 1.0
+		}
+	}
+}
+
+fn test_sys_rng_f64() {
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f64()
+			assert value >= 0.0
+			assert value < 1.0
+		}
+	}
+}
+
+fn test_sys_rng_f32n() {
+	max := f32(357.0)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f32n(max)
+			assert value >= 0.0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_f64n() {
+	max := 1.52e6
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f64n(max)
+			assert value >= 0.0
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_f32_in_range() {
+	min := f32(-24.0)
+	max := f32(125.0)
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f32_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}
+
+fn test_sys_rng_f64_in_range() {
+	min := -548.7
+	max := 5015.2
+	for seed in seeds {
+		seed_data := [seed]
+		mut rng := sys.SysRNG{}
+		rng.seed(seed_data)
+		for _ in 0 .. range_limit {
+			value := rng.f64_in_range(min, max)
+			assert value >= min
+			assert value < max
+		}
+	}
+}