mirror of
https://github.com/vlang/v.git
synced 2023-08-10 21:13:21 +03:00
math/stats: added basic stats operations
This commit is contained in:
parent
d6ddfa124d
commit
760034b6b1
251
vlib/math/stats/stats.v
Normal file
251
vlib/math/stats/stats.v
Normal file
@ -0,0 +1,251 @@
|
||||
module stats
|
||||
|
||||
import math
|
||||
|
||||
// This module defines the following statistical operations on f64 array
|
||||
// ---------------------------
|
||||
// | Summary of Functions |
|
||||
// ---------------------------
|
||||
// -----------------------------------------------------------------------
|
||||
// freq - Frequency
|
||||
// mean - Mean
|
||||
// geometric_mean - Geometric Mean
|
||||
// harmonic_mean - Harmonic Mean
|
||||
// median - Median
|
||||
// mode - Mode
|
||||
// rms - Root Mean Square
|
||||
// population_variance - Population Variance
|
||||
// sample_variance - Sample Variance
|
||||
// population_stddev - Population Standard Deviation
|
||||
// sample_stddev - Sample Standard Deviation
|
||||
// mean_absdev - Mean Absolute Deviation
|
||||
// min - Minimum of the Array
|
||||
// max - Maximum of the Array
|
||||
// range - Range of the Array ( max - min )
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
|
||||
// Measure of Occurance
|
||||
// Frequency of a given number
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/frequency-distribution.html
|
||||
pub fn freq(arr []f64, val f64) int {
|
||||
if arr.len == 0 {
|
||||
return 0
|
||||
}
|
||||
mut count := 0
|
||||
for v in arr {
|
||||
if v == val {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
// Measure of Central Tendancy
|
||||
// Mean of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/central-measures.html
|
||||
pub fn mean(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += v
|
||||
}
|
||||
return sum/f64(arr.len)
|
||||
}
|
||||
|
||||
// Measure of Central Tendancy
|
||||
// Geometric Mean of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/numbers/geometric-mean.html
|
||||
pub fn geometric_mean(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut sum := f64(1)
|
||||
for v in arr {
|
||||
sum *= v
|
||||
}
|
||||
return math.pow(sum,f64(1)/arr.len)
|
||||
}
|
||||
|
||||
// Measure of Central Tendancy
|
||||
// Harmonic Mean of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/numbers/harmonic-mean.html
|
||||
pub fn harmonic_mean(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += f64(1)/v
|
||||
}
|
||||
return f64(arr.len)/sum
|
||||
}
|
||||
|
||||
// Measure of Central Tendancy
|
||||
// Median of the given input array ( input array is assumed to be sorted )
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/central-measures.html
|
||||
pub fn median(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
if arr.len % 2 == 0 {
|
||||
mid := (arr.len/2)-1
|
||||
return (arr[mid] + arr[mid+1])/f64(2)
|
||||
}
|
||||
else {
|
||||
return arr[((arr.len-1)/2)]
|
||||
}
|
||||
}
|
||||
|
||||
// Measure of Central Tendancy
|
||||
// Mode of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/central-measures.html
|
||||
pub fn mode(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut freqs := []int
|
||||
for v in arr {
|
||||
freqs<<freq(arr,v)
|
||||
}
|
||||
mut i := 0
|
||||
mut max := 0
|
||||
for i < freqs.len {
|
||||
if freqs[i] > freqs[max] {
|
||||
max = i
|
||||
}
|
||||
i++
|
||||
}
|
||||
return arr[max]
|
||||
}
|
||||
|
||||
// Root Mean Square of the given input array
|
||||
// Based on
|
||||
// https://en.wikipedia.org/wiki/Root_mean_square
|
||||
pub fn rms(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += math.pow(v,2)
|
||||
}
|
||||
return math.sqrt(sum/f64(arr.len))
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Population Variance of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||
pub fn population_variance(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
m := mean(arr)
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += math.pow(v-m,2)
|
||||
}
|
||||
return sum/f64(arr.len)
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Sample Variance of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||
pub fn sample_variance(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
m := mean(arr)
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += math.pow(v-m,2)
|
||||
}
|
||||
return sum/f64(arr.len-1)
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Population Standard Deviation of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||
pub fn population_stddev(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
return math.sqrt(population_variance(arr))
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Sample Standard Deviation of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/standard-deviation.html
|
||||
pub fn sample_stddev(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
return math.sqrt(sample_variance(arr))
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Mean Absolute Deviation of the given input array
|
||||
// Based on
|
||||
// https://en.wikipedia.org/wiki/Average_absolute_deviation
|
||||
pub fn mean_absdev(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mean := mean(arr)
|
||||
mut sum := f64(0)
|
||||
for v in arr {
|
||||
sum += math.abs(v-mean)
|
||||
}
|
||||
return sum/f64(arr.len)
|
||||
}
|
||||
|
||||
// Minimum of the given input array
|
||||
pub fn min(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut min := arr[0]
|
||||
for v in arr {
|
||||
if v < min {
|
||||
min = v
|
||||
}
|
||||
}
|
||||
return min
|
||||
}
|
||||
|
||||
// Maximum of the given input array
|
||||
pub fn max(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
mut max := arr[0]
|
||||
for v in arr {
|
||||
if v > max {
|
||||
max = v
|
||||
}
|
||||
}
|
||||
return max
|
||||
}
|
||||
|
||||
// Measure of Dispersion / Spread
|
||||
// Range ( Maximum - Minimum ) of the given input array
|
||||
// Based on
|
||||
// https://www.mathsisfun.com/data/range.html
|
||||
pub fn range(arr []f64) f64 {
|
||||
if arr.len == 0 {
|
||||
return f64(0)
|
||||
}
|
||||
return max(arr) - min(arr)
|
||||
}
|
259
vlib/math/stats_test.v
Normal file
259
vlib/math/stats_test.v
Normal file
@ -0,0 +1,259 @@
|
||||
import math.stats as stats
|
||||
|
||||
fn test_freq() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(10.0),f64(5.9),f64(2.7)]
|
||||
mut o := stats.freq(data,10.0)
|
||||
assert o == 2
|
||||
o = stats.freq(data,2.7)
|
||||
assert o == 1
|
||||
o = stats.freq(data,15)
|
||||
assert o == 0
|
||||
}
|
||||
|
||||
fn test_mean() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('5.762500')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('17.650000')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('37.708000')
|
||||
}
|
||||
|
||||
fn test_geometric_mean() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.geometric_mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('5.159932')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.geometric_mean(data)
|
||||
println(o)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('nan') || o.str().eq('-nan') || o == f64(0) // Because in math it yields a complex number
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.geometric_mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('25.064496')
|
||||
}
|
||||
|
||||
fn test_harmonic_mean() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.harmonic_mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('4.626519')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.harmonic_mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('9.134577')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.harmonic_mean(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('16.555477')
|
||||
}
|
||||
|
||||
fn test_median() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
// Assumes sorted array
|
||||
|
||||
// Even
|
||||
mut data := [f64(2.7),f64(4.45),f64(5.9),f64(10.0)]
|
||||
mut o := stats.median(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('5.175000')
|
||||
data = [f64(-3.0),f64(1.89),f64(4.4),f64(67.31)]
|
||||
o = stats.median(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('3.145000')
|
||||
data = [f64(7.88),f64(12.0),f64(54.83),f64(76.122)]
|
||||
o = stats.median(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('33.415000')
|
||||
|
||||
// Odd
|
||||
data = [f64(2.7),f64(4.45),f64(5.9),f64(10.0),f64(22)]
|
||||
o = stats.median(data)
|
||||
assert o == f64(5.9)
|
||||
data = [f64(-3.0),f64(1.89),f64(4.4),f64(9),f64(67.31)]
|
||||
o = stats.median(data)
|
||||
assert o == f64(4.4)
|
||||
data = [f64(7.88),f64(3.3),f64(12.0),f64(54.83),f64(76.122)]
|
||||
o = stats.median(data)
|
||||
assert o == f64(12.0)
|
||||
}
|
||||
|
||||
fn test_mode() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(2.7),f64(2.7),f64(4.45),f64(5.9),f64(10.0)]
|
||||
mut o := stats.mode(data)
|
||||
assert o == f64(2.7)
|
||||
data = [f64(-3.0),f64(1.89),f64(1.89),f64(1.89),f64(9),f64(4.4),f64(4.4),f64(9),f64(67.31)]
|
||||
o = stats.mode(data)
|
||||
assert o == f64(1.89)
|
||||
// Testing greedy nature
|
||||
data = [f64(2.0),f64(4.0),f64(2.0),f64(4.0)]
|
||||
o = stats.mode(data)
|
||||
assert o == f64(2.0)
|
||||
}
|
||||
|
||||
fn test_rms() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.rms(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('6.362046')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.rms(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('33.773393')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.rms(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('47.452561')
|
||||
}
|
||||
|
||||
fn test_population_variance() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.population_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('7.269219')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.population_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('829.119550')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.population_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('829.852282')
|
||||
}
|
||||
|
||||
fn test_sample_variance() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.sample_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('9.692292')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.sample_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('1105.492733')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.sample_variance(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('1106.469709')
|
||||
}
|
||||
|
||||
fn test_population_stddev() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.population_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('2.696149')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.population_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('28.794436')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.population_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('28.807157')
|
||||
}
|
||||
|
||||
fn test_sample_stddev() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.sample_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('3.113245')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.sample_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('33.248951')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.sample_stddev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('33.263639')
|
||||
}
|
||||
|
||||
fn test_mean_absdev() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.mean_absdev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('2.187500')
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.mean_absdev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('24.830000')
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.mean_absdev(data)
|
||||
// Some issue with precision comparison in f64 using == operator hence serializing to string
|
||||
assert o.str().eq('27.768000')
|
||||
}
|
||||
|
||||
fn test_min() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.min(data)
|
||||
assert o == f64(2.7)
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.min(data)
|
||||
assert o == f64(-3.0)
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.min(data)
|
||||
assert o == f64(7.88)
|
||||
}
|
||||
|
||||
fn test_max() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.max(data)
|
||||
assert o == f64(10.0)
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.max(data)
|
||||
assert o == f64(67.31)
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.max(data)
|
||||
assert o == f64(76.122)
|
||||
}
|
||||
|
||||
fn test_range() {
|
||||
// Tests were also verified on Wolfram Alpha
|
||||
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
|
||||
mut o := stats.range(data)
|
||||
assert o == f64(7.3)
|
||||
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
|
||||
o = stats.range(data)
|
||||
assert o == f64(70.31)
|
||||
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
|
||||
o = stats.range(data)
|
||||
assert o == f64(68.242)
|
||||
}
|
||||
|
||||
fn test_passing_empty() {
|
||||
data := []f64
|
||||
assert stats.freq(data,0) == 0
|
||||
assert stats.mean(data) == f64(0)
|
||||
assert stats.geometric_mean(data) == f64(0)
|
||||
assert stats.harmonic_mean(data) == f64(0)
|
||||
assert stats.median(data) == f64(0)
|
||||
assert stats.mode(data) == f64(0)
|
||||
assert stats.rms(data) == f64(0)
|
||||
assert stats.population_variance(data) == f64(0)
|
||||
assert stats.sample_variance(data) == f64(0)
|
||||
assert stats.population_stddev(data) == f64(0)
|
||||
assert stats.sample_stddev(data) == f64(0)
|
||||
assert stats.mean_absdev(data) == f64(0)
|
||||
assert stats.min(data) == f64(0)
|
||||
assert stats.max(data) == f64(0)
|
||||
assert stats.range(data) == f64(0)
|
||||
}
|
Loading…
Reference in New Issue
Block a user