1
0
mirror of https://github.com/vlang/v.git synced 2023-08-10 21:13:21 +03:00

math/stats: added basic stats operations

This commit is contained in:
Archan Patkar 2019-07-19 15:20:32 +05:30 committed by Alexander Medvednikov
parent d6ddfa124d
commit 760034b6b1
2 changed files with 510 additions and 0 deletions

251
vlib/math/stats/stats.v Normal file
View File

@ -0,0 +1,251 @@
module stats
import math
// This module defines the following statistical operations on f64 array
// ---------------------------
// | Summary of Functions |
// ---------------------------
// -----------------------------------------------------------------------
// freq - Frequency
// mean - Mean
// geometric_mean - Geometric Mean
// harmonic_mean - Harmonic Mean
// median - Median
// mode - Mode
// rms - Root Mean Square
// population_variance - Population Variance
// sample_variance - Sample Variance
// population_stddev - Population Standard Deviation
// sample_stddev - Sample Standard Deviation
// mean_absdev - Mean Absolute Deviation
// min - Minimum of the Array
// max - Maximum of the Array
// range - Range of the Array ( max - min )
// -----------------------------------------------------------------------
// Measure of Occurance
// Frequency of a given number
// Based on
// https://www.mathsisfun.com/data/frequency-distribution.html
pub fn freq(arr []f64, val f64) int {
if arr.len == 0 {
return 0
}
mut count := 0
for v in arr {
if v == val {
count++
}
}
return count
}
// Measure of Central Tendancy
// Mean of the given input array
// Based on
// https://www.mathsisfun.com/data/central-measures.html
pub fn mean(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut sum := f64(0)
for v in arr {
sum += v
}
return sum/f64(arr.len)
}
// Measure of Central Tendancy
// Geometric Mean of the given input array
// Based on
// https://www.mathsisfun.com/numbers/geometric-mean.html
pub fn geometric_mean(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut sum := f64(1)
for v in arr {
sum *= v
}
return math.pow(sum,f64(1)/arr.len)
}
// Measure of Central Tendancy
// Harmonic Mean of the given input array
// Based on
// https://www.mathsisfun.com/numbers/harmonic-mean.html
pub fn harmonic_mean(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut sum := f64(0)
for v in arr {
sum += f64(1)/v
}
return f64(arr.len)/sum
}
// Measure of Central Tendancy
// Median of the given input array ( input array is assumed to be sorted )
// Based on
// https://www.mathsisfun.com/data/central-measures.html
pub fn median(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
if arr.len % 2 == 0 {
mid := (arr.len/2)-1
return (arr[mid] + arr[mid+1])/f64(2)
}
else {
return arr[((arr.len-1)/2)]
}
}
// Measure of Central Tendancy
// Mode of the given input array
// Based on
// https://www.mathsisfun.com/data/central-measures.html
pub fn mode(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut freqs := []int
for v in arr {
freqs<<freq(arr,v)
}
mut i := 0
mut max := 0
for i < freqs.len {
if freqs[i] > freqs[max] {
max = i
}
i++
}
return arr[max]
}
// Root Mean Square of the given input array
// Based on
// https://en.wikipedia.org/wiki/Root_mean_square
pub fn rms(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut sum := f64(0)
for v in arr {
sum += math.pow(v,2)
}
return math.sqrt(sum/f64(arr.len))
}
// Measure of Dispersion / Spread
// Population Variance of the given input array
// Based on
// https://www.mathsisfun.com/data/standard-deviation.html
pub fn population_variance(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
m := mean(arr)
mut sum := f64(0)
for v in arr {
sum += math.pow(v-m,2)
}
return sum/f64(arr.len)
}
// Measure of Dispersion / Spread
// Sample Variance of the given input array
// Based on
// https://www.mathsisfun.com/data/standard-deviation.html
pub fn sample_variance(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
m := mean(arr)
mut sum := f64(0)
for v in arr {
sum += math.pow(v-m,2)
}
return sum/f64(arr.len-1)
}
// Measure of Dispersion / Spread
// Population Standard Deviation of the given input array
// Based on
// https://www.mathsisfun.com/data/standard-deviation.html
pub fn population_stddev(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
return math.sqrt(population_variance(arr))
}
// Measure of Dispersion / Spread
// Sample Standard Deviation of the given input array
// Based on
// https://www.mathsisfun.com/data/standard-deviation.html
pub fn sample_stddev(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
return math.sqrt(sample_variance(arr))
}
// Measure of Dispersion / Spread
// Mean Absolute Deviation of the given input array
// Based on
// https://en.wikipedia.org/wiki/Average_absolute_deviation
pub fn mean_absdev(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mean := mean(arr)
mut sum := f64(0)
for v in arr {
sum += math.abs(v-mean)
}
return sum/f64(arr.len)
}
// Minimum of the given input array
pub fn min(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut min := arr[0]
for v in arr {
if v < min {
min = v
}
}
return min
}
// Maximum of the given input array
pub fn max(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
mut max := arr[0]
for v in arr {
if v > max {
max = v
}
}
return max
}
// Measure of Dispersion / Spread
// Range ( Maximum - Minimum ) of the given input array
// Based on
// https://www.mathsisfun.com/data/range.html
pub fn range(arr []f64) f64 {
if arr.len == 0 {
return f64(0)
}
return max(arr) - min(arr)
}

259
vlib/math/stats_test.v Normal file
View File

@ -0,0 +1,259 @@
import math.stats as stats
fn test_freq() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(10.0),f64(5.9),f64(2.7)]
mut o := stats.freq(data,10.0)
assert o == 2
o = stats.freq(data,2.7)
assert o == 1
o = stats.freq(data,15)
assert o == 0
}
fn test_mean() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('5.762500')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('17.650000')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('37.708000')
}
fn test_geometric_mean() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.geometric_mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('5.159932')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.geometric_mean(data)
println(o)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('nan') || o.str().eq('-nan') || o == f64(0) // Because in math it yields a complex number
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.geometric_mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('25.064496')
}
fn test_harmonic_mean() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.harmonic_mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('4.626519')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.harmonic_mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('9.134577')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.harmonic_mean(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('16.555477')
}
fn test_median() {
// Tests were also verified on Wolfram Alpha
// Assumes sorted array
// Even
mut data := [f64(2.7),f64(4.45),f64(5.9),f64(10.0)]
mut o := stats.median(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('5.175000')
data = [f64(-3.0),f64(1.89),f64(4.4),f64(67.31)]
o = stats.median(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('3.145000')
data = [f64(7.88),f64(12.0),f64(54.83),f64(76.122)]
o = stats.median(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('33.415000')
// Odd
data = [f64(2.7),f64(4.45),f64(5.9),f64(10.0),f64(22)]
o = stats.median(data)
assert o == f64(5.9)
data = [f64(-3.0),f64(1.89),f64(4.4),f64(9),f64(67.31)]
o = stats.median(data)
assert o == f64(4.4)
data = [f64(7.88),f64(3.3),f64(12.0),f64(54.83),f64(76.122)]
o = stats.median(data)
assert o == f64(12.0)
}
fn test_mode() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(2.7),f64(2.7),f64(4.45),f64(5.9),f64(10.0)]
mut o := stats.mode(data)
assert o == f64(2.7)
data = [f64(-3.0),f64(1.89),f64(1.89),f64(1.89),f64(9),f64(4.4),f64(4.4),f64(9),f64(67.31)]
o = stats.mode(data)
assert o == f64(1.89)
// Testing greedy nature
data = [f64(2.0),f64(4.0),f64(2.0),f64(4.0)]
o = stats.mode(data)
assert o == f64(2.0)
}
fn test_rms() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.rms(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('6.362046')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.rms(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('33.773393')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.rms(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('47.452561')
}
fn test_population_variance() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.population_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('7.269219')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.population_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('829.119550')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.population_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('829.852282')
}
fn test_sample_variance() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.sample_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('9.692292')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.sample_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('1105.492733')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.sample_variance(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('1106.469709')
}
fn test_population_stddev() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.population_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('2.696149')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.population_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('28.794436')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.population_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('28.807157')
}
fn test_sample_stddev() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.sample_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('3.113245')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.sample_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('33.248951')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.sample_stddev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('33.263639')
}
fn test_mean_absdev() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.mean_absdev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('2.187500')
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.mean_absdev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('24.830000')
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.mean_absdev(data)
// Some issue with precision comparison in f64 using == operator hence serializing to string
assert o.str().eq('27.768000')
}
fn test_min() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.min(data)
assert o == f64(2.7)
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.min(data)
assert o == f64(-3.0)
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.min(data)
assert o == f64(7.88)
}
fn test_max() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.max(data)
assert o == f64(10.0)
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.max(data)
assert o == f64(67.31)
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.max(data)
assert o == f64(76.122)
}
fn test_range() {
// Tests were also verified on Wolfram Alpha
mut data := [f64(10.0),f64(4.45),f64(5.9),f64(2.7)]
mut o := stats.range(data)
assert o == f64(7.3)
data = [f64(-3.0),f64(67.31),f64(4.4),f64(1.89)]
o = stats.range(data)
assert o == f64(70.31)
data = [f64(12.0),f64(7.88),f64(76.122),f64(54.83)]
o = stats.range(data)
assert o == f64(68.242)
}
fn test_passing_empty() {
data := []f64
assert stats.freq(data,0) == 0
assert stats.mean(data) == f64(0)
assert stats.geometric_mean(data) == f64(0)
assert stats.harmonic_mean(data) == f64(0)
assert stats.median(data) == f64(0)
assert stats.mode(data) == f64(0)
assert stats.rms(data) == f64(0)
assert stats.population_variance(data) == f64(0)
assert stats.sample_variance(data) == f64(0)
assert stats.population_stddev(data) == f64(0)
assert stats.sample_stddev(data) == f64(0)
assert stats.mean_absdev(data) == f64(0)
assert stats.min(data) == f64(0)
assert stats.max(data) == f64(0)
assert stats.range(data) == f64(0)
}