2019-07-19 12:50:32 +03:00
|
|
|
module stats
|
|
|
|
|
|
|
|
import math
|
|
|
|
|
2020-04-26 14:49:31 +03:00
|
|
|
// TODO: Implement all of them with generics
|
2019-08-26 13:32:53 +03:00
|
|
|
|
2020-04-26 14:49:31 +03:00
|
|
|
// This module defines the following statistical operations on f64 array
|
2019-07-19 12:50:32 +03:00
|
|
|
// ---------------------------
|
|
|
|
// | Summary of Functions |
|
|
|
|
// ---------------------------
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
// freq - Frequency
|
2020-04-26 14:49:31 +03:00
|
|
|
// mean - Mean
|
2019-07-19 12:50:32 +03:00
|
|
|
// geometric_mean - Geometric Mean
|
|
|
|
// harmonic_mean - Harmonic Mean
|
|
|
|
// median - Median
|
|
|
|
// mode - Mode
|
|
|
|
// rms - Root Mean Square
|
2020-04-26 14:49:31 +03:00
|
|
|
// population_variance - Population Variance
|
2019-07-19 12:50:32 +03:00
|
|
|
// sample_variance - Sample Variance
|
|
|
|
// population_stddev - Population Standard Deviation
|
|
|
|
// sample_stddev - Sample Standard Deviation
|
|
|
|
// mean_absdev - Mean Absolute Deviation
|
|
|
|
// min - Minimum of the Array
|
|
|
|
// max - Maximum of the Array
|
|
|
|
// range - Range of the Array ( max - min )
|
|
|
|
// -----------------------------------------------------------------------
|
|
|
|
|
|
|
|
// Measure of Occurance
|
|
|
|
// Frequency of a given number
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/frequency-distribution.html
|
|
|
|
pub fn freq(arr []f64, val f64) int {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
mut count := 0
|
|
|
|
for v in arr {
|
|
|
|
if v == val {
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return count
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Central Tendancy
|
|
|
|
// Mean of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/central-measures.html
|
|
|
|
pub fn mean(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
|
|
|
sum += v
|
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return sum / f64(arr.len)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Central Tendancy
|
|
|
|
// Geometric Mean of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/numbers/geometric-mean.html
|
|
|
|
pub fn geometric_mean(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut sum := f64(1)
|
|
|
|
for v in arr {
|
|
|
|
sum *= v
|
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return math.pow(sum, f64(1) / arr.len)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Central Tendancy
|
|
|
|
// Harmonic Mean of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/numbers/harmonic-mean.html
|
|
|
|
pub fn harmonic_mean(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
sum += f64(1) / v
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return f64(arr.len) / sum
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Central Tendancy
|
|
|
|
// Median of the given input array ( input array is assumed to be sorted )
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/central-measures.html
|
|
|
|
pub fn median(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
if arr.len % 2 == 0 {
|
2021-05-08 13:32:29 +03:00
|
|
|
mid := (arr.len / 2) - 1
|
|
|
|
return (arr[mid] + arr[mid + 1]) / f64(2)
|
|
|
|
} else {
|
|
|
|
return arr[((arr.len - 1) / 2)]
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Central Tendancy
|
|
|
|
// Mode of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/central-measures.html
|
|
|
|
pub fn mode(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
2020-04-26 14:49:31 +03:00
|
|
|
mut freqs := []int{}
|
2019-07-19 12:50:32 +03:00
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
freqs << freq(arr, v)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
mut max := 0
|
2021-05-08 13:32:29 +03:00
|
|
|
for i in 0 .. freqs.len {
|
2019-07-19 12:50:32 +03:00
|
|
|
if freqs[i] > freqs[max] {
|
|
|
|
max = i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return arr[max]
|
|
|
|
}
|
|
|
|
|
|
|
|
// Root Mean Square of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://en.wikipedia.org/wiki/Root_mean_square
|
|
|
|
pub fn rms(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
sum += math.pow(v, 2)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return math.sqrt(sum / f64(arr.len))
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Population Variance of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
|
|
|
pub fn population_variance(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
m := mean(arr)
|
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
sum += math.pow(v - m, 2)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return sum / f64(arr.len)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Sample Variance of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
|
|
|
pub fn sample_variance(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
m := mean(arr)
|
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
sum += math.pow(v - m, 2)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return sum / f64(arr.len - 1)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Population Standard Deviation of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
|
|
|
pub fn population_stddev(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
return math.sqrt(population_variance(arr))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Sample Standard Deviation of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/standard-deviation.html
|
|
|
|
pub fn sample_stddev(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
return math.sqrt(sample_variance(arr))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Mean Absolute Deviation of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://en.wikipedia.org/wiki/Average_absolute_deviation
|
|
|
|
pub fn mean_absdev(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
2020-07-01 01:53:53 +03:00
|
|
|
amean := mean(arr)
|
2019-07-19 12:50:32 +03:00
|
|
|
mut sum := f64(0)
|
|
|
|
for v in arr {
|
2021-05-08 13:32:29 +03:00
|
|
|
sum += math.abs(v - amean)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
2021-05-08 13:32:29 +03:00
|
|
|
return sum / f64(arr.len)
|
2019-07-19 12:50:32 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
// Minimum of the given input array
|
|
|
|
pub fn min(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut min := arr[0]
|
|
|
|
for v in arr {
|
|
|
|
if v < min {
|
|
|
|
min = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return min
|
|
|
|
}
|
|
|
|
|
|
|
|
// Maximum of the given input array
|
|
|
|
pub fn max(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
mut max := arr[0]
|
|
|
|
for v in arr {
|
|
|
|
if v > max {
|
|
|
|
max = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return max
|
|
|
|
}
|
|
|
|
|
|
|
|
// Measure of Dispersion / Spread
|
|
|
|
// Range ( Maximum - Minimum ) of the given input array
|
2020-04-26 14:49:31 +03:00
|
|
|
// Based on
|
2019-07-19 12:50:32 +03:00
|
|
|
// https://www.mathsisfun.com/data/range.html
|
|
|
|
pub fn range(arr []f64) f64 {
|
|
|
|
if arr.len == 0 {
|
|
|
|
return f64(0)
|
|
|
|
}
|
|
|
|
return max(arr) - min(arr)
|
2019-07-29 19:21:36 +03:00
|
|
|
}
|