use num_traits::{Num, NumCast, ToPrimitive};
use std::ops::Sub;
pub fn mu<T>(data: &[T]) -> T
where
T: Num + NumCast + Copy,
{
if data.is_empty() {
return T::zero();
}
let sum = data.iter().copied().fold(T::zero(), |sum, x| sum + x);
let n = NumCast::from(data.len()).unwrap();
sum / n
}
pub fn mean<T>(data: &[T]) -> T
where
T: Num + NumCast + Copy,
{
mu(data)
}
pub fn median<T>(data: &[T]) -> T
where
T: Num + NumCast + Copy,
{
if data.is_empty() {
return T::zero();
}
let mid = data.len() / 2;
if data.len() % 2 == 0 {
(data[mid] + data[mid - 1]) / (T::one() + T::one())
} else {
data[mid]
}
}
pub fn median_unsorted<T>(data: &[T]) -> T
where
T: Num + NumCast + Copy + PartialOrd,
{
if data.is_empty() {
return T::zero();
}
let mut sorted = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
median(&sorted)
}
pub fn min<T>(data: &[T]) -> Option<T>
where
T: PartialOrd + Copy,
{
let mut iter = data.iter().copied();
let init_min = iter.next()?;
Some(iter.fold(init_min, |min, x| if x < min { x } else { min }))
}
pub fn max<T>(data: &[T]) -> Option<T>
where
T: PartialOrd + Copy,
{
let mut iter = data.iter().copied();
let init_max = iter.next()?;
Some(iter.fold(init_max, |max, x| if x > max { x } else { max }))
}
pub fn range<T>(data: &[T]) -> Option<T>
where
T: PartialOrd + Copy + Sub<Output = T>,
{
if data.is_empty() {
return None;
}
let minimum = min(data).unwrap();
let maximum = max(data).unwrap();
Some(maximum - minimum)
}
pub fn quartiles<T>(data: &[T]) -> Option<(T, T, T)>
where
T: Num + NumCast + Copy + PartialOrd,
{
if data.is_empty() {
return None;
}
let mut sorted = data.to_vec();
if sorted
.iter()
.any(|x| x.to_f64().is_some_and(|x| x.is_nan()))
{
return None;
};
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let n = sorted.len();
let get_quantile = |p: f64| -> Option<T> {
let idx = p * (n - 1) as f64;
let idx_floor = idx.floor();
let weight = idx - idx_floor;
let lower = sorted.get(idx_floor as usize)?.to_f64()?;
let upper = sorted.get(idx.ceil() as usize)?.to_f64()?;
let interpolated = lower + weight * (upper - lower);
NumCast::from(interpolated)
};
Some((
get_quantile(0.25)?,
get_quantile(0.50)?,
get_quantile(0.75)?,
))
}
pub fn interquartile_range<T>(data: &[T]) -> Option<T>
where
T: Num + NumCast + Copy + PartialOrd,
{
if let Some((q1, _, q3)) = quartiles(data) {
Some(q3 - q1)
} else {
None
}
}
pub fn variance<T>(data: &[T]) -> f64
where
T: Num + NumCast + Copy,
{
if data.is_empty() || data.len() < 2 {
return 0.0;
}
let mu = mu(data).to_f64().unwrap_or_default();
let dev_sum: f64 = data
.iter()
.map(|x| {
let x = x.to_f64().unwrap_or_default();
(x - mu).powi(2)
})
.sum();
dev_sum / (data.len() as f64 - 1.0)
}
pub fn stdev<T>(data: &[T]) -> f64
where
T: Num + NumCast + Copy,
{
variance(data).sqrt()
}
pub fn z_score<T, F>(datapoint: T, mu: F, sigma: F) -> Option<f64>
where
T: Num + NumCast + Copy,
F: Into<f64> + Copy,
{
let x_f64 = datapoint.to_f64()?;
Some((x_f64 - mu.into()) / sigma.into())
}
pub fn z_scores<T>(data: &[T]) -> Option<Vec<f64>>
where
T: Num + NumCast + Copy,
{
if data.is_empty() {
return None;
}
let mu = mu(data).to_f64()?;
let sigma = stdev(data).to_f64()?;
data.iter().map(|x| z_score(*x, mu, sigma)).collect()
}
pub fn normalized_entropy<T>(data: &[T], n_bins: u8) -> Option<f64>
where
T: Num + NumCast + Copy + PartialOrd,
{
if data.is_empty() {
return None;
}
let x_min = min(data).and_then(|x| x.to_f64()).unwrap();
let x_max = max(data).and_then(|x| x.to_f64()).unwrap();
let factor = (n_bins as f64 - 1e-11) / ((x_max - x_min) + 1e-60);
let mut bin_counts = vec![0u32; n_bins as usize];
data.iter().for_each(|x| {
let k = (factor * (x.to_f64().unwrap() - x_min)) as usize;
bin_counts[k] += 1;
});
let entropy_sum = bin_counts
.iter()
.copied()
.fold(0.0, |entropy_sum, bin_count| {
if bin_count == 0 {
entropy_sum
} else {
let bin_probability = bin_count as f64 / data.len() as f64;
entropy_sum - (bin_probability * bin_probability.ln())
}
});
let relative_entropy = entropy_sum / (n_bins as f64).ln();
Some(relative_entropy)
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_close(a: f64, b: f64, eps: f64) {
assert!(
(a - b).abs() < eps,
"Expected {:.6}, got {:.6}, diff = {:.6}",
b,
a,
(a - b).abs()
);
}
#[test]
fn test_mu_basic() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert_eq!(mu(&data), 3.0);
}
#[test]
fn test_mu_single_element() {
let data = vec![42];
assert_eq!(mu(&data), 42);
}
#[test]
fn test_mu_empty() {
let data: Vec<f64> = vec![];
let result = mu(&data);
assert_eq!(result, 0.0);
}
#[test]
fn test_mu_negative_numbers() {
let data = vec![-1.0, -2.0, -3.0];
assert_eq!(mu(&data), -2.0);
}
#[test]
fn test_mu_mixed_numbers() {
let data = vec![-2.0, 0.0, 2.0];
assert_eq!(mu(&data), 0.0);
}
#[test]
fn test_mu_bigger_sample_f64() {
let data: Vec<f64> = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0];
let result = mu(&data);
let expected = 5.5;
let epsilon = 1e-10;
assert!(
(result - expected).abs() < epsilon,
"Expected {}, got {}",
expected,
result
);
}
#[test]
fn test_mu_bigger_sample_integers() {
let data = vec![2, 4, 6, 8, 10, 12, 14, 16, 18, 20];
let result = mu(&data);
let expected = 11;
assert_eq!(result, expected);
}
#[test]
fn test_median_sorted_odd_length() {
let data = [1, 2, 3];
let result = median(&data);
assert_eq!(result, 2);
}
#[test]
fn test_median_sorted_even_length() {
let data = [1, 2, 3, 4];
let result = median(&data);
assert_eq!(result, (2 + 3) / 2);
}
#[test]
fn test_median_single_element() {
let data = [42];
let result = median(&data);
assert_eq!(result, 42);
}
#[test]
fn test_median_empty_slice() {
let data: [i32; 0] = [];
let result = median(&data);
assert_eq!(result, 0);
}
#[test]
fn test_median_floats() {
let data = [1.0, 2.0, 3.0, 4.0];
let result = median(&data);
let expected = (2.0 + 3.0) / 2.0;
assert_close(result, expected, 1e-6);
}
#[test]
fn test_median_unsorted_odd_length() {
let data = [2, 4, 3, 5, 1];
let result = median_unsorted(&data);
assert_eq!(result, 3);
}
#[test]
fn test_median_unsorted_even_length() {
let data = [7, 1, 5, 3];
let result = median_unsorted(&data);
assert_eq!(result, (3 + 5) / 2);
}
#[test]
fn test_median_unsorted_single_element() {
let data = [42];
let result = median_unsorted(&data);
assert_eq!(result, 42);
}
#[test]
fn test_median_unsorted_empty_slice() {
let data: [i32; 0] = [];
let result = median_unsorted(&data);
assert_eq!(result, 0);
}
#[test]
fn test_median_unsorted_floats() {
let data = [2.5, 3.5, 1.5, 4.5];
let result = median_unsorted(&data);
let expected = (2.5 + 3.5) / 2.0;
assert_close(result, expected, 1e-6);
}
#[test]
fn test_median_unsorted_duplicates() {
let data = [1, 2, 2, 2, 3];
let result = median_unsorted(&data);
assert_eq!(result, 2);
}
#[test]
fn test_stdev_basic_floats() {
let data = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
let expected = 2.138089935;
let result = stdev(&data);
assert_close(result, expected, 1e-6);
}
#[test]
fn test_stdev_integers() {
let data = [1, 2, 3, 4, 5];
let expected = 1.58113883;
let result = stdev(&data);
assert_close(result, expected, 1e-6);
}
#[test]
fn test_stdev_identical_values() {
let data = [42.0, 42.0, 42.0];
let result = stdev(&data);
assert_eq!(result, 0.0);
}
#[test]
fn test_stdev_single_element() {
let data = [99.0];
let result = stdev(&data);
assert_eq!(result, 0.0);
}
#[test]
fn test_stdev_empty() {
let data: [f64; 0] = [];
let result = stdev(&data);
assert_eq!(result, 0.0);
}
#[test]
fn test_variance_basic_integers() {
let data = [1, 2, 3, 4, 5];
let var = variance(&data);
assert_close(var, 2.5, 1e-6);
}
#[test]
fn test_variance_basic_floats() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
let var = variance(&data);
assert_close(var, 2.5, 1e-6);
}
#[test]
fn test_variance_single_element() {
let data = [42];
let var = variance(&data);
assert_eq!(var, 0.0);
}
#[test]
fn test_variance_empty() {
let data: [f64; 0] = [];
let var = variance(&data);
assert_eq!(var, 0.0);
}
#[test]
fn test_variance_duplicates() {
let data = [3, 3, 3, 3];
let var = variance(&data);
assert_eq!(var, 0.0);
}
#[test]
fn test_variance_negative_numbers() {
let data = [-1, -2, -3, -4, -5];
let var = variance(&data);
assert_close(var, 2.5, 1e-6);
}
#[test]
fn test_min_with_integers() {
let data = [4, 2, 7, 1, 9];
assert_eq!(min(&data), Some(1));
}
#[test]
fn test_min_with_floats() {
let data = [3.5, 2.2, 5.1, 0.1, -4.7];
assert_eq!(min(&data), Some(-4.7));
}
#[test]
fn test_min_with_one_element() {
let data = [42];
assert_eq!(min(&data), Some(42));
}
#[test]
fn test_min_with_empty_slice() {
let data: [i32; 0] = [];
assert_eq!(min(&data), None);
}
#[test]
fn test_min_with_duplicates() {
let data = [5, 5, 5, 5];
assert_eq!(min(&data), Some(5));
}
#[test]
fn test_min_with_negatives() {
let data = [-10, -20, -5, -30];
assert_eq!(min(&data), Some(-30));
}
#[test]
fn test_max_with_integers() {
let data = [4, 2, 7, 1, 9];
assert_eq!(max(&data), Some(9));
}
#[test]
fn test_max_with_floats() {
let data = [3.5, 2.2, 5.1, 0.1, -4.7];
assert_eq!(max(&data), Some(5.1));
}
#[test]
fn test_max_with_one_element() {
let data = [42];
assert_eq!(max(&data), Some(42));
}
#[test]
fn test_max_with_empty_slice() {
let data: [i32; 0] = [];
assert_eq!(max(&data), None);
}
#[test]
fn test_max_with_duplicates() {
let data = [5, 5, 5, 5];
assert_eq!(max(&data), Some(5));
}
#[test]
fn test_max_with_negatives() {
let data = [-10, -20, -5, -30];
assert_eq!(max(&data), Some(-5));
}
#[test]
fn test_range_with_integers() {
let data = [4, 2, 7, 1, 9];
assert_eq!(range(&data), Some(8));
}
#[test]
fn test_range_with_floats() {
let data = [3.5, 2.2, 5.1, 0.1, -4.7];
let result = range(&data).unwrap();
assert_close(result, 9.8, 1e-10);
}
#[test]
fn test_range_with_one_element() {
let data = [42];
assert_eq!(range(&data), Some(0));
}
#[test]
fn test_range_with_empty_slice() {
let data: [i32; 0] = [];
assert_eq!(range(&data), None);
}
#[test]
fn test_range_with_duplicates() {
let data = [5, 5, 5, 5];
assert_eq!(range(&data), Some(0));
}
#[test]
fn test_range_with_negatives() {
let data = [-10, -20, -5, -30];
assert_eq!(range(&data), Some(25));
}
#[test]
fn test_quartiles_even_sized_data() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
let (q1, q2, q3) = quartiles(&data).unwrap();
assert_close(q1, 2.25, 1e-10);
assert_close(q2, 3.5, 1e-10);
assert_close(q3, 4.75, 1e-10);
}
#[test]
fn test_quartiles_odd_sized_data() {
let data = [10.0, 20.0, 30.0, 40.0, 50.0];
let (q1, q2, q3) = quartiles(&data).unwrap();
assert_eq!(q1, 20.0);
assert_eq!(q2, 30.0);
assert_eq!(q3, 40.0);
}
#[test]
fn test_quartiles_empty_input() {
let data: [f64; 0] = [];
assert_eq!(quartiles(&data), None);
}
#[test]
fn test_quartiles_nan_in_input() {
let data = [1.0, 2.0, f64::NAN, 4.0];
assert_eq!(quartiles(&data), None);
}
#[test]
fn test_quartiles_all_same_values() {
let data = [42.0, 42.0, 42.0, 42.0, 42.0];
let (q1, q2, q3) = quartiles(&data).unwrap();
assert_eq!(q1, 42.0);
assert_eq!(q2, 42.0);
assert_eq!(q3, 42.0);
}
#[test]
fn test_quartiles_unsorted_input() {
let data = [9.0, 3.0, 1.0, 10.0, 5.0, 6.0, 2.0, 4.0, 8.0, 7.0];
let (q1, q2, q3) = quartiles(&data).unwrap();
assert_close(q1, 3.25, 1e-10);
assert_close(q2, 5.5, 1e-10);
assert_close(q3, 7.75, 1e-10);
}
#[test]
fn test_iqr_empty() {
let data: [f64; 0] = [];
assert_eq!(interquartile_range(&data), None);
}
#[test]
fn test_iqr_single_element() {
let data = [42.0];
assert_eq!(interquartile_range(&data), Some(0.0));
}
#[test]
fn test_iqr_even_length() {
let data = [1.0, 2.0, 3.0, 4.0];
let result = interquartile_range(&data);
assert_eq!(result, Some(1.5));
}
#[test]
fn test_iqr_odd_length() {
let data = [
7.0, 15.0, 36.0, 39.0, 40.0, 41.0, 42.0, 43.0, 47.0, 49.0, 50.0,
];
let result = interquartile_range(&data);
assert_eq!(result, Some(7.5));
}
#[test]
fn test_iqr_unsorted() {
let data = [5.0, 1.0, 3.0, 2.0, 4.0];
let result = interquartile_range(&data);
assert_eq!(result, Some(2.0));
}
#[test]
fn test_iqr_integers() {
let data = [10, 20, 30, 40, 50];
let result = interquartile_range(&data);
assert_eq!(result, Some(20));
}
#[test]
fn test_z_score_f64_inputs() {
let z = z_score(10.0f64, 5.0f64, 2.0f64);
assert_eq!(z, Some(2.5));
}
#[test]
fn test_z_score_f32_inputs() {
let z = z_score(10.0f32, 5.0f32, 2.0f32);
assert_eq!(z, Some(2.5));
}
#[test]
fn test_z_score_integer_datapoint() {
let z = z_score(10i32, 5.0f64, 2.0f64);
assert_eq!(z, Some(2.5));
}
#[test]
fn test_z_score_zero_sigma() {
let z = z_score(10.0f64, 5.0f64, 0.0f64);
assert!(z.unwrap().is_infinite());
}
#[test]
fn test_z_scores_precise() {
let data = [1.0, 2.0, 3.0, 4.0, 5.0];
let stdev = 2.5f64.sqrt();
let expected = [
(1.0 - 3.0) / stdev,
(2.0 - 3.0) / stdev,
(3.0 - 3.0) / stdev,
(4.0 - 3.0) / stdev,
(5.0 - 3.0) / stdev,
];
let result = z_scores(&data).unwrap();
for (actual, expected) in result.into_iter().zip(expected.into_iter()) {
assert_close(actual, expected, 1e-10);
}
}
#[test]
fn test_z_scores_symmetric_centered() {
let data = [-2.0, -1.0, 0.0, 1.0, 2.0];
let stdev = 2.5f64.sqrt();
let expected = [-2.0 / stdev, -1.0 / stdev, 0.0, 1.0 / stdev, 2.0 / stdev];
let result = z_scores(&data).unwrap();
for (actual, expected) in result.into_iter().zip(expected.into_iter()) {
assert_close(actual, expected, 1e-10);
}
}
#[test]
fn test_entropy_all_same() {
let data = [1.0; 100];
let entropy = normalized_entropy(&data, 10);
assert_eq!(entropy, Some(0.0));
}
#[test]
fn test_entropy_uniform_distribution() {
let data: Vec<f64> = (0..100).map(|x| x as f64).collect();
let entropy = normalized_entropy(&data, 10);
assert!(entropy.unwrap() > 0.95);
}
#[test]
fn test_entropy_random_distribution() {
let data = [1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 1.0, 4.0, 3.0, 2.0];
let entropy = normalized_entropy(&data, 5);
assert!(entropy.unwrap() > 0.5 && entropy.unwrap() < 1.0);
}
#[test]
fn test_entropy_single_value() {
let data = [42.0];
let entropy = normalized_entropy(&data, 5);
assert_eq!(entropy, Some(0.0));
}
#[test]
fn test_entropy_empty_slice() {
let data: [f64; 0] = [];
let entropy = normalized_entropy(&data, 5);
assert_eq!(entropy, None);
}
#[test]
fn test_entropy_maximum_when_even_bins() {
let data = [0.0, 1.0, 2.0, 3.0];
let entropy = normalized_entropy(&data, 4);
assert!((entropy.unwrap() - 1.0).abs() < 1e-6);
}
}