#![allow(dead_code)]
pub fn filter_finite(data: &[f64]) -> Vec<f64> {
data.iter().filter(|x| x.is_finite()).copied().collect()
}
pub fn percentile(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
let k = (p / 100.0) * (sorted.len() - 1) as f64;
let lower = k.floor() as usize;
let upper = k.ceil() as usize;
let weight = k - lower as f64;
if upper >= sorted.len() {
sorted[sorted.len() - 1]
} else {
sorted[lower] * (1.0 - weight) + sorted[upper] * weight
}
}
pub fn mean(data: &[f64]) -> Option<f64> {
let valid = filter_finite(data);
if valid.is_empty() {
return None;
}
Some(valid.iter().sum::<f64>() / valid.len() as f64)
}
pub fn median(data: &[f64]) -> Option<f64> {
let mut valid = filter_finite(data);
if valid.is_empty() {
return None;
}
valid.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mid = valid.len() / 2;
if valid.len().is_multiple_of(2) {
Some((valid[mid - 1] + valid[mid]) / 2.0)
} else {
Some(valid[mid])
}
}
pub fn quartiles(data: &[f64]) -> Option<(f64, f64, f64)> {
let mut valid = filter_finite(data);
if valid.is_empty() {
return None;
}
valid.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = percentile(&valid, 25.0);
let med = percentile(&valid, 50.0);
let q3 = percentile(&valid, 75.0);
Some((q1, med, q3))
}
pub fn min_max(data: &[f64]) -> Option<(f64, f64)> {
let valid = filter_finite(data);
if valid.is_empty() {
return None;
}
let min = valid.iter().cloned().fold(f64::INFINITY, f64::min);
let max = valid.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
Some((min, max))
}
pub fn iqr(data: &[f64]) -> Option<f64> {
quartiles(data).map(|(q1, _, q3)| q3 - q1)
}
pub fn outliers_iqr(data: &[f64]) -> Vec<f64> {
let mut valid = filter_finite(data);
if valid.is_empty() {
return Vec::new();
}
valid.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = percentile(&valid, 25.0);
let q3 = percentile(&valid, 75.0);
let iqr = q3 - q1;
let lower_fence = q1 - 1.5 * iqr;
let upper_fence = q3 + 1.5 * iqr;
valid
.into_iter()
.filter(|&x| x < lower_fence || x > upper_fence)
.collect()
}
#[derive(Clone, Debug, Default)]
pub enum BinConfig {
#[default]
Auto,
Count(usize),
Width(f64),
Edges(Vec<f64>),
}
#[derive(Clone, Debug)]
pub struct HistogramBin {
pub start: f64,
pub end: f64,
pub count: usize,
pub frequency: f64,
pub density: f64,
}
pub fn compute_bins(data: &[f64], config: &BinConfig) -> Vec<HistogramBin> {
let valid_data = filter_finite(data);
if valid_data.is_empty() {
return Vec::new();
}
let min = valid_data.iter().cloned().fold(f64::INFINITY, f64::min);
let max = valid_data.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let range = (max - min).max(1.0);
let edges = match config {
BinConfig::Auto => {
let n = valid_data.len();
let bin_count = ((n as f64).log2() + 1.0).ceil() as usize;
let bin_count = bin_count.clamp(1, 100);
let bin_width = range / bin_count as f64;
(0..=bin_count)
.map(|i| min + i as f64 * bin_width)
.collect::<Vec<_>>()
}
BinConfig::Count(n) => {
let bin_count = (*n).max(1);
let bin_width = range / bin_count as f64;
(0..=bin_count)
.map(|i| min + i as f64 * bin_width)
.collect::<Vec<_>>()
}
BinConfig::Width(w) => {
let bin_width = (*w).max(0.001);
let bin_count = (range / bin_width).ceil() as usize;
(0..=bin_count)
.map(|i| min + i as f64 * bin_width)
.collect::<Vec<_>>()
}
BinConfig::Edges(edges) => edges.clone(),
};
let total = valid_data.len();
let mut bins = Vec::new();
for i in 0..edges.len().saturating_sub(1) {
let start = edges[i];
let end = edges[i + 1];
let count = valid_data
.iter()
.filter(|&&x| {
if i == edges.len() - 2 {
x >= start && x <= end } else {
x >= start && x < end
}
})
.count();
let frequency = count as f64 / total as f64;
let bin_width = end - start;
let density = if bin_width > 0.0 {
frequency / bin_width
} else {
0.0
};
bins.push(HistogramBin {
start,
end,
count,
frequency,
density,
});
}
bins
}