#[derive(Debug, Clone, PartialEq)]
pub struct IvfClusterStats {
pub n_clusters: usize,
pub cluster_sizes: Vec<usize>,
pub avg_size: f32,
pub size_variance: f32,
}
impl IvfClusterStats {
#[must_use]
pub(crate) fn from_sizes(n_clusters: usize, cluster_sizes: Vec<usize>) -> Self {
let len = cluster_sizes.len();
if len == 0 {
return Self {
n_clusters,
cluster_sizes,
avg_size: 0.0,
size_variance: 0.0,
};
}
let mut sum: f64 = 0.0;
for &s in &cluster_sizes {
sum += s as f64;
}
let avg = sum / (len as f64);
let mut var_sum: f64 = 0.0;
for &s in &cluster_sizes {
let d = (s as f64) - avg;
var_sum += d * d;
}
let variance = var_sum / (len as f64);
Self {
n_clusters,
cluster_sizes,
avg_size: avg as f32,
size_variance: variance as f32,
}
}
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
use super::*;
#[test]
fn empty_sizes_yields_zero_avg_and_variance() {
let stats = IvfClusterStats::from_sizes(4, Vec::new());
assert_eq!(stats.n_clusters, 4);
assert!(stats.cluster_sizes.is_empty());
assert_eq!(stats.avg_size, 0.0);
assert_eq!(stats.size_variance, 0.0);
}
#[test]
fn uniform_sizes_have_zero_variance() {
let stats = IvfClusterStats::from_sizes(4, vec![5, 5, 5, 5]);
assert_eq!(stats.avg_size, 5.0);
assert_eq!(stats.size_variance, 0.0);
}
#[test]
fn variance_matches_hand_computation() {
let stats = IvfClusterStats::from_sizes(4, vec![1, 3, 5, 7]);
assert!((stats.avg_size - 4.0).abs() < 1e-6);
assert!((stats.size_variance - 5.0).abs() < 1e-6);
}
}