use rust_decimal::Decimal;
pub const MIN_OBSERVATIONS: usize = 30;
#[derive(Debug)]
pub enum ClusterNode {
Leaf(usize),
Branch {
left: Box<ClusterNode>,
right: Box<ClusterNode>,
},
}
pub fn mean(data: &[f64]) -> f64 {
data.iter().sum::<f64>() / data.len() as f64
}
pub fn variance(data: &[f64], mean: f64) -> f64 {
data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64
}
pub fn correlation_matrix(series: &[&[f64]], means: &[f64], variances: &[f64]) -> Vec<Vec<f64>> {
let n = series.len();
let n_obs = series[0].len();
let mut corr = vec![vec![1.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let cov: f64 = (0..n_obs)
.map(|t| (series[i][t] - means[i]) * (series[j][t] - means[j]))
.sum::<f64>()
/ (n_obs - 1) as f64;
let r = cov / (variances[i].sqrt() * variances[j].sqrt());
let r = r.clamp(-1.0, 1.0);
corr[i][j] = r;
corr[j][i] = r;
}
}
corr
}
pub fn distance_matrix(corr: &[Vec<f64>]) -> Vec<Vec<f64>> {
let n = corr.len();
let mut dist = vec![vec![0.0; n]; n];
for i in 0..n {
for j in (i + 1)..n {
let d = ((1.0 - corr[i][j]) / 2.0).sqrt();
dist[i][j] = d;
dist[j][i] = d;
}
}
dist
}
pub fn build_dendrogram(dist: &[Vec<f64>], n: usize) -> ClusterNode {
if n == 1 {
return ClusterNode::Leaf(0);
}
let mut nodes: Vec<Option<ClusterNode>> = (0..n).map(|i| Some(ClusterNode::Leaf(i))).collect();
let mut work_dist: Vec<Vec<f64>> = dist.to_vec();
let mut sizes: Vec<usize> = vec![1; n];
for _ in 0..(n - 1) {
let (ci, cj) = find_closest_pair(&work_dist, &nodes);
let Some(left) = nodes[ci].take() else {
continue;
};
let Some(right) = nodes[cj].take() else {
nodes[ci] = Some(left); continue;
};
let merged = ClusterNode::Branch {
left: Box::new(left),
right: Box::new(right),
};
let size_merged = sizes[ci] + sizes[cj];
for k in 0..nodes.len() {
if k == ci || k == cj || nodes[k].is_none() {
continue;
}
let d_new = (work_dist[ci][k] * sizes[ci] as f64 + work_dist[cj][k] * sizes[cj] as f64)
/ size_merged as f64;
work_dist[ci][k] = d_new;
work_dist[k][ci] = d_new;
}
sizes[ci] = size_merged;
for val in &mut work_dist[cj] {
*val = f64::INFINITY;
}
for row in &mut work_dist {
row[cj] = f64::INFINITY;
}
nodes[ci] = Some(merged);
}
nodes
.into_iter()
.find_map(|n| n)
.unwrap_or(ClusterNode::Leaf(0))
}
pub fn find_closest_pair(dist: &[Vec<f64>], nodes: &[Option<ClusterNode>]) -> (usize, usize) {
let mut min_d = f64::INFINITY;
let mut best = (0, 1);
for i in 0..nodes.len() {
if nodes[i].is_none() {
continue;
}
for j in (i + 1)..nodes.len() {
if nodes[j].is_none() {
continue;
}
if dist[i][j] < min_d {
min_d = dist[i][j];
best = (i, j);
}
}
}
best
}
pub fn recursive_bisect(node: &ClusterNode, variances: &[f64], weight: f64, out: &mut [f64]) {
match node {
ClusterNode::Leaf(i) => {
out[*i] = weight;
}
ClusterNode::Branch { left, right } => {
let var_left = cluster_variance(left, variances);
let var_right = cluster_variance(right, variances);
let total = var_left + var_right;
if total < 1e-30 {
recursive_bisect(left, variances, weight * 0.5, out);
recursive_bisect(right, variances, weight * 0.5, out);
} else {
let alpha = 1.0 - var_left / total;
recursive_bisect(left, variances, weight * alpha, out);
recursive_bisect(right, variances, weight * (1.0 - alpha), out);
}
}
}
}
pub fn cluster_variance(node: &ClusterNode, variances: &[f64]) -> f64 {
let (sum, count) = cluster_variance_sum(node, variances);
sum / count as f64
}
pub fn cluster_variance_sum(node: &ClusterNode, variances: &[f64]) -> (f64, usize) {
match node {
ClusterNode::Leaf(i) => (variances[*i], 1),
ClusterNode::Branch { left, right } => {
let (sl, cl) = cluster_variance_sum(left, variances);
let (sr, cr) = cluster_variance_sum(right, variances);
(sl + sr, cl + cr)
}
}
}
pub fn decimal_from_f64(v: f64) -> Result<Decimal, String> {
if !v.is_finite() {
return Err(format!("non-finite weight: {v}"));
}
let scaled = (v * 1_000_000.0).round() / 1_000_000.0;
Decimal::try_from(scaled).map_err(|e| format!("decimal conversion failed for {v}: {e}"))
}
#[cfg(test)]
#[path = "hrp_tests.rs"]
mod tests;