1use rust_decimal::Decimal;
13
14pub const MIN_OBSERVATIONS: usize = 30;
17
18#[derive(Debug)]
20pub enum ClusterNode {
21 Leaf(usize),
23 Branch {
25 left: Box<ClusterNode>,
26 right: Box<ClusterNode>,
27 },
28}
29
30pub fn mean(data: &[f64]) -> f64 {
35 data.iter().sum::<f64>() / data.len() as f64
36}
37
38pub fn variance(data: &[f64], mean: f64) -> f64 {
39 data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64
40}
41
42pub fn correlation_matrix(series: &[&[f64]], means: &[f64], variances: &[f64]) -> Vec<Vec<f64>> {
48 let n = series.len();
49 let n_obs = series[0].len();
50 let mut corr = vec![vec![1.0; n]; n];
51
52 for i in 0..n {
53 for j in (i + 1)..n {
54 let cov: f64 = (0..n_obs)
55 .map(|t| (series[i][t] - means[i]) * (series[j][t] - means[j]))
56 .sum::<f64>()
57 / (n_obs - 1) as f64;
58 let r = cov / (variances[i].sqrt() * variances[j].sqrt());
59 let r = r.clamp(-1.0, 1.0);
60 corr[i][j] = r;
61 corr[j][i] = r;
62 }
63 }
64
65 corr
66}
67
68pub fn distance_matrix(corr: &[Vec<f64>]) -> Vec<Vec<f64>> {
70 let n = corr.len();
71 let mut dist = vec![vec![0.0; n]; n];
72 for i in 0..n {
73 for j in (i + 1)..n {
74 let d = ((1.0 - corr[i][j]) / 2.0).sqrt();
75 dist[i][j] = d;
76 dist[j][i] = d;
77 }
78 }
79 dist
80}
81
82pub fn build_dendrogram(dist: &[Vec<f64>], n: usize) -> ClusterNode {
88 if n == 1 {
89 return ClusterNode::Leaf(0);
90 }
91
92 let mut nodes: Vec<Option<ClusterNode>> = (0..n).map(|i| Some(ClusterNode::Leaf(i))).collect();
93
94 let mut work_dist: Vec<Vec<f64>> = dist.to_vec();
95 let mut sizes: Vec<usize> = vec![1; n];
96
97 for _ in 0..(n - 1) {
98 let (ci, cj) = find_closest_pair(&work_dist, &nodes);
99
100 let Some(left) = nodes[ci].take() else {
102 continue;
103 };
104 let Some(right) = nodes[cj].take() else {
105 nodes[ci] = Some(left); continue;
107 };
108
109 let merged = ClusterNode::Branch {
110 left: Box::new(left),
111 right: Box::new(right),
112 };
113
114 let size_merged = sizes[ci] + sizes[cj];
115
116 for k in 0..nodes.len() {
118 if k == ci || k == cj || nodes[k].is_none() {
119 continue;
120 }
121 let d_new = (work_dist[ci][k] * sizes[ci] as f64 + work_dist[cj][k] * sizes[cj] as f64)
122 / size_merged as f64;
123 work_dist[ci][k] = d_new;
124 work_dist[k][ci] = d_new;
125 }
126
127 sizes[ci] = size_merged;
128
129 for val in &mut work_dist[cj] {
131 *val = f64::INFINITY;
132 }
133 for row in &mut work_dist {
135 row[cj] = f64::INFINITY;
136 }
137
138 nodes[ci] = Some(merged);
139 }
140
141 nodes
143 .into_iter()
144 .find_map(|n| n)
145 .unwrap_or(ClusterNode::Leaf(0))
146}
147
148pub fn find_closest_pair(dist: &[Vec<f64>], nodes: &[Option<ClusterNode>]) -> (usize, usize) {
149 let mut min_d = f64::INFINITY;
150 let mut best = (0, 1);
151
152 for i in 0..nodes.len() {
153 if nodes[i].is_none() {
154 continue;
155 }
156 for j in (i + 1)..nodes.len() {
157 if nodes[j].is_none() {
158 continue;
159 }
160 if dist[i][j] < min_d {
161 min_d = dist[i][j];
162 best = (i, j);
163 }
164 }
165 }
166
167 best
168}
169
170pub fn recursive_bisect(node: &ClusterNode, variances: &[f64], weight: f64, out: &mut [f64]) {
177 match node {
178 ClusterNode::Leaf(i) => {
179 out[*i] = weight;
180 }
181 ClusterNode::Branch { left, right } => {
182 let var_left = cluster_variance(left, variances);
183 let var_right = cluster_variance(right, variances);
184 let total = var_left + var_right;
185
186 if total < 1e-30 {
187 recursive_bisect(left, variances, weight * 0.5, out);
188 recursive_bisect(right, variances, weight * 0.5, out);
189 } else {
190 let alpha = 1.0 - var_left / total;
192 recursive_bisect(left, variances, weight * alpha, out);
193 recursive_bisect(right, variances, weight * (1.0 - alpha), out);
194 }
195 }
196 }
197}
198
199pub fn cluster_variance(node: &ClusterNode, variances: &[f64]) -> f64 {
206 let (sum, count) = cluster_variance_sum(node, variances);
207 sum / count as f64
208}
209
210pub fn cluster_variance_sum(node: &ClusterNode, variances: &[f64]) -> (f64, usize) {
211 match node {
212 ClusterNode::Leaf(i) => (variances[*i], 1),
213 ClusterNode::Branch { left, right } => {
214 let (sl, cl) = cluster_variance_sum(left, variances);
215 let (sr, cr) = cluster_variance_sum(right, variances);
216 (sl + sr, cl + cr)
217 }
218 }
219}
220
221pub fn decimal_from_f64(v: f64) -> Result<Decimal, String> {
226 if !v.is_finite() {
227 return Err(format!("non-finite weight: {v}"));
228 }
229 let scaled = (v * 1_000_000.0).round() / 1_000_000.0;
230 Decimal::try_from(scaled).map_err(|e| format!("decimal conversion failed for {v}: {e}"))
231}
232
233#[cfg(test)]
234#[path = "hrp_tests.rs"]
235mod tests;