Skip to main content

quant_indicators/
hrp.rs

1//! Hierarchical Risk Parity (HRP) — pure math functions.
2//!
3//! Three-phase algorithm:
4//! 1. Compute correlation → distance matrix → agglomerative clustering (average linkage)
5//! 2. Build dendrogram (binary tree of clusters)
6//! 3. Recursive bisection following dendrogram with inverse-variance weighting
7//!
8//! All functions are pure math: `&[f64]` in, `f64` out. No domain types,
9//! no I/O. The adapter wraps domain types (e.g. `LegReturns`) into raw slices
10//! before calling these functions.
11
12use rust_decimal::Decimal;
13
14/// Minimum number of return observations required to compute
15/// a meaningful correlation matrix.
16pub const MIN_OBSERVATIONS: usize = 30;
17
18/// Binary tree node representing hierarchical clustering.
19#[derive(Debug)]
20pub enum ClusterNode {
21    /// Leaf node: a single asset (original index).
22    Leaf(usize),
23    /// Branch node: two child clusters.
24    Branch {
25        left: Box<ClusterNode>,
26        right: Box<ClusterNode>,
27    },
28}
29
30// =============================================================================
31// Linear algebra helpers
32// =============================================================================
33
34pub fn mean(data: &[f64]) -> f64 {
35    data.iter().sum::<f64>() / data.len() as f64
36}
37
38pub fn variance(data: &[f64], mean: f64) -> f64 {
39    data.iter().map(|x| (x - mean).powi(2)).sum::<f64>() / (data.len() - 1) as f64
40}
41
42/// Compute the Pearson correlation matrix from return series.
43///
44/// `series[i]` is the return series for asset `i`. All series must have the
45/// same length. `means[i]` and `variances[i]` are the pre-computed mean and
46/// sample variance for series `i`.
47pub fn correlation_matrix(series: &[&[f64]], means: &[f64], variances: &[f64]) -> Vec<Vec<f64>> {
48    let n = series.len();
49    let n_obs = series[0].len();
50    let mut corr = vec![vec![1.0; n]; n];
51
52    for i in 0..n {
53        for j in (i + 1)..n {
54            let cov: f64 = (0..n_obs)
55                .map(|t| (series[i][t] - means[i]) * (series[j][t] - means[j]))
56                .sum::<f64>()
57                / (n_obs - 1) as f64;
58            let r = cov / (variances[i].sqrt() * variances[j].sqrt());
59            let r = r.clamp(-1.0, 1.0);
60            corr[i][j] = r;
61            corr[j][i] = r;
62        }
63    }
64
65    corr
66}
67
68/// Convert correlation to distance: d(i,j) = sqrt((1 - corr(i,j)) / 2)
69pub fn distance_matrix(corr: &[Vec<f64>]) -> Vec<Vec<f64>> {
70    let n = corr.len();
71    let mut dist = vec![vec![0.0; n]; n];
72    for i in 0..n {
73        for j in (i + 1)..n {
74            let d = ((1.0 - corr[i][j]) / 2.0).sqrt();
75            dist[i][j] = d;
76            dist[j][i] = d;
77        }
78    }
79    dist
80}
81
82// =============================================================================
83// Agglomerative clustering → dendrogram
84// =============================================================================
85
86/// Build a dendrogram using agglomerative clustering with weighted average linkage.
87pub fn build_dendrogram(dist: &[Vec<f64>], n: usize) -> ClusterNode {
88    if n == 1 {
89        return ClusterNode::Leaf(0);
90    }
91
92    let mut nodes: Vec<Option<ClusterNode>> = (0..n).map(|i| Some(ClusterNode::Leaf(i))).collect();
93
94    let mut work_dist: Vec<Vec<f64>> = dist.to_vec();
95    let mut sizes: Vec<usize> = vec![1; n];
96
97    for _ in 0..(n - 1) {
98        let (ci, cj) = find_closest_pair(&work_dist, &nodes);
99
100        // Safety: find_closest_pair only returns indices of active (Some) nodes
101        let Some(left) = nodes[ci].take() else {
102            continue;
103        };
104        let Some(right) = nodes[cj].take() else {
105            nodes[ci] = Some(left); // restore if cj was unexpectedly None
106            continue;
107        };
108
109        let merged = ClusterNode::Branch {
110            left: Box::new(left),
111            right: Box::new(right),
112        };
113
114        let size_merged = sizes[ci] + sizes[cj];
115
116        // Update distances using UPGMA (size-weighted average linkage)
117        for k in 0..nodes.len() {
118            if k == ci || k == cj || nodes[k].is_none() {
119                continue;
120            }
121            let d_new = (work_dist[ci][k] * sizes[ci] as f64 + work_dist[cj][k] * sizes[cj] as f64)
122                / size_merged as f64;
123            work_dist[ci][k] = d_new;
124            work_dist[k][ci] = d_new;
125        }
126
127        sizes[ci] = size_merged;
128
129        // Invalidate merged slot's row
130        for val in &mut work_dist[cj] {
131            *val = f64::INFINITY;
132        }
133        // Invalidate merged slot's column
134        for row in &mut work_dist {
135            row[cj] = f64::INFINITY;
136        }
137
138        nodes[ci] = Some(merged);
139    }
140
141    // After n-1 merges, exactly one node remains
142    nodes
143        .into_iter()
144        .find_map(|n| n)
145        .unwrap_or(ClusterNode::Leaf(0))
146}
147
148pub fn find_closest_pair(dist: &[Vec<f64>], nodes: &[Option<ClusterNode>]) -> (usize, usize) {
149    let mut min_d = f64::INFINITY;
150    let mut best = (0, 1);
151
152    for i in 0..nodes.len() {
153        if nodes[i].is_none() {
154            continue;
155        }
156        for j in (i + 1)..nodes.len() {
157            if nodes[j].is_none() {
158                continue;
159            }
160            if dist[i][j] < min_d {
161                min_d = dist[i][j];
162                best = (i, j);
163            }
164        }
165    }
166
167    best
168}
169
170// =============================================================================
171// Recursive bisection (follows dendrogram structure)
172// =============================================================================
173
174/// Walk the dendrogram tree, splitting weight between left and right
175/// subtrees using inverse-variance allocation at each branch.
176pub fn recursive_bisect(node: &ClusterNode, variances: &[f64], weight: f64, out: &mut [f64]) {
177    match node {
178        ClusterNode::Leaf(i) => {
179            out[*i] = weight;
180        }
181        ClusterNode::Branch { left, right } => {
182            let var_left = cluster_variance(left, variances);
183            let var_right = cluster_variance(right, variances);
184            let total = var_left + var_right;
185
186            if total < 1e-30 {
187                recursive_bisect(left, variances, weight * 0.5, out);
188                recursive_bisect(right, variances, weight * 0.5, out);
189            } else {
190                // Inverse-variance: lower variance gets more weight
191                let alpha = 1.0 - var_left / total;
192                recursive_bisect(left, variances, weight * alpha, out);
193                recursive_bisect(right, variances, weight * (1.0 - alpha), out);
194            }
195        }
196    }
197}
198
199/// Compute average variance for a cluster (sum of leaf variances / leaf count).
200///
201/// Bug fix (#1238): previously returned the raw SUM, which scaled linearly
202/// with cluster size. A 17-leaf cluster had ~17x the "variance" of a 1-leaf
203/// cluster, causing inverse-variance bisection to push 89-99% weight into
204/// the smaller cluster. Dividing by leaf count normalizes for cluster size.
205pub fn cluster_variance(node: &ClusterNode, variances: &[f64]) -> f64 {
206    let (sum, count) = cluster_variance_sum(node, variances);
207    sum / count as f64
208}
209
210pub fn cluster_variance_sum(node: &ClusterNode, variances: &[f64]) -> (f64, usize) {
211    match node {
212        ClusterNode::Leaf(i) => (variances[*i], 1),
213        ClusterNode::Branch { left, right } => {
214            let (sl, cl) = cluster_variance_sum(left, variances);
215            let (sr, cr) = cluster_variance_sum(right, variances);
216            (sl + sr, cl + cr)
217        }
218    }
219}
220
221/// Convert f64 to Decimal with 6 decimal places precision.
222///
223/// Returns an error message if the value is NaN, infinite, or
224/// cannot be represented as a Decimal.
225pub fn decimal_from_f64(v: f64) -> Result<Decimal, String> {
226    if !v.is_finite() {
227        return Err(format!("non-finite weight: {v}"));
228    }
229    let scaled = (v * 1_000_000.0).round() / 1_000_000.0;
230    Decimal::try_from(scaled).map_err(|e| format!("decimal conversion failed for {v}: {e}"))
231}
232
233#[cfg(test)]
234#[path = "hrp_tests.rs"]
235mod tests;