organizational_intelligence_plugin/
correlation.rs

1//! Correlation Matrix Computation
2//!
3//! Implements Section 5.2: Correlation Matrix Computation
4//! Phase 1: SIMD via trueno (GPU in Phase 2)
5//!
6//! OPT-001: Added LruCache for correlation result caching
7
8use crate::perf::{LruCache, PerfStats};
9use anyhow::Result;
10use std::time::{Duration, Instant};
11use trueno::{Matrix, Vector};
12
13/// Correlation matrix result
14#[derive(Debug)]
15pub struct CorrelationMatrix {
16    /// Number of categories
17    pub categories: usize,
18
19    /// Correlation values (categories × categories)
20    pub values: Matrix<f32>,
21}
22
23/// Compute Pearson correlation between two vectors
24///
25/// r = cov(X,Y) / (std(X) * std(Y))
26pub fn pearson_correlation(x: &Vector<f32>, y: &Vector<f32>) -> Result<f32> {
27    if x.len() != y.len() {
28        anyhow::bail!("Vectors must have same length");
29    }
30
31    let n = x.len() as f32;
32
33    // Mean values (trueno SIMD-accelerated)
34    let mean_x = x.sum()? / n;
35    let mean_y = y.sum()? / n;
36
37    // Compute covariance and variances
38    let mut cov = 0.0_f32;
39    let mut var_x = 0.0_f32;
40    let mut var_y = 0.0_f32;
41
42    let x_slice = x.as_slice();
43    let y_slice = y.as_slice();
44
45    for i in 0..x.len() {
46        let dx = x_slice[i] - mean_x;
47        let dy = y_slice[i] - mean_y;
48
49        cov += dx * dy;
50        var_x += dx * dx;
51        var_y += dy * dy;
52    }
53
54    // Handle zero variance
55    if var_x == 0.0 || var_y == 0.0 {
56        return Ok(0.0);
57    }
58
59    // Pearson correlation: r = cov / sqrt(var_x * var_y)
60    let r = cov / (var_x * var_y).sqrt();
61
62    Ok(r)
63}
64
65/// Cached correlation computer with performance tracking
66///
67/// OPT-001: Caches correlation results to avoid redundant computation
68pub struct CachedCorrelation {
69    cache: LruCache<(usize, usize), f32>,
70    stats: PerfStats,
71    cache_hits: u64,
72    cache_misses: u64,
73}
74
75impl CachedCorrelation {
76    /// Create cached correlation with default capacity (1000 entries, 5 min TTL)
77    pub fn new() -> Self {
78        Self::with_capacity(1000)
79    }
80
81    /// Create cached correlation with custom capacity
82    pub fn with_capacity(capacity: usize) -> Self {
83        Self {
84            cache: LruCache::with_ttl(capacity, Duration::from_secs(300)),
85            stats: PerfStats::new(),
86            cache_hits: 0,
87            cache_misses: 0,
88        }
89    }
90
91    /// Compute correlation with caching
92    ///
93    /// Cache key is based on vector indices (assumes stable vector ordering)
94    pub fn correlate(
95        &mut self,
96        x: &Vector<f32>,
97        y: &Vector<f32>,
98        x_idx: usize,
99        y_idx: usize,
100    ) -> Result<f32> {
101        let key = if x_idx <= y_idx {
102            (x_idx, y_idx)
103        } else {
104            (y_idx, x_idx)
105        };
106
107        // Check cache
108        if let Some(cached) = self.cache.get(&key) {
109            self.cache_hits += 1;
110            return Ok(cached);
111        }
112
113        self.cache_misses += 1;
114
115        // Compute correlation
116        let start = Instant::now();
117        let result = pearson_correlation(x, y)?;
118        let duration_ns = start.elapsed().as_nanos() as u64;
119        self.stats.record(duration_ns);
120
121        // Cache result
122        self.cache.insert(key, result);
123
124        Ok(result)
125    }
126
127    /// Compute correlation matrix for multiple vectors
128    pub fn correlation_matrix(&mut self, vectors: &[Vector<f32>]) -> Result<Vec<Vec<f32>>> {
129        let n = vectors.len();
130        let mut matrix = vec![vec![0.0; n]; n];
131
132        for i in 0..n {
133            matrix[i][i] = 1.0; // Self-correlation
134            for j in (i + 1)..n {
135                let corr = self.correlate(&vectors[i], &vectors[j], i, j)?;
136                matrix[i][j] = corr;
137                matrix[j][i] = corr; // Symmetric
138            }
139        }
140
141        Ok(matrix)
142    }
143
144    /// Get performance statistics
145    pub fn stats(&self) -> &PerfStats {
146        &self.stats
147    }
148
149    /// Get cache hit rate
150    pub fn cache_hit_rate(&self) -> f64 {
151        let total = self.cache_hits + self.cache_misses;
152        if total == 0 {
153            0.0
154        } else {
155            self.cache_hits as f64 / total as f64
156        }
157    }
158
159    /// Get cache statistics
160    pub fn cache_stats(&self) -> (u64, u64) {
161        (self.cache_hits, self.cache_misses)
162    }
163
164    /// Clear cache
165    pub fn clear_cache(&mut self) {
166        self.cache.clear();
167        self.cache_hits = 0;
168        self.cache_misses = 0;
169    }
170}
171
172impl Default for CachedCorrelation {
173    fn default() -> Self {
174        Self::new()
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_pearson_perfect_positive() {
184        let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
185        let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
186
187        let r = pearson_correlation(&x, &y).unwrap();
188
189        assert!((r - 1.0).abs() < 1e-6, "Expected r=1.0, got {}", r);
190    }
191
192    #[test]
193    fn test_pearson_perfect_negative() {
194        let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
195        let y = Vector::from_slice(&[5.0, 4.0, 3.0, 2.0, 1.0]);
196
197        let r = pearson_correlation(&x, &y).unwrap();
198
199        assert!((r + 1.0).abs() < 1e-6, "Expected r=-1.0, got {}", r);
200    }
201
202    #[test]
203    fn test_pearson_zero_variance() {
204        let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
205        let y = Vector::from_slice(&[1.0, 1.0, 1.0, 1.0]);
206
207        let r = pearson_correlation(&x, &y).unwrap();
208
209        assert_eq!(r, 0.0, "Zero variance should return 0.0");
210    }
211
212    #[test]
213    fn test_pearson_different_lengths() {
214        let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
215        let y = Vector::from_slice(&[1.0, 2.0]);
216
217        let result = pearson_correlation(&x, &y);
218
219        assert!(result.is_err(), "Should error on different lengths");
220    }
221
222    #[test]
223    fn test_cached_correlation_creation() {
224        let cached = CachedCorrelation::new();
225        assert_eq!(cached.cache_hit_rate(), 0.0);
226    }
227
228    #[test]
229    fn test_cached_correlation_compute() {
230        let mut cached = CachedCorrelation::new();
231
232        let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
233        let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
234
235        let r = cached.correlate(&x, &y, 0, 1).unwrap();
236        assert!((r - 1.0).abs() < 1e-6);
237
238        let (hits, misses) = cached.cache_stats();
239        assert_eq!(hits, 0);
240        assert_eq!(misses, 1);
241    }
242
243    #[test]
244    fn test_cached_correlation_cache_hit() {
245        let mut cached = CachedCorrelation::new();
246
247        let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
248        let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
249
250        // First call - cache miss
251        cached.correlate(&x, &y, 0, 1).unwrap();
252
253        // Second call - cache hit
254        let r = cached.correlate(&x, &y, 0, 1).unwrap();
255        assert!((r - 1.0).abs() < 1e-6);
256
257        let (hits, misses) = cached.cache_stats();
258        assert_eq!(hits, 1);
259        assert_eq!(misses, 1);
260        assert!((cached.cache_hit_rate() - 0.5).abs() < 0.01);
261    }
262
263    #[test]
264    fn test_cached_correlation_symmetric_key() {
265        let mut cached = CachedCorrelation::new();
266
267        let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
268        let y = Vector::from_slice(&[3.0, 2.0, 1.0]);
269
270        // Call with (0, 1)
271        cached.correlate(&x, &y, 0, 1).unwrap();
272
273        // Call with (1, 0) - should hit cache due to symmetric key
274        cached.correlate(&y, &x, 1, 0).unwrap();
275
276        let (hits, _) = cached.cache_stats();
277        assert_eq!(hits, 1);
278    }
279
280    #[test]
281    fn test_cached_correlation_matrix() {
282        let mut cached = CachedCorrelation::new();
283
284        let vectors = vec![
285            Vector::from_slice(&[1.0, 2.0, 3.0]),
286            Vector::from_slice(&[2.0, 4.0, 6.0]),
287            Vector::from_slice(&[3.0, 2.0, 1.0]),
288        ];
289
290        let matrix = cached.correlation_matrix(&vectors).unwrap();
291
292        assert_eq!(matrix.len(), 3);
293        assert_eq!(matrix[0].len(), 3);
294
295        // Diagonal should be 1.0
296        assert!((matrix[0][0] - 1.0).abs() < 1e-6);
297        assert!((matrix[1][1] - 1.0).abs() < 1e-6);
298        assert!((matrix[2][2] - 1.0).abs() < 1e-6);
299
300        // Should be symmetric
301        assert!((matrix[0][1] - matrix[1][0]).abs() < 1e-6);
302    }
303
304    #[test]
305    fn test_cached_correlation_clear() {
306        let mut cached = CachedCorrelation::new();
307
308        let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
309        let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
310
311        cached.correlate(&x, &y, 0, 1).unwrap();
312        cached.clear_cache();
313
314        let (hits, misses) = cached.cache_stats();
315        assert_eq!(hits, 0);
316        assert_eq!(misses, 0);
317    }
318}