organizational_intelligence_plugin/
correlation.rs1use crate::perf::{LruCache, PerfStats};
9use anyhow::Result;
10use std::time::{Duration, Instant};
11use trueno::{Matrix, Vector};
12
13#[derive(Debug)]
15pub struct CorrelationMatrix {
16 pub categories: usize,
18
19 pub values: Matrix<f32>,
21}
22
23pub fn pearson_correlation(x: &Vector<f32>, y: &Vector<f32>) -> Result<f32> {
27 if x.len() != y.len() {
28 anyhow::bail!("Vectors must have same length");
29 }
30
31 let n = x.len() as f32;
32
33 let mean_x = x.sum()? / n;
35 let mean_y = y.sum()? / n;
36
37 let mut cov = 0.0_f32;
39 let mut var_x = 0.0_f32;
40 let mut var_y = 0.0_f32;
41
42 let x_slice = x.as_slice();
43 let y_slice = y.as_slice();
44
45 for i in 0..x.len() {
46 let dx = x_slice[i] - mean_x;
47 let dy = y_slice[i] - mean_y;
48
49 cov += dx * dy;
50 var_x += dx * dx;
51 var_y += dy * dy;
52 }
53
54 if var_x == 0.0 || var_y == 0.0 {
56 return Ok(0.0);
57 }
58
59 let r = cov / (var_x * var_y).sqrt();
61
62 Ok(r)
63}
64
65pub struct CachedCorrelation {
69 cache: LruCache<(usize, usize), f32>,
70 stats: PerfStats,
71 cache_hits: u64,
72 cache_misses: u64,
73}
74
75impl CachedCorrelation {
76 pub fn new() -> Self {
78 Self::with_capacity(1000)
79 }
80
81 pub fn with_capacity(capacity: usize) -> Self {
83 Self {
84 cache: LruCache::with_ttl(capacity, Duration::from_secs(300)),
85 stats: PerfStats::new(),
86 cache_hits: 0,
87 cache_misses: 0,
88 }
89 }
90
91 pub fn correlate(
95 &mut self,
96 x: &Vector<f32>,
97 y: &Vector<f32>,
98 x_idx: usize,
99 y_idx: usize,
100 ) -> Result<f32> {
101 let key = if x_idx <= y_idx {
102 (x_idx, y_idx)
103 } else {
104 (y_idx, x_idx)
105 };
106
107 if let Some(cached) = self.cache.get(&key) {
109 self.cache_hits += 1;
110 return Ok(cached);
111 }
112
113 self.cache_misses += 1;
114
115 let start = Instant::now();
117 let result = pearson_correlation(x, y)?;
118 let duration_ns = start.elapsed().as_nanos() as u64;
119 self.stats.record(duration_ns);
120
121 self.cache.insert(key, result);
123
124 Ok(result)
125 }
126
127 pub fn correlation_matrix(&mut self, vectors: &[Vector<f32>]) -> Result<Vec<Vec<f32>>> {
129 let n = vectors.len();
130 let mut matrix = vec![vec![0.0; n]; n];
131
132 for i in 0..n {
133 matrix[i][i] = 1.0; for j in (i + 1)..n {
135 let corr = self.correlate(&vectors[i], &vectors[j], i, j)?;
136 matrix[i][j] = corr;
137 matrix[j][i] = corr; }
139 }
140
141 Ok(matrix)
142 }
143
144 pub fn stats(&self) -> &PerfStats {
146 &self.stats
147 }
148
149 pub fn cache_hit_rate(&self) -> f64 {
151 let total = self.cache_hits + self.cache_misses;
152 if total == 0 {
153 0.0
154 } else {
155 self.cache_hits as f64 / total as f64
156 }
157 }
158
159 pub fn cache_stats(&self) -> (u64, u64) {
161 (self.cache_hits, self.cache_misses)
162 }
163
164 pub fn clear_cache(&mut self) {
166 self.cache.clear();
167 self.cache_hits = 0;
168 self.cache_misses = 0;
169 }
170}
171
172impl Default for CachedCorrelation {
173 fn default() -> Self {
174 Self::new()
175 }
176}
177
178#[cfg(test)]
179mod tests {
180 use super::*;
181
182 #[test]
183 fn test_pearson_perfect_positive() {
184 let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
185 let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
186
187 let r = pearson_correlation(&x, &y).unwrap();
188
189 assert!((r - 1.0).abs() < 1e-6, "Expected r=1.0, got {}", r);
190 }
191
192 #[test]
193 fn test_pearson_perfect_negative() {
194 let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
195 let y = Vector::from_slice(&[5.0, 4.0, 3.0, 2.0, 1.0]);
196
197 let r = pearson_correlation(&x, &y).unwrap();
198
199 assert!((r + 1.0).abs() < 1e-6, "Expected r=-1.0, got {}", r);
200 }
201
202 #[test]
203 fn test_pearson_zero_variance() {
204 let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
205 let y = Vector::from_slice(&[1.0, 1.0, 1.0, 1.0]);
206
207 let r = pearson_correlation(&x, &y).unwrap();
208
209 assert_eq!(r, 0.0, "Zero variance should return 0.0");
210 }
211
212 #[test]
213 fn test_pearson_different_lengths() {
214 let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
215 let y = Vector::from_slice(&[1.0, 2.0]);
216
217 let result = pearson_correlation(&x, &y);
218
219 assert!(result.is_err(), "Should error on different lengths");
220 }
221
222 #[test]
223 fn test_cached_correlation_creation() {
224 let cached = CachedCorrelation::new();
225 assert_eq!(cached.cache_hit_rate(), 0.0);
226 }
227
228 #[test]
229 fn test_cached_correlation_compute() {
230 let mut cached = CachedCorrelation::new();
231
232 let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
233 let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
234
235 let r = cached.correlate(&x, &y, 0, 1).unwrap();
236 assert!((r - 1.0).abs() < 1e-6);
237
238 let (hits, misses) = cached.cache_stats();
239 assert_eq!(hits, 0);
240 assert_eq!(misses, 1);
241 }
242
243 #[test]
244 fn test_cached_correlation_cache_hit() {
245 let mut cached = CachedCorrelation::new();
246
247 let x = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
248 let y = Vector::from_slice(&[2.0, 4.0, 6.0, 8.0, 10.0]);
249
250 cached.correlate(&x, &y, 0, 1).unwrap();
252
253 let r = cached.correlate(&x, &y, 0, 1).unwrap();
255 assert!((r - 1.0).abs() < 1e-6);
256
257 let (hits, misses) = cached.cache_stats();
258 assert_eq!(hits, 1);
259 assert_eq!(misses, 1);
260 assert!((cached.cache_hit_rate() - 0.5).abs() < 0.01);
261 }
262
263 #[test]
264 fn test_cached_correlation_symmetric_key() {
265 let mut cached = CachedCorrelation::new();
266
267 let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
268 let y = Vector::from_slice(&[3.0, 2.0, 1.0]);
269
270 cached.correlate(&x, &y, 0, 1).unwrap();
272
273 cached.correlate(&y, &x, 1, 0).unwrap();
275
276 let (hits, _) = cached.cache_stats();
277 assert_eq!(hits, 1);
278 }
279
280 #[test]
281 fn test_cached_correlation_matrix() {
282 let mut cached = CachedCorrelation::new();
283
284 let vectors = vec![
285 Vector::from_slice(&[1.0, 2.0, 3.0]),
286 Vector::from_slice(&[2.0, 4.0, 6.0]),
287 Vector::from_slice(&[3.0, 2.0, 1.0]),
288 ];
289
290 let matrix = cached.correlation_matrix(&vectors).unwrap();
291
292 assert_eq!(matrix.len(), 3);
293 assert_eq!(matrix[0].len(), 3);
294
295 assert!((matrix[0][0] - 1.0).abs() < 1e-6);
297 assert!((matrix[1][1] - 1.0).abs() < 1e-6);
298 assert!((matrix[2][2] - 1.0).abs() < 1e-6);
299
300 assert!((matrix[0][1] - matrix[1][0]).abs() < 1e-6);
302 }
303
304 #[test]
305 fn test_cached_correlation_clear() {
306 let mut cached = CachedCorrelation::new();
307
308 let x = Vector::from_slice(&[1.0, 2.0, 3.0]);
309 let y = Vector::from_slice(&[2.0, 4.0, 6.0]);
310
311 cached.correlate(&x, &y, 0, 1).unwrap();
312 cached.clear_cache();
313
314 let (hits, misses) = cached.cache_stats();
315 assert_eq!(hits, 0);
316 assert_eq!(misses, 0);
317 }
318}