organizational_intelligence_plugin/
sliding_window.rs

1//! Sliding Window Correlation for Concept Drift Detection
2//!
3//! Implements PHASE2-003: Time-windowed correlation matrices
4//! Detects changing defect patterns over time (concept drift)
5
6use crate::correlation::pearson_correlation;
7use crate::features::CommitFeatures;
8use crate::storage::FeatureStore;
9use anyhow::Result;
10use trueno::Vector;
11
12/// Time window duration in seconds (6 months ≈ 15,768,000 seconds)
13pub const SIX_MONTHS_SECONDS: f64 = 6.0 * 30.0 * 24.0 * 3600.0;
14
15/// Time window for correlation analysis
16#[derive(Debug, Clone)]
17pub struct TimeWindow {
18    pub start_time: f64, // Unix epoch
19    pub end_time: f64,   // Unix epoch
20}
21
22impl TimeWindow {
23    /// Create time window
24    pub fn new(start_time: f64, end_time: f64) -> Self {
25        Self {
26            start_time,
27            end_time,
28        }
29    }
30
31    /// Create 6-month window starting at given time
32    pub fn six_months_from(start_time: f64) -> Self {
33        Self {
34            start_time,
35            end_time: start_time + SIX_MONTHS_SECONDS,
36        }
37    }
38
39    /// Check if window contains timestamp
40    pub fn contains(&self, timestamp: f64) -> bool {
41        timestamp >= self.start_time && timestamp < self.end_time
42    }
43
44    /// Get window duration in seconds
45    pub fn duration(&self) -> f64 {
46        self.end_time - self.start_time
47    }
48}
49
50/// Correlation matrix for a time window
51#[derive(Debug, Clone)]
52pub struct WindowedCorrelationMatrix {
53    pub window: TimeWindow,
54    pub matrix: Vec<Vec<f32>>, // n×n correlation matrix
55    pub feature_count: usize,  // Number of features in window
56}
57
58/// Sliding window correlation analyzer
59pub struct SlidingWindowAnalyzer {
60    window_size: f64, // Window duration in seconds
61    stride: f64,      // Window stride in seconds (for overlap)
62}
63
64impl SlidingWindowAnalyzer {
65    /// Create analyzer with 6-month windows, 3-month stride (50% overlap)
66    pub fn new_six_month() -> Self {
67        Self {
68            window_size: SIX_MONTHS_SECONDS,
69            stride: SIX_MONTHS_SECONDS / 2.0,
70        }
71    }
72
73    /// Create analyzer with custom window size and stride
74    pub fn new(window_size: f64, stride: f64) -> Self {
75        Self {
76            window_size,
77            stride,
78        }
79    }
80
81    /// Generate time windows for given data range
82    pub fn generate_windows(&self, start_time: f64, end_time: f64) -> Vec<TimeWindow> {
83        let mut windows = Vec::new();
84        let mut current_start = start_time;
85
86        while current_start + self.window_size <= end_time {
87            windows.push(TimeWindow::new(
88                current_start,
89                current_start + self.window_size,
90            ));
91            current_start += self.stride;
92        }
93
94        windows
95    }
96
97    /// Compute correlation matrix for features in a time window
98    ///
99    /// Returns correlation matrix between all feature dimensions
100    pub fn compute_window_correlation(
101        &self,
102        store: &FeatureStore,
103        window: &TimeWindow,
104    ) -> Result<WindowedCorrelationMatrix> {
105        // Query features in window
106        let features = store.query_by_time_range(window.start_time, window.end_time)?;
107
108        if features.is_empty() {
109            anyhow::bail!(
110                "No features in window [{}, {})",
111                window.start_time,
112                window.end_time
113            );
114        }
115
116        // Convert to vectors (8 dimensions per feature)
117        let vectors: Vec<Vec<f32>> = features.iter().map(|f| f.to_vector()).collect();
118        let n_samples = vectors.len();
119        let n_dims = CommitFeatures::DIMENSION;
120
121        // Build dimension-wise arrays
122        let mut dim_arrays: Vec<Vec<f32>> = vec![Vec::new(); n_dims];
123        for v in &vectors {
124            for (dim_idx, &value) in v.iter().enumerate() {
125                dim_arrays[dim_idx].push(value);
126            }
127        }
128
129        // Compute correlation matrix (n_dims × n_dims)
130        let mut matrix = vec![vec![0.0; n_dims]; n_dims];
131        for i in 0..n_dims {
132            for j in 0..n_dims {
133                if i == j {
134                    matrix[i][j] = 1.0; // Self-correlation is always 1
135                } else {
136                    let vec_i = Vector::from_slice(&dim_arrays[i]);
137                    let vec_j = Vector::from_slice(&dim_arrays[j]);
138                    matrix[i][j] = pearson_correlation(&vec_i, &vec_j)?;
139                }
140            }
141        }
142
143        Ok(WindowedCorrelationMatrix {
144            window: window.clone(),
145            matrix,
146            feature_count: n_samples,
147        })
148    }
149
150    /// Compute correlation matrices for all windows
151    pub fn compute_all_windows(
152        &self,
153        store: &FeatureStore,
154    ) -> Result<Vec<WindowedCorrelationMatrix>> {
155        // Find time range in data
156        let all_features = store.all_features();
157        if all_features.is_empty() {
158            anyhow::bail!("No features in store");
159        }
160
161        let start_time = all_features
162            .iter()
163            .map(|f| f.timestamp)
164            .fold(f64::INFINITY, f64::min);
165        let end_time = all_features
166            .iter()
167            .map(|f| f.timestamp)
168            .fold(f64::NEG_INFINITY, f64::max);
169
170        // Generate windows
171        let windows = self.generate_windows(start_time, end_time);
172
173        // Compute correlation for each window
174        let mut results = Vec::new();
175        for window in windows {
176            match self.compute_window_correlation(store, &window) {
177                Ok(wcm) => results.push(wcm),
178                Err(_) => continue, // Skip windows with no data
179            }
180        }
181
182        Ok(results)
183    }
184}
185
186/// Concept drift detection
187#[derive(Debug, Clone)]
188pub struct ConceptDrift {
189    pub window1_idx: usize,
190    pub window2_idx: usize,
191    pub matrix_diff: f32,     // Frobenius norm of difference
192    pub is_significant: bool, // Above threshold
193}
194
195/// Detect concept drift between consecutive windows
196///
197/// Uses Frobenius norm to measure matrix difference
198pub fn detect_drift(
199    matrices: &[WindowedCorrelationMatrix],
200    threshold: f32,
201) -> Result<Vec<ConceptDrift>> {
202    if matrices.len() < 2 {
203        return Ok(Vec::new());
204    }
205
206    let mut drifts = Vec::new();
207
208    for i in 0..matrices.len() - 1 {
209        let mat1 = &matrices[i].matrix;
210        let mat2 = &matrices[i + 1].matrix;
211
212        // Compute Frobenius norm: sqrt(sum of squared differences)
213        let mut sum_sq_diff = 0.0;
214        for (row1, row2) in mat1.iter().zip(mat2.iter()) {
215            for (&val1, &val2) in row1.iter().zip(row2.iter()) {
216                let diff = val1 - val2;
217                sum_sq_diff += diff * diff;
218            }
219        }
220        let frobenius_norm = sum_sq_diff.sqrt();
221
222        drifts.push(ConceptDrift {
223            window1_idx: i,
224            window2_idx: i + 1,
225            matrix_diff: frobenius_norm,
226            is_significant: frobenius_norm > threshold,
227        });
228    }
229
230    Ok(drifts)
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_time_window_creation() {
239        let window = TimeWindow::new(1000.0, 2000.0);
240        assert_eq!(window.duration(), 1000.0);
241        assert!(window.contains(1500.0));
242        assert!(!window.contains(2500.0));
243    }
244
245    #[test]
246    fn test_six_month_window() {
247        let window = TimeWindow::six_months_from(0.0);
248        assert_eq!(window.duration(), SIX_MONTHS_SECONDS);
249    }
250
251    #[test]
252    fn test_generate_windows() {
253        let analyzer = SlidingWindowAnalyzer::new_six_month();
254        let windows = analyzer.generate_windows(0.0, SIX_MONTHS_SECONDS * 3.0);
255
256        // With 50% overlap: windows at 0, 0.5×6mo, 1×6mo, 1.5×6mo, 2×6mo
257        assert_eq!(windows.len(), 5);
258    }
259
260    #[test]
261    fn test_window_correlation_computation() {
262        let mut store = FeatureStore::new().unwrap();
263
264        // Create test features with different timestamps
265        for i in 0..10 {
266            let f = CommitFeatures {
267                defect_category: 1,
268                files_changed: (i + 1) as f32,
269                lines_added: (i * 10) as f32,
270                lines_deleted: (i * 5) as f32,
271                complexity_delta: (i as f32) * 0.5,
272                timestamp: (i * 1000) as f64,
273                hour_of_day: 10,
274                day_of_week: 1,
275                ..Default::default()
276            };
277            store.insert(f).unwrap();
278        }
279
280        let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
281        let window = TimeWindow::new(0.0, 5000.0);
282
283        let result = analyzer
284            .compute_window_correlation(&store, &window)
285            .unwrap();
286
287        // Should have 8×8 correlation matrix
288        assert_eq!(result.matrix.len(), CommitFeatures::DIMENSION);
289        assert_eq!(result.matrix[0].len(), CommitFeatures::DIMENSION);
290
291        // Diagonal should be 1.0
292        for i in 0..CommitFeatures::DIMENSION {
293            assert!((result.matrix[i][i] - 1.0).abs() < 1e-6);
294        }
295    }
296
297    #[test]
298    fn test_window_contains_boundaries() {
299        let window = TimeWindow::new(1000.0, 2000.0);
300
301        // Start boundary (inclusive)
302        assert!(window.contains(1000.0));
303
304        // End boundary (exclusive)
305        assert!(!window.contains(2000.0));
306
307        // Before window
308        assert!(!window.contains(999.9));
309
310        // After window
311        assert!(!window.contains(2000.1));
312    }
313
314    #[test]
315    fn test_empty_store_compute_all_windows() {
316        let store = FeatureStore::new().unwrap();
317        let analyzer = SlidingWindowAnalyzer::new_six_month();
318
319        let result = analyzer.compute_all_windows(&store);
320        assert!(result.is_err());
321        assert!(result
322            .unwrap_err()
323            .to_string()
324            .contains("No features in store"));
325    }
326
327    #[test]
328    fn test_window_with_no_features() {
329        let mut store = FeatureStore::new().unwrap();
330
331        // Add features outside the window
332        let f = CommitFeatures {
333            defect_category: 1,
334            files_changed: 5.0,
335            lines_added: 50.0,
336            lines_deleted: 20.0,
337            complexity_delta: 0.5,
338            timestamp: 10000.0, // Way outside window
339            hour_of_day: 10,
340            day_of_week: 1,
341            ..Default::default()
342        };
343        store.insert(f).unwrap();
344
345        let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
346        let window = TimeWindow::new(0.0, 5000.0); // Features are at 10000.0
347
348        let result = analyzer.compute_window_correlation(&store, &window);
349        assert!(result.is_err());
350        assert!(result
351            .unwrap_err()
352            .to_string()
353            .contains("No features in window"));
354    }
355
356    #[test]
357    fn test_detect_drift_with_no_matrices() {
358        let matrices = Vec::new();
359        let drifts = detect_drift(&matrices, 0.5).unwrap();
360        assert_eq!(drifts.len(), 0);
361    }
362
363    #[test]
364    fn test_detect_drift_with_one_matrix() {
365        let matrix = WindowedCorrelationMatrix {
366            window: TimeWindow::new(0.0, 1000.0),
367            matrix: vec![vec![1.0; 8]; 8],
368            feature_count: 10,
369        };
370
371        let drifts = detect_drift(&[matrix], 0.5).unwrap();
372        assert_eq!(drifts.len(), 0);
373    }
374
375    #[test]
376    fn test_detect_drift_identical_matrices() {
377        let matrix1 = WindowedCorrelationMatrix {
378            window: TimeWindow::new(0.0, 1000.0),
379            matrix: vec![vec![1.0; 8]; 8],
380            feature_count: 10,
381        };
382
383        let matrix2 = WindowedCorrelationMatrix {
384            window: TimeWindow::new(1000.0, 2000.0),
385            matrix: vec![vec![1.0; 8]; 8],
386            feature_count: 10,
387        };
388
389        let drifts = detect_drift(&[matrix1, matrix2], 0.5).unwrap();
390        assert_eq!(drifts.len(), 1);
391        assert!(!drifts[0].is_significant); // No difference
392        assert_eq!(drifts[0].matrix_diff, 0.0);
393    }
394
395    #[test]
396    fn test_detect_drift_different_matrices() {
397        let mut matrix1_data = vec![vec![1.0; 8]; 8];
398        matrix1_data[0][1] = 0.5; // Change one value
399
400        let matrix1 = WindowedCorrelationMatrix {
401            window: TimeWindow::new(0.0, 1000.0),
402            matrix: matrix1_data,
403            feature_count: 10,
404        };
405
406        let matrix2 = WindowedCorrelationMatrix {
407            window: TimeWindow::new(1000.0, 2000.0),
408            matrix: vec![vec![1.0; 8]; 8],
409            feature_count: 10,
410        };
411
412        let drifts = detect_drift(&[matrix1, matrix2], 0.01).unwrap();
413        assert_eq!(drifts.len(), 1);
414        assert!(drifts[0].is_significant); // Difference above threshold
415        assert!(drifts[0].matrix_diff > 0.0);
416    }
417
418    #[test]
419    fn test_detect_drift_multiple_windows() {
420        let mat1 = WindowedCorrelationMatrix {
421            window: TimeWindow::new(0.0, 1000.0),
422            matrix: vec![vec![1.0; 8]; 8],
423            feature_count: 10,
424        };
425
426        let mat2 = WindowedCorrelationMatrix {
427            window: TimeWindow::new(1000.0, 2000.0),
428            matrix: vec![vec![0.9; 8]; 8],
429            feature_count: 10,
430        };
431
432        let mat3 = WindowedCorrelationMatrix {
433            window: TimeWindow::new(2000.0, 3000.0),
434            matrix: vec![vec![0.8; 8]; 8],
435            feature_count: 10,
436        };
437
438        let drifts = detect_drift(&[mat1, mat2, mat3], 0.1).unwrap();
439        assert_eq!(drifts.len(), 2); // Two transitions
440        assert_eq!(drifts[0].window1_idx, 0);
441        assert_eq!(drifts[0].window2_idx, 1);
442        assert_eq!(drifts[1].window1_idx, 1);
443        assert_eq!(drifts[1].window2_idx, 2);
444    }
445
446    #[test]
447    fn test_custom_analyzer_creation() {
448        let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
449        let windows = analyzer.generate_windows(0.0, 3000.0);
450
451        // Windows: [0-1000], [500-1500], [1000-2000], [1500-2500], [2000-3000]
452        assert_eq!(windows.len(), 5);
453        assert_eq!(windows[0].start_time, 0.0);
454        assert_eq!(windows[0].end_time, 1000.0);
455        assert_eq!(windows[1].start_time, 500.0);
456    }
457
458    #[test]
459    fn test_generate_windows_no_full_window_at_end() {
460        let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
461        let windows = analyzer.generate_windows(0.0, 1500.0);
462
463        // Windows: [0-1000], [500-1500]
464        // No [1000-2000] because end_time is only 1500
465        assert_eq!(windows.len(), 2);
466    }
467
468    #[test]
469    fn test_compute_all_windows_skips_empty_windows() {
470        let mut store = FeatureStore::new().unwrap();
471
472        // Add features only in first half of time range
473        for i in 0..5 {
474            let f = CommitFeatures {
475                defect_category: 1,
476                files_changed: (i + 1) as f32,
477                lines_added: (i * 10) as f32,
478                lines_deleted: (i * 5) as f32,
479                complexity_delta: (i as f32) * 0.5,
480                timestamp: (i * 1000) as f64, // 0-4000
481                hour_of_day: 10,
482                day_of_week: 1,
483                ..Default::default()
484            };
485            store.insert(f).unwrap();
486        }
487
488        let analyzer = SlidingWindowAnalyzer::new(3000.0, 1500.0);
489        let results = analyzer.compute_all_windows(&store).unwrap();
490
491        // Only windows with data should be in results
492        assert!(!results.is_empty());
493        assert!(results.len() <= 3); // Won't have empty windows
494    }
495
496    #[test]
497    fn test_concept_drift_structure() {
498        let drift = ConceptDrift {
499            window1_idx: 0,
500            window2_idx: 1,
501            matrix_diff: 0.75,
502            is_significant: true,
503        };
504
505        assert_eq!(drift.window1_idx, 0);
506        assert_eq!(drift.window2_idx, 1);
507        assert_eq!(drift.matrix_diff, 0.75);
508        assert!(drift.is_significant);
509    }
510
511    #[test]
512    fn test_windowed_correlation_matrix_structure() {
513        let wcm = WindowedCorrelationMatrix {
514            window: TimeWindow::new(0.0, 1000.0),
515            matrix: vec![vec![1.0; 8]; 8],
516            feature_count: 42,
517        };
518
519        assert_eq!(wcm.window.start_time, 0.0);
520        assert_eq!(wcm.window.end_time, 1000.0);
521        assert_eq!(wcm.matrix.len(), 8);
522        assert_eq!(wcm.feature_count, 42);
523    }
524
525    #[test]
526    fn test_six_months_constant() {
527        // 6 months * 30 days * 24 hours * 3600 seconds
528        let expected = 6.0 * 30.0 * 24.0 * 3600.0;
529        assert_eq!(SIX_MONTHS_SECONDS, expected);
530        assert_eq!(SIX_MONTHS_SECONDS, 15_552_000.0);
531    }
532}