1use crate::correlation::pearson_correlation;
7use crate::features::CommitFeatures;
8use crate::storage::FeatureStore;
9use anyhow::Result;
10use trueno::Vector;
11
12pub const SIX_MONTHS_SECONDS: f64 = 6.0 * 30.0 * 24.0 * 3600.0;
14
15#[derive(Debug, Clone)]
17pub struct TimeWindow {
18 pub start_time: f64, pub end_time: f64, }
21
22impl TimeWindow {
23 pub fn new(start_time: f64, end_time: f64) -> Self {
25 Self {
26 start_time,
27 end_time,
28 }
29 }
30
31 pub fn six_months_from(start_time: f64) -> Self {
33 Self {
34 start_time,
35 end_time: start_time + SIX_MONTHS_SECONDS,
36 }
37 }
38
39 pub fn contains(&self, timestamp: f64) -> bool {
41 timestamp >= self.start_time && timestamp < self.end_time
42 }
43
44 pub fn duration(&self) -> f64 {
46 self.end_time - self.start_time
47 }
48}
49
50#[derive(Debug, Clone)]
52pub struct WindowedCorrelationMatrix {
53 pub window: TimeWindow,
54 pub matrix: Vec<Vec<f32>>, pub feature_count: usize, }
57
58pub struct SlidingWindowAnalyzer {
60 window_size: f64, stride: f64, }
63
64impl SlidingWindowAnalyzer {
65 pub fn new_six_month() -> Self {
67 Self {
68 window_size: SIX_MONTHS_SECONDS,
69 stride: SIX_MONTHS_SECONDS / 2.0,
70 }
71 }
72
73 pub fn new(window_size: f64, stride: f64) -> Self {
75 Self {
76 window_size,
77 stride,
78 }
79 }
80
81 pub fn generate_windows(&self, start_time: f64, end_time: f64) -> Vec<TimeWindow> {
83 let mut windows = Vec::new();
84 let mut current_start = start_time;
85
86 while current_start + self.window_size <= end_time {
87 windows.push(TimeWindow::new(
88 current_start,
89 current_start + self.window_size,
90 ));
91 current_start += self.stride;
92 }
93
94 windows
95 }
96
97 pub fn compute_window_correlation(
101 &self,
102 store: &FeatureStore,
103 window: &TimeWindow,
104 ) -> Result<WindowedCorrelationMatrix> {
105 let features = store.query_by_time_range(window.start_time, window.end_time)?;
107
108 if features.is_empty() {
109 anyhow::bail!(
110 "No features in window [{}, {})",
111 window.start_time,
112 window.end_time
113 );
114 }
115
116 let vectors: Vec<Vec<f32>> = features.iter().map(|f| f.to_vector()).collect();
118 let n_samples = vectors.len();
119 let n_dims = CommitFeatures::DIMENSION;
120
121 let mut dim_arrays: Vec<Vec<f32>> = vec![Vec::new(); n_dims];
123 for v in &vectors {
124 for (dim_idx, &value) in v.iter().enumerate() {
125 dim_arrays[dim_idx].push(value);
126 }
127 }
128
129 let mut matrix = vec![vec![0.0; n_dims]; n_dims];
131 for i in 0..n_dims {
132 for j in 0..n_dims {
133 if i == j {
134 matrix[i][j] = 1.0; } else {
136 let vec_i = Vector::from_slice(&dim_arrays[i]);
137 let vec_j = Vector::from_slice(&dim_arrays[j]);
138 matrix[i][j] = pearson_correlation(&vec_i, &vec_j)?;
139 }
140 }
141 }
142
143 Ok(WindowedCorrelationMatrix {
144 window: window.clone(),
145 matrix,
146 feature_count: n_samples,
147 })
148 }
149
150 pub fn compute_all_windows(
152 &self,
153 store: &FeatureStore,
154 ) -> Result<Vec<WindowedCorrelationMatrix>> {
155 let all_features = store.all_features();
157 if all_features.is_empty() {
158 anyhow::bail!("No features in store");
159 }
160
161 let start_time = all_features
162 .iter()
163 .map(|f| f.timestamp)
164 .fold(f64::INFINITY, f64::min);
165 let end_time = all_features
166 .iter()
167 .map(|f| f.timestamp)
168 .fold(f64::NEG_INFINITY, f64::max);
169
170 let windows = self.generate_windows(start_time, end_time);
172
173 let mut results = Vec::new();
175 for window in windows {
176 match self.compute_window_correlation(store, &window) {
177 Ok(wcm) => results.push(wcm),
178 Err(_) => continue, }
180 }
181
182 Ok(results)
183 }
184}
185
186#[derive(Debug, Clone)]
188pub struct ConceptDrift {
189 pub window1_idx: usize,
190 pub window2_idx: usize,
191 pub matrix_diff: f32, pub is_significant: bool, }
194
195pub fn detect_drift(
199 matrices: &[WindowedCorrelationMatrix],
200 threshold: f32,
201) -> Result<Vec<ConceptDrift>> {
202 if matrices.len() < 2 {
203 return Ok(Vec::new());
204 }
205
206 let mut drifts = Vec::new();
207
208 for i in 0..matrices.len() - 1 {
209 let mat1 = &matrices[i].matrix;
210 let mat2 = &matrices[i + 1].matrix;
211
212 let mut sum_sq_diff = 0.0;
214 for (row1, row2) in mat1.iter().zip(mat2.iter()) {
215 for (&val1, &val2) in row1.iter().zip(row2.iter()) {
216 let diff = val1 - val2;
217 sum_sq_diff += diff * diff;
218 }
219 }
220 let frobenius_norm = sum_sq_diff.sqrt();
221
222 drifts.push(ConceptDrift {
223 window1_idx: i,
224 window2_idx: i + 1,
225 matrix_diff: frobenius_norm,
226 is_significant: frobenius_norm > threshold,
227 });
228 }
229
230 Ok(drifts)
231}
232
233#[cfg(test)]
234mod tests {
235 use super::*;
236
237 #[test]
238 fn test_time_window_creation() {
239 let window = TimeWindow::new(1000.0, 2000.0);
240 assert_eq!(window.duration(), 1000.0);
241 assert!(window.contains(1500.0));
242 assert!(!window.contains(2500.0));
243 }
244
245 #[test]
246 fn test_six_month_window() {
247 let window = TimeWindow::six_months_from(0.0);
248 assert_eq!(window.duration(), SIX_MONTHS_SECONDS);
249 }
250
251 #[test]
252 fn test_generate_windows() {
253 let analyzer = SlidingWindowAnalyzer::new_six_month();
254 let windows = analyzer.generate_windows(0.0, SIX_MONTHS_SECONDS * 3.0);
255
256 assert_eq!(windows.len(), 5);
258 }
259
260 #[test]
261 fn test_window_correlation_computation() {
262 let mut store = FeatureStore::new().unwrap();
263
264 for i in 0..10 {
266 let f = CommitFeatures {
267 defect_category: 1,
268 files_changed: (i + 1) as f32,
269 lines_added: (i * 10) as f32,
270 lines_deleted: (i * 5) as f32,
271 complexity_delta: (i as f32) * 0.5,
272 timestamp: (i * 1000) as f64,
273 hour_of_day: 10,
274 day_of_week: 1,
275 ..Default::default()
276 };
277 store.insert(f).unwrap();
278 }
279
280 let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
281 let window = TimeWindow::new(0.0, 5000.0);
282
283 let result = analyzer
284 .compute_window_correlation(&store, &window)
285 .unwrap();
286
287 assert_eq!(result.matrix.len(), CommitFeatures::DIMENSION);
289 assert_eq!(result.matrix[0].len(), CommitFeatures::DIMENSION);
290
291 for i in 0..CommitFeatures::DIMENSION {
293 assert!((result.matrix[i][i] - 1.0).abs() < 1e-6);
294 }
295 }
296
297 #[test]
298 fn test_window_contains_boundaries() {
299 let window = TimeWindow::new(1000.0, 2000.0);
300
301 assert!(window.contains(1000.0));
303
304 assert!(!window.contains(2000.0));
306
307 assert!(!window.contains(999.9));
309
310 assert!(!window.contains(2000.1));
312 }
313
314 #[test]
315 fn test_empty_store_compute_all_windows() {
316 let store = FeatureStore::new().unwrap();
317 let analyzer = SlidingWindowAnalyzer::new_six_month();
318
319 let result = analyzer.compute_all_windows(&store);
320 assert!(result.is_err());
321 assert!(result
322 .unwrap_err()
323 .to_string()
324 .contains("No features in store"));
325 }
326
327 #[test]
328 fn test_window_with_no_features() {
329 let mut store = FeatureStore::new().unwrap();
330
331 let f = CommitFeatures {
333 defect_category: 1,
334 files_changed: 5.0,
335 lines_added: 50.0,
336 lines_deleted: 20.0,
337 complexity_delta: 0.5,
338 timestamp: 10000.0, hour_of_day: 10,
340 day_of_week: 1,
341 ..Default::default()
342 };
343 store.insert(f).unwrap();
344
345 let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
346 let window = TimeWindow::new(0.0, 5000.0); let result = analyzer.compute_window_correlation(&store, &window);
349 assert!(result.is_err());
350 assert!(result
351 .unwrap_err()
352 .to_string()
353 .contains("No features in window"));
354 }
355
356 #[test]
357 fn test_detect_drift_with_no_matrices() {
358 let matrices = Vec::new();
359 let drifts = detect_drift(&matrices, 0.5).unwrap();
360 assert_eq!(drifts.len(), 0);
361 }
362
363 #[test]
364 fn test_detect_drift_with_one_matrix() {
365 let matrix = WindowedCorrelationMatrix {
366 window: TimeWindow::new(0.0, 1000.0),
367 matrix: vec![vec![1.0; 8]; 8],
368 feature_count: 10,
369 };
370
371 let drifts = detect_drift(&[matrix], 0.5).unwrap();
372 assert_eq!(drifts.len(), 0);
373 }
374
375 #[test]
376 fn test_detect_drift_identical_matrices() {
377 let matrix1 = WindowedCorrelationMatrix {
378 window: TimeWindow::new(0.0, 1000.0),
379 matrix: vec![vec![1.0; 8]; 8],
380 feature_count: 10,
381 };
382
383 let matrix2 = WindowedCorrelationMatrix {
384 window: TimeWindow::new(1000.0, 2000.0),
385 matrix: vec![vec![1.0; 8]; 8],
386 feature_count: 10,
387 };
388
389 let drifts = detect_drift(&[matrix1, matrix2], 0.5).unwrap();
390 assert_eq!(drifts.len(), 1);
391 assert!(!drifts[0].is_significant); assert_eq!(drifts[0].matrix_diff, 0.0);
393 }
394
395 #[test]
396 fn test_detect_drift_different_matrices() {
397 let mut matrix1_data = vec![vec![1.0; 8]; 8];
398 matrix1_data[0][1] = 0.5; let matrix1 = WindowedCorrelationMatrix {
401 window: TimeWindow::new(0.0, 1000.0),
402 matrix: matrix1_data,
403 feature_count: 10,
404 };
405
406 let matrix2 = WindowedCorrelationMatrix {
407 window: TimeWindow::new(1000.0, 2000.0),
408 matrix: vec![vec![1.0; 8]; 8],
409 feature_count: 10,
410 };
411
412 let drifts = detect_drift(&[matrix1, matrix2], 0.01).unwrap();
413 assert_eq!(drifts.len(), 1);
414 assert!(drifts[0].is_significant); assert!(drifts[0].matrix_diff > 0.0);
416 }
417
418 #[test]
419 fn test_detect_drift_multiple_windows() {
420 let mat1 = WindowedCorrelationMatrix {
421 window: TimeWindow::new(0.0, 1000.0),
422 matrix: vec![vec![1.0; 8]; 8],
423 feature_count: 10,
424 };
425
426 let mat2 = WindowedCorrelationMatrix {
427 window: TimeWindow::new(1000.0, 2000.0),
428 matrix: vec![vec![0.9; 8]; 8],
429 feature_count: 10,
430 };
431
432 let mat3 = WindowedCorrelationMatrix {
433 window: TimeWindow::new(2000.0, 3000.0),
434 matrix: vec![vec![0.8; 8]; 8],
435 feature_count: 10,
436 };
437
438 let drifts = detect_drift(&[mat1, mat2, mat3], 0.1).unwrap();
439 assert_eq!(drifts.len(), 2); assert_eq!(drifts[0].window1_idx, 0);
441 assert_eq!(drifts[0].window2_idx, 1);
442 assert_eq!(drifts[1].window1_idx, 1);
443 assert_eq!(drifts[1].window2_idx, 2);
444 }
445
446 #[test]
447 fn test_custom_analyzer_creation() {
448 let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
449 let windows = analyzer.generate_windows(0.0, 3000.0);
450
451 assert_eq!(windows.len(), 5);
453 assert_eq!(windows[0].start_time, 0.0);
454 assert_eq!(windows[0].end_time, 1000.0);
455 assert_eq!(windows[1].start_time, 500.0);
456 }
457
458 #[test]
459 fn test_generate_windows_no_full_window_at_end() {
460 let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
461 let windows = analyzer.generate_windows(0.0, 1500.0);
462
463 assert_eq!(windows.len(), 2);
466 }
467
468 #[test]
469 fn test_compute_all_windows_skips_empty_windows() {
470 let mut store = FeatureStore::new().unwrap();
471
472 for i in 0..5 {
474 let f = CommitFeatures {
475 defect_category: 1,
476 files_changed: (i + 1) as f32,
477 lines_added: (i * 10) as f32,
478 lines_deleted: (i * 5) as f32,
479 complexity_delta: (i as f32) * 0.5,
480 timestamp: (i * 1000) as f64, hour_of_day: 10,
482 day_of_week: 1,
483 ..Default::default()
484 };
485 store.insert(f).unwrap();
486 }
487
488 let analyzer = SlidingWindowAnalyzer::new(3000.0, 1500.0);
489 let results = analyzer.compute_all_windows(&store).unwrap();
490
491 assert!(!results.is_empty());
493 assert!(results.len() <= 3); }
495
496 #[test]
497 fn test_concept_drift_structure() {
498 let drift = ConceptDrift {
499 window1_idx: 0,
500 window2_idx: 1,
501 matrix_diff: 0.75,
502 is_significant: true,
503 };
504
505 assert_eq!(drift.window1_idx, 0);
506 assert_eq!(drift.window2_idx, 1);
507 assert_eq!(drift.matrix_diff, 0.75);
508 assert!(drift.is_significant);
509 }
510
511 #[test]
512 fn test_windowed_correlation_matrix_structure() {
513 let wcm = WindowedCorrelationMatrix {
514 window: TimeWindow::new(0.0, 1000.0),
515 matrix: vec![vec![1.0; 8]; 8],
516 feature_count: 42,
517 };
518
519 assert_eq!(wcm.window.start_time, 0.0);
520 assert_eq!(wcm.window.end_time, 1000.0);
521 assert_eq!(wcm.matrix.len(), 8);
522 assert_eq!(wcm.feature_count, 42);
523 }
524
525 #[test]
526 fn test_six_months_constant() {
527 let expected = 6.0 * 30.0 * 24.0 * 3600.0;
529 assert_eq!(SIX_MONTHS_SECONDS, expected);
530 assert_eq!(SIX_MONTHS_SECONDS, 15_552_000.0);
531 }
532}