use crate::correlation::pearson_correlation;
use crate::features::CommitFeatures;
use crate::storage::FeatureStore;
use anyhow::Result;
use trueno::Vector;
pub const SIX_MONTHS_SECONDS: f64 = 6.0 * 30.0 * 24.0 * 3600.0;
#[derive(Debug, Clone)]
pub struct TimeWindow {
pub start_time: f64, pub end_time: f64, }
impl TimeWindow {
pub fn new(start_time: f64, end_time: f64) -> Self {
Self {
start_time,
end_time,
}
}
pub fn six_months_from(start_time: f64) -> Self {
Self {
start_time,
end_time: start_time + SIX_MONTHS_SECONDS,
}
}
pub fn contains(&self, timestamp: f64) -> bool {
timestamp >= self.start_time && timestamp < self.end_time
}
pub fn duration(&self) -> f64 {
self.end_time - self.start_time
}
}
#[derive(Debug, Clone)]
pub struct WindowedCorrelationMatrix {
pub window: TimeWindow,
pub matrix: Vec<Vec<f32>>, pub feature_count: usize, }
pub struct SlidingWindowAnalyzer {
window_size: f64, stride: f64, }
impl SlidingWindowAnalyzer {
pub fn new_six_month() -> Self {
Self {
window_size: SIX_MONTHS_SECONDS,
stride: SIX_MONTHS_SECONDS / 2.0,
}
}
pub fn new(window_size: f64, stride: f64) -> Self {
Self {
window_size,
stride,
}
}
pub fn generate_windows(&self, start_time: f64, end_time: f64) -> Vec<TimeWindow> {
let mut windows = Vec::new();
let mut current_start = start_time;
while current_start + self.window_size <= end_time {
windows.push(TimeWindow::new(
current_start,
current_start + self.window_size,
));
current_start += self.stride;
}
windows
}
pub fn compute_window_correlation(
&self,
store: &FeatureStore,
window: &TimeWindow,
) -> Result<WindowedCorrelationMatrix> {
let features = store.query_by_time_range(window.start_time, window.end_time)?;
if features.is_empty() {
anyhow::bail!(
"No features in window [{}, {})",
window.start_time,
window.end_time
);
}
let vectors: Vec<Vec<f32>> = features.iter().map(|f| f.to_vector()).collect();
let n_samples = vectors.len();
let n_dims = CommitFeatures::DIMENSION;
let mut dim_arrays: Vec<Vec<f32>> = vec![Vec::new(); n_dims];
for v in &vectors {
for (dim_idx, &value) in v.iter().enumerate() {
dim_arrays[dim_idx].push(value);
}
}
let mut matrix = vec![vec![0.0; n_dims]; n_dims];
for i in 0..n_dims {
for j in 0..n_dims {
if i == j {
matrix[i][j] = 1.0; } else {
let vec_i = Vector::from_slice(&dim_arrays[i]);
let vec_j = Vector::from_slice(&dim_arrays[j]);
matrix[i][j] = pearson_correlation(&vec_i, &vec_j)?;
}
}
}
Ok(WindowedCorrelationMatrix {
window: window.clone(),
matrix,
feature_count: n_samples,
})
}
pub fn compute_all_windows(
&self,
store: &FeatureStore,
) -> Result<Vec<WindowedCorrelationMatrix>> {
let all_features = store.all_features();
if all_features.is_empty() {
anyhow::bail!("No features in store");
}
let start_time = all_features
.iter()
.map(|f| f.timestamp)
.fold(f64::INFINITY, f64::min);
let end_time = all_features
.iter()
.map(|f| f.timestamp)
.fold(f64::NEG_INFINITY, f64::max);
let windows = self.generate_windows(start_time, end_time);
let mut results = Vec::new();
for window in windows {
match self.compute_window_correlation(store, &window) {
Ok(wcm) => results.push(wcm),
Err(_) => continue, }
}
Ok(results)
}
}
#[derive(Debug, Clone)]
pub struct ConceptDrift {
pub window1_idx: usize,
pub window2_idx: usize,
pub matrix_diff: f32, pub is_significant: bool, }
pub fn detect_drift(
matrices: &[WindowedCorrelationMatrix],
threshold: f32,
) -> Result<Vec<ConceptDrift>> {
if matrices.len() < 2 {
return Ok(Vec::new());
}
let mut drifts = Vec::new();
for i in 0..matrices.len() - 1 {
let mat1 = &matrices[i].matrix;
let mat2 = &matrices[i + 1].matrix;
let mut sum_sq_diff = 0.0;
for (row1, row2) in mat1.iter().zip(mat2.iter()) {
for (&val1, &val2) in row1.iter().zip(row2.iter()) {
let diff = val1 - val2;
sum_sq_diff += diff * diff;
}
}
let frobenius_norm = sum_sq_diff.sqrt();
drifts.push(ConceptDrift {
window1_idx: i,
window2_idx: i + 1,
matrix_diff: frobenius_norm,
is_significant: frobenius_norm > threshold,
});
}
Ok(drifts)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_time_window_creation() {
let window = TimeWindow::new(1000.0, 2000.0);
assert_eq!(window.duration(), 1000.0);
assert!(window.contains(1500.0));
assert!(!window.contains(2500.0));
}
#[test]
fn test_six_month_window() {
let window = TimeWindow::six_months_from(0.0);
assert_eq!(window.duration(), SIX_MONTHS_SECONDS);
}
#[test]
fn test_generate_windows() {
let analyzer = SlidingWindowAnalyzer::new_six_month();
let windows = analyzer.generate_windows(0.0, SIX_MONTHS_SECONDS * 3.0);
assert_eq!(windows.len(), 5);
}
#[test]
fn test_window_correlation_computation() {
let mut store = FeatureStore::new().unwrap();
for i in 0..10 {
let f = CommitFeatures {
defect_category: 1,
files_changed: (i + 1) as f32,
lines_added: (i * 10) as f32,
lines_deleted: (i * 5) as f32,
complexity_delta: (i as f32) * 0.5,
timestamp: (i * 1000) as f64,
hour_of_day: 10,
day_of_week: 1,
..Default::default()
};
store.insert(f).unwrap();
}
let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
let window = TimeWindow::new(0.0, 5000.0);
let result = analyzer
.compute_window_correlation(&store, &window)
.unwrap();
assert_eq!(result.matrix.len(), CommitFeatures::DIMENSION);
assert_eq!(result.matrix[0].len(), CommitFeatures::DIMENSION);
for i in 0..CommitFeatures::DIMENSION {
assert!((result.matrix[i][i] - 1.0).abs() < 1e-6);
}
}
#[test]
fn test_window_contains_boundaries() {
let window = TimeWindow::new(1000.0, 2000.0);
assert!(window.contains(1000.0));
assert!(!window.contains(2000.0));
assert!(!window.contains(999.9));
assert!(!window.contains(2000.1));
}
#[test]
fn test_empty_store_compute_all_windows() {
let store = FeatureStore::new().unwrap();
let analyzer = SlidingWindowAnalyzer::new_six_month();
let result = analyzer.compute_all_windows(&store);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("No features in store"));
}
#[test]
fn test_window_with_no_features() {
let mut store = FeatureStore::new().unwrap();
let f = CommitFeatures {
defect_category: 1,
files_changed: 5.0,
lines_added: 50.0,
lines_deleted: 20.0,
complexity_delta: 0.5,
timestamp: 10000.0, hour_of_day: 10,
day_of_week: 1,
..Default::default()
};
store.insert(f).unwrap();
let analyzer = SlidingWindowAnalyzer::new(5000.0, 2500.0);
let window = TimeWindow::new(0.0, 5000.0);
let result = analyzer.compute_window_correlation(&store, &window);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("No features in window"));
}
#[test]
fn test_detect_drift_with_no_matrices() {
let matrices = Vec::new();
let drifts = detect_drift(&matrices, 0.5).unwrap();
assert_eq!(drifts.len(), 0);
}
#[test]
fn test_detect_drift_with_one_matrix() {
let matrix = WindowedCorrelationMatrix {
window: TimeWindow::new(0.0, 1000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 10,
};
let drifts = detect_drift(&[matrix], 0.5).unwrap();
assert_eq!(drifts.len(), 0);
}
#[test]
fn test_detect_drift_identical_matrices() {
let matrix1 = WindowedCorrelationMatrix {
window: TimeWindow::new(0.0, 1000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 10,
};
let matrix2 = WindowedCorrelationMatrix {
window: TimeWindow::new(1000.0, 2000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 10,
};
let drifts = detect_drift(&[matrix1, matrix2], 0.5).unwrap();
assert_eq!(drifts.len(), 1);
assert!(!drifts[0].is_significant); assert_eq!(drifts[0].matrix_diff, 0.0);
}
#[test]
fn test_detect_drift_different_matrices() {
let mut matrix1_data = vec![vec![1.0; 8]; 8];
matrix1_data[0][1] = 0.5;
let matrix1 = WindowedCorrelationMatrix {
window: TimeWindow::new(0.0, 1000.0),
matrix: matrix1_data,
feature_count: 10,
};
let matrix2 = WindowedCorrelationMatrix {
window: TimeWindow::new(1000.0, 2000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 10,
};
let drifts = detect_drift(&[matrix1, matrix2], 0.01).unwrap();
assert_eq!(drifts.len(), 1);
assert!(drifts[0].is_significant); assert!(drifts[0].matrix_diff > 0.0);
}
#[test]
fn test_detect_drift_multiple_windows() {
let mat1 = WindowedCorrelationMatrix {
window: TimeWindow::new(0.0, 1000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 10,
};
let mat2 = WindowedCorrelationMatrix {
window: TimeWindow::new(1000.0, 2000.0),
matrix: vec![vec![0.9; 8]; 8],
feature_count: 10,
};
let mat3 = WindowedCorrelationMatrix {
window: TimeWindow::new(2000.0, 3000.0),
matrix: vec![vec![0.8; 8]; 8],
feature_count: 10,
};
let drifts = detect_drift(&[mat1, mat2, mat3], 0.1).unwrap();
assert_eq!(drifts.len(), 2); assert_eq!(drifts[0].window1_idx, 0);
assert_eq!(drifts[0].window2_idx, 1);
assert_eq!(drifts[1].window1_idx, 1);
assert_eq!(drifts[1].window2_idx, 2);
}
#[test]
fn test_custom_analyzer_creation() {
let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
let windows = analyzer.generate_windows(0.0, 3000.0);
assert_eq!(windows.len(), 5);
assert_eq!(windows[0].start_time, 0.0);
assert_eq!(windows[0].end_time, 1000.0);
assert_eq!(windows[1].start_time, 500.0);
}
#[test]
fn test_generate_windows_no_full_window_at_end() {
let analyzer = SlidingWindowAnalyzer::new(1000.0, 500.0);
let windows = analyzer.generate_windows(0.0, 1500.0);
assert_eq!(windows.len(), 2);
}
#[test]
fn test_compute_all_windows_skips_empty_windows() {
let mut store = FeatureStore::new().unwrap();
for i in 0..5 {
let f = CommitFeatures {
defect_category: 1,
files_changed: (i + 1) as f32,
lines_added: (i * 10) as f32,
lines_deleted: (i * 5) as f32,
complexity_delta: (i as f32) * 0.5,
timestamp: (i * 1000) as f64, hour_of_day: 10,
day_of_week: 1,
..Default::default()
};
store.insert(f).unwrap();
}
let analyzer = SlidingWindowAnalyzer::new(3000.0, 1500.0);
let results = analyzer.compute_all_windows(&store).unwrap();
assert!(!results.is_empty());
assert!(results.len() <= 3); }
#[test]
fn test_concept_drift_structure() {
let drift = ConceptDrift {
window1_idx: 0,
window2_idx: 1,
matrix_diff: 0.75,
is_significant: true,
};
assert_eq!(drift.window1_idx, 0);
assert_eq!(drift.window2_idx, 1);
assert_eq!(drift.matrix_diff, 0.75);
assert!(drift.is_significant);
}
#[test]
fn test_windowed_correlation_matrix_structure() {
let wcm = WindowedCorrelationMatrix {
window: TimeWindow::new(0.0, 1000.0),
matrix: vec![vec![1.0; 8]; 8],
feature_count: 42,
};
assert_eq!(wcm.window.start_time, 0.0);
assert_eq!(wcm.window.end_time, 1000.0);
assert_eq!(wcm.matrix.len(), 8);
assert_eq!(wcm.feature_count, 42);
}
#[test]
fn test_six_months_constant() {
let expected = 6.0 * 30.0 * 24.0 * 3600.0;
assert_eq!(SIX_MONTHS_SECONDS, expected);
assert_eq!(SIX_MONTHS_SECONDS, 15_552_000.0);
}
}