Skip to main content

batuta/stack/
diagnostics_ml.rs

1//! ML-based Anomaly Detection and Forecasting
2//!
3//! This module provides machine learning components for stack diagnostics:
4//! - Isolation Forest for anomaly detection
5//! - Error Forecaster for time series prediction
6//!
7//! ## Toyota Way Principles
8//!
9//! - **Jidoka**: ML anomaly detection surfaces issues automatically
10//! - **Genchi Genbutsu**: Evidence-based diagnosis from actual data
11
12use super::diagnostics::{
13    Anomaly, AnomalyCategory, ComponentMetrics, ComponentNode, StackDiagnostics,
14};
15use serde::{Deserialize, Serialize};
16
17// ============================================================================
18// Feature extraction
19// ============================================================================
20
21/// Feature indices for the 6-element vector
22const FEAT_DEMO_SCORE: usize = 0;
23const FEAT_COVERAGE: usize = 1;
24const FEAT_COMPLEXITY: usize = 3;
25const FEAT_DEAD_CODE: usize = 5;
26
27/// Extract a 6-element feature vector from component metrics.
28///
29/// Layout: `[demo_score, coverage, mutation_score, complexity_avg, satd_count, dead_code_pct]`
30fn extract_features(metrics: &ComponentMetrics) -> Vec<f64> {
31    vec![
32        metrics.demo_score,
33        metrics.coverage,
34        metrics.mutation_score,
35        metrics.complexity_avg,
36        metrics.satd_count as f64,
37        metrics.dead_code_pct,
38    ]
39}
40
41// ============================================================================
42// Anomaly category dispatch table
43// ============================================================================
44
45/// Rule for categorizing an anomaly based on a single feature threshold.
46struct CategoryRule {
47    feature_index: usize,
48    threshold: f64,
49    /// true = trigger when feature < threshold; false = trigger when feature > threshold
50    below: bool,
51    category: AnomalyCategory,
52    description_template: &'static str,
53    recommendation: &'static str,
54}
55
56/// Ordered table of anomaly rules. First matching rule wins.
57const CATEGORY_RULES: &[CategoryRule] = &[
58    CategoryRule {
59        feature_index: FEAT_DEMO_SCORE,
60        threshold: 70.0,
61        below: true,
62        category: AnomalyCategory::QualityRegression,
63        description_template: "Quality score {val:.1} is significantly below healthy threshold",
64        recommendation: "Review recent changes for quality regressions",
65    },
66    CategoryRule {
67        feature_index: FEAT_COVERAGE,
68        threshold: 50.0,
69        below: true,
70        category: AnomalyCategory::CoverageDrop,
71        description_template: "Test coverage {val:.1}% is dangerously low",
72        recommendation: "Run `cargo tarpaulin` and add tests for uncovered paths",
73    },
74    CategoryRule {
75        feature_index: FEAT_COMPLEXITY,
76        threshold: 15.0,
77        below: false,
78        category: AnomalyCategory::ComplexityIncrease,
79        description_template: "Average complexity {val:.1} indicates maintainability risk",
80        recommendation: "Consider refactoring complex functions (>10 cyclomatic complexity)",
81    },
82    CategoryRule {
83        feature_index: FEAT_DEAD_CODE,
84        threshold: 10.0,
85        below: false,
86        category: AnomalyCategory::DependencyRisk,
87        description_template: "Dead code {val:.1}% suggests technical debt accumulation",
88        recommendation: "Run `cargo udeps` to identify and remove dead code",
89    },
90];
91
92/// Check whether a rule matches a given feature vector.
93fn rule_matches(rule: &CategoryRule, features: &[f64]) -> bool {
94    let val = features[rule.feature_index];
95    if rule.below {
96        val < rule.threshold
97    } else {
98        val > rule.threshold
99    }
100}
101
102/// Find the first matching rule for a feature vector.
103fn find_matching_rule(features: &[f64]) -> Option<&'static CategoryRule> {
104    CATEGORY_RULES.iter().find(|r| rule_matches(r, features))
105}
106
107/// Render a description template, replacing `{val:.1}` with the feature value.
108fn render_description(template: &str, features: &[f64], feature_index: usize) -> String {
109    let val = features[feature_index];
110    template.replace("{val:.1}", &format!("{val:.1}"))
111}
112
113// ============================================================================
114// Simple PRNG (for reproducible isolation forest without external deps)
115// ============================================================================
116
117/// Simple Linear Congruential Generator for reproducible randomness
118#[derive(Debug, Clone)]
119struct SimpleRng {
120    state: u64,
121}
122
123impl SimpleRng {
124    fn seed_from_u64(seed: u64) -> Self {
125        Self { state: seed ^ 0x5DEECE66D }
126    }
127
128    fn next_u64(&mut self) -> u64 {
129        // LCG parameters from Numerical Recipes
130        self.state = self.state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
131        self.state
132    }
133
134    fn gen_range(&mut self, range: std::ops::Range<usize>) -> usize {
135        if range.is_empty() {
136            return range.start;
137        }
138        let len = range.end - range.start;
139        range.start + (self.next_u64() as usize % len)
140    }
141
142    fn gen_range_f64(&mut self, range: std::ops::Range<f64>) -> f64 {
143        let t = (self.next_u64() as f64) / (u64::MAX as f64);
144        range.start + t * (range.end - range.start)
145    }
146}
147
148// ============================================================================
149// Isolation Forest (ML Anomaly Detection)
150// ============================================================================
151
152/// Isolation Forest for anomaly detection
153/// Implements a simplified version of the algorithm from Liu et al. (2008)
154#[derive(Debug)]
155pub struct IsolationForest {
156    /// Number of trees in the forest
157    n_trees: usize,
158    /// Subsample size for each tree
159    sample_size: usize,
160    /// Random seed for reproducibility
161    seed: u64,
162    /// Trained isolation trees
163    trees: Vec<IsolationTree>,
164    /// Feature names for interpretation
165    feature_names: Vec<String>,
166}
167
168impl IsolationForest {
169    /// Create a new Isolation Forest
170    pub fn new(n_trees: usize, sample_size: usize, seed: u64) -> Self {
171        Self { n_trees, sample_size, seed, trees: Vec::new(), feature_names: Vec::new() }
172    }
173
174    /// Default forest configuration
175    pub fn default_forest() -> Self {
176        Self::new(100, 256, 42)
177    }
178
179    /// Set feature names for interpretability
180    pub fn with_feature_names(mut self, names: Vec<String>) -> Self {
181        self.feature_names = names;
182        self
183    }
184
185    /// Fit the forest on data points
186    /// Each row is a data point, each column is a feature
187    pub fn fit(&mut self, data: &[Vec<f64>]) {
188        if data.is_empty() {
189            return;
190        }
191
192        let mut rng = SimpleRng::seed_from_u64(self.seed);
193        let n_samples = data.len();
194        let max_depth = (self.sample_size as f64).log2().ceil() as usize;
195
196        self.trees.clear();
197
198        for _ in 0..self.n_trees {
199            // Sample data points
200            let sample: Vec<Vec<f64>> = (0..self.sample_size.min(n_samples))
201                .map(|_| {
202                    let idx = rng.gen_range(0..n_samples);
203                    data[idx].clone()
204                })
205                .collect();
206
207            // Build tree
208            let tree = IsolationTree::build(&sample, max_depth, &mut rng);
209            self.trees.push(tree);
210        }
211    }
212
213    /// Compute anomaly scores for data points
214    /// Returns scores in [0, 1] where higher = more anomalous
215    pub fn score(&self, data: &[Vec<f64>]) -> Vec<f64> {
216        if self.trees.is_empty() || data.is_empty() {
217            return vec![0.0; data.len()];
218        }
219
220        let n = self.sample_size as f64;
221        let c_n = average_path_length(n);
222
223        data.iter()
224            .map(|point| {
225                let avg_path_length: f64 =
226                    self.trees.iter().map(|tree| tree.path_length(point, 0) as f64).sum::<f64>()
227                        / self.trees.len() as f64;
228
229                // Anomaly score: 2^(-avg_path_length / c(n))
230                // Higher score = more anomalous
231                2.0_f64.powf(-avg_path_length / c_n)
232            })
233            .collect()
234    }
235
236    /// Predict anomalies with threshold
237    pub fn predict(&self, data: &[Vec<f64>], threshold: f64) -> Vec<bool> {
238        self.score(data).into_iter().map(|s| s > threshold).collect()
239    }
240
241    /// Detect anomalies in component metrics and return Anomaly objects
242    pub fn detect_anomalies(&self, diagnostics: &StackDiagnostics, threshold: f64) -> Vec<Anomaly> {
243        let components: Vec<_> = diagnostics.components().collect();
244        if components.is_empty() {
245            return Vec::new();
246        }
247
248        // Extract feature vectors
249        let data: Vec<Vec<f64>> = components.iter().map(|c| extract_features(&c.metrics)).collect();
250
251        let scores = self.score(&data);
252        let mut anomalies = Vec::new();
253
254        for (i, (component, score)) in components.iter().zip(scores.iter()).enumerate() {
255            if *score > threshold {
256                let category = self.categorize_anomaly(&data[i]);
257                let description = self.describe_anomaly(&data[i], &category);
258
259                let mut anomaly =
260                    Anomaly::new(component.name.clone(), *score, category, description);
261
262                // Add evidence
263                anomaly = anomaly
264                    .with_evidence(format!("Isolation score: {:.3}", score))
265                    .with_evidence(format!("Demo score: {:.1}", component.metrics.demo_score))
266                    .with_evidence(format!("Coverage: {:.1}%", component.metrics.coverage));
267
268                // Add recommendation
269                let rec = self.recommend_action(&category, &data[i]);
270                anomaly = anomaly.with_recommendation(rec);
271
272                anomalies.push(anomaly);
273            }
274        }
275
276        // Sort by score descending
277        anomalies
278            .sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
279        anomalies
280    }
281
282    /// Categorize the anomaly based on which features are most deviant
283    fn categorize_anomaly(&self, features: &[f64]) -> AnomalyCategory {
284        if features.len() < 6 {
285            return AnomalyCategory::Other;
286        }
287        find_matching_rule(features).map(|r| r.category).unwrap_or(AnomalyCategory::Other)
288    }
289
290    /// Generate human-readable description
291    fn describe_anomaly(&self, features: &[f64], category: &AnomalyCategory) -> String {
292        if features.len() < 6 {
293            return "Unusual metric combination detected".to_string();
294        }
295        find_matching_rule(features)
296            .filter(|r| r.category == *category)
297            .map(|r| render_description(r.description_template, features, r.feature_index))
298            .unwrap_or_else(|| "Unusual metric combination detected".to_string())
299    }
300
301    /// Generate actionable recommendation
302    fn recommend_action(&self, category: &AnomalyCategory, features: &[f64]) -> String {
303        // Special sub-rule: QualityRegression with low coverage gets a coverage-specific tip
304        if *category == AnomalyCategory::QualityRegression
305            && features.len() >= 6
306            && features[FEAT_COVERAGE] < 80.0
307        {
308            return "Add tests to improve coverage above 80%".to_string();
309        }
310
311        find_matching_rule(features)
312            .filter(|r| r.category == *category)
313            .map(|r| r.recommendation.to_string())
314            .unwrap_or_else(|| "Review component metrics for unusual patterns".to_string())
315    }
316}
317
318/// A single isolation tree node
319#[derive(Debug)]
320enum IsolationTree {
321    /// Internal node with split
322    Internal {
323        split_feature: usize,
324        split_value: f64,
325        left: Box<IsolationTree>,
326        right: Box<IsolationTree>,
327    },
328    /// External (leaf) node
329    External { size: usize },
330}
331
332impl IsolationTree {
333    /// Build an isolation tree from data
334    fn build(data: &[Vec<f64>], max_depth: usize, rng: &mut SimpleRng) -> Self {
335        if data.is_empty() {
336            return IsolationTree::External { size: 0 };
337        }
338
339        if max_depth == 0 || data.len() <= 1 {
340            return IsolationTree::External { size: data.len() };
341        }
342
343        let n_features = data[0].len();
344        if n_features == 0 {
345            return IsolationTree::External { size: data.len() };
346        }
347
348        // Random feature
349        let feature = rng.gen_range(0..n_features);
350
351        // Find min/max for this feature
352        let values: Vec<f64> = data.iter().filter_map(|row| row.get(feature).copied()).collect();
353        if values.is_empty() {
354            return IsolationTree::External { size: data.len() };
355        }
356
357        let min_val = values.iter().copied().fold(f64::INFINITY, f64::min);
358        let max_val = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
359
360        if (max_val - min_val).abs() < f64::EPSILON {
361            return IsolationTree::External { size: data.len() };
362        }
363
364        // Random split value
365        let split_value = rng.gen_range_f64(min_val..max_val);
366
367        // Partition data
368        let (left_data, right_data): (Vec<_>, Vec<_>) = data
369            .iter()
370            .cloned()
371            .partition(|row| row.get(feature).is_some_and(|&v| v < split_value));
372
373        // Handle edge case where all data goes to one side
374        if left_data.is_empty() || right_data.is_empty() {
375            return IsolationTree::External { size: data.len() };
376        }
377
378        IsolationTree::Internal {
379            split_feature: feature,
380            split_value,
381            left: Box::new(IsolationTree::build(&left_data, max_depth - 1, rng)),
382            right: Box::new(IsolationTree::build(&right_data, max_depth - 1, rng)),
383        }
384    }
385
386    /// Compute path length for a point
387    fn path_length(&self, point: &[f64], current_depth: usize) -> usize {
388        match self {
389            IsolationTree::External { size } => {
390                current_depth + average_path_length(*size as f64) as usize
391            }
392            IsolationTree::Internal { split_feature, split_value, left, right } => {
393                let value = point.get(*split_feature).copied().unwrap_or(0.0);
394                if value < *split_value {
395                    left.path_length(point, current_depth + 1)
396                } else {
397                    right.path_length(point, current_depth + 1)
398                }
399            }
400        }
401    }
402}
403
404/// Average path length of unsuccessful search in BST
405fn average_path_length(n: f64) -> f64 {
406    if n <= 1.0 {
407        return 0.0;
408    }
409    2.0 * (n.ln() + 0.5772156649) - (2.0 * (n - 1.0) / n)
410}
411
412// ============================================================================
413// Time Series Forecasting (Error Prediction)
414// ============================================================================
415
416/// Simple exponential smoothing for time series forecasting
417#[derive(Debug, Clone)]
418pub struct ErrorForecaster {
419    /// Smoothing parameter alpha (0-1)
420    alpha: f64,
421    /// Historical observations
422    history: Vec<f64>,
423    /// Current smoothed value
424    level: f64,
425}
426
427impl ErrorForecaster {
428    /// Create a new error forecaster
429    pub fn new(alpha: f64) -> Self {
430        Self { alpha: alpha.clamp(0.0, 1.0), history: Vec::new(), level: 0.0 }
431    }
432
433    /// Default forecaster with alpha=0.3
434    pub fn default_forecaster() -> Self {
435        Self::new(0.3)
436    }
437
438    /// Add an observation
439    pub fn observe(&mut self, value: f64) {
440        if self.history.is_empty() {
441            self.level = value;
442        } else {
443            // Exponential smoothing: L_t = alpha * Y_t + (1 - alpha) * L_{t-1}
444            self.level = self.alpha * value + (1.0 - self.alpha) * self.level;
445        }
446        self.history.push(value);
447    }
448
449    /// Forecast next n values
450    pub fn forecast(&self, n: usize) -> Vec<f64> {
451        // Simple exponential smoothing forecasts are constant
452        vec![self.level; n]
453    }
454
455    /// Compute forecast error metrics
456    pub fn error_metrics(&self) -> ForecastMetrics {
457        if self.history.len() < 2 {
458            return ForecastMetrics::default();
459        }
460
461        // Compute in-sample errors
462        let mut errors = Vec::new();
463        let mut level = self.history[0];
464
465        for &actual in self.history.iter().skip(1) {
466            let forecast = level;
467            errors.push(actual - forecast);
468            level = self.alpha * actual + (1.0 - self.alpha) * level;
469        }
470
471        let n = errors.len() as f64;
472        let mae = errors.iter().map(|e| e.abs()).sum::<f64>() / n;
473        let mse = errors.iter().map(|e| e * e).sum::<f64>() / n;
474        let rmse = mse.sqrt();
475
476        // MAPE (avoid division by zero)
477        let mape = if self.history.iter().skip(1).all(|&v| v.abs() > f64::EPSILON) {
478            let sum: f64 =
479                errors.iter().zip(self.history.iter().skip(1)).map(|(e, a)| (e / a).abs()).sum();
480            sum / n * 100.0
481        } else {
482            f64::NAN
483        };
484
485        ForecastMetrics { mae, mse, rmse, mape }
486    }
487
488    /// Get historical observations
489    pub fn history(&self) -> &[f64] {
490        &self.history
491    }
492
493    /// Get current level
494    pub fn current_level(&self) -> f64 {
495        self.level
496    }
497}
498
499/// Forecast error metrics
500#[derive(Debug, Clone, Default, Serialize, Deserialize)]
501pub struct ForecastMetrics {
502    /// Mean Absolute Error
503    pub mae: f64,
504    /// Mean Squared Error
505    pub mse: f64,
506    /// Root Mean Squared Error
507    pub rmse: f64,
508    /// Mean Absolute Percentage Error
509    pub mape: f64,
510}
511
512// ============================================================================
513// Tests
514// ============================================================================
515
516#[cfg(test)]
517#[path = "diagnostics_ml_tests.rs"]
518mod tests;