organizational_intelligence_plugin/
ensemble_predictor.rs

1//! Weighted Ensemble Risk Score and Calibrated Defect Prediction
2//!
3//! This module implements Phase 6 (Weighted Ensemble) and Phase 7 (Calibrated Probability)
4//! from the Tarantula specification. It combines multiple defect signals using weak supervision
5//! and provides calibrated probability predictions with confidence intervals.
6//!
7//! # Toyota Way Alignment
8//! - **Jidoka**: Learned weights are interpretable - developers see why
9//! - **Kaizen**: Model improves as more defect history accumulates
10//! - **Genchi Genbutsu**: Weights derived from actual codebase patterns
11//! - **Heijunka**: Batch training amortizes cost across many predictions
12//! - **Muri**: Low-confidence predictions flagged for human judgment
13
14use serde::{Deserialize, Serialize};
15use std::collections::HashMap;
16use std::path::PathBuf;
17
18/// Features extracted for each file for defect prediction
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct FileFeatures {
21    /// File path
22    pub path: PathBuf,
23    /// SBFL suspiciousness score (0.0-1.0)
24    pub sbfl_score: f32,
25    /// Technical Debt Grade score (0.0-1.0, inverted so higher = worse)
26    pub tdg_score: f32,
27    /// Normalized commit frequency (0.0-1.0)
28    pub churn_score: f32,
29    /// Normalized cyclomatic complexity (0.0-1.0)
30    pub complexity_score: f32,
31    /// RAG similarity to historical bugs (0.0-1.0)
32    pub rag_similarity: f32,
33}
34
35impl FileFeatures {
36    /// Create new FileFeatures
37    pub fn new(path: PathBuf) -> Self {
38        Self {
39            path,
40            sbfl_score: 0.0,
41            tdg_score: 0.0,
42            churn_score: 0.0,
43            complexity_score: 0.0,
44            rag_similarity: 0.0,
45        }
46    }
47
48    /// Builder method to set SBFL score
49    pub fn with_sbfl(mut self, score: f32) -> Self {
50        self.sbfl_score = score.clamp(0.0, 1.0);
51        self
52    }
53
54    /// Builder method to set TDG score
55    pub fn with_tdg(mut self, score: f32) -> Self {
56        self.tdg_score = score.clamp(0.0, 1.0);
57        self
58    }
59
60    /// Builder method to set churn score
61    pub fn with_churn(mut self, score: f32) -> Self {
62        self.churn_score = score.clamp(0.0, 1.0);
63        self
64    }
65
66    /// Builder method to set complexity score
67    pub fn with_complexity(mut self, score: f32) -> Self {
68        self.complexity_score = score.clamp(0.0, 1.0);
69        self
70    }
71
72    /// Builder method to set RAG similarity
73    pub fn with_rag_similarity(mut self, score: f32) -> Self {
74        self.rag_similarity = score.clamp(0.0, 1.0);
75        self
76    }
77
78    /// Convert to feature vector for ML models
79    pub fn to_vector(&self) -> Vec<f32> {
80        vec![
81            self.sbfl_score,
82            self.tdg_score,
83            self.churn_score,
84            self.complexity_score,
85            self.rag_similarity,
86        ]
87    }
88}
89
90/// Output of a labeling function
91#[derive(Debug, Clone, Copy, PartialEq, Eq)]
92pub enum LabelOutput {
93    /// Positive label (likely defect)
94    Positive,
95    /// Negative label (likely clean)
96    Negative,
97    /// Abstain (uncertain)
98    Abstain,
99}
100
101/// Trait for labeling functions that emit noisy labels
102pub trait LabelingFunction: Send + Sync {
103    /// Apply the labeling function to features
104    fn apply(&self, features: &FileFeatures) -> LabelOutput;
105
106    /// Get the name of this labeling function
107    fn name(&self) -> &str;
108}
109
110/// SBFL-based labeling function
111#[derive(Debug, Clone)]
112pub struct SbflLabelingFunction {
113    /// Threshold for positive label
114    pub positive_threshold: f32,
115    /// Threshold for negative label
116    pub negative_threshold: f32,
117}
118
119impl SbflLabelingFunction {
120    pub fn new(positive_threshold: f32, negative_threshold: f32) -> Self {
121        Self {
122            positive_threshold,
123            negative_threshold,
124        }
125    }
126}
127
128impl LabelingFunction for SbflLabelingFunction {
129    fn apply(&self, features: &FileFeatures) -> LabelOutput {
130        if features.sbfl_score > self.positive_threshold {
131            LabelOutput::Positive
132        } else if features.sbfl_score < self.negative_threshold {
133            LabelOutput::Negative
134        } else {
135            LabelOutput::Abstain
136        }
137    }
138
139    fn name(&self) -> &str {
140        "SBFL"
141    }
142}
143
144/// TDG-based labeling function (low TDG = likely defect)
145#[derive(Debug, Clone)]
146pub struct TdgLabelingFunction {
147    /// Maximum TDG grade for positive label (defect likely if TDG < this)
148    pub max_grade: f32,
149    /// Minimum TDG grade for negative label (clean if TDG > this)
150    pub min_grade: f32,
151}
152
153impl TdgLabelingFunction {
154    pub fn new(max_grade: f32, min_grade: f32) -> Self {
155        Self {
156            max_grade,
157            min_grade,
158        }
159    }
160}
161
162impl LabelingFunction for TdgLabelingFunction {
163    fn apply(&self, features: &FileFeatures) -> LabelOutput {
164        // Note: tdg_score is inverted (higher = worse debt = lower grade)
165        if features.tdg_score > self.max_grade {
166            LabelOutput::Positive // High debt = likely defect
167        } else if features.tdg_score < self.min_grade {
168            LabelOutput::Negative // Low debt = likely clean
169        } else {
170            LabelOutput::Abstain
171        }
172    }
173
174    fn name(&self) -> &str {
175        "TDG"
176    }
177}
178
179/// Churn-based labeling function
180#[derive(Debug, Clone)]
181pub struct ChurnLabelingFunction {
182    /// Percentile threshold for high churn
183    pub high_percentile: f32,
184    /// Percentile threshold for low churn
185    pub low_percentile: f32,
186}
187
188impl ChurnLabelingFunction {
189    pub fn new(high_percentile: f32, low_percentile: f32) -> Self {
190        Self {
191            high_percentile,
192            low_percentile,
193        }
194    }
195}
196
197impl LabelingFunction for ChurnLabelingFunction {
198    fn apply(&self, features: &FileFeatures) -> LabelOutput {
199        if features.churn_score > self.high_percentile {
200            LabelOutput::Positive
201        } else if features.churn_score < self.low_percentile {
202            LabelOutput::Negative
203        } else {
204            LabelOutput::Abstain
205        }
206    }
207
208    fn name(&self) -> &str {
209        "Churn"
210    }
211}
212
213/// Complexity-based labeling function
214#[derive(Debug, Clone)]
215pub struct ComplexityLabelingFunction {
216    /// Max complexity threshold (above = likely defect)
217    pub max_complexity: f32,
218    /// Min complexity threshold (below = likely clean)
219    pub min_complexity: f32,
220}
221
222impl ComplexityLabelingFunction {
223    pub fn new(max_complexity: f32, min_complexity: f32) -> Self {
224        Self {
225            max_complexity,
226            min_complexity,
227        }
228    }
229}
230
231impl LabelingFunction for ComplexityLabelingFunction {
232    fn apply(&self, features: &FileFeatures) -> LabelOutput {
233        if features.complexity_score > self.max_complexity {
234            LabelOutput::Positive
235        } else if features.complexity_score < self.min_complexity {
236            LabelOutput::Negative
237        } else {
238            LabelOutput::Abstain
239        }
240    }
241
242    fn name(&self) -> &str {
243        "Complexity"
244    }
245}
246
247/// RAG similarity-based labeling function
248#[derive(Debug, Clone)]
249pub struct RagSimilarityLabelingFunction {
250    /// Threshold for similar to historical bugs
251    pub threshold: f32,
252}
253
254impl RagSimilarityLabelingFunction {
255    pub fn new(threshold: f32) -> Self {
256        Self { threshold }
257    }
258}
259
260impl LabelingFunction for RagSimilarityLabelingFunction {
261    fn apply(&self, features: &FileFeatures) -> LabelOutput {
262        if features.rag_similarity > self.threshold {
263            LabelOutput::Positive
264        } else {
265            LabelOutput::Abstain // RAG only provides positive signal
266        }
267    }
268
269    fn name(&self) -> &str {
270        "RAG_Similarity"
271    }
272}
273
274/// Learned weights for combining labeling functions
275#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct LabelModelWeights {
277    /// Weights for each labeling function
278    pub weights: Vec<f32>,
279    /// Names of labeling functions
280    pub names: Vec<String>,
281    /// Number of training iterations
282    pub n_iterations: usize,
283    /// Final log-likelihood
284    pub log_likelihood: f64,
285}
286
287impl LabelModelWeights {
288    /// Get weight by name
289    pub fn get_weight(&self, name: &str) -> Option<f32> {
290        self.names
291            .iter()
292            .position(|n| n == name)
293            .map(|idx| self.weights[idx])
294    }
295
296    /// Get weights as HashMap for easy access
297    pub fn to_hashmap(&self) -> HashMap<String, f32> {
298        self.names
299            .iter()
300            .cloned()
301            .zip(self.weights.iter().copied())
302            .collect()
303    }
304}
305
306/// Weighted Ensemble Model using weak supervision
307///
308/// Phase 6: Combines multiple noisy signals (SBFL, TDG, Churn, Complexity, RAG)
309/// to learn optimal weights for defect prediction.
310pub struct WeightedEnsembleModel {
311    /// Labeling functions
312    labeling_functions: Vec<Box<dyn LabelingFunction>>,
313    /// Learned weights (after fitting)
314    weights: Option<LabelModelWeights>,
315    /// Number of EM iterations
316    n_iterations: usize,
317    /// Convergence threshold
318    convergence_threshold: f64,
319}
320
321impl Default for WeightedEnsembleModel {
322    fn default() -> Self {
323        Self::new()
324    }
325}
326
327impl WeightedEnsembleModel {
328    /// Create a new ensemble model with default labeling functions
329    pub fn new() -> Self {
330        let lfs: Vec<Box<dyn LabelingFunction>> = vec![
331            Box::new(SbflLabelingFunction::new(0.7, 0.2)),
332            Box::new(TdgLabelingFunction::new(0.5, 0.2)),
333            Box::new(ChurnLabelingFunction::new(0.9, 0.3)),
334            Box::new(ComplexityLabelingFunction::new(0.7, 0.3)),
335            Box::new(RagSimilarityLabelingFunction::new(0.8)),
336        ];
337
338        Self {
339            labeling_functions: lfs,
340            weights: None,
341            n_iterations: 100,
342            convergence_threshold: 1e-6,
343        }
344    }
345
346    /// Create with custom labeling functions
347    pub fn with_labeling_functions(lfs: Vec<Box<dyn LabelingFunction>>) -> Self {
348        Self {
349            labeling_functions: lfs,
350            weights: None,
351            n_iterations: 100,
352            convergence_threshold: 1e-6,
353        }
354    }
355
356    /// Set number of EM iterations
357    pub fn with_iterations(mut self, n: usize) -> Self {
358        self.n_iterations = n;
359        self
360    }
361
362    /// Fit the model using EM algorithm on unlabeled data
363    ///
364    /// This learns optimal weights for each labeling function by
365    /// maximizing the likelihood of the observed label matrix.
366    pub fn fit(&mut self, files: &[FileFeatures]) -> anyhow::Result<()> {
367        if files.is_empty() {
368            anyhow::bail!("Cannot fit on empty data");
369        }
370
371        let n_lfs = self.labeling_functions.len();
372        if n_lfs == 0 {
373            anyhow::bail!("No labeling functions provided");
374        }
375
376        // Generate label matrix: rows = files, cols = LFs
377        let label_matrix: Vec<Vec<LabelOutput>> = files
378            .iter()
379            .map(|f| {
380                self.labeling_functions
381                    .iter()
382                    .map(|lf| lf.apply(f))
383                    .collect()
384            })
385            .collect();
386
387        // EM Algorithm for Label Model
388        // Initialize weights uniformly
389        let mut weights: Vec<f64> = vec![1.0 / n_lfs as f64; n_lfs];
390        let mut prev_ll = f64::NEG_INFINITY;
391
392        for _iter in 0..self.n_iterations {
393            // E-step: Estimate latent labels
394            let mut expected_labels: Vec<f64> = Vec::with_capacity(files.len());
395            for row in &label_matrix {
396                let mut pos_score = 0.0;
397                let mut neg_score = 0.0;
398
399                for (j, &output) in row.iter().enumerate() {
400                    match output {
401                        LabelOutput::Positive => pos_score += weights[j],
402                        LabelOutput::Negative => neg_score += weights[j],
403                        LabelOutput::Abstain => {}
404                    }
405                }
406
407                // Sigmoid probability
408                let total = pos_score + neg_score;
409                let prob = if total > 0.0 { pos_score / total } else { 0.5 };
410                expected_labels.push(prob);
411            }
412
413            // M-step: Update weights based on expected labels
414            let mut new_weights = vec![0.0; n_lfs];
415            let mut counts = vec![0.0; n_lfs];
416
417            for (i, row) in label_matrix.iter().enumerate() {
418                let y = expected_labels[i];
419                for (j, &output) in row.iter().enumerate() {
420                    match output {
421                        LabelOutput::Positive => {
422                            new_weights[j] += y;
423                            counts[j] += 1.0;
424                        }
425                        LabelOutput::Negative => {
426                            new_weights[j] += 1.0 - y;
427                            counts[j] += 1.0;
428                        }
429                        LabelOutput::Abstain => {}
430                    }
431                }
432            }
433
434            // Normalize weights
435            for j in 0..n_lfs {
436                if counts[j] > 0.0 {
437                    new_weights[j] /= counts[j];
438                } else {
439                    new_weights[j] = 0.5; // Default weight if no labels
440                }
441            }
442
443            // Compute log-likelihood
444            let ll: f64 = expected_labels
445                .iter()
446                .map(|&p| {
447                    let p_clamped = p.clamp(1e-10, 1.0 - 1e-10);
448                    p_clamped.ln() + (1.0 - p_clamped).ln()
449                })
450                .sum();
451
452            // Check convergence
453            if (ll - prev_ll).abs() < self.convergence_threshold {
454                break;
455            }
456
457            weights = new_weights;
458            prev_ll = ll;
459        }
460
461        // Normalize weights to sum to 1
462        let sum: f64 = weights.iter().sum();
463        if sum > 0.0 {
464            for w in &mut weights {
465                *w /= sum;
466            }
467        }
468
469        let names: Vec<String> = self
470            .labeling_functions
471            .iter()
472            .map(|lf| lf.name().to_string())
473            .collect();
474
475        self.weights = Some(LabelModelWeights {
476            weights: weights.iter().map(|&w| w as f32).collect(),
477            names,
478            n_iterations: self.n_iterations,
479            log_likelihood: prev_ll,
480        });
481
482        Ok(())
483    }
484
485    /// Predict defect probability for a file
486    pub fn predict(&self, features: &FileFeatures) -> f32 {
487        let weights = match &self.weights {
488            Some(w) => &w.weights,
489            None => return 0.5, // Untrained model returns neutral
490        };
491
492        let mut pos_score = 0.0f32;
493        let mut neg_score = 0.0f32;
494
495        for (lf, &weight) in self.labeling_functions.iter().zip(weights.iter()) {
496            match lf.apply(features) {
497                LabelOutput::Positive => pos_score += weight,
498                LabelOutput::Negative => neg_score += weight,
499                LabelOutput::Abstain => {}
500            }
501        }
502
503        let total = pos_score + neg_score;
504        if total > 0.0 {
505            pos_score / total
506        } else {
507            0.5
508        }
509    }
510
511    /// Get learned weights for interpretability
512    pub fn get_weights(&self) -> Option<&LabelModelWeights> {
513        self.weights.as_ref()
514    }
515
516    /// Check if model is fitted
517    pub fn is_fitted(&self) -> bool {
518        self.weights.is_some()
519    }
520
521    /// Save model weights to file
522    pub fn save(&self, path: &std::path::Path) -> anyhow::Result<()> {
523        let weights = self
524            .weights
525            .as_ref()
526            .ok_or_else(|| anyhow::anyhow!("Model not fitted"))?;
527        let json = serde_json::to_string_pretty(weights)?;
528        std::fs::write(path, json)?;
529        Ok(())
530    }
531
532    /// Load model weights from file
533    pub fn load(&mut self, path: &std::path::Path) -> anyhow::Result<()> {
534        let json = std::fs::read_to_string(path)?;
535        let weights: LabelModelWeights = serde_json::from_str(&json)?;
536        self.weights = Some(weights);
537        Ok(())
538    }
539}
540
541// ============================================================================
542// Phase 7: Calibrated Defect Probability
543// ============================================================================
544
545/// Confidence level based on CI width
546#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
547pub enum ConfidenceLevel {
548    /// CI width < 0.15
549    High,
550    /// CI width 0.15-0.30
551    Medium,
552    /// CI width > 0.30
553    Low,
554}
555
556impl std::fmt::Display for ConfidenceLevel {
557    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
558        match self {
559            ConfidenceLevel::High => write!(f, "HIGH"),
560            ConfidenceLevel::Medium => write!(f, "MEDIUM"),
561            ConfidenceLevel::Low => write!(f, "LOW"),
562        }
563    }
564}
565
566/// Contribution of each factor to the prediction
567#[derive(Debug, Clone, Serialize, Deserialize)]
568pub struct FactorContribution {
569    /// Name of the factor
570    pub factor_name: String,
571    /// Contribution percentage (0-100)
572    pub contribution_pct: f32,
573    /// Raw value of the factor
574    pub raw_value: f32,
575}
576
577/// Prediction with uncertainty quantification
578#[derive(Debug, Clone, Serialize, Deserialize)]
579pub struct CalibratedPrediction {
580    /// File path
581    pub file: PathBuf,
582    /// Line number (optional, for statement-level)
583    pub line: Option<usize>,
584    /// Calibrated probability of defect
585    pub probability: f32,
586    /// 95% confidence interval (low, high)
587    pub confidence_interval: (f32, f32),
588    /// Confidence level based on CI width
589    pub confidence_level: ConfidenceLevel,
590    /// Factor contributions for explainability
591    pub contributing_factors: Vec<FactorContribution>,
592}
593
594/// Calibration metrics for model validation
595#[derive(Debug, Clone, Serialize, Deserialize)]
596pub struct CalibrationMetrics {
597    /// Expected Calibration Error
598    pub ece: f32,
599    /// Maximum Calibration Error
600    pub mce: f32,
601    /// Brier Score (lower is better)
602    pub brier_score: f32,
603    /// Coverage of confidence intervals
604    pub coverage: f32,
605}
606
607/// Isotonic regression for calibration
608#[derive(Debug, Clone, Serialize, Deserialize)]
609struct IsotonicCalibrator {
610    /// X values (raw probabilities)
611    x_values: Vec<f32>,
612    /// Y values (calibrated probabilities)
613    y_values: Vec<f32>,
614}
615
616impl IsotonicCalibrator {
617    fn new() -> Self {
618        Self {
619            x_values: Vec::new(),
620            y_values: Vec::new(),
621        }
622    }
623
624    /// Fit isotonic regression using Pool Adjacent Violators Algorithm (PAVA)
625    fn fit(&mut self, raw_probs: &[f32], actuals: &[bool]) -> anyhow::Result<()> {
626        if raw_probs.len() != actuals.len() {
627            anyhow::bail!("Mismatched lengths");
628        }
629        if raw_probs.is_empty() {
630            anyhow::bail!("Empty data");
631        }
632
633        // Sort by raw probabilities
634        let mut pairs: Vec<(f32, f32)> = raw_probs
635            .iter()
636            .zip(actuals.iter())
637            .map(|(&p, &a)| (p, if a { 1.0 } else { 0.0 }))
638            .collect();
639        pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
640
641        // Pool Adjacent Violators Algorithm
642        let mut y: Vec<f32> = pairs.iter().map(|(_, y)| *y).collect();
643        let mut weights: Vec<f32> = vec![1.0; pairs.len()];
644
645        // Forward pass - enforce monotonicity
646        let mut i = 0;
647        while i < y.len().saturating_sub(1) {
648            if y[i] > y[i + 1] {
649                // Pool adjacent violators
650                let combined_y =
651                    (y[i] * weights[i] + y[i + 1] * weights[i + 1]) / (weights[i] + weights[i + 1]);
652                let combined_w = weights[i] + weights[i + 1];
653
654                y[i] = combined_y;
655                weights[i] = combined_w;
656
657                // Remove pooled element
658                y.remove(i + 1);
659                weights.remove(i + 1);
660
661                // Go back to check previous
662                i = i.saturating_sub(1);
663            } else {
664                i += 1;
665            }
666        }
667
668        // Extract unique x values and corresponding y values
669        self.x_values = pairs.iter().map(|(x, _)| *x).collect();
670        self.y_values = y;
671
672        // If PAVA reduced the size, we need to expand back
673        if self.y_values.len() < self.x_values.len() {
674            // Create step function from PAVA result
675            let pava_x: Vec<f32> = pairs
676                .iter()
677                .step_by(pairs.len() / self.y_values.len().max(1))
678                .map(|(x, _)| *x)
679                .collect();
680
681            let mut expanded_y = Vec::with_capacity(self.x_values.len());
682            let mut pava_idx = 0;
683
684            for &x in &self.x_values {
685                while pava_idx < pava_x.len() - 1 && x > pava_x[pava_idx + 1] {
686                    pava_idx += 1;
687                }
688                expanded_y.push(self.y_values[pava_idx.min(self.y_values.len() - 1)]);
689            }
690
691            self.y_values = expanded_y;
692        }
693
694        Ok(())
695    }
696
697    /// Transform raw probability to calibrated probability
698    fn transform(&self, raw_prob: f32) -> f32 {
699        if self.x_values.is_empty() {
700            return raw_prob;
701        }
702
703        // Binary search for closest x value
704        let idx = self
705            .x_values
706            .binary_search_by(|x| {
707                x.partial_cmp(&raw_prob)
708                    .unwrap_or(std::cmp::Ordering::Equal)
709            })
710            .unwrap_or_else(|i| i.min(self.x_values.len() - 1));
711
712        // Linear interpolation
713        if idx == 0 {
714            self.y_values[0]
715        } else if idx >= self.x_values.len() {
716            *self.y_values.last().unwrap_or(&raw_prob)
717        } else {
718            let x0 = self.x_values[idx - 1];
719            let x1 = self.x_values[idx];
720            let y0 = self.y_values[idx - 1];
721            let y1 = self.y_values[idx];
722
723            if (x1 - x0).abs() < 1e-10 {
724                y0
725            } else {
726                let t = (raw_prob - x0) / (x1 - x0);
727                y0 + t * (y1 - y0)
728            }
729        }
730    }
731}
732
733/// Calibrated Defect Predictor using Bayesian inference + Isotonic calibration
734///
735/// Phase 7: Provides calibrated probabilities with confidence intervals
736pub struct CalibratedDefectPredictor {
737    /// Ensemble model for base predictions
738    ensemble: WeightedEnsembleModel,
739    /// Isotonic calibrator
740    calibrator: IsotonicCalibrator,
741    /// Feature names for explainability
742    feature_names: Vec<String>,
743    /// Prior variance for Bayesian inference
744    prior_variance: f32,
745    /// Is calibrator fitted
746    calibrator_fitted: bool,
747}
748
749impl Default for CalibratedDefectPredictor {
750    fn default() -> Self {
751        Self::new()
752    }
753}
754
755impl CalibratedDefectPredictor {
756    /// Create new calibrated predictor
757    pub fn new() -> Self {
758        Self {
759            ensemble: WeightedEnsembleModel::new(),
760            calibrator: IsotonicCalibrator::new(),
761            feature_names: vec![
762                "SBFL".into(),
763                "TDG".into(),
764                "Churn".into(),
765                "Complexity".into(),
766                "RAG_Similarity".into(),
767            ],
768            prior_variance: 1.0,
769            calibrator_fitted: false,
770        }
771    }
772
773    /// Set prior variance for uncertainty estimation
774    pub fn with_prior_variance(mut self, variance: f32) -> Self {
775        self.prior_variance = variance;
776        self
777    }
778
779    /// Fit the predictor on labeled data
780    ///
781    /// Splits data into training (for ensemble) and calibration sets.
782    pub fn fit(&mut self, files: &[FileFeatures], labels: &[bool]) -> anyhow::Result<()> {
783        if files.len() != labels.len() {
784            anyhow::bail!(
785                "Mismatched lengths: {} files, {} labels",
786                files.len(),
787                labels.len()
788            );
789        }
790        if files.len() < 10 {
791            anyhow::bail!("Need at least 10 samples for calibration");
792        }
793
794        // Split: 80% training, 20% calibration
795        let split_idx = (files.len() as f32 * 0.8) as usize;
796        let train_files = &files[..split_idx];
797        let cal_files = &files[split_idx..];
798        let cal_labels = &labels[split_idx..];
799
800        // Fit ensemble on training data (unsupervised weak supervision)
801        self.ensemble.fit(train_files)?;
802
803        // Get raw predictions on calibration set
804        let raw_probs: Vec<f32> = cal_files.iter().map(|f| self.ensemble.predict(f)).collect();
805
806        // Fit isotonic calibrator
807        self.calibrator.fit(&raw_probs, cal_labels)?;
808        self.calibrator_fitted = true;
809
810        Ok(())
811    }
812
813    /// Predict with uncertainty quantification
814    pub fn predict(&self, features: &FileFeatures) -> CalibratedPrediction {
815        // Get raw ensemble prediction
816        let raw_prob = self.ensemble.predict(features);
817
818        // Calibrate
819        let calibrated_prob = if self.calibrator_fitted {
820            self.calibrator.transform(raw_prob)
821        } else {
822            raw_prob
823        };
824
825        // Estimate uncertainty using Bayesian approximation
826        // Variance increases for predictions near 0.5 and with less training data
827        let base_variance = self.prior_variance * calibrated_prob * (1.0 - calibrated_prob);
828        let std_dev = base_variance.sqrt();
829
830        // 95% confidence interval
831        let z_95 = 1.96f32;
832        let ci_low = (calibrated_prob - z_95 * std_dev).max(0.0);
833        let ci_high = (calibrated_prob + z_95 * std_dev).min(1.0);
834
835        // Confidence level based on CI width
836        let ci_width = ci_high - ci_low;
837        let confidence_level = if ci_width < 0.15 {
838            ConfidenceLevel::High
839        } else if ci_width < 0.30 {
840            ConfidenceLevel::Medium
841        } else {
842            ConfidenceLevel::Low
843        };
844
845        // Compute factor contributions
846        let contributing_factors = self.compute_contributions(features);
847
848        CalibratedPrediction {
849            file: features.path.clone(),
850            line: None,
851            probability: calibrated_prob,
852            confidence_interval: (ci_low, ci_high),
853            confidence_level,
854            contributing_factors,
855        }
856    }
857
858    /// Compute factor contributions for explainability
859    fn compute_contributions(&self, features: &FileFeatures) -> Vec<FactorContribution> {
860        let weights = match self.ensemble.get_weights() {
861            Some(w) => w.weights.clone(),
862            None => vec![0.2; 5], // Equal weights if not fitted
863        };
864
865        let feature_values = features.to_vector();
866
867        // Weighted contribution of each feature
868        let weighted: Vec<f32> = feature_values
869            .iter()
870            .zip(weights.iter())
871            .map(|(f, w)| (f * w).abs())
872            .collect();
873
874        let total: f32 = weighted.iter().sum();
875
876        self.feature_names
877            .iter()
878            .zip(feature_values.iter())
879            .zip(weighted.iter())
880            .map(|((name, &raw_value), &w)| FactorContribution {
881                factor_name: name.clone(),
882                contribution_pct: if total > 0.0 { w / total * 100.0 } else { 20.0 },
883                raw_value,
884            })
885            .collect()
886    }
887
888    /// Evaluate calibration quality on test set
889    pub fn evaluate(
890        &self,
891        test_files: &[FileFeatures],
892        test_labels: &[bool],
893    ) -> CalibrationMetrics {
894        if test_files.len() != test_labels.len() || test_files.is_empty() {
895            return CalibrationMetrics {
896                ece: 1.0,
897                mce: 1.0,
898                brier_score: 1.0,
899                coverage: 0.0,
900            };
901        }
902
903        let predictions: Vec<CalibratedPrediction> =
904            test_files.iter().map(|f| self.predict(f)).collect();
905
906        // Brier Score
907        let brier_score: f32 = predictions
908            .iter()
909            .zip(test_labels.iter())
910            .map(|(pred, &actual)| {
911                let target = if actual { 1.0 } else { 0.0 };
912                (pred.probability - target).powi(2)
913            })
914            .sum::<f32>()
915            / predictions.len() as f32;
916
917        // Expected Calibration Error (binned)
918        let n_bins = 10;
919        let mut bins: Vec<(f32, f32, usize)> = vec![(0.0, 0.0, 0); n_bins];
920
921        for (pred, &actual) in predictions.iter().zip(test_labels.iter()) {
922            let bin_idx = ((pred.probability * n_bins as f32) as usize).min(n_bins - 1);
923            bins[bin_idx].0 += pred.probability; // sum of predictions
924            bins[bin_idx].1 += if actual { 1.0 } else { 0.0 }; // sum of actuals
925            bins[bin_idx].2 += 1; // count
926        }
927
928        let mut ece = 0.0f32;
929        let mut mce = 0.0f32;
930
931        for (sum_pred, sum_actual, count) in &bins {
932            if *count > 0 {
933                let avg_pred = sum_pred / *count as f32;
934                let avg_actual = sum_actual / *count as f32;
935                let bin_error = (avg_pred - avg_actual).abs();
936                let weight = *count as f32 / predictions.len() as f32;
937                ece += weight * bin_error;
938                mce = mce.max(bin_error);
939            }
940        }
941
942        // Coverage: % of true labels within confidence interval
943        let covered = predictions
944            .iter()
945            .zip(test_labels.iter())
946            .filter(|(pred, &actual)| {
947                let target = if actual { 1.0 } else { 0.0 };
948                target >= pred.confidence_interval.0 && target <= pred.confidence_interval.1
949            })
950            .count();
951        let coverage = covered as f32 / predictions.len() as f32;
952
953        CalibrationMetrics {
954            ece,
955            mce,
956            brier_score,
957            coverage,
958        }
959    }
960
961    /// Check if model is fitted
962    pub fn is_fitted(&self) -> bool {
963        self.ensemble.is_fitted() && self.calibrator_fitted
964    }
965}
966
967// ============================================================================
968// Tests
969// ============================================================================
970
971#[cfg(test)]
972mod tests {
973    use super::*;
974    use std::path::PathBuf;
975
976    // -------------------------------------------------------------------------
977    // FileFeatures Tests
978    // -------------------------------------------------------------------------
979
980    #[test]
981    fn test_file_features_new() {
982        let features = FileFeatures::new(PathBuf::from("src/main.rs"));
983        assert_eq!(features.path, PathBuf::from("src/main.rs"));
984        assert_eq!(features.sbfl_score, 0.0);
985        assert_eq!(features.tdg_score, 0.0);
986        assert_eq!(features.churn_score, 0.0);
987        assert_eq!(features.complexity_score, 0.0);
988        assert_eq!(features.rag_similarity, 0.0);
989    }
990
991    #[test]
992    fn test_file_features_builder() {
993        let features = FileFeatures::new(PathBuf::from("src/lib.rs"))
994            .with_sbfl(0.85)
995            .with_tdg(0.4)
996            .with_churn(0.95)
997            .with_complexity(0.6)
998            .with_rag_similarity(0.75);
999
1000        assert_eq!(features.sbfl_score, 0.85);
1001        assert_eq!(features.tdg_score, 0.4);
1002        assert_eq!(features.churn_score, 0.95);
1003        assert_eq!(features.complexity_score, 0.6);
1004        assert_eq!(features.rag_similarity, 0.75);
1005    }
1006
1007    #[test]
1008    fn test_file_features_clamping() {
1009        let features = FileFeatures::new(PathBuf::from("test.rs"))
1010            .with_sbfl(1.5) // Should clamp to 1.0
1011            .with_tdg(-0.5); // Should clamp to 0.0
1012
1013        assert_eq!(features.sbfl_score, 1.0);
1014        assert_eq!(features.tdg_score, 0.0);
1015    }
1016
1017    #[test]
1018    fn test_file_features_to_vector() {
1019        let features = FileFeatures::new(PathBuf::from("test.rs"))
1020            .with_sbfl(0.9)
1021            .with_tdg(0.3)
1022            .with_churn(0.8)
1023            .with_complexity(0.5)
1024            .with_rag_similarity(0.7);
1025
1026        let vec = features.to_vector();
1027        assert_eq!(vec, vec![0.9, 0.3, 0.8, 0.5, 0.7]);
1028    }
1029
1030    // -------------------------------------------------------------------------
1031    // Labeling Function Tests
1032    // -------------------------------------------------------------------------
1033
1034    #[test]
1035    fn test_sbfl_labeling_function_positive() {
1036        let lf = SbflLabelingFunction::new(0.7, 0.2);
1037        let features = FileFeatures::new(PathBuf::from("test.rs")).with_sbfl(0.9);
1038        assert_eq!(lf.apply(&features), LabelOutput::Positive);
1039    }
1040
1041    #[test]
1042    fn test_sbfl_labeling_function_negative() {
1043        let lf = SbflLabelingFunction::new(0.7, 0.2);
1044        let features = FileFeatures::new(PathBuf::from("test.rs")).with_sbfl(0.1);
1045        assert_eq!(lf.apply(&features), LabelOutput::Negative);
1046    }
1047
1048    #[test]
1049    fn test_sbfl_labeling_function_abstain() {
1050        let lf = SbflLabelingFunction::new(0.7, 0.2);
1051        let features = FileFeatures::new(PathBuf::from("test.rs")).with_sbfl(0.5);
1052        assert_eq!(lf.apply(&features), LabelOutput::Abstain);
1053    }
1054
1055    #[test]
1056    fn test_tdg_labeling_function() {
1057        let lf = TdgLabelingFunction::new(0.5, 0.2);
1058
1059        // High debt (bad TDG) = positive (likely defect)
1060        let high_debt = FileFeatures::new(PathBuf::from("test.rs")).with_tdg(0.7);
1061        assert_eq!(lf.apply(&high_debt), LabelOutput::Positive);
1062
1063        // Low debt (good TDG) = negative (likely clean)
1064        let low_debt = FileFeatures::new(PathBuf::from("test.rs")).with_tdg(0.1);
1065        assert_eq!(lf.apply(&low_debt), LabelOutput::Negative);
1066
1067        // Medium debt = abstain
1068        let medium_debt = FileFeatures::new(PathBuf::from("test.rs")).with_tdg(0.35);
1069        assert_eq!(lf.apply(&medium_debt), LabelOutput::Abstain);
1070    }
1071
1072    #[test]
1073    fn test_churn_labeling_function() {
1074        let lf = ChurnLabelingFunction::new(0.9, 0.3);
1075
1076        let high_churn = FileFeatures::new(PathBuf::from("test.rs")).with_churn(0.95);
1077        assert_eq!(lf.apply(&high_churn), LabelOutput::Positive);
1078
1079        let low_churn = FileFeatures::new(PathBuf::from("test.rs")).with_churn(0.1);
1080        assert_eq!(lf.apply(&low_churn), LabelOutput::Negative);
1081    }
1082
1083    #[test]
1084    fn test_complexity_labeling_function() {
1085        let lf = ComplexityLabelingFunction::new(0.7, 0.3);
1086
1087        let high_complexity = FileFeatures::new(PathBuf::from("test.rs")).with_complexity(0.9);
1088        assert_eq!(lf.apply(&high_complexity), LabelOutput::Positive);
1089
1090        let low_complexity = FileFeatures::new(PathBuf::from("test.rs")).with_complexity(0.1);
1091        assert_eq!(lf.apply(&low_complexity), LabelOutput::Negative);
1092    }
1093
1094    #[test]
1095    fn test_rag_similarity_labeling_function() {
1096        let lf = RagSimilarityLabelingFunction::new(0.8);
1097
1098        // Only provides positive or abstain (no negative signal)
1099        let similar = FileFeatures::new(PathBuf::from("test.rs")).with_rag_similarity(0.9);
1100        assert_eq!(lf.apply(&similar), LabelOutput::Positive);
1101
1102        let not_similar = FileFeatures::new(PathBuf::from("test.rs")).with_rag_similarity(0.5);
1103        assert_eq!(lf.apply(&not_similar), LabelOutput::Abstain);
1104    }
1105
1106    #[test]
1107    fn test_labeling_function_names() {
1108        assert_eq!(SbflLabelingFunction::new(0.7, 0.2).name(), "SBFL");
1109        assert_eq!(TdgLabelingFunction::new(0.5, 0.2).name(), "TDG");
1110        assert_eq!(ChurnLabelingFunction::new(0.9, 0.3).name(), "Churn");
1111        assert_eq!(
1112            ComplexityLabelingFunction::new(0.7, 0.3).name(),
1113            "Complexity"
1114        );
1115        assert_eq!(
1116            RagSimilarityLabelingFunction::new(0.8).name(),
1117            "RAG_Similarity"
1118        );
1119    }
1120
1121    // -------------------------------------------------------------------------
1122    // WeightedEnsembleModel Tests
1123    // -------------------------------------------------------------------------
1124
1125    #[test]
1126    fn test_ensemble_model_new() {
1127        let model = WeightedEnsembleModel::new();
1128        assert!(!model.is_fitted());
1129        assert!(model.get_weights().is_none());
1130    }
1131
1132    #[test]
1133    fn test_ensemble_model_predict_unfitted() {
1134        let model = WeightedEnsembleModel::new();
1135        let features = FileFeatures::new(PathBuf::from("test.rs")).with_sbfl(0.9);
1136        // Unfitted model returns 0.5 (neutral)
1137        assert_eq!(model.predict(&features), 0.5);
1138    }
1139
1140    #[test]
1141    fn test_ensemble_model_fit_empty_data() {
1142        let mut model = WeightedEnsembleModel::new();
1143        let result = model.fit(&[]);
1144        assert!(result.is_err());
1145    }
1146
1147    #[test]
1148    fn test_ensemble_model_fit_and_predict() {
1149        let mut model = WeightedEnsembleModel::new();
1150
1151        // Create synthetic training data
1152        let files: Vec<FileFeatures> = (0..100)
1153            .map(|i| {
1154                let is_defect = i % 3 == 0;
1155                FileFeatures::new(PathBuf::from(format!("file_{}.rs", i)))
1156                    .with_sbfl(if is_defect { 0.8 } else { 0.2 })
1157                    .with_tdg(if is_defect { 0.7 } else { 0.2 })
1158                    .with_churn(if is_defect { 0.95 } else { 0.3 })
1159                    .with_complexity(if is_defect { 0.8 } else { 0.3 })
1160                    .with_rag_similarity(if is_defect { 0.85 } else { 0.1 })
1161            })
1162            .collect();
1163
1164        let result = model.fit(&files);
1165        assert!(result.is_ok());
1166        assert!(model.is_fitted());
1167
1168        // Test prediction on high-risk file
1169        let high_risk = FileFeatures::new(PathBuf::from("risky.rs"))
1170            .with_sbfl(0.9)
1171            .with_tdg(0.8)
1172            .with_churn(0.95)
1173            .with_complexity(0.9)
1174            .with_rag_similarity(0.9);
1175        let prob = model.predict(&high_risk);
1176        assert!(
1177            prob > 0.5,
1178            "High risk file should have prob > 0.5, got {}",
1179            prob
1180        );
1181
1182        // Test prediction on low-risk file
1183        let low_risk = FileFeatures::new(PathBuf::from("safe.rs"))
1184            .with_sbfl(0.1)
1185            .with_tdg(0.1)
1186            .with_churn(0.1)
1187            .with_complexity(0.1)
1188            .with_rag_similarity(0.1);
1189        let prob = model.predict(&low_risk);
1190        assert!(
1191            prob < 0.5,
1192            "Low risk file should have prob < 0.5, got {}",
1193            prob
1194        );
1195    }
1196
1197    #[test]
1198    fn test_ensemble_model_weights_interpretability() {
1199        let mut model = WeightedEnsembleModel::new();
1200
1201        let files: Vec<FileFeatures> = (0..50)
1202            .map(|i| {
1203                FileFeatures::new(PathBuf::from(format!("file_{}.rs", i)))
1204                    .with_sbfl(0.5 + (i as f32 % 10.0) / 20.0)
1205                    .with_tdg(0.3 + (i as f32 % 5.0) / 10.0)
1206                    .with_churn(0.4 + (i as f32 % 7.0) / 15.0)
1207                    .with_complexity(0.35 + (i as f32 % 8.0) / 20.0)
1208                    .with_rag_similarity(0.2 + (i as f32 % 6.0) / 12.0)
1209            })
1210            .collect();
1211
1212        model.fit(&files).unwrap();
1213
1214        let weights = model.get_weights().unwrap();
1215        assert_eq!(weights.names.len(), 5);
1216        assert_eq!(weights.weights.len(), 5);
1217
1218        // Weights should sum to approximately 1
1219        let sum: f32 = weights.weights.iter().sum();
1220        assert!(
1221            (sum - 1.0).abs() < 0.01,
1222            "Weights should sum to 1, got {}",
1223            sum
1224        );
1225
1226        // Test hashmap conversion
1227        let weight_map = weights.to_hashmap();
1228        assert!(weight_map.contains_key("SBFL"));
1229        assert!(weight_map.contains_key("TDG"));
1230    }
1231
1232    // -------------------------------------------------------------------------
1233    // CalibratedDefectPredictor Tests
1234    // -------------------------------------------------------------------------
1235
1236    #[test]
1237    fn test_calibrated_predictor_new() {
1238        let predictor = CalibratedDefectPredictor::new();
1239        assert!(!predictor.is_fitted());
1240    }
1241
1242    #[test]
1243    fn test_calibrated_predictor_fit_insufficient_data() {
1244        let mut predictor = CalibratedDefectPredictor::new();
1245        let files: Vec<FileFeatures> = (0..5)
1246            .map(|i| FileFeatures::new(PathBuf::from(format!("file_{}.rs", i))))
1247            .collect();
1248        let labels = vec![true, false, true, false, true];
1249
1250        let result = predictor.fit(&files, &labels);
1251        assert!(result.is_err()); // Need at least 10 samples
1252    }
1253
1254    #[test]
1255    fn test_calibrated_predictor_fit_and_predict() {
1256        let mut predictor = CalibratedDefectPredictor::new();
1257
1258        // Create synthetic labeled data
1259        let files: Vec<FileFeatures> = (0..100)
1260            .map(|i| {
1261                let is_defect = i % 3 == 0;
1262                FileFeatures::new(PathBuf::from(format!("file_{}.rs", i)))
1263                    .with_sbfl(if is_defect {
1264                        0.8 + (i as f32 % 10.0) / 50.0
1265                    } else {
1266                        0.2 + (i as f32 % 10.0) / 50.0
1267                    })
1268                    .with_tdg(if is_defect { 0.7 } else { 0.2 })
1269                    .with_churn(if is_defect { 0.9 } else { 0.3 })
1270                    .with_complexity(if is_defect { 0.8 } else { 0.3 })
1271                    .with_rag_similarity(if is_defect { 0.85 } else { 0.1 })
1272            })
1273            .collect();
1274
1275        let labels: Vec<bool> = (0..100).map(|i| i % 3 == 0).collect();
1276
1277        let result = predictor.fit(&files, &labels);
1278        assert!(result.is_ok());
1279        assert!(predictor.is_fitted());
1280
1281        // Predict on new file
1282        let test_features = FileFeatures::new(PathBuf::from("test.rs"))
1283            .with_sbfl(0.85)
1284            .with_tdg(0.6)
1285            .with_churn(0.9)
1286            .with_complexity(0.75)
1287            .with_rag_similarity(0.8);
1288
1289        let prediction = predictor.predict(&test_features);
1290        assert!(prediction.probability >= 0.0 && prediction.probability <= 1.0);
1291        assert!(prediction.confidence_interval.0 <= prediction.probability);
1292        assert!(prediction.confidence_interval.1 >= prediction.probability);
1293        assert!(!prediction.contributing_factors.is_empty());
1294    }
1295
1296    #[test]
1297    fn test_calibrated_prediction_confidence_levels() {
1298        // Test confidence level classification
1299        let mut predictor = CalibratedDefectPredictor::new().with_prior_variance(0.1);
1300
1301        let files: Vec<FileFeatures> = (0..50)
1302            .map(|i| {
1303                FileFeatures::new(PathBuf::from(format!("file_{}.rs", i)))
1304                    .with_sbfl(0.9)
1305                    .with_tdg(0.7)
1306                    .with_churn(0.95)
1307                    .with_complexity(0.8)
1308                    .with_rag_similarity(0.85)
1309            })
1310            .collect();
1311        let labels: Vec<bool> = vec![true; 50];
1312
1313        let _ = predictor.fit(&files, &labels);
1314
1315        // High confidence prediction (low variance)
1316        let high_conf_features = FileFeatures::new(PathBuf::from("high.rs"))
1317            .with_sbfl(0.95)
1318            .with_tdg(0.9)
1319            .with_churn(0.98)
1320            .with_complexity(0.9)
1321            .with_rag_similarity(0.95);
1322
1323        let pred = predictor.predict(&high_conf_features);
1324        // Prediction near 1.0 should have narrower CI (higher confidence)
1325        let ci_width = pred.confidence_interval.1 - pred.confidence_interval.0;
1326        assert!(ci_width < 0.5, "CI width {} should be reasonable", ci_width);
1327    }
1328
1329    #[test]
1330    fn test_calibration_metrics_evaluation() {
1331        let mut predictor = CalibratedDefectPredictor::new();
1332
1333        // Create training data
1334        let train_files: Vec<FileFeatures> = (0..80)
1335            .map(|i| {
1336                let is_defect = i % 4 == 0;
1337                FileFeatures::new(PathBuf::from(format!("train_{}.rs", i)))
1338                    .with_sbfl(if is_defect { 0.85 } else { 0.15 })
1339                    .with_tdg(if is_defect { 0.75 } else { 0.25 })
1340                    .with_churn(if is_defect { 0.9 } else { 0.2 })
1341                    .with_complexity(if is_defect { 0.8 } else { 0.2 })
1342                    .with_rag_similarity(if is_defect { 0.8 } else { 0.1 })
1343            })
1344            .collect();
1345        let train_labels: Vec<bool> = (0..80).map(|i| i % 4 == 0).collect();
1346
1347        predictor.fit(&train_files, &train_labels).unwrap();
1348
1349        // Create test data
1350        let test_files: Vec<FileFeatures> = (0..20)
1351            .map(|i| {
1352                let is_defect = i % 4 == 0;
1353                FileFeatures::new(PathBuf::from(format!("test_{}.rs", i)))
1354                    .with_sbfl(if is_defect { 0.85 } else { 0.15 })
1355                    .with_tdg(if is_defect { 0.75 } else { 0.25 })
1356                    .with_churn(if is_defect { 0.9 } else { 0.2 })
1357                    .with_complexity(if is_defect { 0.8 } else { 0.2 })
1358                    .with_rag_similarity(if is_defect { 0.8 } else { 0.1 })
1359            })
1360            .collect();
1361        let test_labels: Vec<bool> = (0..20).map(|i| i % 4 == 0).collect();
1362
1363        let metrics = predictor.evaluate(&test_files, &test_labels);
1364
1365        // Metrics should be in valid ranges
1366        assert!(metrics.ece >= 0.0 && metrics.ece <= 1.0);
1367        assert!(metrics.mce >= 0.0 && metrics.mce <= 1.0);
1368        assert!(metrics.brier_score >= 0.0 && metrics.brier_score <= 1.0);
1369        assert!(metrics.coverage >= 0.0 && metrics.coverage <= 1.0);
1370    }
1371
1372    #[test]
1373    fn test_factor_contributions() {
1374        let mut predictor = CalibratedDefectPredictor::new();
1375
1376        let files: Vec<FileFeatures> = (0..50)
1377            .map(|i| {
1378                FileFeatures::new(PathBuf::from(format!("file_{}.rs", i)))
1379                    .with_sbfl(0.5 + (i as f32) / 100.0)
1380                    .with_tdg(0.4)
1381                    .with_churn(0.6)
1382                    .with_complexity(0.5)
1383                    .with_rag_similarity(0.3)
1384            })
1385            .collect();
1386        let labels: Vec<bool> = (0..50).map(|i| i > 25).collect();
1387
1388        predictor.fit(&files, &labels).unwrap();
1389
1390        let features = FileFeatures::new(PathBuf::from("test.rs"))
1391            .with_sbfl(0.9)
1392            .with_tdg(0.1)
1393            .with_churn(0.5)
1394            .with_complexity(0.3)
1395            .with_rag_similarity(0.2);
1396
1397        let prediction = predictor.predict(&features);
1398
1399        // Should have 5 factor contributions
1400        assert_eq!(prediction.contributing_factors.len(), 5);
1401
1402        // Contributions should sum to approximately 100%
1403        let total: f32 = prediction
1404            .contributing_factors
1405            .iter()
1406            .map(|f| f.contribution_pct)
1407            .sum();
1408        assert!(
1409            (total - 100.0).abs() < 1.0,
1410            "Contributions should sum to 100%, got {}",
1411            total
1412        );
1413
1414        // Each factor should have a name and valid percentage
1415        for factor in &prediction.contributing_factors {
1416            assert!(!factor.factor_name.is_empty());
1417            assert!(factor.contribution_pct >= 0.0);
1418            assert!(factor.raw_value >= 0.0 && factor.raw_value <= 1.0);
1419        }
1420    }
1421
1422    // -------------------------------------------------------------------------
1423    // Isotonic Calibrator Tests
1424    // -------------------------------------------------------------------------
1425
1426    #[test]
1427    fn test_isotonic_calibrator_basic() {
1428        let mut calibrator = IsotonicCalibrator::new();
1429
1430        // Perfect calibration data
1431        let raw_probs = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
1432        let actuals = vec![false, false, false, false, true, true, true, true, true];
1433
1434        calibrator.fit(&raw_probs, &actuals).unwrap();
1435
1436        // Transform should produce monotonic output
1437        let t1 = calibrator.transform(0.2);
1438        let t2 = calibrator.transform(0.5);
1439        let t3 = calibrator.transform(0.8);
1440
1441        assert!(t1 <= t2, "Isotonic: {} should be <= {}", t1, t2);
1442        assert!(t2 <= t3, "Isotonic: {} should be <= {}", t2, t3);
1443    }
1444
1445    #[test]
1446    fn test_isotonic_calibrator_empty() {
1447        let mut calibrator = IsotonicCalibrator::new();
1448        let result = calibrator.fit(&[], &[]);
1449        assert!(result.is_err());
1450    }
1451
1452    #[test]
1453    fn test_isotonic_calibrator_mismatched_lengths() {
1454        let mut calibrator = IsotonicCalibrator::new();
1455        let result = calibrator.fit(&[0.5, 0.6], &[true]);
1456        assert!(result.is_err());
1457    }
1458
1459    // -------------------------------------------------------------------------
1460    // LabelModelWeights Tests
1461    // -------------------------------------------------------------------------
1462
1463    #[test]
1464    fn test_label_model_weights_get_weight() {
1465        let weights = LabelModelWeights {
1466            weights: vec![0.3, 0.2, 0.25, 0.15, 0.1],
1467            names: vec![
1468                "SBFL".into(),
1469                "TDG".into(),
1470                "Churn".into(),
1471                "Complexity".into(),
1472                "RAG_Similarity".into(),
1473            ],
1474            n_iterations: 100,
1475            log_likelihood: -50.0,
1476        };
1477
1478        assert_eq!(weights.get_weight("SBFL"), Some(0.3));
1479        assert_eq!(weights.get_weight("TDG"), Some(0.2));
1480        assert_eq!(weights.get_weight("Unknown"), None);
1481    }
1482
1483    #[test]
1484    fn test_confidence_level_display() {
1485        assert_eq!(format!("{}", ConfidenceLevel::High), "HIGH");
1486        assert_eq!(format!("{}", ConfidenceLevel::Medium), "MEDIUM");
1487        assert_eq!(format!("{}", ConfidenceLevel::Low), "LOW");
1488    }
1489
1490    // -------------------------------------------------------------------------
1491    // Integration Tests
1492    // -------------------------------------------------------------------------
1493
1494    #[test]
1495    fn test_end_to_end_defect_prediction() {
1496        // Test ensemble model directly (without calibration) for clearer signal
1497        let mut ensemble = WeightedEnsembleModel::new();
1498
1499        // Training data with clear patterns
1500        let mut files = Vec::new();
1501
1502        // Pattern 1: High SBFL + High Churn = Defect indicators
1503        for i in 0..40 {
1504            files.push(
1505                FileFeatures::new(PathBuf::from(format!("high_risk_{}.rs", i)))
1506                    .with_sbfl(0.85 + (i as f32 % 5.0) / 100.0)
1507                    .with_tdg(0.7)
1508                    .with_churn(0.95)
1509                    .with_complexity(0.8)
1510                    .with_rag_similarity(0.85),
1511            );
1512        }
1513
1514        // Pattern 2: Low all signals = Clean indicators
1515        for i in 0..60 {
1516            files.push(
1517                FileFeatures::new(PathBuf::from(format!("low_risk_{}.rs", i)))
1518                    .with_sbfl(0.1 + (i as f32 % 5.0) / 100.0)
1519                    .with_tdg(0.1)
1520                    .with_churn(0.15)
1521                    .with_complexity(0.2)
1522                    .with_rag_similarity(0.05),
1523            );
1524        }
1525
1526        ensemble.fit(&files).unwrap();
1527
1528        // Test predictions on clearly different risk profiles
1529        let high_risk = FileFeatures::new(PathBuf::from("new_risky.rs"))
1530            .with_sbfl(0.95)
1531            .with_tdg(0.8)
1532            .with_churn(0.98)
1533            .with_complexity(0.9)
1534            .with_rag_similarity(0.9);
1535
1536        let low_risk = FileFeatures::new(PathBuf::from("new_safe.rs"))
1537            .with_sbfl(0.05)
1538            .with_tdg(0.05)
1539            .with_churn(0.05)
1540            .with_complexity(0.1)
1541            .with_rag_similarity(0.0);
1542
1543        let high_pred = ensemble.predict(&high_risk);
1544        let low_pred = ensemble.predict(&low_risk);
1545
1546        // High risk should have higher probability than low risk
1547        assert!(
1548            high_pred >= low_pred,
1549            "High risk ({}) should have >= prob than low risk ({})",
1550            high_pred,
1551            low_pred
1552        );
1553
1554        // Both should be in valid range
1555        assert!((0.0..=1.0).contains(&high_pred));
1556        assert!((0.0..=1.0).contains(&low_pred));
1557    }
1558
1559    #[test]
1560    fn test_serialization_roundtrip() {
1561        let weights = LabelModelWeights {
1562            weights: vec![0.25, 0.20, 0.20, 0.20, 0.15],
1563            names: vec![
1564                "SBFL".into(),
1565                "TDG".into(),
1566                "Churn".into(),
1567                "Complexity".into(),
1568                "RAG_Similarity".into(),
1569            ],
1570            n_iterations: 50,
1571            log_likelihood: -45.5,
1572        };
1573
1574        let json = serde_json::to_string(&weights).unwrap();
1575        let parsed: LabelModelWeights = serde_json::from_str(&json).unwrap();
1576
1577        assert_eq!(parsed.weights, weights.weights);
1578        assert_eq!(parsed.names, weights.names);
1579        assert_eq!(parsed.n_iterations, weights.n_iterations);
1580    }
1581}