decy_ownership/
active_learning.rs

1//! Active learning for uncertain sample collection (DECY-ML-016).
2//!
3//! Implements uncertainty sampling to identify low-confidence predictions
4//! that would benefit from human labeling to improve the model.
5//!
6//! # Active Learning Strategy
7//!
8//! ```text
9//! ┌─────────────────────────────────────────────────────────────────┐
10//! │                   ACTIVE LEARNING PIPELINE                      │
11//! │                                                                 │
12//! │  1. Classify samples with ML model                              │
13//! │     ├─ High confidence → Use prediction                         │
14//! │     └─ Low confidence  → Queue for labeling                     │
15//! │                                                                 │
16//! │  2. Selection strategies:                                       │
17//! │     ├─ Uncertainty sampling (lowest confidence)                 │
18//! │     ├─ Margin sampling (smallest margin between top 2)          │
19//! │     └─ Entropy sampling (highest prediction entropy)            │
20//! │                                                                 │
21//! │  3. Human labels uncertain samples                              │
22//! │                                                                 │
23//! │  4. Retrain model with expanded dataset                         │
24//! │                                                                 │
25//! │  5. Repeat (continuous improvement cycle)                       │
26//! └─────────────────────────────────────────────────────────────────┘
27//! ```
28//!
29//! # Toyota Way: Kaizen (Continuous Improvement)
30//!
31//! Active learning embodies Kaizen by:
32//! - Focusing labeling effort on most valuable samples
33//! - Incrementally improving model accuracy
34//! - Learning from failures (uncertain predictions)
35
36use crate::ml_features::{InferredOwnership, OwnershipFeatures, OwnershipPrediction};
37use serde::{Deserialize, Serialize};
38use std::collections::VecDeque;
39
40/// Strategy for selecting uncertain samples.
41#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
42pub enum SelectionStrategy {
43    /// Select samples with lowest confidence
44    UncertaintySampling,
45    /// Select samples where top-2 predictions are close
46    MarginSampling,
47    /// Select samples with highest prediction entropy
48    EntropySampling,
49    /// Random sampling (baseline)
50    Random,
51}
52
53impl std::fmt::Display for SelectionStrategy {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        match self {
56            SelectionStrategy::UncertaintySampling => write!(f, "uncertainty"),
57            SelectionStrategy::MarginSampling => write!(f, "margin"),
58            SelectionStrategy::EntropySampling => write!(f, "entropy"),
59            SelectionStrategy::Random => write!(f, "random"),
60        }
61    }
62}
63
64/// An uncertain sample queued for labeling.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct UncertainSample {
67    /// Unique identifier
68    pub id: u64,
69    /// Variable name being classified
70    pub variable: String,
71    /// Source file (if known)
72    pub source_file: Option<String>,
73    /// Source line (if known)
74    pub source_line: Option<u32>,
75    /// Feature vector
76    pub features: OwnershipFeatures,
77    /// ML prediction
78    pub prediction: OwnershipPrediction,
79    /// Uncertainty score (0.0 = certain, 1.0 = uncertain)
80    pub uncertainty_score: f64,
81    /// Selection strategy that chose this sample
82    pub strategy: SelectionStrategy,
83    /// Human-provided label (None if unlabeled)
84    pub label: Option<InferredOwnership>,
85    /// Timestamp when queued
86    pub queued_at: u64,
87    /// Timestamp when labeled (None if unlabeled)
88    pub labeled_at: Option<u64>,
89}
90
91impl UncertainSample {
92    /// Create a new uncertain sample.
93    pub fn new(
94        id: u64,
95        variable: impl Into<String>,
96        features: OwnershipFeatures,
97        prediction: OwnershipPrediction,
98        uncertainty_score: f64,
99        strategy: SelectionStrategy,
100    ) -> Self {
101        let now = std::time::SystemTime::now()
102            .duration_since(std::time::UNIX_EPOCH)
103            .unwrap_or_default()
104            .as_millis() as u64;
105
106        Self {
107            id,
108            variable: variable.into(),
109            source_file: None,
110            source_line: None,
111            features,
112            prediction,
113            uncertainty_score: uncertainty_score.clamp(0.0, 1.0),
114            strategy,
115            label: None,
116            queued_at: now,
117            labeled_at: None,
118        }
119    }
120
121    /// Set source location.
122    pub fn with_source(mut self, file: impl Into<String>, line: u32) -> Self {
123        self.source_file = Some(file.into());
124        self.source_line = Some(line);
125        self
126    }
127
128    /// Check if sample is labeled.
129    pub fn is_labeled(&self) -> bool {
130        self.label.is_some()
131    }
132
133    /// Apply a label.
134    pub fn apply_label(&mut self, label: InferredOwnership) {
135        let now = std::time::SystemTime::now()
136            .duration_since(std::time::UNIX_EPOCH)
137            .unwrap_or_default()
138            .as_millis() as u64;
139
140        self.label = Some(label);
141        self.labeled_at = Some(now);
142    }
143
144    /// Check if prediction matches label (for accuracy calculation).
145    pub fn prediction_correct(&self) -> Option<bool> {
146        self.label.map(|l| l == self.prediction.kind)
147    }
148}
149
150/// Uncertainty calculator for predictions.
151#[derive(Debug, Clone)]
152pub struct UncertaintyCalculator {
153    /// Confidence threshold below which samples are considered uncertain
154    pub(crate) confidence_threshold: f64,
155}
156
157impl Default for UncertaintyCalculator {
158    fn default() -> Self {
159        Self::new()
160    }
161}
162
163impl UncertaintyCalculator {
164    /// Create with default thresholds.
165    pub fn new() -> Self {
166        Self {
167            confidence_threshold: 0.65,
168        }
169    }
170
171    /// Create with custom confidence threshold.
172    pub fn with_confidence_threshold(threshold: f64) -> Self {
173        Self {
174            confidence_threshold: threshold.clamp(0.0, 1.0),
175        }
176    }
177
178    /// Calculate uncertainty score using specified strategy.
179    pub fn calculate(&self, prediction: &OwnershipPrediction, strategy: SelectionStrategy) -> f64 {
180        match strategy {
181            SelectionStrategy::UncertaintySampling => self.uncertainty_sampling(prediction),
182            SelectionStrategy::MarginSampling => self.margin_sampling(prediction),
183            SelectionStrategy::EntropySampling => self.entropy_sampling(prediction),
184            SelectionStrategy::Random => rand_like_score(prediction),
185        }
186    }
187
188    /// Uncertainty = 1 - confidence
189    fn uncertainty_sampling(&self, prediction: &OwnershipPrediction) -> f64 {
190        1.0 - (prediction.confidence as f64)
191    }
192
193    /// Margin between top prediction and fallback (if available)
194    fn margin_sampling(&self, prediction: &OwnershipPrediction) -> f64 {
195        // If fallback exists, use margin; otherwise use uncertainty
196        if prediction.fallback.is_some() {
197            // Assume fallback has slightly lower confidence
198            let primary_conf = prediction.confidence as f64;
199            let secondary_conf = primary_conf * 0.8; // Approximation
200            let margin = primary_conf - secondary_conf;
201            // Lower margin = higher uncertainty
202            1.0 - margin.min(1.0)
203        } else {
204            self.uncertainty_sampling(prediction)
205        }
206    }
207
208    /// Entropy-based uncertainty (simplified binary entropy)
209    fn entropy_sampling(&self, prediction: &OwnershipPrediction) -> f64 {
210        let p = prediction.confidence as f64;
211        if p <= 0.0 || p >= 1.0 {
212            return 0.0;
213        }
214
215        // Binary entropy: -p*log2(p) - (1-p)*log2(1-p)
216        let entropy = -p * p.log2() - (1.0 - p) * (1.0 - p).log2();
217        // Normalize to 0-1 (max entropy is 1.0 at p=0.5)
218        entropy.min(1.0)
219    }
220
221    /// Check if prediction is uncertain.
222    pub fn is_uncertain(&self, prediction: &OwnershipPrediction) -> bool {
223        (prediction.confidence as f64) < self.confidence_threshold
224    }
225}
226
227/// Simple deterministic "random" based on prediction properties.
228fn rand_like_score(prediction: &OwnershipPrediction) -> f64 {
229    // Use confidence and kind to generate pseudo-random score
230    let kind_hash = match prediction.kind {
231        InferredOwnership::Owned => 0.1,
232        InferredOwnership::Borrowed => 0.2,
233        InferredOwnership::BorrowedMut => 0.3,
234        InferredOwnership::Shared => 0.4,
235        InferredOwnership::RawPointer => 0.5,
236        InferredOwnership::Vec => 0.6,
237        InferredOwnership::Slice => 0.7,
238        InferredOwnership::SliceMut => 0.8,
239    };
240    let conf_part = prediction.confidence as f64 * 0.3;
241    ((kind_hash + conf_part) * 7.0) % 1.0
242}
243
244/// Active learning sample queue.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct SampleQueue {
247    /// Queued samples awaiting labeling
248    pending: VecDeque<UncertainSample>,
249    /// Labeled samples ready for training
250    labeled: Vec<UncertainSample>,
251    /// Selection strategy
252    strategy: SelectionStrategy,
253    /// Maximum pending queue size
254    max_pending: usize,
255    /// Next sample ID
256    next_id: u64,
257    /// Total samples processed
258    total_processed: u64,
259}
260
261impl Default for SampleQueue {
262    fn default() -> Self {
263        Self::new(SelectionStrategy::UncertaintySampling)
264    }
265}
266
267impl SampleQueue {
268    /// Create a new sample queue.
269    pub fn new(strategy: SelectionStrategy) -> Self {
270        Self {
271            pending: VecDeque::new(),
272            labeled: Vec::new(),
273            strategy,
274            max_pending: 1000,
275            next_id: 1,
276            total_processed: 0,
277        }
278    }
279
280    /// Set maximum pending queue size.
281    pub fn with_max_pending(mut self, max: usize) -> Self {
282        self.max_pending = max;
283        self
284    }
285
286    /// Get selection strategy.
287    pub fn strategy(&self) -> SelectionStrategy {
288        self.strategy
289    }
290
291    /// Get pending count.
292    pub fn pending_count(&self) -> usize {
293        self.pending.len()
294    }
295
296    /// Get labeled count.
297    pub fn labeled_count(&self) -> usize {
298        self.labeled.len()
299    }
300
301    /// Get total processed.
302    pub fn total_processed(&self) -> u64 {
303        self.total_processed
304    }
305
306    /// Add an uncertain sample to the queue.
307    pub fn enqueue(&mut self, mut sample: UncertainSample) -> u64 {
308        sample.id = self.next_id;
309        self.next_id += 1;
310        self.total_processed += 1;
311
312        // Maintain priority order (highest uncertainty first)
313        let insert_pos = self
314            .pending
315            .iter()
316            .position(|s| s.uncertainty_score < sample.uncertainty_score)
317            .unwrap_or(self.pending.len());
318
319        if insert_pos < self.max_pending {
320            self.pending.insert(insert_pos, sample);
321
322            // Remove lowest priority if over capacity
323            if self.pending.len() > self.max_pending {
324                self.pending.pop_back();
325            }
326        }
327
328        self.next_id - 1
329    }
330
331    /// Get next sample for labeling (highest uncertainty).
332    pub fn next_for_labeling(&mut self) -> Option<UncertainSample> {
333        self.pending.pop_front()
334    }
335
336    /// Peek at next sample without removing.
337    pub fn peek_next(&self) -> Option<&UncertainSample> {
338        self.pending.front()
339    }
340
341    /// Get top N samples for batch labeling.
342    pub fn batch_for_labeling(&mut self, n: usize) -> Vec<UncertainSample> {
343        let mut batch = Vec::with_capacity(n);
344        for _ in 0..n {
345            if let Some(sample) = self.pending.pop_front() {
346                batch.push(sample);
347            } else {
348                break;
349            }
350        }
351        batch
352    }
353
354    /// Submit a labeled sample.
355    pub fn submit_labeled(&mut self, sample: UncertainSample) {
356        if sample.is_labeled() {
357            self.labeled.push(sample);
358        }
359    }
360
361    /// Get all labeled samples for training.
362    pub fn get_labeled_samples(&self) -> &[UncertainSample] {
363        &self.labeled
364    }
365
366    /// Take labeled samples (moves them out).
367    pub fn take_labeled_samples(&mut self) -> Vec<UncertainSample> {
368        std::mem::take(&mut self.labeled)
369    }
370
371    /// Clear all pending samples.
372    pub fn clear_pending(&mut self) {
373        self.pending.clear();
374    }
375
376    /// Get statistics.
377    pub fn stats(&self) -> QueueStats {
378        let labeled_correct = self
379            .labeled
380            .iter()
381            .filter_map(|s| s.prediction_correct())
382            .filter(|&c| c)
383            .count();
384        let labeled_total = self
385            .labeled
386            .iter()
387            .filter(|s| s.prediction_correct().is_some())
388            .count();
389
390        QueueStats {
391            pending: self.pending.len(),
392            labeled: self.labeled.len(),
393            total_processed: self.total_processed,
394            avg_uncertainty: if self.pending.is_empty() {
395                0.0
396            } else {
397                self.pending
398                    .iter()
399                    .map(|s| s.uncertainty_score)
400                    .sum::<f64>()
401                    / self.pending.len() as f64
402            },
403            prediction_accuracy: if labeled_total > 0 {
404                labeled_correct as f64 / labeled_total as f64
405            } else {
406                0.0
407            },
408        }
409    }
410}
411
412/// Statistics for the sample queue.
413#[derive(Debug, Clone, Serialize, Deserialize)]
414pub struct QueueStats {
415    /// Pending samples count
416    pub pending: usize,
417    /// Labeled samples count
418    pub labeled: usize,
419    /// Total samples processed
420    pub total_processed: u64,
421    /// Average uncertainty of pending samples
422    pub avg_uncertainty: f64,
423    /// Accuracy of predictions on labeled samples
424    pub prediction_accuracy: f64,
425}
426
427/// Active learning manager.
428#[derive(Debug)]
429pub struct ActiveLearner {
430    /// Uncertainty calculator
431    calculator: UncertaintyCalculator,
432    /// Sample queue
433    queue: SampleQueue,
434    /// Minimum uncertainty to queue
435    min_uncertainty: f64,
436}
437
438impl Default for ActiveLearner {
439    fn default() -> Self {
440        Self::new()
441    }
442}
443
444impl ActiveLearner {
445    /// Create a new active learner.
446    pub fn new() -> Self {
447        Self {
448            calculator: UncertaintyCalculator::new(),
449            queue: SampleQueue::new(SelectionStrategy::UncertaintySampling),
450            min_uncertainty: 0.35,
451        }
452    }
453
454    /// Create with custom strategy.
455    pub fn with_strategy(strategy: SelectionStrategy) -> Self {
456        Self {
457            calculator: UncertaintyCalculator::new(),
458            queue: SampleQueue::new(strategy),
459            min_uncertainty: 0.35,
460        }
461    }
462
463    /// Set minimum uncertainty threshold.
464    pub fn with_min_uncertainty(mut self, threshold: f64) -> Self {
465        self.min_uncertainty = threshold.clamp(0.0, 1.0);
466        self
467    }
468
469    /// Set confidence threshold.
470    pub fn with_confidence_threshold(mut self, threshold: f64) -> Self {
471        self.calculator = UncertaintyCalculator::with_confidence_threshold(threshold);
472        self
473    }
474
475    /// Process a prediction and optionally queue for labeling.
476    ///
477    /// Returns the uncertainty score and whether it was queued.
478    pub fn process_prediction(
479        &mut self,
480        variable: impl Into<String>,
481        features: OwnershipFeatures,
482        prediction: OwnershipPrediction,
483    ) -> (f64, bool) {
484        let strategy = self.queue.strategy();
485        let uncertainty = self.calculator.calculate(&prediction, strategy);
486
487        let queued = if uncertainty >= self.min_uncertainty {
488            let sample =
489                UncertainSample::new(0, variable, features, prediction, uncertainty, strategy);
490            self.queue.enqueue(sample);
491            true
492        } else {
493            false
494        };
495
496        (uncertainty, queued)
497    }
498
499    /// Get next sample for labeling.
500    pub fn next_for_labeling(&mut self) -> Option<UncertainSample> {
501        self.queue.next_for_labeling()
502    }
503
504    /// Get batch for labeling.
505    pub fn batch_for_labeling(&mut self, n: usize) -> Vec<UncertainSample> {
506        self.queue.batch_for_labeling(n)
507    }
508
509    /// Submit a labeled sample.
510    pub fn submit_labeled(&mut self, sample: UncertainSample) {
511        self.queue.submit_labeled(sample);
512    }
513
514    /// Get labeled samples for training.
515    pub fn get_training_samples(&self) -> &[UncertainSample] {
516        self.queue.get_labeled_samples()
517    }
518
519    /// Take labeled samples for training.
520    pub fn take_training_samples(&mut self) -> Vec<UncertainSample> {
521        self.queue.take_labeled_samples()
522    }
523
524    /// Get queue statistics.
525    pub fn stats(&self) -> QueueStats {
526        self.queue.stats()
527    }
528
529    /// Check if prediction is uncertain.
530    pub fn is_uncertain(&self, prediction: &OwnershipPrediction) -> bool {
531        self.calculator.is_uncertain(prediction)
532    }
533
534    /// Generate markdown report.
535    pub fn to_markdown(&self) -> String {
536        let stats = self.stats();
537
538        format!(
539            r#"## Active Learning Report
540
541### Queue Status
542
543| Metric | Value |
544|--------|-------|
545| Strategy | {} |
546| Pending Samples | {} |
547| Labeled Samples | {} |
548| Total Processed | {} |
549| Avg Uncertainty | {:.2} |
550| Prediction Accuracy | {:.1}% |
551
552### Configuration
553
554| Parameter | Value |
555|-----------|-------|
556| Min Uncertainty | {:.2} |
557| Confidence Threshold | {:.2} |
558"#,
559            self.queue.strategy(),
560            stats.pending,
561            stats.labeled,
562            stats.total_processed,
563            stats.avg_uncertainty,
564            stats.prediction_accuracy * 100.0,
565            self.min_uncertainty,
566            self.calculator.confidence_threshold,
567        )
568    }
569}
570
571#[cfg(test)]
572mod tests {
573    use super::*;
574
575    fn make_prediction(kind: InferredOwnership, confidence: f32) -> OwnershipPrediction {
576        OwnershipPrediction {
577            kind,
578            confidence,
579            fallback: None,
580        }
581    }
582
583    // ========================================================================
584    // SelectionStrategy tests
585    // ========================================================================
586
587    #[test]
588    fn selection_strategy_display() {
589        assert_eq!(
590            SelectionStrategy::UncertaintySampling.to_string(),
591            "uncertainty"
592        );
593        assert_eq!(SelectionStrategy::MarginSampling.to_string(), "margin");
594        assert_eq!(SelectionStrategy::EntropySampling.to_string(), "entropy");
595        assert_eq!(SelectionStrategy::Random.to_string(), "random");
596    }
597
598    // ========================================================================
599    // UncertaintyCalculator tests
600    // ========================================================================
601
602    #[test]
603    fn uncertainty_calculator_default() {
604        let calc = UncertaintyCalculator::new();
605        assert!((calc.confidence_threshold - 0.65).abs() < 0.001);
606    }
607
608    #[test]
609    fn uncertainty_sampling_high_confidence() {
610        let calc = UncertaintyCalculator::new();
611        let pred = make_prediction(InferredOwnership::Owned, 0.95);
612
613        let score = calc.calculate(&pred, SelectionStrategy::UncertaintySampling);
614        assert!((score - 0.05).abs() < 0.001); // 1 - 0.95 = 0.05
615    }
616
617    #[test]
618    fn uncertainty_sampling_low_confidence() {
619        let calc = UncertaintyCalculator::new();
620        let pred = make_prediction(InferredOwnership::Owned, 0.3);
621
622        let score = calc.calculate(&pred, SelectionStrategy::UncertaintySampling);
623        assert!((score - 0.7).abs() < 0.001); // 1 - 0.3 = 0.7
624    }
625
626    #[test]
627    fn entropy_sampling_mid_confidence() {
628        let calc = UncertaintyCalculator::new();
629        let pred = make_prediction(InferredOwnership::Owned, 0.5);
630
631        let score = calc.calculate(&pred, SelectionStrategy::EntropySampling);
632        // At p=0.5, entropy is maximum (1.0)
633        assert!((score - 1.0).abs() < 0.001);
634    }
635
636    #[test]
637    fn entropy_sampling_high_confidence() {
638        let calc = UncertaintyCalculator::new();
639        let pred = make_prediction(InferredOwnership::Owned, 0.95);
640
641        let score = calc.calculate(&pred, SelectionStrategy::EntropySampling);
642        // High confidence = low entropy
643        assert!(score < 0.5);
644    }
645
646    #[test]
647    fn is_uncertain_below_threshold() {
648        let calc = UncertaintyCalculator::new(); // threshold = 0.65
649        let pred = make_prediction(InferredOwnership::Owned, 0.5);
650        assert!(calc.is_uncertain(&pred));
651    }
652
653    #[test]
654    fn is_uncertain_above_threshold() {
655        let calc = UncertaintyCalculator::new();
656        let pred = make_prediction(InferredOwnership::Owned, 0.8);
657        assert!(!calc.is_uncertain(&pred));
658    }
659
660    // ========================================================================
661    // UncertainSample tests
662    // ========================================================================
663
664    #[test]
665    fn uncertain_sample_new() {
666        let features = OwnershipFeatures::default();
667        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
668        let sample = UncertainSample::new(
669            1,
670            "ptr",
671            features,
672            pred,
673            0.6,
674            SelectionStrategy::UncertaintySampling,
675        );
676
677        assert_eq!(sample.id, 1);
678        assert_eq!(sample.variable, "ptr");
679        assert!((sample.uncertainty_score - 0.6).abs() < 0.001);
680        assert!(!sample.is_labeled());
681    }
682
683    #[test]
684    fn uncertain_sample_apply_label() {
685        let features = OwnershipFeatures::default();
686        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
687        let mut sample = UncertainSample::new(
688            1,
689            "ptr",
690            features,
691            pred,
692            0.6,
693            SelectionStrategy::UncertaintySampling,
694        );
695
696        sample.apply_label(InferredOwnership::Owned);
697
698        assert!(sample.is_labeled());
699        assert_eq!(sample.label, Some(InferredOwnership::Owned));
700        assert!(sample.labeled_at.is_some());
701    }
702
703    #[test]
704    fn uncertain_sample_prediction_correct() {
705        let features = OwnershipFeatures::default();
706        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
707        let mut sample = UncertainSample::new(
708            1,
709            "ptr",
710            features,
711            pred,
712            0.6,
713            SelectionStrategy::UncertaintySampling,
714        );
715
716        // Before labeling
717        assert!(sample.prediction_correct().is_none());
718
719        // Label matches prediction
720        sample.apply_label(InferredOwnership::Borrowed);
721        assert_eq!(sample.prediction_correct(), Some(true));
722    }
723
724    #[test]
725    fn uncertain_sample_prediction_incorrect() {
726        let features = OwnershipFeatures::default();
727        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
728        let mut sample = UncertainSample::new(
729            1,
730            "ptr",
731            features,
732            pred,
733            0.6,
734            SelectionStrategy::UncertaintySampling,
735        );
736
737        // Label doesn't match prediction
738        sample.apply_label(InferredOwnership::Owned);
739        assert_eq!(sample.prediction_correct(), Some(false));
740    }
741
742    // ========================================================================
743    // SampleQueue tests
744    // ========================================================================
745
746    #[test]
747    fn sample_queue_new() {
748        let queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
749        assert_eq!(queue.pending_count(), 0);
750        assert_eq!(queue.labeled_count(), 0);
751    }
752
753    #[test]
754    fn sample_queue_enqueue() {
755        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
756
757        let sample = UncertainSample::new(
758            0,
759            "ptr",
760            OwnershipFeatures::default(),
761            make_prediction(InferredOwnership::Borrowed, 0.4),
762            0.6,
763            SelectionStrategy::UncertaintySampling,
764        );
765
766        let id = queue.enqueue(sample);
767        assert_eq!(id, 1);
768        assert_eq!(queue.pending_count(), 1);
769    }
770
771    #[test]
772    fn sample_queue_priority_order() {
773        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
774
775        // Enqueue in wrong order
776        for uncertainty in [0.3, 0.9, 0.5] {
777            let sample = UncertainSample::new(
778                0,
779                "ptr",
780                OwnershipFeatures::default(),
781                make_prediction(InferredOwnership::Borrowed, 0.4),
782                uncertainty,
783                SelectionStrategy::UncertaintySampling,
784            );
785            queue.enqueue(sample);
786        }
787
788        // Should get highest uncertainty first
789        let first = queue.next_for_labeling().unwrap();
790        assert!((first.uncertainty_score - 0.9).abs() < 0.001);
791
792        let second = queue.next_for_labeling().unwrap();
793        assert!((second.uncertainty_score - 0.5).abs() < 0.001);
794    }
795
796    #[test]
797    fn sample_queue_max_pending() {
798        let mut queue =
799            SampleQueue::new(SelectionStrategy::UncertaintySampling).with_max_pending(3);
800
801        // Enqueue 5 samples
802        for i in 0..5 {
803            let sample = UncertainSample::new(
804                0,
805                format!("ptr{}", i),
806                OwnershipFeatures::default(),
807                make_prediction(InferredOwnership::Borrowed, 0.4),
808                (i as f64 + 1.0) * 0.1,
809                SelectionStrategy::UncertaintySampling,
810            );
811            queue.enqueue(sample);
812        }
813
814        // Should only have top 3 by uncertainty
815        assert_eq!(queue.pending_count(), 3);
816    }
817
818    #[test]
819    fn sample_queue_batch_labeling() {
820        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
821
822        for i in 0..5 {
823            let sample = UncertainSample::new(
824                0,
825                format!("ptr{}", i),
826                OwnershipFeatures::default(),
827                make_prediction(InferredOwnership::Borrowed, 0.4),
828                0.5,
829                SelectionStrategy::UncertaintySampling,
830            );
831            queue.enqueue(sample);
832        }
833
834        let batch = queue.batch_for_labeling(3);
835        assert_eq!(batch.len(), 3);
836        assert_eq!(queue.pending_count(), 2);
837    }
838
839    #[test]
840    fn sample_queue_submit_labeled() {
841        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
842
843        let mut sample = UncertainSample::new(
844            1,
845            "ptr",
846            OwnershipFeatures::default(),
847            make_prediction(InferredOwnership::Borrowed, 0.4),
848            0.6,
849            SelectionStrategy::UncertaintySampling,
850        );
851        sample.apply_label(InferredOwnership::Owned);
852
853        queue.submit_labeled(sample);
854        assert_eq!(queue.labeled_count(), 1);
855    }
856
857    #[test]
858    fn sample_queue_stats() {
859        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
860
861        // Add pending
862        let sample1 = UncertainSample::new(
863            0,
864            "ptr1",
865            OwnershipFeatures::default(),
866            make_prediction(InferredOwnership::Borrowed, 0.4),
867            0.6,
868            SelectionStrategy::UncertaintySampling,
869        );
870        queue.enqueue(sample1);
871
872        // Add labeled (correct prediction)
873        let mut sample2 = UncertainSample::new(
874            0,
875            "ptr2",
876            OwnershipFeatures::default(),
877            make_prediction(InferredOwnership::Borrowed, 0.4),
878            0.5,
879            SelectionStrategy::UncertaintySampling,
880        );
881        sample2.apply_label(InferredOwnership::Borrowed);
882        queue.submit_labeled(sample2);
883
884        let stats = queue.stats();
885        assert_eq!(stats.pending, 1);
886        assert_eq!(stats.labeled, 1);
887        assert!((stats.prediction_accuracy - 1.0).abs() < 0.001); // 100% accurate
888    }
889
890    // ========================================================================
891    // ActiveLearner tests
892    // ========================================================================
893
894    #[test]
895    fn active_learner_new() {
896        let learner = ActiveLearner::new();
897        let stats = learner.stats();
898        assert_eq!(stats.pending, 0);
899        assert_eq!(stats.labeled, 0);
900    }
901
902    #[test]
903    fn active_learner_process_uncertain() {
904        let mut learner = ActiveLearner::new().with_min_uncertainty(0.3);
905
906        let features = OwnershipFeatures::default();
907        let pred = make_prediction(InferredOwnership::Borrowed, 0.4); // uncertainty = 0.6
908
909        let (uncertainty, queued) = learner.process_prediction("ptr", features, pred);
910
911        assert!((uncertainty - 0.6).abs() < 0.001);
912        assert!(queued);
913        assert_eq!(learner.stats().pending, 1);
914    }
915
916    #[test]
917    fn active_learner_process_certain() {
918        let mut learner = ActiveLearner::new().with_min_uncertainty(0.3);
919
920        let features = OwnershipFeatures::default();
921        let pred = make_prediction(InferredOwnership::Owned, 0.95); // uncertainty = 0.05
922
923        let (uncertainty, queued) = learner.process_prediction("ptr", features, pred);
924
925        assert!((uncertainty - 0.05).abs() < 0.001);
926        assert!(!queued); // Not uncertain enough
927        assert_eq!(learner.stats().pending, 0);
928    }
929
930    #[test]
931    fn active_learner_labeling_workflow() {
932        let mut learner = ActiveLearner::new().with_min_uncertainty(0.3);
933
934        // Process uncertain prediction
935        let features = OwnershipFeatures::default();
936        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
937        learner.process_prediction("ptr", features, pred);
938
939        // Get for labeling
940        let mut sample = learner.next_for_labeling().unwrap();
941        assert_eq!(sample.variable, "ptr");
942
943        // Apply label
944        sample.apply_label(InferredOwnership::Owned);
945        learner.submit_labeled(sample);
946
947        // Check training samples
948        let training = learner.get_training_samples();
949        assert_eq!(training.len(), 1);
950        assert_eq!(training[0].label, Some(InferredOwnership::Owned));
951    }
952
953    #[test]
954    fn active_learner_to_markdown() {
955        let learner = ActiveLearner::new();
956        let md = learner.to_markdown();
957
958        assert!(md.contains("Active Learning Report"));
959        assert!(md.contains("Queue Status"));
960        assert!(md.contains("Strategy"));
961    }
962
963    #[test]
964    fn active_learner_with_strategy() {
965        let learner = ActiveLearner::with_strategy(SelectionStrategy::EntropySampling);
966        assert_eq!(learner.queue.strategy(), SelectionStrategy::EntropySampling);
967    }
968
969    #[test]
970    fn active_learner_batch_labeling() {
971        let mut learner = ActiveLearner::new().with_min_uncertainty(0.2);
972
973        // Process multiple uncertain predictions
974        for i in 0..5 {
975            let features = OwnershipFeatures::default();
976            let pred = make_prediction(InferredOwnership::Borrowed, 0.3 + (i as f32 * 0.05));
977            learner.process_prediction(format!("ptr{}", i), features, pred);
978        }
979
980        let batch = learner.batch_for_labeling(3);
981        assert_eq!(batch.len(), 3);
982    }
983
984    // ========================================================================
985    // Additional coverage: margin sampling with fallback
986    // ========================================================================
987
988    #[test]
989    fn margin_sampling_with_fallback() {
990        let calc = UncertaintyCalculator::new();
991        let pred = OwnershipPrediction {
992            kind: InferredOwnership::Borrowed,
993            confidence: 0.6,
994            fallback: Some(InferredOwnership::Owned),
995        };
996
997        let score = calc.calculate(&pred, SelectionStrategy::MarginSampling);
998        // With fallback: margin = 0.6 - 0.6*0.8 = 0.12, score = 1 - 0.12 = 0.88
999        assert!(score > 0.5);
1000    }
1001
1002    #[test]
1003    fn margin_sampling_without_fallback() {
1004        let calc = UncertaintyCalculator::new();
1005        let pred = make_prediction(InferredOwnership::Borrowed, 0.6);
1006
1007        let score = calc.calculate(&pred, SelectionStrategy::MarginSampling);
1008        // Without fallback: falls through to uncertainty sampling: 1 - 0.6 = 0.4
1009        assert!((score - 0.4).abs() < 0.001);
1010    }
1011
1012    // ========================================================================
1013    // Additional coverage: entropy boundary values
1014    // ========================================================================
1015
1016    #[test]
1017    fn entropy_at_zero_confidence() {
1018        let calc = UncertaintyCalculator::new();
1019        let pred = make_prediction(InferredOwnership::Owned, 0.0);
1020
1021        let score = calc.calculate(&pred, SelectionStrategy::EntropySampling);
1022        assert!((score - 0.0).abs() < 0.001); // p <= 0 returns 0
1023    }
1024
1025    #[test]
1026    fn entropy_at_one_confidence() {
1027        let calc = UncertaintyCalculator::new();
1028        let pred = make_prediction(InferredOwnership::Owned, 1.0);
1029
1030        let score = calc.calculate(&pred, SelectionStrategy::EntropySampling);
1031        assert!((score - 0.0).abs() < 0.001); // p >= 1 returns 0
1032    }
1033
1034    // ========================================================================
1035    // Additional coverage: random sampling all InferredOwnership variants
1036    // ========================================================================
1037
1038    #[test]
1039    fn random_sampling_all_ownership_kinds() {
1040        let calc = UncertaintyCalculator::new();
1041        let kinds = vec![
1042            InferredOwnership::Owned,
1043            InferredOwnership::Borrowed,
1044            InferredOwnership::BorrowedMut,
1045            InferredOwnership::Shared,
1046            InferredOwnership::RawPointer,
1047            InferredOwnership::Vec,
1048            InferredOwnership::Slice,
1049            InferredOwnership::SliceMut,
1050        ];
1051
1052        for kind in kinds {
1053            let pred = make_prediction(kind, 0.5);
1054            let score = calc.calculate(&pred, SelectionStrategy::Random);
1055            assert!(score >= 0.0 && score <= 1.0);
1056        }
1057    }
1058
1059    // ========================================================================
1060    // Additional coverage: UncertainSample with_source
1061    // ========================================================================
1062
1063    #[test]
1064    fn uncertain_sample_with_source() {
1065        let features = OwnershipFeatures::default();
1066        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
1067        let sample = UncertainSample::new(
1068            1,
1069            "ptr",
1070            features,
1071            pred,
1072            0.6,
1073            SelectionStrategy::UncertaintySampling,
1074        )
1075        .with_source("test.c", 42);
1076
1077        assert_eq!(sample.source_file, Some("test.c".to_string()));
1078        assert_eq!(sample.source_line, Some(42));
1079    }
1080
1081    // ========================================================================
1082    // Additional coverage: SampleQueue edge cases
1083    // ========================================================================
1084
1085    #[test]
1086    fn sample_queue_peek_next() {
1087        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1088
1089        assert!(queue.peek_next().is_none());
1090
1091        let sample = UncertainSample::new(
1092            0,
1093            "ptr",
1094            OwnershipFeatures::default(),
1095            make_prediction(InferredOwnership::Borrowed, 0.4),
1096            0.6,
1097            SelectionStrategy::UncertaintySampling,
1098        );
1099        queue.enqueue(sample);
1100
1101        assert!(queue.peek_next().is_some());
1102        assert_eq!(queue.pending_count(), 1); // Not consumed
1103    }
1104
1105    #[test]
1106    fn sample_queue_clear_pending() {
1107        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1108
1109        for i in 0..3 {
1110            let sample = UncertainSample::new(
1111                0,
1112                format!("ptr{}", i),
1113                OwnershipFeatures::default(),
1114                make_prediction(InferredOwnership::Borrowed, 0.4),
1115                0.5,
1116                SelectionStrategy::UncertaintySampling,
1117            );
1118            queue.enqueue(sample);
1119        }
1120
1121        assert_eq!(queue.pending_count(), 3);
1122        queue.clear_pending();
1123        assert_eq!(queue.pending_count(), 0);
1124    }
1125
1126    #[test]
1127    fn sample_queue_take_labeled() {
1128        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1129
1130        let mut sample = UncertainSample::new(
1131            1,
1132            "ptr",
1133            OwnershipFeatures::default(),
1134            make_prediction(InferredOwnership::Borrowed, 0.4),
1135            0.6,
1136            SelectionStrategy::UncertaintySampling,
1137        );
1138        sample.apply_label(InferredOwnership::Owned);
1139        queue.submit_labeled(sample);
1140
1141        assert_eq!(queue.labeled_count(), 1);
1142        let taken = queue.take_labeled_samples();
1143        assert_eq!(taken.len(), 1);
1144        assert_eq!(queue.labeled_count(), 0);
1145    }
1146
1147    #[test]
1148    fn sample_queue_submit_unlabeled_rejected() {
1149        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1150
1151        let sample = UncertainSample::new(
1152            1,
1153            "ptr",
1154            OwnershipFeatures::default(),
1155            make_prediction(InferredOwnership::Borrowed, 0.4),
1156            0.6,
1157            SelectionStrategy::UncertaintySampling,
1158        );
1159        // Don't label it
1160        queue.submit_labeled(sample);
1161        assert_eq!(queue.labeled_count(), 0); // Rejected because unlabeled
1162    }
1163
1164    #[test]
1165    fn sample_queue_batch_from_empty() {
1166        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1167        let batch = queue.batch_for_labeling(5);
1168        assert!(batch.is_empty());
1169    }
1170
1171    #[test]
1172    fn sample_queue_batch_partial() {
1173        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1174
1175        for i in 0..2 {
1176            let sample = UncertainSample::new(
1177                0,
1178                format!("ptr{}", i),
1179                OwnershipFeatures::default(),
1180                make_prediction(InferredOwnership::Borrowed, 0.4),
1181                0.5,
1182                SelectionStrategy::UncertaintySampling,
1183            );
1184            queue.enqueue(sample);
1185        }
1186
1187        let batch = queue.batch_for_labeling(5);
1188        assert_eq!(batch.len(), 2); // Only 2 available
1189    }
1190
1191    #[test]
1192    fn sample_queue_total_processed() {
1193        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1194        assert_eq!(queue.total_processed(), 0);
1195
1196        for i in 0..3 {
1197            let sample = UncertainSample::new(
1198                0,
1199                format!("ptr{}", i),
1200                OwnershipFeatures::default(),
1201                make_prediction(InferredOwnership::Borrowed, 0.4),
1202                0.5,
1203                SelectionStrategy::UncertaintySampling,
1204            );
1205            queue.enqueue(sample);
1206        }
1207        assert_eq!(queue.total_processed(), 3);
1208    }
1209
1210    #[test]
1211    fn sample_queue_get_labeled_samples() {
1212        let queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1213        assert!(queue.get_labeled_samples().is_empty());
1214    }
1215
1216    // ========================================================================
1217    // Additional coverage: SampleQueue overflow (insert_pos >= max_pending)
1218    // ========================================================================
1219
1220    #[test]
1221    fn sample_queue_overflow_low_priority_dropped() {
1222        let mut queue =
1223            SampleQueue::new(SelectionStrategy::UncertaintySampling).with_max_pending(2);
1224
1225        // Add two high uncertainty samples
1226        for u in [0.9, 0.8] {
1227            let sample = UncertainSample::new(
1228                0,
1229                "ptr",
1230                OwnershipFeatures::default(),
1231                make_prediction(InferredOwnership::Borrowed, 0.4),
1232                u,
1233                SelectionStrategy::UncertaintySampling,
1234            );
1235            queue.enqueue(sample);
1236        }
1237
1238        // Add very low uncertainty sample - should not be inserted
1239        let low = UncertainSample::new(
1240            0,
1241            "low",
1242            OwnershipFeatures::default(),
1243            make_prediction(InferredOwnership::Borrowed, 0.4),
1244            0.01,
1245            SelectionStrategy::UncertaintySampling,
1246        );
1247        queue.enqueue(low);
1248
1249        assert_eq!(queue.pending_count(), 2);
1250        // The low priority sample should NOT be in the queue
1251        let first = queue.peek_next().unwrap();
1252        assert!((first.uncertainty_score - 0.9).abs() < 0.01);
1253    }
1254
1255    // ========================================================================
1256    // Additional coverage: ActiveLearner with_confidence_threshold
1257    // ========================================================================
1258
1259    #[test]
1260    fn active_learner_with_confidence_threshold() {
1261        let learner = ActiveLearner::new().with_confidence_threshold(0.8);
1262        let high = make_prediction(InferredOwnership::Owned, 0.75);
1263        assert!(learner.is_uncertain(&high)); // Below 0.8
1264
1265        let very_high = make_prediction(InferredOwnership::Owned, 0.85);
1266        assert!(!learner.is_uncertain(&very_high)); // Above 0.8
1267    }
1268
1269    #[test]
1270    fn active_learner_take_training_samples() {
1271        let mut learner = ActiveLearner::new().with_min_uncertainty(0.2);
1272
1273        let features = OwnershipFeatures::default();
1274        let pred = make_prediction(InferredOwnership::Borrowed, 0.4);
1275        learner.process_prediction("ptr", features, pred);
1276
1277        let mut sample = learner.next_for_labeling().unwrap();
1278        sample.apply_label(InferredOwnership::Owned);
1279        learner.submit_labeled(sample);
1280
1281        let training = learner.take_training_samples();
1282        assert_eq!(training.len(), 1);
1283        assert!(learner.get_training_samples().is_empty()); // Moved out
1284    }
1285
1286    // ========================================================================
1287    // Additional coverage: SampleQueue stats with no labeled
1288    // ========================================================================
1289
1290    #[test]
1291    fn sample_queue_stats_empty() {
1292        let queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1293        let stats = queue.stats();
1294        assert_eq!(stats.pending, 0);
1295        assert_eq!(stats.labeled, 0);
1296        assert_eq!(stats.total_processed, 0);
1297        assert!((stats.avg_uncertainty - 0.0).abs() < 0.001);
1298        assert!((stats.prediction_accuracy - 0.0).abs() < 0.001);
1299    }
1300
1301    #[test]
1302    fn sample_queue_stats_with_incorrect_prediction() {
1303        let mut queue = SampleQueue::new(SelectionStrategy::UncertaintySampling);
1304
1305        let mut sample = UncertainSample::new(
1306            0,
1307            "ptr",
1308            OwnershipFeatures::default(),
1309            make_prediction(InferredOwnership::Borrowed, 0.4),
1310            0.6,
1311            SelectionStrategy::UncertaintySampling,
1312        );
1313        sample.apply_label(InferredOwnership::Owned); // Different from prediction
1314        queue.submit_labeled(sample);
1315
1316        let stats = queue.stats();
1317        assert!((stats.prediction_accuracy - 0.0).abs() < 0.001); // 0% accurate
1318    }
1319
1320    // ========================================================================
1321    // Additional coverage: UncertaintyCalculator custom threshold
1322    // ========================================================================
1323
1324    #[test]
1325    fn uncertainty_calculator_custom_threshold_clamp() {
1326        let calc = UncertaintyCalculator::with_confidence_threshold(2.0);
1327        assert!((calc.confidence_threshold - 1.0).abs() < 0.001);
1328
1329        let calc2 = UncertaintyCalculator::with_confidence_threshold(-1.0);
1330        assert!((calc2.confidence_threshold - 0.0).abs() < 0.001);
1331    }
1332
1333    // ========================================================================
1334    // Additional coverage: SampleQueue default
1335    // ========================================================================
1336
1337    #[test]
1338    fn sample_queue_default() {
1339        let queue = SampleQueue::default();
1340        assert_eq!(queue.strategy(), SelectionStrategy::UncertaintySampling);
1341    }
1342
1343    #[test]
1344    fn active_learner_default() {
1345        let learner = ActiveLearner::default();
1346        let stats = learner.stats();
1347        assert_eq!(stats.pending, 0);
1348    }
1349
1350    // ========================================================================
1351    // Additional coverage: uncertain sample uncertainty clamp
1352    // ========================================================================
1353
1354    #[test]
1355    fn uncertain_sample_clamp_high() {
1356        let sample = UncertainSample::new(
1357            1,
1358            "ptr",
1359            OwnershipFeatures::default(),
1360            make_prediction(InferredOwnership::Borrowed, 0.4),
1361            1.5, // Over 1.0
1362            SelectionStrategy::UncertaintySampling,
1363        );
1364        assert!((sample.uncertainty_score - 1.0).abs() < 0.001);
1365    }
1366
1367    #[test]
1368    fn uncertain_sample_clamp_low() {
1369        let sample = UncertainSample::new(
1370            1,
1371            "ptr",
1372            OwnershipFeatures::default(),
1373            make_prediction(InferredOwnership::Borrowed, 0.4),
1374            -0.5, // Below 0.0
1375            SelectionStrategy::UncertaintySampling,
1376        );
1377        assert!((sample.uncertainty_score - 0.0).abs() < 0.001);
1378    }
1379
1380    // ========================================================================
1381    // Additional coverage: ActiveLearner with_min_uncertainty clamp
1382    // ========================================================================
1383
1384    #[test]
1385    fn active_learner_min_uncertainty_clamp() {
1386        let learner = ActiveLearner::new().with_min_uncertainty(2.0);
1387        // Should be clamped to 1.0
1388        let features = OwnershipFeatures::default();
1389        let pred = make_prediction(InferredOwnership::Borrowed, 0.01);
1390        let mut learner2 = learner;
1391        let (_, queued) = learner2.process_prediction("ptr", features, pred);
1392        // 1.0 - 0.01 = 0.99 < 1.0 (min_uncertainty), so NOT queued
1393        assert!(!queued);
1394    }
1395
1396    // ========================================================================
1397    // Additional coverage: process_prediction with MarginSampling strategy
1398    // ========================================================================
1399
1400    #[test]
1401    fn active_learner_process_with_margin_strategy() {
1402        let mut learner =
1403            ActiveLearner::with_strategy(SelectionStrategy::MarginSampling).with_min_uncertainty(0.2);
1404
1405        let features = OwnershipFeatures::default();
1406        let pred = OwnershipPrediction {
1407            kind: InferredOwnership::Borrowed,
1408            confidence: 0.4,
1409            fallback: Some(InferredOwnership::Owned),
1410        };
1411        let (_, queued) = learner.process_prediction("ptr", features, pred);
1412        assert!(queued);
1413    }
1414
1415    #[test]
1416    fn active_learner_process_with_entropy_strategy() {
1417        let mut learner =
1418            ActiveLearner::with_strategy(SelectionStrategy::EntropySampling).with_min_uncertainty(0.2);
1419
1420        let features = OwnershipFeatures::default();
1421        let pred = make_prediction(InferredOwnership::Borrowed, 0.5);
1422        let (uncertainty, queued) = learner.process_prediction("ptr", features, pred);
1423        assert!((uncertainty - 1.0).abs() < 0.001); // max entropy at 0.5
1424        assert!(queued);
1425    }
1426
1427    #[test]
1428    fn active_learner_process_with_random_strategy() {
1429        let mut learner =
1430            ActiveLearner::with_strategy(SelectionStrategy::Random).with_min_uncertainty(0.0);
1431
1432        let features = OwnershipFeatures::default();
1433        let pred = make_prediction(InferredOwnership::Borrowed, 0.5);
1434        let (uncertainty, queued) = learner.process_prediction("ptr", features, pred);
1435        assert!(uncertainty >= 0.0 && uncertainty <= 1.0);
1436        assert!(queued);
1437    }
1438}
decy_ownership/active_learning.rs

decy_ownership/
active_learning.rs