entrenar/integrity/behavioral/
metrics.rs

1//! Behavioral integrity metrics
2//!
3//! Core metrics tracking for model promotion gates including
4//! equivalence scoring, syscall matching, timing variance, and semantic equivalence.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8
9use super::assessment::IntegrityAssessment;
10use super::counts::ViolationCounts;
11use super::violation::{MetamorphicRelationType, MetamorphicViolation};
12
13/// Behavioral integrity metrics for model promotion gates
14///
15/// Tracks multiple dimensions of behavioral consistency to determine
16/// if a model is ready for promotion to production.
17#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
18pub struct BehavioralIntegrity {
19    /// Overall equivalence score (0.0 - 1.0)
20    /// Measures how well the model's behavior matches expected patterns
21    pub equivalence_score: f64,
22
23    /// Syscall pattern match score (0.0 - 1.0)
24    /// Measures consistency of system call patterns during inference
25    pub syscall_match: f64,
26
27    /// Timing variance score (0.0 - 1.0, lower = more consistent)
28    /// Measures consistency of inference timing
29    pub timing_variance: f64,
30
31    /// Semantic equivalence score (0.0 - 1.0)
32    /// Measures semantic consistency of model outputs
33    pub semantic_equiv: f64,
34
35    /// List of metamorphic violations detected
36    pub violations: Vec<MetamorphicViolation>,
37
38    /// Timestamp when metrics were collected
39    pub timestamp: DateTime<Utc>,
40
41    /// Number of test cases evaluated
42    pub test_count: u32,
43
44    /// Model version or identifier being evaluated
45    pub model_id: String,
46}
47
48impl BehavioralIntegrity {
49    /// Create new behavioral integrity metrics
50    pub fn new(
51        equivalence_score: f64,
52        syscall_match: f64,
53        timing_variance: f64,
54        semantic_equiv: f64,
55        model_id: impl Into<String>,
56    ) -> Self {
57        Self {
58            equivalence_score: equivalence_score.clamp(0.0, 1.0),
59            syscall_match: syscall_match.clamp(0.0, 1.0),
60            timing_variance: timing_variance.clamp(0.0, 1.0),
61            semantic_equiv: semantic_equiv.clamp(0.0, 1.0),
62            violations: Vec::new(),
63            timestamp: Utc::now(),
64            test_count: 0,
65            model_id: model_id.into(),
66        }
67    }
68
69    /// Create perfect behavioral integrity (all scores = 1.0, variance = 0.0)
70    pub fn perfect(model_id: impl Into<String>) -> Self {
71        Self::new(1.0, 1.0, 0.0, 1.0, model_id)
72    }
73
74    /// Add a metamorphic violation
75    pub fn add_violation(&mut self, violation: MetamorphicViolation) {
76        self.violations.push(violation);
77    }
78
79    /// Set the test count
80    pub fn with_test_count(mut self, count: u32) -> Self {
81        self.test_count = count;
82        self
83    }
84
85    /// Calculate the composite integrity score
86    ///
87    /// Weighted average of all metrics (timing variance is inverted)
88    pub fn composite_score(&self) -> f64 {
89        // Weights for each metric
90        const W_EQUIV: f64 = 0.3;
91        const W_SYSCALL: f64 = 0.2;
92        const W_TIMING: f64 = 0.2;
93        const W_SEMANTIC: f64 = 0.3;
94
95        let timing_score = 1.0 - self.timing_variance; // Invert: lower variance = better
96
97        W_EQUIV * self.equivalence_score
98            + W_SYSCALL * self.syscall_match
99            + W_TIMING * timing_score
100            + W_SEMANTIC * self.semantic_equiv
101    }
102
103    /// Check if the model passes promotion gate
104    ///
105    /// Requires:
106    /// - Composite score >= threshold (default 0.9)
107    /// - No critical violations
108    /// - Timing variance < 0.2
109    pub fn passes_gate(&self, threshold: f64) -> bool {
110        self.composite_score() >= threshold
111            && !self.has_critical_violations()
112            && self.timing_variance < 0.2
113    }
114
115    /// Check if there are any critical violations
116    pub fn has_critical_violations(&self) -> bool {
117        self.violations.iter().any(MetamorphicViolation::is_critical)
118    }
119
120    /// Get count of violations by severity level
121    pub fn violation_counts(&self) -> ViolationCounts {
122        let critical = self.violations.iter().filter(|v| v.is_critical()).count() as u32;
123        let warnings =
124            self.violations.iter().filter(|v| v.is_warning() && !v.is_critical()).count() as u32;
125        let minor = self.violations.iter().filter(|v| !v.is_warning()).count() as u32;
126
127        ViolationCounts { critical, warnings, minor, total: self.violations.len() as u32 }
128    }
129
130    /// Get violations grouped by relation type
131    pub fn violations_by_type(
132        &self,
133    ) -> std::collections::HashMap<MetamorphicRelationType, Vec<&MetamorphicViolation>> {
134        let mut map = std::collections::HashMap::new();
135        for v in &self.violations {
136            map.entry(v.relation_type).or_insert_with(Vec::new).push(v);
137        }
138        map
139    }
140
141    /// Get the most severe violation, if any
142    pub fn most_severe_violation(&self) -> Option<&MetamorphicViolation> {
143        self.violations
144            .iter()
145            .max_by(|a, b| a.severity.partial_cmp(&b.severity).unwrap_or(std::cmp::Ordering::Equal))
146    }
147
148    /// Get a human-readable assessment
149    pub fn assessment(&self) -> IntegrityAssessment {
150        let score = self.composite_score();
151        let counts = self.violation_counts();
152
153        if counts.critical > 0 {
154            IntegrityAssessment::Critical
155        } else if score < 0.5 {
156            IntegrityAssessment::Poor
157        } else if score < 0.7 {
158            IntegrityAssessment::Fair
159        } else if score < 0.9 {
160            IntegrityAssessment::Good
161        } else {
162            IntegrityAssessment::Excellent
163        }
164    }
165
166    /// Generate a summary report
167    pub fn summary(&self) -> String {
168        let counts = self.violation_counts();
169        format!(
170            "Model: {}\n\
171             Composite Score: {:.1}%\n\
172             Assessment: {}\n\
173             Violations: {} critical, {} warnings, {} minor\n\
174             Tests Run: {}\n\
175             Gate Status: {}",
176            self.model_id,
177            self.composite_score() * 100.0,
178            self.assessment(),
179            counts.critical,
180            counts.warnings,
181            counts.minor,
182            self.test_count,
183            if self.passes_gate(0.9) { "PASS" } else { "FAIL" }
184        )
185    }
186}
entrenar/integrity/behavioral/metrics.rs

entrenar/integrity/behavioral/
metrics.rs