aprender/qa/
mod.rs

1//! Model Quality Assurance Module (`aprender::qa`)
2//!
3//! Provides a 100-point adversarial QA checklist for production model validation.
4//! Separates *model quality* (aprender) from *code quality* (certeza).
5//!
6//! # Toyota Way Alignment
7//! - **Jidoka**: `Severity::Blocker` stops the deployment line
8//! - **Poka-yoke**: Type-safe category enums prevent misconfiguration
9//!
10//! # Example
11//! ```
12//! use aprender::qa::{QaChecklist, QaCategory, Severity};
13//!
14//! let checklist = QaChecklist::default();
15//! assert_eq!(QaChecklist::max_score(), 100);
16//! ```
17
18pub mod adversarial;
19pub mod docs;
20pub mod fairness;
21pub mod robustness;
22pub mod security;
23pub mod velocity;
24
25use serde::{Deserialize, Serialize};
26use std::collections::HashMap;
27use std::path::PathBuf;
28use std::time::Duration;
29
30/// 100-point QA checklist for model validation
31#[derive(Debug, Clone)]
32pub struct QaChecklist {
33    /// Model under test
34    pub model_path: PathBuf,
35    /// Test dataset (required for most checks)
36    pub test_data: Option<PathBuf>,
37    /// Protected attributes for fairness testing
38    pub protected_attrs: Vec<String>,
39    /// Latency SLA for performance testing
40    pub latency_sla: Duration,
41    /// Memory budget for resource testing
42    pub memory_budget: usize,
43    /// Maximum turns before failure (for multi-turn evals)
44    pub max_turns: u32,
45}
46
47impl Default for QaChecklist {
48    fn default() -> Self {
49        Self {
50            model_path: PathBuf::new(),
51            test_data: None,
52            protected_attrs: Vec::new(),
53            latency_sla: Duration::from_millis(100),
54            memory_budget: 512 * 1024 * 1024, // 512 MB
55            max_turns: 5,
56        }
57    }
58}
59
60impl QaChecklist {
61    /// Create a new QA checklist for a model
62    #[must_use]
63    pub fn new(model_path: PathBuf) -> Self {
64        Self {
65            model_path,
66            ..Default::default()
67        }
68    }
69
70    /// Set test data path
71    #[must_use]
72    pub fn with_test_data(mut self, path: PathBuf) -> Self {
73        self.test_data = Some(path);
74        self
75    }
76
77    /// Set protected attributes for fairness testing
78    #[must_use]
79    pub fn with_protected_attrs(mut self, attrs: Vec<String>) -> Self {
80        self.protected_attrs = attrs;
81        self
82    }
83
84    /// Set latency SLA
85    #[must_use]
86    pub fn with_latency_sla(mut self, sla: Duration) -> Self {
87        self.latency_sla = sla;
88        self
89    }
90
91    /// Set memory budget
92    #[must_use]
93    pub fn with_memory_budget(mut self, budget: usize) -> Self {
94        self.memory_budget = budget;
95        self
96    }
97
98    /// Maximum possible score (always 100)
99    #[must_use]
100    pub const fn max_score() -> u8 {
101        100
102    }
103
104    /// Get points allocation per category
105    #[must_use]
106    pub fn category_points() -> HashMap<QaCategory, u8> {
107        let mut points = HashMap::new();
108        points.insert(QaCategory::Robustness, 20);
109        points.insert(QaCategory::EdgeCases, 15);
110        points.insert(QaCategory::DistributionShift, 15);
111        points.insert(QaCategory::Fairness, 15);
112        points.insert(QaCategory::Privacy, 10);
113        points.insert(QaCategory::Latency, 10);
114        points.insert(QaCategory::Memory, 10);
115        points.insert(QaCategory::Reproducibility, 5);
116        points
117    }
118}
119
120/// QA report with 100-point scoring
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct QaReport {
123    /// Model identifier
124    pub model_id: String,
125    /// Individual category scores
126    pub categories: HashMap<QaCategory, CategoryScore>,
127    /// Total score (0-100)
128    pub total_score: u8,
129    /// Pass/fail determination
130    pub passed: bool,
131    /// Blocking issues (must fix)
132    pub blockers: Vec<QaIssue>,
133    /// Warnings (should fix)
134    pub warnings: Vec<QaIssue>,
135}
136
137impl QaReport {
138    /// Create a new empty QA report
139    #[must_use]
140    pub fn new(model_id: String) -> Self {
141        Self {
142            model_id,
143            categories: HashMap::new(),
144            total_score: 0,
145            passed: false,
146            blockers: Vec::new(),
147            warnings: Vec::new(),
148        }
149    }
150
151    /// Add a category score
152    pub fn add_category(&mut self, category: QaCategory, score: CategoryScore) {
153        self.categories.insert(category, score);
154        self.recalculate_total();
155    }
156
157    /// Add a blocking issue
158    pub fn add_blocker(&mut self, issue: QaIssue) {
159        self.blockers.push(issue);
160        self.passed = false;
161    }
162
163    /// Add a warning
164    pub fn add_warning(&mut self, issue: QaIssue) {
165        self.warnings.push(issue);
166    }
167
168    /// Recalculate total score from categories
169    fn recalculate_total(&mut self) {
170        let earned: u16 = self
171            .categories
172            .values()
173            .map(|s| u16::from(s.points_earned))
174            .sum();
175        let possible: u16 = self
176            .categories
177            .values()
178            .map(|s| u16::from(s.points_possible))
179            .sum();
180
181        self.total_score = if possible > 0 {
182            ((earned * 100) / possible).min(100) as u8
183        } else {
184            0
185        };
186
187        // Pass if score >= 80 and no blockers
188        self.passed = self.total_score >= 80 && self.blockers.is_empty();
189    }
190
191    /// Check if the model is production-ready
192    #[must_use]
193    pub fn is_production_ready(&self) -> bool {
194        self.passed && self.total_score >= 90
195    }
196}
197
198/// QA category enumeration
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
200pub enum QaCategory {
201    /// Adversarial robustness (FGSM, PGD, noise)
202    Robustness,
203    /// Edge cases (NaN, Inf, empty, max-size)
204    EdgeCases,
205    /// Out-of-distribution detection
206    DistributionShift,
207    /// Fairness metrics (disparate impact, EOD)
208    Fairness,
209    /// Privacy (membership inference)
210    Privacy,
211    /// Latency (P50, P95, P99)
212    Latency,
213    /// Memory (peak, leaks)
214    Memory,
215    /// Reproducibility (determinism)
216    Reproducibility,
217}
218
219impl QaCategory {
220    /// Get all categories
221    #[must_use]
222    pub fn all() -> Vec<Self> {
223        vec![
224            Self::Robustness,
225            Self::EdgeCases,
226            Self::DistributionShift,
227            Self::Fairness,
228            Self::Privacy,
229            Self::Latency,
230            Self::Memory,
231            Self::Reproducibility,
232        ]
233    }
234
235    /// Get display name
236    #[must_use]
237    pub const fn name(&self) -> &'static str {
238        match self {
239            Self::Robustness => "Robustness",
240            Self::EdgeCases => "Edge Cases",
241            Self::DistributionShift => "Distribution Shift",
242            Self::Fairness => "Fairness",
243            Self::Privacy => "Privacy",
244            Self::Latency => "Latency",
245            Self::Memory => "Memory",
246            Self::Reproducibility => "Reproducibility",
247        }
248    }
249}
250
251/// Score for a single category
252#[derive(Debug, Clone, Serialize, Deserialize)]
253pub struct CategoryScore {
254    /// Points earned
255    pub points_earned: u8,
256    /// Points possible
257    pub points_possible: u8,
258    /// Tests passed
259    pub tests_passed: u32,
260    /// Tests failed
261    pub tests_failed: u32,
262    /// Detailed test results
263    pub details: Vec<TestResult>,
264}
265
266impl CategoryScore {
267    /// Create a new category score
268    #[must_use]
269    pub fn new(points_possible: u8) -> Self {
270        Self {
271            points_earned: 0,
272            points_possible,
273            tests_passed: 0,
274            tests_failed: 0,
275            details: Vec::new(),
276        }
277    }
278
279    /// Add a test result
280    pub fn add_result(&mut self, result: TestResult) {
281        if result.passed {
282            self.tests_passed += 1;
283        } else {
284            self.tests_failed += 1;
285        }
286        self.details.push(result);
287    }
288
289    /// Calculate earned points based on pass rate
290    pub fn finalize(&mut self) {
291        let total = self.tests_passed + self.tests_failed;
292        if total > 0 {
293            let pass_rate = f64::from(self.tests_passed) / f64::from(total);
294            self.points_earned = (f64::from(self.points_possible) * pass_rate).round() as u8;
295        }
296    }
297
298    /// Get pass rate as percentage
299    #[must_use]
300    pub fn pass_rate(&self) -> f64 {
301        let total = self.tests_passed + self.tests_failed;
302        if total > 0 {
303            f64::from(self.tests_passed) / f64::from(total) * 100.0
304        } else {
305            0.0
306        }
307    }
308}
309
310/// Individual test result
311#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct TestResult {
313    /// Test name
314    pub name: String,
315    /// Pass/fail
316    pub passed: bool,
317    /// Optional message
318    pub message: Option<String>,
319    /// Duration
320    pub duration: Duration,
321}
322
323impl TestResult {
324    /// Create a passing test result
325    #[must_use]
326    pub fn pass(name: impl Into<String>, duration: Duration) -> Self {
327        Self {
328            name: name.into(),
329            passed: true,
330            message: None,
331            duration,
332        }
333    }
334
335    /// Create a failing test result
336    #[must_use]
337    pub fn fail(name: impl Into<String>, message: impl Into<String>, duration: Duration) -> Self {
338        Self {
339            name: name.into(),
340            passed: false,
341            message: Some(message.into()),
342            duration,
343        }
344    }
345}
346
347/// QA issue (blocker or warning)
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct QaIssue {
350    /// Category
351    pub category: QaCategory,
352    /// Severity level
353    pub severity: Severity,
354    /// Issue message
355    pub message: String,
356    /// Remediation suggestion
357    pub remediation: String,
358}
359
360impl QaIssue {
361    /// Create a new QA issue
362    #[must_use]
363    pub fn new(
364        category: QaCategory,
365        severity: Severity,
366        message: impl Into<String>,
367        remediation: impl Into<String>,
368    ) -> Self {
369        Self {
370            category,
371            severity,
372            message: message.into(),
373            remediation: remediation.into(),
374        }
375    }
376
377    /// Create a blocker issue
378    #[must_use]
379    pub fn blocker(
380        category: QaCategory,
381        message: impl Into<String>,
382        remediation: impl Into<String>,
383    ) -> Self {
384        Self::new(category, Severity::Blocker, message, remediation)
385    }
386
387    /// Create a warning issue
388    #[must_use]
389    pub fn warning(
390        category: QaCategory,
391        message: impl Into<String>,
392        remediation: impl Into<String>,
393    ) -> Self {
394        Self::new(category, Severity::Warning, message, remediation)
395    }
396}
397
398/// Issue severity (Toyota Way: Jidoka - stop the line)
399#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
400pub enum Severity {
401    /// Blocks production deployment (Andon cord)
402    Blocker,
403    /// Should fix before production
404    Critical,
405    /// Recommended improvement
406    Warning,
407    /// Informational only
408    Info,
409}
410
411impl Severity {
412    /// Check if this severity should block deployment
413    #[must_use]
414    pub const fn is_blocking(&self) -> bool {
415        matches!(self, Self::Blocker)
416    }
417
418    /// Check if this severity requires human review
419    #[must_use]
420    pub const fn requires_review(&self) -> bool {
421        matches!(self, Self::Blocker | Self::Critical)
422    }
423}
424
425/// Jidoka enforcement points in the loading pipeline
426#[derive(Debug, Clone)]
427pub enum JidokaStop {
428    /// Header magic/version mismatch - stop immediately
429    InvalidHeader,
430    /// Signature verification failed - stop, alert security
431    SignatureFailed,
432    /// Checksum mismatch - stop, data corrupted
433    ChecksumFailed,
434    /// WCET budget exceeded - stop, unsafe for deployment
435    WcetViolation,
436    /// Fairness threshold breached - stop, ethical concern
437    FairnessViolation,
438    /// Model score below threshold - stop, quality gate
439    QualityGateFailed {
440        /// Actual score
441        score: u8,
442        /// Required threshold
443        threshold: u8,
444    },
445}
446
447impl JidokaStop {
448    /// All stops are non-recoverable without human intervention (Andon cord)
449    #[must_use]
450    pub const fn requires_human_review(&self) -> bool {
451        true
452    }
453
454    /// Get description of the stop
455    #[must_use]
456    pub fn description(&self) -> String {
457        match self {
458            Self::InvalidHeader => "Invalid file header".to_string(),
459            Self::SignatureFailed => "Signature verification failed".to_string(),
460            Self::ChecksumFailed => "Checksum mismatch - data corrupted".to_string(),
461            Self::WcetViolation => "WCET budget exceeded".to_string(),
462            Self::FairnessViolation => "Fairness threshold breached".to_string(),
463            Self::QualityGateFailed { score, threshold } => {
464                format!("Quality gate failed: {score}/100 < {threshold}/100")
465            }
466        }
467    }
468}
469
470#[cfg(test)]
471#[path = "qa_tests.rs"]
472mod tests;
aprender/qa/mod.rs

aprender/qa/
mod.rs