use apr_qa_runner::{Evidence, EvidenceCollector, Outcome};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use crate::error::Result;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GatewayResult {
pub id: String,
pub passed: bool,
pub description: String,
pub failure_reason: Option<String>,
}
impl GatewayResult {
#[must_use]
pub fn passed(id: impl Into<String>, description: impl Into<String>) -> Self {
Self {
id: id.into(),
passed: true,
description: description.into(),
failure_reason: None,
}
}
#[must_use]
pub fn failed(
id: impl Into<String>,
description: impl Into<String>,
reason: impl Into<String>,
) -> Self {
Self {
id: id.into(),
passed: false,
description: description.into(),
failure_reason: Some(reason.into()),
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct CategoryScores {
pub qual: u32,
pub perf: u32,
pub stab: u32,
pub comp: u32,
pub edge: u32,
pub regr: u32,
}
impl CategoryScores {
pub const MAX_QUAL: u32 = 200;
pub const MAX_PERF: u32 = 150;
pub const MAX_STAB: u32 = 200;
pub const MAX_COMP: u32 = 150;
pub const MAX_EDGE: u32 = 150;
pub const MAX_REGR: u32 = 150;
pub const MAX_TOTAL: u32 = 1000;
#[must_use]
pub fn total(&self) -> u32 {
self.qual + self.perf + self.stab + self.comp + self.edge + self.regr
}
#[must_use]
pub fn breakdown(&self) -> HashMap<String, (u32, u32)> {
let mut map = HashMap::new();
map.insert("QUAL".to_string(), (self.qual, Self::MAX_QUAL));
map.insert("PERF".to_string(), (self.perf, Self::MAX_PERF));
map.insert("STAB".to_string(), (self.stab, Self::MAX_STAB));
map.insert("COMP".to_string(), (self.comp, Self::MAX_COMP));
map.insert("EDGE".to_string(), (self.edge, Self::MAX_EDGE));
map.insert("REGR".to_string(), (self.regr, Self::MAX_REGR));
map
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MqsScore {
pub model_id: String,
pub raw_score: u32,
pub normalized_score: f64,
pub grade: String,
pub gateways: Vec<GatewayResult>,
pub gateways_passed: bool,
pub categories: CategoryScores,
pub total_tests: usize,
pub tests_passed: usize,
pub tests_failed: usize,
pub penalties: Vec<Penalty>,
pub total_penalty: u32,
}
impl MqsScore {
#[must_use]
pub fn qualifies(&self) -> bool {
self.gateways_passed && self.normalized_score >= 70.0
}
#[must_use]
pub fn is_production_ready(&self) -> bool {
self.gateways_passed && self.normalized_score >= 90.0
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Penalty {
pub code: String,
pub description: String,
pub points: u32,
}
#[derive(Debug)]
pub struct MqsCalculator {
failure_multiplier: f64,
#[allow(dead_code)]
min_tests_per_category: usize,
}
impl Default for MqsCalculator {
fn default() -> Self {
Self::new()
}
}
impl MqsCalculator {
#[must_use]
pub fn new() -> Self {
Self {
failure_multiplier: 1.5,
min_tests_per_category: 10,
}
}
#[must_use]
pub fn with_failure_multiplier(mut self, multiplier: f64) -> Self {
self.failure_multiplier = multiplier;
self
}
pub fn calculate(&self, model_id: &str, evidence: &EvidenceCollector) -> Result<MqsScore> {
let all_evidence = evidence.all();
let gateways = self.check_gateways(all_evidence);
let gateways_passed = gateways.iter().all(|g| g.passed);
if !gateways_passed {
return Ok(MqsScore {
model_id: model_id.to_string(),
raw_score: 0,
normalized_score: 0.0,
grade: "F".to_string(),
gateways,
gateways_passed: false,
categories: CategoryScores::default(),
total_tests: all_evidence.len(),
tests_passed: evidence.pass_count(),
tests_failed: evidence.fail_count(),
penalties: vec![Penalty {
code: "GATEWAY".to_string(),
description: "Gateway check failed - score zeroed".to_string(),
points: 1000,
}],
total_penalty: 1000,
});
}
let categories = self.calculate_categories(all_evidence);
let mut penalties = Vec::new();
let mut total_penalty: u32 = 0;
let crash_count = all_evidence
.iter()
.filter(|e| e.outcome == Outcome::Crashed)
.count();
if crash_count > 0 {
let penalty = (crash_count as u32) * 20;
penalties.push(Penalty {
code: "CRASH".to_string(),
description: format!("{crash_count} crash(es) detected"),
points: penalty,
});
total_penalty += penalty;
}
let timeout_count = all_evidence
.iter()
.filter(|e| e.outcome == Outcome::Timeout)
.count();
if timeout_count > 0 {
let penalty = (timeout_count as u32) * 10;
penalties.push(Penalty {
code: "TIMEOUT".to_string(),
description: format!("{timeout_count} timeout(s) detected"),
points: penalty,
});
total_penalty += penalty;
}
let raw_score = categories.total().saturating_sub(total_penalty);
let normalized = self.normalize_score(raw_score, categories.total());
let grade = Self::calculate_grade(normalized);
Ok(MqsScore {
model_id: model_id.to_string(),
raw_score,
normalized_score: normalized,
grade,
gateways,
gateways_passed: true,
categories,
total_tests: all_evidence.len(),
tests_passed: evidence.pass_count(),
tests_failed: evidence.fail_count(),
penalties,
total_penalty,
})
}
fn check_gateways(&self, evidence: &[Evidence]) -> Vec<GatewayResult> {
let mut results = Vec::new();
let integrity_failures: Vec<&Evidence> = evidence
.iter()
.filter(|e| e.gate_id.starts_with("G0-INTEGRITY") && e.outcome.is_fail())
.collect();
if integrity_failures.is_empty() {
results.push(GatewayResult::passed(
"G0",
"Model integrity (config/tensor match)",
));
} else {
let error_details: Vec<&str> = integrity_failures
.iter()
.map(|e| e.reason.as_str())
.collect();
results.push(GatewayResult::failed(
"G0",
"Model integrity (config/tensor match)",
format!(
"{} integrity check(s) failed: {}",
integrity_failures.len(),
error_details.join("; ")
),
));
}
let has_load_failure = evidence
.iter()
.any(|e| e.gate_id.contains("G1") && e.outcome.is_fail());
if has_load_failure {
results.push(GatewayResult::failed(
"G1",
"Model loads successfully",
"Model failed to load",
));
} else {
results.push(GatewayResult::passed("G1", "Model loads successfully"));
}
let has_inference_failure = evidence
.iter()
.any(|e| e.gate_id.contains("G2") && e.outcome.is_fail());
if has_inference_failure {
results.push(GatewayResult::failed(
"G2",
"Basic inference works",
"Inference failed",
));
} else {
results.push(GatewayResult::passed("G2", "Basic inference works"));
}
let crash_count = evidence
.iter()
.filter(|e| e.outcome == Outcome::Crashed)
.count();
if crash_count > 0 {
results.push(GatewayResult::failed(
"G3",
"No crashes",
format!("{crash_count} crash(es) detected"),
));
} else {
results.push(GatewayResult::passed("G3", "No crashes"));
}
let garbage_failures = evidence
.iter()
.filter(|e| e.gate_id.contains("G4") && e.outcome.is_fail())
.count();
if garbage_failures > evidence.len() / 4 {
results.push(GatewayResult::failed(
"G4",
"Output is not garbage",
format!("{garbage_failures} garbage outputs detected"),
));
} else {
results.push(GatewayResult::passed("G4", "Output is not garbage"));
}
results
}
fn calculate_categories(&self, evidence: &[Evidence]) -> CategoryScores {
let mut scores = CategoryScores::default();
let mut qual_pass = 0;
let mut qual_total = 0;
let mut perf_pass = 0;
let mut perf_total = 0;
let mut stab_pass = 0;
let mut stab_total = 0;
let mut comp_pass = 0;
let mut comp_total = 0;
let mut edge_pass = 0;
let mut edge_total = 0;
let mut regr_pass = 0;
let mut regr_total = 0;
for e in evidence {
let category = Self::extract_category(&e.gate_id);
let passed = e.outcome.is_pass();
match category.as_str() {
"QUAL" => {
qual_total += 1;
if passed {
qual_pass += 1;
}
}
"PERF" => {
perf_total += 1;
if passed {
perf_pass += 1;
}
}
"STAB" => {
stab_total += 1;
if passed {
stab_pass += 1;
}
}
"COMP" => {
comp_total += 1;
if passed {
comp_pass += 1;
}
}
"EDGE" => {
edge_total += 1;
if passed {
edge_pass += 1;
}
}
"REGR" => {
regr_total += 1;
if passed {
regr_pass += 1;
}
}
_ => {
qual_total += 1;
if passed {
qual_pass += 1;
}
}
}
}
scores.qual = Self::proportional_score(qual_pass, qual_total, CategoryScores::MAX_QUAL);
scores.perf = Self::proportional_score(perf_pass, perf_total, CategoryScores::MAX_PERF);
scores.stab = Self::proportional_score(stab_pass, stab_total, CategoryScores::MAX_STAB);
scores.comp = Self::proportional_score(comp_pass, comp_total, CategoryScores::MAX_COMP);
scores.edge = Self::proportional_score(edge_pass, edge_total, CategoryScores::MAX_EDGE);
scores.regr = Self::proportional_score(regr_pass, regr_total, CategoryScores::MAX_REGR);
scores
}
fn extract_category(gate_id: &str) -> String {
gate_id.split('-').nth(1).unwrap_or("QUAL").to_string()
}
fn proportional_score(passed: usize, total: usize, max: u32) -> u32 {
if total == 0 {
return 0;
}
let ratio = passed as f64 / total as f64;
(ratio * f64::from(max)).round() as u32
}
fn normalize_score(&self, raw: u32, pre_penalty: u32) -> f64 {
if pre_penalty == 0 {
return 0.0;
}
let ratio = f64::from(raw) / f64::from(CategoryScores::MAX_TOTAL);
let normalized = 100.0 * (1.0 + 9.0 * ratio).ln() / 10_f64.ln();
normalized.clamp(0.0, 100.0)
}
fn calculate_grade(score: f64) -> String {
match score {
s if s >= 97.0 => "A+".to_string(),
s if s >= 93.0 => "A".to_string(),
s if s >= 90.0 => "A-".to_string(),
s if s >= 87.0 => "B+".to_string(),
s if s >= 83.0 => "B".to_string(),
s if s >= 80.0 => "B-".to_string(),
s if s >= 77.0 => "C+".to_string(),
s if s >= 73.0 => "C".to_string(),
s if s >= 70.0 => "C-".to_string(),
s if s >= 67.0 => "D+".to_string(),
s if s >= 63.0 => "D".to_string(),
s if s >= 60.0 => "D-".to_string(),
_ => "F".to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use apr_qa_gen::{Backend, Format, Modality, ModelId, QaScenario};
fn test_scenario() -> QaScenario {
QaScenario::new(
ModelId::new("test", "model"),
Modality::Run,
Backend::Cpu,
Format::Gguf,
"2+2=".to_string(),
42,
)
}
fn test_evidence_passed(gate_id: &str) -> Evidence {
Evidence::corroborated(gate_id, test_scenario(), "4", 100)
}
fn test_evidence_failed(gate_id: &str) -> Evidence {
Evidence::falsified(gate_id, test_scenario(), "Wrong answer", "5", 100)
}
#[test]
fn test_gateway_result_passed() {
let result = GatewayResult::passed("G1", "Model loads");
assert!(result.passed);
assert!(result.failure_reason.is_none());
}
#[test]
fn test_gateway_result_failed() {
let result = GatewayResult::failed("G1", "Model loads", "OOM");
assert!(!result.passed);
assert_eq!(result.failure_reason, Some("OOM".to_string()));
}
#[test]
fn test_category_scores_total() {
let scores = CategoryScores {
qual: 150,
perf: 100,
stab: 150,
comp: 100,
edge: 100,
regr: 100,
};
assert_eq!(scores.total(), 700);
}
#[test]
fn test_category_scores_max() {
assert_eq!(CategoryScores::MAX_TOTAL, 1000);
}
#[test]
fn test_mqs_calculator_all_pass() {
let calculator = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
for i in 0..10 {
collector.add(test_evidence_passed(&format!("F-QUAL-{i:03}")));
collector.add(test_evidence_passed(&format!("F-PERF-{i:03}")));
collector.add(test_evidence_passed(&format!("F-STAB-{i:03}")));
collector.add(test_evidence_passed(&format!("F-COMP-{i:03}")));
collector.add(test_evidence_passed(&format!("F-EDGE-{i:03}")));
collector.add(test_evidence_passed(&format!("F-REGR-{i:03}")));
}
let score = calculator
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(score.gateways_passed);
assert_eq!(score.raw_score, 1000);
assert!(score.normalized_score > 99.0);
assert_eq!(score.grade, "A+");
}
#[test]
fn test_mqs_calculator_gateway_failure() {
let calculator = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(Evidence::crashed(
"F-QUAL-001",
test_scenario(),
"SIGSEGV",
-11,
0,
));
let score = calculator
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(!score.gateways_passed);
assert_eq!(score.raw_score, 0);
assert_eq!(score.normalized_score, 0.0);
assert_eq!(score.grade, "F");
}
#[test]
fn test_mqs_calculator_with_penalties() {
let calculator = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
for i in 0..50 {
collector.add(test_evidence_passed(&format!("F-QUAL-{i:03}")));
}
for i in 0..5 {
collector.add(Evidence::timeout(
&format!("F-PERF-{i:03}"),
test_scenario(),
30000,
));
}
let score = calculator
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(score.total_penalty > 0);
assert!(score.penalties.iter().any(|p| p.code == "TIMEOUT"));
}
#[test]
fn test_extract_category() {
assert_eq!(MqsCalculator::extract_category("F-QUAL-001"), "QUAL");
assert_eq!(MqsCalculator::extract_category("F-PERF-042"), "PERF");
assert_eq!(MqsCalculator::extract_category("UNKNOWN"), "QUAL");
}
#[test]
fn test_proportional_score() {
assert_eq!(MqsCalculator::proportional_score(10, 10, 200), 200);
assert_eq!(MqsCalculator::proportional_score(5, 10, 200), 100);
assert_eq!(MqsCalculator::proportional_score(0, 10, 200), 0);
assert_eq!(MqsCalculator::proportional_score(0, 0, 200), 0);
}
#[test]
fn test_grade_calculation() {
assert_eq!(MqsCalculator::calculate_grade(100.0), "A+");
assert_eq!(MqsCalculator::calculate_grade(97.0), "A+");
assert_eq!(MqsCalculator::calculate_grade(93.0), "A");
assert_eq!(MqsCalculator::calculate_grade(90.0), "A-");
assert_eq!(MqsCalculator::calculate_grade(83.0), "B");
assert_eq!(MqsCalculator::calculate_grade(73.0), "C");
assert_eq!(MqsCalculator::calculate_grade(50.0), "F");
}
#[test]
fn test_mqs_score_qualifies() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 800,
normalized_score: 75.0,
grade: "C".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 80,
tests_failed: 20,
penalties: vec![],
total_penalty: 0,
};
assert!(score.qualifies());
assert!(!score.is_production_ready());
}
#[test]
fn test_normalize_score_scaling() {
let calc = MqsCalculator::new();
let low = calc.normalize_score(200, 200);
let mid = calc.normalize_score(500, 500);
let high = calc.normalize_score(900, 900);
let perfect = calc.normalize_score(1000, 1000);
assert!(low < mid);
assert!(mid < high);
assert!(high < perfect);
assert!((perfect - 100.0).abs() < 0.01);
}
#[test]
fn test_grade_all_levels() {
assert_eq!(MqsCalculator::calculate_grade(98.0), "A+");
assert_eq!(MqsCalculator::calculate_grade(95.0), "A");
assert_eq!(MqsCalculator::calculate_grade(91.0), "A-");
assert_eq!(MqsCalculator::calculate_grade(88.0), "B+");
assert_eq!(MqsCalculator::calculate_grade(85.0), "B");
assert_eq!(MqsCalculator::calculate_grade(81.0), "B-");
assert_eq!(MqsCalculator::calculate_grade(78.0), "C+");
assert_eq!(MqsCalculator::calculate_grade(75.0), "C");
assert_eq!(MqsCalculator::calculate_grade(71.0), "C-");
assert_eq!(MqsCalculator::calculate_grade(68.0), "D+");
assert_eq!(MqsCalculator::calculate_grade(65.0), "D");
assert_eq!(MqsCalculator::calculate_grade(61.0), "D-");
assert_eq!(MqsCalculator::calculate_grade(55.0), "F");
}
#[test]
fn test_mqs_score_is_production_ready() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 950,
normalized_score: 95.0,
grade: "A".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 95,
tests_failed: 5,
penalties: vec![],
total_penalty: 0,
};
assert!(score.is_production_ready());
}
#[test]
fn test_mqs_score_not_qualifies() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 500,
normalized_score: 50.0,
grade: "F".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 50,
tests_failed: 50,
penalties: vec![],
total_penalty: 0,
};
assert!(!score.qualifies());
}
#[test]
fn test_mqs_score_gateway_failed_not_qualifies() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 900,
normalized_score: 90.0,
grade: "A-".to_string(),
gateways: vec![],
gateways_passed: false,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 90,
tests_failed: 10,
penalties: vec![],
total_penalty: 0,
};
assert!(!score.qualifies());
}
#[test]
fn test_category_scores_default() {
let scores = CategoryScores::default();
assert_eq!(scores.total(), 0);
}
#[test]
fn test_category_scores_breakdown() {
let scores = CategoryScores {
qual: 180,
perf: 150,
stab: 160,
comp: 140,
edge: 130,
regr: 120,
};
let breakdown = scores.breakdown();
assert_eq!(breakdown.get("QUAL"), Some(&(180, 200)));
assert_eq!(breakdown.get("PERF"), Some(&(150, 150)));
assert_eq!(breakdown.get("STAB"), Some(&(160, 200)));
assert_eq!(breakdown.get("COMP"), Some(&(140, 150)));
assert_eq!(breakdown.get("EDGE"), Some(&(130, 150)));
assert_eq!(breakdown.get("REGR"), Some(&(120, 150)));
}
#[test]
fn test_penalty_clone() {
let penalty = Penalty {
code: "TEST".to_string(),
description: "Test penalty".to_string(),
points: 10,
};
let cloned = penalty.clone();
assert_eq!(cloned.code, penalty.code);
assert_eq!(cloned.points, penalty.points);
}
#[test]
fn test_gateway_result_clone() {
let result = GatewayResult::passed("G1", "Test");
let cloned = result.clone();
assert_eq!(cloned.id, result.id);
assert_eq!(cloned.passed, result.passed);
}
#[test]
fn test_mqs_score_serialize() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 800,
normalized_score: 80.0,
grade: "B".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 80,
tests_failed: 20,
penalties: vec![],
total_penalty: 0,
};
let json = serde_json::to_string(&score).expect("serialize");
assert!(json.contains("test"));
assert!(json.contains("800"));
}
#[test]
fn test_extract_category_stab() {
assert_eq!(MqsCalculator::extract_category("F-STAB-001"), "STAB");
}
#[test]
fn test_extract_category_comp() {
assert_eq!(MqsCalculator::extract_category("F-COMP-001"), "COMP");
}
#[test]
fn test_extract_category_edge() {
assert_eq!(MqsCalculator::extract_category("F-EDGE-001"), "EDGE");
}
#[test]
fn test_extract_category_regr() {
assert_eq!(MqsCalculator::extract_category("F-REGR-001"), "REGR");
}
#[test]
fn test_normalize_score_zero() {
let calc = MqsCalculator::new();
let score = calc.normalize_score(0, 0);
assert_eq!(score, 0.0);
}
#[test]
fn test_mqs_calculator_check_gateways() {
let calc = MqsCalculator::new();
let collector = EvidenceCollector::new();
let gateways = calc.check_gateways(collector.all());
assert_eq!(gateways.len(), 5);
}
#[test]
fn test_mqs_calculator_with_failure_multiplier() {
let calc = MqsCalculator::new().with_failure_multiplier(2.0);
assert_eq!(calc.failure_multiplier, 2.0);
}
#[test]
fn test_mqs_calculator_default() {
let calc = MqsCalculator::default();
assert_eq!(calc.failure_multiplier, 1.5);
}
#[test]
fn test_mqs_calculator_debug() {
let calc = MqsCalculator::new();
let debug_str = format!("{calc:?}");
assert!(debug_str.contains("MqsCalculator"));
}
#[test]
fn test_gateway_g1_failure() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(Evidence::falsified(
"G1-LOAD",
test_scenario(),
"Model failed to load",
"",
100,
));
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(!score.gateways_passed);
let g1 = score.gateways.iter().find(|g| g.id == "G1").unwrap();
assert!(!g1.passed);
}
#[test]
fn test_gateway_g2_failure() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(Evidence::falsified(
"G2-INFERENCE",
test_scenario(),
"Inference failed",
"",
100,
));
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
let g2 = score.gateways.iter().find(|g| g.id == "G2").unwrap();
assert!(!g2.passed);
}
#[test]
fn test_gateway_g4_failure_garbage_output() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
for i in 0..10 {
collector.add(Evidence::falsified(
&format!("G4-GARBAGE-{i:03}"),
test_scenario(),
"Garbage output",
"###$$@@!!",
100,
));
}
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
let g4 = score.gateways.iter().find(|g| g.id == "G4").unwrap();
assert!(!g4.passed);
}
#[test]
fn test_mqs_with_crash_penalty() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
for i in 0..50 {
collector.add(test_evidence_passed(&format!("F-QUAL-{i:03}")));
}
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(score.gateways_passed);
}
#[test]
fn test_calculate_categories_all_types() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(test_evidence_passed("F-QUAL-001"));
collector.add(test_evidence_passed("F-PERF-001"));
collector.add(test_evidence_passed("F-STAB-001"));
collector.add(test_evidence_passed("F-COMP-001"));
collector.add(test_evidence_passed("F-EDGE-001"));
collector.add(test_evidence_passed("F-REGR-001"));
let categories = calc.calculate_categories(collector.all());
assert!(categories.qual > 0);
assert!(categories.perf > 0);
assert!(categories.stab > 0);
assert!(categories.comp > 0);
assert!(categories.edge > 0);
assert!(categories.regr > 0);
}
#[test]
fn test_calculate_categories_with_failures() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(test_evidence_passed("F-QUAL-001"));
collector.add(test_evidence_failed("F-QUAL-002"));
collector.add(test_evidence_passed("F-QUAL-003"));
let categories = calc.calculate_categories(collector.all());
assert!(categories.qual > 100);
assert!(categories.qual < 200);
}
#[test]
fn test_calculate_categories_unknown_category() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(test_evidence_passed("UNKNOWN"));
let categories = calc.calculate_categories(collector.all());
assert!(categories.qual > 0);
}
#[test]
fn test_gateway_result_debug() {
let result = GatewayResult::passed("G1", "Test");
let debug_str = format!("{result:?}");
assert!(debug_str.contains("GatewayResult"));
}
#[test]
fn test_category_scores_debug() {
let scores = CategoryScores::default();
let debug_str = format!("{scores:?}");
assert!(debug_str.contains("CategoryScores"));
}
#[test]
fn test_penalty_debug() {
let penalty = Penalty {
code: "TEST".to_string(),
description: "Test".to_string(),
points: 10,
};
let debug_str = format!("{penalty:?}");
assert!(debug_str.contains("Penalty"));
}
#[test]
fn test_mqs_score_debug() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 800,
normalized_score: 80.0,
grade: "B".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 80,
tests_failed: 20,
penalties: vec![],
total_penalty: 0,
};
let debug_str = format!("{score:?}");
assert!(debug_str.contains("MqsScore"));
}
#[test]
fn test_mqs_score_clone() {
let score = MqsScore {
model_id: "test".to_string(),
raw_score: 800,
normalized_score: 80.0,
grade: "B".to_string(),
gateways: vec![],
gateways_passed: true,
categories: CategoryScores::default(),
total_tests: 100,
tests_passed: 80,
tests_failed: 20,
penalties: vec![],
total_penalty: 0,
};
let cloned = score.clone();
assert_eq!(cloned.model_id, score.model_id);
assert_eq!(cloned.raw_score, score.raw_score);
}
#[test]
fn test_gateway_result_serialize() {
let result = GatewayResult::passed("G1", "Test");
let json = serde_json::to_string(&result).expect("serialize");
assert!(json.contains("G1"));
}
#[test]
fn test_category_scores_serialize() {
let scores = CategoryScores {
qual: 100,
perf: 50,
stab: 75,
comp: 60,
edge: 40,
regr: 30,
};
let json = serde_json::to_string(&scores).expect("serialize");
assert!(json.contains("100"));
}
#[test]
fn test_penalty_serialize() {
let penalty = Penalty {
code: "CRASH".to_string(),
description: "Crash detected".to_string(),
points: 20,
};
let json = serde_json::to_string(&penalty).expect("serialize");
assert!(json.contains("CRASH"));
}
#[test]
fn test_mqs_calculator_calculate_empty() {
let calc = MqsCalculator::new();
let collector = EvidenceCollector::new();
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(score.gateways_passed);
assert_eq!(score.total_tests, 0);
}
#[test]
fn test_category_scores_clone() {
let scores = CategoryScores {
qual: 100,
perf: 50,
stab: 75,
comp: 60,
edge: 40,
regr: 30,
};
let cloned = scores.clone();
assert_eq!(cloned.qual, scores.qual);
assert_eq!(cloned.total(), scores.total());
}
#[test]
fn test_mqs_score_deserialize() {
let json = r#"{
"model_id": "test",
"raw_score": 800,
"normalized_score": 80.0,
"grade": "B",
"gateways": [],
"gateways_passed": true,
"categories": {"qual": 0, "perf": 0, "stab": 0, "comp": 0, "edge": 0, "regr": 0},
"total_tests": 100,
"tests_passed": 80,
"tests_failed": 20,
"penalties": [],
"total_penalty": 0
}"#;
let score: MqsScore = serde_json::from_str(json).expect("deserialize");
assert_eq!(score.model_id, "test");
assert_eq!(score.raw_score, 800);
}
#[test]
fn test_gateway_g0_integrity_failure() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(Evidence::falsified(
"G0-INTEGRITY-LAYERS",
test_scenario(),
"config says 14 layers but tensors have 24",
"",
100,
));
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(!score.gateways_passed);
assert_eq!(score.raw_score, 0);
assert_eq!(score.normalized_score, 0.0);
let g0 = score.gateways.iter().find(|g| g.id == "G0").unwrap();
assert!(!g0.passed);
assert!(g0.failure_reason.as_ref().unwrap().contains("integrity"));
}
#[test]
fn test_gateway_g0_integrity_multiple_failures() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(Evidence::falsified(
"G0-INTEGRITY-LAYERS",
test_scenario(),
"config says 14 layers but tensors have 24",
"",
100,
));
collector.add(Evidence::falsified(
"G0-INTEGRITY-HIDDEN",
test_scenario(),
"config says hidden_size=4096 but embedding has 896",
"",
100,
));
collector.add(Evidence::falsified(
"G0-INTEGRITY-VOCAB",
test_scenario(),
"config says vocab_size=896 but embedding has 151936",
"",
100,
));
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(!score.gateways_passed);
assert_eq!(score.raw_score, 0);
let g0 = score.gateways.iter().find(|g| g.id == "G0").unwrap();
assert!(!g0.passed);
assert!(g0.failure_reason.as_ref().unwrap().contains("3 integrity"));
}
#[test]
fn test_gateway_g0_passes_when_no_integrity_failures() {
let calc = MqsCalculator::new();
let mut collector = EvidenceCollector::new();
collector.add(test_evidence_passed("F-QUAL-001"));
collector.add(test_evidence_passed("F-PERF-001"));
let score = calc
.calculate("test/model", &collector)
.expect("Calculation failed");
assert!(score.gateways_passed);
let g0 = score.gateways.iter().find(|g| g.id == "G0").unwrap();
assert!(g0.passed);
}
#[test]
fn test_gateway_order_g0_first() {
let calc = MqsCalculator::new();
let collector = EvidenceCollector::new();
let gateways = calc.check_gateways(collector.all());
assert_eq!(gateways[0].id, "G0");
assert_eq!(gateways[1].id, "G1");
assert_eq!(gateways[2].id, "G2");
assert_eq!(gateways[3].id, "G3");
assert_eq!(gateways[4].id, "G4");
}
}