pub const AC_QAMETA_COSINE_MIN: f64 = 0.95;
pub const AC_QAMETA_TOP_K: u32 = 5;
pub const AC_QAMETA_TOP_K_OVERLAP: u32 = 3;
pub const AC_QAMETA_ROUNDTRIP_TOLERANCE: f64 = 0.01;
pub const AC_QAMETA_ARCHITECTURES: [&str; 5] =
["qwen2", "llama", "phi", "gemma", "mistral"];
pub const AC_QAMETA_MIN_COHERENT_ARCHS: u32 = 3;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantEquivalenceVerdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_quant_equivalence(
cosine_similarity: f64,
top_k_overlap_count: u32,
) -> QuantEquivalenceVerdict {
if cosine_similarity <= AC_QAMETA_COSINE_MIN {
return QuantEquivalenceVerdict::Fail;
}
if top_k_overlap_count < AC_QAMETA_TOP_K_OVERLAP {
return QuantEquivalenceVerdict::Fail;
}
QuantEquivalenceVerdict::Pass
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RoundtripVerdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_roundtrip(
tensor_count_before: u32,
tensor_count_after: u32,
has_nan: bool,
max_l2_drift: f64,
) -> RoundtripVerdict {
if tensor_count_before != tensor_count_after {
return RoundtripVerdict::Fail;
}
if has_nan {
return RoundtripVerdict::Fail;
}
if max_l2_drift >= AC_QAMETA_ROUNDTRIP_TOLERANCE {
return RoundtripVerdict::Fail;
}
RoundtripVerdict::Pass
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum MultiArchSmokeVerdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_multi_arch_smoke(arch_results: &[(&str, bool, bool)]) -> MultiArchSmokeVerdict {
let mut coherent: u32 = 0;
for (_arch, nonempty, broken) in arch_results {
if *nonempty && !*broken {
coherent += 1;
}
}
if coherent >= AC_QAMETA_MIN_COHERENT_ARCHS {
MultiArchSmokeVerdict::Pass
} else {
MultiArchSmokeVerdict::Fail
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PromptInvarianceVerdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_prompt_invariance(
output_a_contains_answer: bool,
output_b_contains_answer: bool,
) -> PromptInvarianceVerdict {
if output_a_contains_answer && output_b_contains_answer {
PromptInvarianceVerdict::Pass
} else {
PromptInvarianceVerdict::Fail
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TemperatureZeroDeterminismVerdict {
Pass,
Fail,
}
#[must_use]
pub fn verdict_from_temp_zero_determinism(unique_outputs: u32) -> TemperatureZeroDeterminismVerdict {
if unique_outputs == 1 {
TemperatureZeroDeterminismVerdict::Pass
} else {
TemperatureZeroDeterminismVerdict::Fail
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn provenance_cosine_threshold_095() {
assert!((AC_QAMETA_COSINE_MIN - 0.95).abs() < f64::EPSILON);
}
#[test]
fn provenance_top_k_5() {
assert_eq!(AC_QAMETA_TOP_K, 5);
}
#[test]
fn provenance_top_k_overlap_3() {
assert_eq!(AC_QAMETA_TOP_K_OVERLAP, 3);
}
#[test]
fn provenance_roundtrip_tolerance_001() {
assert!((AC_QAMETA_ROUNDTRIP_TOLERANCE - 0.01).abs() < f64::EPSILON);
}
#[test]
fn provenance_architectures_count_5() {
assert_eq!(AC_QAMETA_ARCHITECTURES.len(), 5);
}
#[test]
fn provenance_min_coherent_archs_3() {
assert_eq!(AC_QAMETA_MIN_COHERENT_ARCHS, 3);
}
#[test]
fn fm001_pass_high_similarity_full_overlap() {
assert_eq!(verdict_from_quant_equivalence(0.99, 5), QuantEquivalenceVerdict::Pass);
}
#[test]
fn fm001_pass_at_overlap_threshold() {
assert_eq!(verdict_from_quant_equivalence(0.96, 3), QuantEquivalenceVerdict::Pass);
}
#[test]
fn fm001_fail_low_similarity() {
assert_eq!(verdict_from_quant_equivalence(0.95, 5), QuantEquivalenceVerdict::Fail);
}
#[test]
fn fm001_fail_low_overlap() {
assert_eq!(verdict_from_quant_equivalence(0.99, 2), QuantEquivalenceVerdict::Fail);
}
#[test]
fn fm002_pass_clean_roundtrip() {
assert_eq!(verdict_from_roundtrip(339, 339, false, 0.005), RoundtripVerdict::Pass);
}
#[test]
fn fm002_fail_tensor_count_mismatch() {
assert_eq!(verdict_from_roundtrip(339, 338, false, 0.005), RoundtripVerdict::Fail);
}
#[test]
fn fm002_fail_nan_introduced() {
assert_eq!(verdict_from_roundtrip(339, 339, true, 0.0), RoundtripVerdict::Fail);
}
#[test]
fn fm002_fail_l2_drift_at_threshold() {
assert_eq!(verdict_from_roundtrip(339, 339, false, 0.01), RoundtripVerdict::Fail);
}
#[test]
fn fm002_pass_l2_drift_just_under() {
assert_eq!(verdict_from_roundtrip(339, 339, false, 0.0099), RoundtripVerdict::Pass);
}
#[test]
fn fm003_pass_all_5_coherent() {
let r = [
("qwen2", true, false),
("llama", true, false),
("phi", true, false),
("gemma", true, false),
("mistral", true, false),
];
assert_eq!(verdict_from_multi_arch_smoke(&r), MultiArchSmokeVerdict::Pass);
}
#[test]
fn fm003_pass_3_of_5() {
let r = [
("qwen2", true, false),
("llama", true, false),
("phi", true, false),
("gemma", false, false),
("mistral", true, true),
];
assert_eq!(verdict_from_multi_arch_smoke(&r), MultiArchSmokeVerdict::Pass);
}
#[test]
fn fm003_fail_only_2_coherent() {
let r = [
("qwen2", true, false),
("llama", true, false),
("phi", false, false),
("gemma", false, true),
("mistral", true, true),
];
assert_eq!(verdict_from_multi_arch_smoke(&r), MultiArchSmokeVerdict::Fail);
}
#[test]
fn fm003_fail_empty_results() {
assert_eq!(verdict_from_multi_arch_smoke(&[]), MultiArchSmokeVerdict::Fail);
}
#[test]
fn fm004_pass_both_contain_answer() {
assert_eq!(verdict_from_prompt_invariance(true, true), PromptInvarianceVerdict::Pass);
}
#[test]
fn fm004_fail_only_a_has_answer() {
assert_eq!(verdict_from_prompt_invariance(true, false), PromptInvarianceVerdict::Fail);
}
#[test]
fn fm004_fail_only_b_has_answer() {
assert_eq!(verdict_from_prompt_invariance(false, true), PromptInvarianceVerdict::Fail);
}
#[test]
fn fm004_fail_both_miss_answer() {
assert_eq!(verdict_from_prompt_invariance(false, false), PromptInvarianceVerdict::Fail);
}
#[test]
fn fm005_pass_one_unique_output() {
assert_eq!(
verdict_from_temp_zero_determinism(1),
TemperatureZeroDeterminismVerdict::Pass
);
}
#[test]
fn fm005_fail_two_unique_outputs() {
assert_eq!(
verdict_from_temp_zero_determinism(2),
TemperatureZeroDeterminismVerdict::Fail
);
}
#[test]
fn fm005_fail_zero_unique() {
assert_eq!(
verdict_from_temp_zero_determinism(0),
TemperatureZeroDeterminismVerdict::Fail
);
}
#[test]
fn realistic_healthy_run_passes_all_5() {
assert_eq!(verdict_from_quant_equivalence(0.98, 5), QuantEquivalenceVerdict::Pass);
assert_eq!(verdict_from_roundtrip(339, 339, false, 0.003), RoundtripVerdict::Pass);
let r = [
("qwen2", true, false),
("llama", true, false),
("phi", true, false),
("gemma", true, false),
("mistral", true, false),
];
assert_eq!(verdict_from_multi_arch_smoke(&r), MultiArchSmokeVerdict::Pass);
assert_eq!(verdict_from_prompt_invariance(true, true), PromptInvarianceVerdict::Pass);
assert_eq!(verdict_from_temp_zero_determinism(1), TemperatureZeroDeterminismVerdict::Pass);
}
#[test]
fn realistic_pre_fix_all_5_failures() {
assert_eq!(verdict_from_quant_equivalence(0.50, 1), QuantEquivalenceVerdict::Fail);
assert_eq!(verdict_from_roundtrip(339, 339, true, 0.05), RoundtripVerdict::Fail);
let r = [
("qwen2", false, false),
("llama", false, false),
("phi", false, false),
("gemma", false, false),
("mistral", false, false),
];
assert_eq!(verdict_from_multi_arch_smoke(&r), MultiArchSmokeVerdict::Fail);
assert_eq!(verdict_from_prompt_invariance(false, false), PromptInvarianceVerdict::Fail);
assert_eq!(verdict_from_temp_zero_determinism(3), TemperatureZeroDeterminismVerdict::Fail);
}
}