pub const AC_AQ_001_RELATIVE_TOLERANCE: f32 = 1.0e-3;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Aq001Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_dot_product_parity(dot_q8k: f32, dot_f32: f32) -> Aq001Verdict {
if !dot_q8k.is_finite() || !dot_f32.is_finite() { return Aq001Verdict::Fail; }
let denom = dot_f32.abs().max(1.0e-6_f32);
let rel = (dot_q8k - dot_f32).abs() / denom;
if !rel.is_finite() { return Aq001Verdict::Fail; }
if rel > AC_AQ_001_RELATIVE_TOLERANCE { return Aq001Verdict::Fail; }
Aq001Verdict::Pass
}
pub const AC_AQ_002_MIN_RATIO: f32 = 0.85;
pub const AC_AQ_002_TARGET_TPS: f32 = 60.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Aq002Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_throughput_target(
apr_cpu_tps: f32,
llama_cpp_cpu_tps: f32,
) -> Aq002Verdict {
if !apr_cpu_tps.is_finite() || !llama_cpp_cpu_tps.is_finite() {
return Aq002Verdict::Fail;
}
if apr_cpu_tps <= 0.0 || llama_cpp_cpu_tps <= 0.0 { return Aq002Verdict::Fail; }
if apr_cpu_tps < AC_AQ_002_TARGET_TPS { return Aq002Verdict::Fail; }
let ratio = apr_cpu_tps / llama_cpp_cpu_tps;
if !ratio.is_finite() || ratio < AC_AQ_002_MIN_RATIO { return Aq002Verdict::Fail; }
Aq002Verdict::Pass
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Aq003Verdict { Pass, Fail }
#[must_use]
pub const fn verdict_from_amortized_quantization(
quantize_call_count: u64,
num_matmuls: u64,
) -> Aq003Verdict {
if num_matmuls == 0 { return Aq003Verdict::Fail; }
if quantize_call_count > num_matmuls { return Aq003Verdict::Fail; }
if quantize_call_count == 0 { return Aq003Verdict::Fail; } Aq003Verdict::Pass
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Aq004Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_output_quality_parity(
q8k_argmax_per_step: &[u32],
f32_argmax_per_step: &[u32],
) -> Aq004Verdict {
if q8k_argmax_per_step.is_empty() || f32_argmax_per_step.is_empty() {
return Aq004Verdict::Fail;
}
if q8k_argmax_per_step.len() != f32_argmax_per_step.len() {
return Aq004Verdict::Fail;
}
if q8k_argmax_per_step == f32_argmax_per_step {
Aq004Verdict::Pass
} else {
Aq004Verdict::Fail
}
}
pub const AQ_BANDWIDTH_SPEEDUP: f32 = 4.0;
pub const AQ_COMPUTE_SPEEDUP_MIN: f32 = 3.0;
pub const AQ_COMPUTE_SPEEDUP_MAX: f32 = 4.0;
#[cfg(test)]
mod tests {
use super::*;
#[test] fn aq001_pass_identical() {
assert_eq!(verdict_from_dot_product_parity(1234.5, 1234.5), Aq001Verdict::Pass);
}
#[test] fn aq001_pass_within_tolerance() {
assert_eq!(verdict_from_dot_product_parity(1000.5, 1000.0), Aq001Verdict::Pass);
}
#[test] fn aq001_fail_above_tolerance() {
assert_eq!(verdict_from_dot_product_parity(1005.0, 1000.0), Aq001Verdict::Fail);
}
#[test] fn aq001_pass_near_zero() {
assert_eq!(verdict_from_dot_product_parity(1e-7, 1e-7), Aq001Verdict::Pass);
}
#[test] fn aq001_fail_nan() {
assert_eq!(verdict_from_dot_product_parity(f32::NAN, 1000.0), Aq001Verdict::Fail);
}
#[test] fn aq001_fail_inf() {
assert_eq!(verdict_from_dot_product_parity(f32::INFINITY, 1000.0), Aq001Verdict::Fail);
}
#[test] fn aq002_pass_canonical() {
assert_eq!(verdict_from_throughput_target(65.0, 74.0), Aq002Verdict::Pass);
}
#[test] fn aq002_fail_below_60_target() {
assert_eq!(verdict_from_throughput_target(50.0, 55.0), Aq002Verdict::Fail);
}
#[test] fn aq002_fail_below_ratio() {
assert_eq!(verdict_from_throughput_target(60.0, 100.0), Aq002Verdict::Fail);
}
#[test] fn aq002_fail_pre_fix_baseline() {
assert_eq!(verdict_from_throughput_target(9.5, 74.0), Aq002Verdict::Fail);
}
#[test] fn aq002_fail_zero_llama() {
assert_eq!(verdict_from_throughput_target(60.0, 0.0), Aq002Verdict::Fail);
}
#[test] fn aq002_fail_nan() {
assert_eq!(verdict_from_throughput_target(f32::NAN, 74.0), Aq002Verdict::Fail);
}
#[test] fn aq003_pass_one_per_matmul() {
assert_eq!(verdict_from_amortized_quantization(196, 196), Aq003Verdict::Pass);
}
#[test] fn aq003_pass_fewer_quants() {
assert_eq!(verdict_from_amortized_quantization(50, 196), Aq003Verdict::Pass);
}
#[test] fn aq003_fail_per_dot_regression() {
assert_eq!(verdict_from_amortized_quantization(802816, 196), Aq003Verdict::Fail);
}
#[test] fn aq003_fail_zero_matmuls() {
assert_eq!(verdict_from_amortized_quantization(0, 0), Aq003Verdict::Fail);
}
#[test] fn aq003_fail_zero_quants() {
assert_eq!(verdict_from_amortized_quantization(0, 196), Aq003Verdict::Fail);
}
#[test] fn aq004_pass_canonical() {
let q8k = [42_u32, 100, 7, 99];
let f32 = [42_u32, 100, 7, 99];
assert_eq!(verdict_from_output_quality_parity(&q8k, &f32), Aq004Verdict::Pass);
}
#[test] fn aq004_fail_step_divergence() {
let q8k = [42_u32, 100, 7, 99];
let f32_ax = [42_u32, 100, 8, 99]; assert_eq!(verdict_from_output_quality_parity(&q8k, &f32_ax), Aq004Verdict::Fail);
}
#[test] fn aq004_fail_length_mismatch() {
let q8k = [42_u32];
let f32 = [42_u32, 100];
assert_eq!(verdict_from_output_quality_parity(&q8k, &f32), Aq004Verdict::Fail);
}
#[test] fn aq004_fail_empty() {
assert_eq!(verdict_from_output_quality_parity(&[], &[]), Aq004Verdict::Fail);
}
#[test] fn provenance_constants() {
assert!((AC_AQ_001_RELATIVE_TOLERANCE - 1e-3).abs() < 1e-9);
assert!((AC_AQ_002_MIN_RATIO - 0.85).abs() < 1e-9);
assert!((AC_AQ_002_TARGET_TPS - 60.0).abs() < 1e-9);
assert!((AQ_BANDWIDTH_SPEEDUP - 4.0).abs() < 1e-9);
}
}