pub const AC_NFTC_001_TOLERANCE: f32 = 1.0e-3;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Nftc001Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_tc_gemm_equivalence(tc_out: &[f32], naive_out: &[f32]) -> Nftc001Verdict {
if tc_out.is_empty() || naive_out.is_empty() { return Nftc001Verdict::Fail; }
if tc_out.len() != naive_out.len() { return Nftc001Verdict::Fail; }
for (&a, &b) in tc_out.iter().zip(naive_out.iter()) {
if !a.is_finite() || !b.is_finite() { return Nftc001Verdict::Fail; }
if (a - b).abs() > AC_NFTC_001_TOLERANCE { return Nftc001Verdict::Fail; }
}
Nftc001Verdict::Pass
}
pub const AC_NFTC_002_MIN_SPEEDUP: f32 = 5.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Nftc002Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_tc_speedup(tc_tps: f32, naive_tps: f32) -> Nftc002Verdict {
if !tc_tps.is_finite() || !naive_tps.is_finite() { return Nftc002Verdict::Fail; }
if tc_tps <= 0.0 || naive_tps <= 0.0 { return Nftc002Verdict::Fail; }
let speedup = tc_tps / naive_tps;
if !speedup.is_finite() { return Nftc002Verdict::Fail; }
if speedup < AC_NFTC_002_MIN_SPEEDUP { return Nftc002Verdict::Fail; }
Nftc002Verdict::Pass
}
pub const AC_NFTC_003_TOLERANCE: f32 = 1.0e-3;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Nftc003Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_dequant_match(gpu_shared_mem: &[f32], cpu_reference: &[f32]) -> Nftc003Verdict {
if gpu_shared_mem.is_empty() || cpu_reference.is_empty() { return Nftc003Verdict::Fail; }
if gpu_shared_mem.len() != cpu_reference.len() { return Nftc003Verdict::Fail; }
for (&g, &c) in gpu_shared_mem.iter().zip(cpu_reference.iter()) {
if !g.is_finite() || !c.is_finite() { return Nftc003Verdict::Fail; }
if (g - c).abs() > AC_NFTC_003_TOLERANCE { return Nftc003Verdict::Fail; }
}
Nftc003Verdict::Pass
}
pub const AC_NFBTC_001_TOLERANCE: f32 = 1.0e-3;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Nfbtc001Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_backward_gradient_parity(
nf4_grad: &[f32],
fp32_grad: &[f32],
) -> Nfbtc001Verdict {
if nf4_grad.is_empty() || fp32_grad.is_empty() { return Nfbtc001Verdict::Fail; }
if nf4_grad.len() != fp32_grad.len() { return Nfbtc001Verdict::Fail; }
for (&n, &f) in nf4_grad.iter().zip(fp32_grad.iter()) {
if !n.is_finite() || !f.is_finite() { return Nfbtc001Verdict::Fail; }
if (n - f).abs() > AC_NFBTC_001_TOLERANCE { return Nfbtc001Verdict::Fail; }
}
Nfbtc001Verdict::Pass
}
pub const AC_NFBTC_002_VRAM_RATIO_MAX: f32 = 0.5;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Nfbtc002Verdict { Pass, Fail }
#[must_use]
pub fn verdict_from_memory_saving(nf4_peak_vram: u64, fp16_peak_vram: u64) -> Nfbtc002Verdict {
if nf4_peak_vram == 0 || fp16_peak_vram == 0 { return Nfbtc002Verdict::Fail; }
let ratio = (nf4_peak_vram as f64) / (fp16_peak_vram as f64);
if !ratio.is_finite() { return Nfbtc002Verdict::Fail; }
if ratio >= AC_NFBTC_002_VRAM_RATIO_MAX as f64 { return Nfbtc002Verdict::Fail; }
Nfbtc002Verdict::Pass
}
#[cfg(test)]
mod tests {
use super::*;
#[test] fn nftc001_pass_identical() {
let a = vec![1.0_f32, 2.0, 3.0];
assert_eq!(verdict_from_tc_gemm_equivalence(&a, &a), Nftc001Verdict::Pass);
}
#[test] fn nftc001_pass_within_tol() {
let a = vec![1.0_f32];
let b = vec![1.0_f32 + 5e-4]; assert_eq!(verdict_from_tc_gemm_equivalence(&a, &b), Nftc001Verdict::Pass);
}
#[test] fn nftc001_fail_above_tol() {
let a = vec![1.0_f32];
let b = vec![1.01_f32]; assert_eq!(verdict_from_tc_gemm_equivalence(&a, &b), Nftc001Verdict::Fail);
}
#[test] fn nftc001_fail_length() {
let a = vec![1.0_f32];
let b = vec![1.0_f32, 2.0];
assert_eq!(verdict_from_tc_gemm_equivalence(&a, &b), Nftc001Verdict::Fail);
}
#[test] fn nftc001_fail_nan() {
let a = vec![f32::NAN];
let b = vec![1.0_f32];
assert_eq!(verdict_from_tc_gemm_equivalence(&a, &b), Nftc001Verdict::Fail);
}
#[test] fn nftc002_pass_5x() {
assert_eq!(verdict_from_tc_speedup(5000.0, 1000.0), Nftc002Verdict::Pass);
}
#[test] fn nftc002_pass_higher_speedup() {
assert_eq!(verdict_from_tc_speedup(40_000.0, 1000.0), Nftc002Verdict::Pass);
}
#[test] fn nftc002_fail_below_5x() {
assert_eq!(verdict_from_tc_speedup(4000.0, 1000.0), Nftc002Verdict::Fail);
}
#[test] fn nftc002_fail_no_gain() {
assert_eq!(verdict_from_tc_speedup(1000.0, 1000.0), Nftc002Verdict::Fail);
}
#[test] fn nftc002_fail_zero_naive() {
assert_eq!(verdict_from_tc_speedup(5000.0, 0.0), Nftc002Verdict::Fail);
}
#[test] fn nftc002_fail_nan() {
assert_eq!(verdict_from_tc_speedup(f32::NAN, 1000.0), Nftc002Verdict::Fail);
}
#[test] fn nftc003_pass_identical() {
let a = vec![0.5_f32, -0.3, 0.0];
assert_eq!(verdict_from_dequant_match(&a, &a), Nftc003Verdict::Pass);
}
#[test] fn nftc003_pass_within_tol() {
let gpu = vec![0.5_f32 + 5e-4];
let cpu = vec![0.5_f32];
assert_eq!(verdict_from_dequant_match(&gpu, &cpu), Nftc003Verdict::Pass);
}
#[test] fn nftc003_fail_above_tol() {
let gpu = vec![0.5_f32 + 0.01];
let cpu = vec![0.5_f32];
assert_eq!(verdict_from_dequant_match(&gpu, &cpu), Nftc003Verdict::Fail);
}
#[test] fn nftc003_fail_length() {
let gpu = vec![0.5_f32];
let cpu = vec![0.5_f32, 0.3];
assert_eq!(verdict_from_dequant_match(&gpu, &cpu), Nftc003Verdict::Fail);
}
#[test] fn nfbtc001_pass_identical() {
let a = vec![0.1_f32, -0.2, 0.3];
assert_eq!(verdict_from_backward_gradient_parity(&a, &a), Nfbtc001Verdict::Pass);
}
#[test] fn nfbtc001_pass_within_tol() {
let nf4 = vec![0.1_f32];
let fp32 = vec![0.1_f32 + 5e-4];
assert_eq!(verdict_from_backward_gradient_parity(&nf4, &fp32), Nfbtc001Verdict::Pass);
}
#[test] fn nfbtc001_fail_above_tol() {
let nf4 = vec![0.1_f32];
let fp32 = vec![0.5_f32];
assert_eq!(verdict_from_backward_gradient_parity(&nf4, &fp32), Nfbtc001Verdict::Fail);
}
#[test] fn nfbtc001_fail_length() {
let nf4 = vec![0.1_f32];
let fp32 = vec![0.1_f32, 0.2];
assert_eq!(verdict_from_backward_gradient_parity(&nf4, &fp32), Nfbtc001Verdict::Fail);
}
#[test] fn nfbtc001_fail_nan() {
let nf4 = vec![f32::NAN];
let fp32 = vec![0.1_f32];
assert_eq!(verdict_from_backward_gradient_parity(&nf4, &fp32), Nfbtc001Verdict::Fail);
}
#[test] fn nfbtc002_pass_canonical() {
assert_eq!(
verdict_from_memory_saving(4 * 1024 * 1024 * 1024, 10 * 1024 * 1024 * 1024),
Nfbtc002Verdict::Pass
);
}
#[test] fn nfbtc002_pass_25_percent() {
assert_eq!(
verdict_from_memory_saving(2_000_000, 8_000_000),
Nfbtc002Verdict::Pass
);
}
#[test] fn nfbtc002_fail_at_boundary() {
assert_eq!(
verdict_from_memory_saving(5_000_000, 10_000_000),
Nfbtc002Verdict::Fail
);
}
#[test] fn nfbtc002_fail_above_50_percent() {
assert_eq!(
verdict_from_memory_saving(7_000_000, 10_000_000),
Nfbtc002Verdict::Fail
);
}
#[test] fn nfbtc002_fail_zero() {
assert_eq!(verdict_from_memory_saving(0, 10_000_000), Nfbtc002Verdict::Fail);
assert_eq!(verdict_from_memory_saving(5_000_000, 0), Nfbtc002Verdict::Fail);
}
#[test] fn provenance_constants() {
assert!((AC_NFTC_001_TOLERANCE - 1e-3).abs() < 1e-9);
assert!((AC_NFTC_002_MIN_SPEEDUP - 5.0).abs() < 1e-9);
assert!((AC_NFTC_003_TOLERANCE - 1e-3).abs() < 1e-9);
assert!((AC_NFBTC_001_TOLERANCE - 1e-3).abs() < 1e-9);
assert!((AC_NFBTC_002_VRAM_RATIO_MAX - 0.5).abs() < 1e-9);
}
}