pub const AC_QO_Q4K_BYTES_PER_BLOCK: usize = 18;
pub const AC_QO_Q6K_BYTES_PER_BLOCK: usize = 26;
pub const AC_QO_Q8_0_BYTES_PER_BLOCK: usize = 34;
pub const AC_QO_BLOCK_ELEMS: usize = 32;
pub const AC_QO_F16_BYTES_PER_PARAM: f64 = 2.0;
pub const AC_QO_F32_BYTES_PER_PARAM: f64 = 4.0;
pub const AC_QO_CONCRETE_TOLERANCE: f64 = 0.20;
pub const AC_QO_SIMD_TOLERANCE: f32 = 0.0;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QoVerdict {
Pass,
Fail,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Quant {
Q4K,
Q6K,
Q8_0,
F16,
F32,
}
#[must_use]
pub fn bytes_for_params(n_params: usize, q: Quant) -> Option<usize> {
if n_params == 0 {
return None;
}
let bytes = match q {
Quant::Q4K | Quant::Q6K | Quant::Q8_0 => {
let blocks = n_params.div_ceil(AC_QO_BLOCK_ELEMS);
let bpb = match q {
Quant::Q4K => AC_QO_Q4K_BYTES_PER_BLOCK,
Quant::Q6K => AC_QO_Q6K_BYTES_PER_BLOCK,
Quant::Q8_0 => AC_QO_Q8_0_BYTES_PER_BLOCK,
_ => unreachable!(),
};
blocks * bpb
}
Quant::F16 => n_params * 2,
Quant::F32 => n_params * 4,
};
Some(bytes)
}
#[must_use]
pub fn verdict_from_size_ordering(n_params: usize) -> QoVerdict {
let Some(q4k) = bytes_for_params(n_params, Quant::Q4K) else {
return QoVerdict::Fail;
};
let Some(q6k) = bytes_for_params(n_params, Quant::Q6K) else {
return QoVerdict::Fail;
};
let Some(q8) = bytes_for_params(n_params, Quant::Q8_0) else {
return QoVerdict::Fail;
};
let Some(f16) = bytes_for_params(n_params, Quant::F16) else {
return QoVerdict::Fail;
};
let Some(f32) = bytes_for_params(n_params, Quant::F32) else {
return QoVerdict::Fail;
};
if q4k < q6k && q6k < q8 && q8 < f16 && f16 < f32 {
QoVerdict::Pass
} else {
QoVerdict::Fail
}
}
#[must_use]
pub fn verdict_from_alpha_scaling(alpha: f32, rank: u32, observed_scale: f32) -> QoVerdict {
if rank == 0 || !alpha.is_finite() || !observed_scale.is_finite() {
return QoVerdict::Fail;
}
if alpha <= 0.0 {
return QoVerdict::Fail;
}
let expected = alpha / rank as f32;
if (expected - observed_scale).abs() <= 1e-6 * expected.abs().max(1.0) {
QoVerdict::Pass
} else {
QoVerdict::Fail
}
}
#[must_use]
pub fn verdict_from_concrete_size_within_20pct(expected_gb: f64, observed_gb: f64) -> QoVerdict {
if expected_gb <= 0.0 || !observed_gb.is_finite() || !expected_gb.is_finite() {
return QoVerdict::Fail;
}
let rel = (observed_gb - expected_gb).abs() / expected_gb;
if rel <= AC_QO_CONCRETE_TOLERANCE {
QoVerdict::Pass
} else {
QoVerdict::Fail
}
}
#[must_use]
pub fn verdict_from_dropout_expectation(p: f32, observed_mean_mask: f32, tol: f32) -> QoVerdict {
if !p.is_finite() || !observed_mean_mask.is_finite() || !tol.is_finite() {
return QoVerdict::Fail;
}
if !(0.0..1.0).contains(&p) || tol < 0.0 {
return QoVerdict::Fail;
}
let expected = 1.0 - p;
if (observed_mean_mask - expected).abs() <= tol {
QoVerdict::Pass
} else {
QoVerdict::Fail
}
}
#[must_use]
pub fn verdict_from_simd_scalar_equivalence(
simd_output: &[f32],
scalar_output: &[f32],
) -> QoVerdict {
if simd_output.len() != scalar_output.len() || simd_output.is_empty() {
return QoVerdict::Fail;
}
for (s, sc) in simd_output.iter().zip(scalar_output.iter()) {
if s.to_bits() != sc.to_bits() {
return QoVerdict::Fail;
}
}
QoVerdict::Pass
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn provenance_block_byte_constants() {
assert_eq!(AC_QO_Q4K_BYTES_PER_BLOCK, 18);
assert_eq!(AC_QO_Q6K_BYTES_PER_BLOCK, 26);
assert_eq!(AC_QO_Q8_0_BYTES_PER_BLOCK, 34);
assert_eq!(AC_QO_BLOCK_ELEMS, 32);
}
#[test]
fn provenance_concrete_tolerance_20pct() {
assert_eq!(AC_QO_CONCRETE_TOLERANCE, 0.20);
}
#[test]
fn provenance_simd_tolerance_zero() {
assert_eq!(AC_QO_SIMD_TOLERANCE, 0.0);
}
#[test]
fn fqo001_pass_one_block() {
let v = verdict_from_size_ordering(AC_QO_BLOCK_ELEMS);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo001_pass_one_billion_params() {
let v = verdict_from_size_ordering(1_000_000_000);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo001_fail_zero_params() {
let v = verdict_from_size_ordering(0);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo001_pass_partial_block() {
let v = verdict_from_size_ordering(64);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo001_pass_realistic_layer_size() {
let v = verdict_from_size_ordering(4096 * 4096);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo002_pass_standard_qlora_16_64() {
let v = verdict_from_alpha_scaling(16.0, 64, 0.25);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo002_pass_alpha_32_rank_16() {
let v = verdict_from_alpha_scaling(32.0, 16, 2.0);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo002_fail_wrong_scale() {
let v = verdict_from_alpha_scaling(16.0, 64, 0.5);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo002_fail_zero_rank() {
let v = verdict_from_alpha_scaling(16.0, 0, 0.0);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo002_fail_negative_alpha() {
let v = verdict_from_alpha_scaling(-16.0, 64, -0.25);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo002_fail_nan() {
let v = verdict_from_alpha_scaling(f32::NAN, 64, 0.25);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo003_pass_q4k_9b_at_expected() {
let v = verdict_from_concrete_size_within_20pct(5.0, 5.0);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo003_pass_q4k_9b_within_20pct_high() {
let v = verdict_from_concrete_size_within_20pct(5.0, 5.9);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo003_pass_q4k_9b_within_20pct_low() {
let v = verdict_from_concrete_size_within_20pct(5.0, 4.1);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo003_fail_q4k_9b_30pct_high() {
let v = verdict_from_concrete_size_within_20pct(5.0, 6.5);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo003_pass_f16_9b_typical() {
let v = verdict_from_concrete_size_within_20pct(18.0, 17.5);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo004_pass_p_zero_inference() {
let v = verdict_from_dropout_expectation(0.0, 1.0, 1e-3);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo004_pass_p_50_pct_with_noise() {
let v = verdict_from_dropout_expectation(0.5, 0.502, 0.01);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo004_fail_observed_mean_too_high() {
let v = verdict_from_dropout_expectation(0.5, 0.7, 0.01);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo004_fail_p_at_one() {
let v = verdict_from_dropout_expectation(1.0, 0.0, 0.01);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo004_fail_negative_tolerance() {
let v = verdict_from_dropout_expectation(0.5, 0.5, -1e-3);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo005_pass_bit_exact() {
let s = vec![1.0_f32, 2.5, -3.0, 0.0];
let sc = vec![1.0_f32, 2.5, -3.0, 0.0];
let v = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v, QoVerdict::Pass);
}
#[test]
fn fqo005_fail_one_bit_off() {
let s = vec![1.0_f32, 2.5, -3.0, 0.0];
let bumped = f32::from_bits(2.5_f32.to_bits() + 1);
let sc = vec![1.0_f32, bumped, -3.0, 0.0];
let v = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo005_fail_length_mismatch() {
let s = vec![1.0_f32, 2.5];
let sc = vec![1.0_f32, 2.5, -3.0];
let v = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo005_fail_empty() {
let v = verdict_from_simd_scalar_equivalence(&[], &[]);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn fqo005_fail_nan_pattern_mismatch() {
let s = vec![f32::NAN];
let sc = vec![f32::from_bits(0x7fc0_0001)];
let v = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v, QoVerdict::Fail);
}
#[test]
fn mutation_survey_001_param_counts_all_pass_above_one_block() {
for n in [64_usize, 128, 1024, 10_000, 1_000_000, 1_000_000_000] {
let v = verdict_from_size_ordering(n);
assert_eq!(v, QoVerdict::Pass, "n_params={n}");
}
}
#[test]
fn mutation_survey_003_tolerance_band() {
let expected = 5.0_f64;
for pct in [0_u32, 5, 10, 15, 19, 20, 21, 25] {
let observed = expected * (1.0 + pct as f64 / 100.0);
let v = verdict_from_concrete_size_within_20pct(expected, observed);
let want = if pct <= 20 {
QoVerdict::Pass
} else {
QoVerdict::Fail
};
assert_eq!(v, want, "pct={pct}");
}
}
#[test]
fn realistic_healthy_quant_passes_all_5() {
let v1 = verdict_from_size_ordering(9_000_000_000);
let v2 = verdict_from_alpha_scaling(16.0, 64, 0.25);
let v3 = verdict_from_concrete_size_within_20pct(5.0, 5.0);
let v4 = verdict_from_dropout_expectation(0.5, 0.501, 0.01);
let s = vec![1.0_f32, 2.5];
let sc = vec![1.0_f32, 2.5];
let v5 = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v1, QoVerdict::Pass);
assert_eq!(v2, QoVerdict::Pass);
assert_eq!(v3, QoVerdict::Pass);
assert_eq!(v4, QoVerdict::Pass);
assert_eq!(v5, QoVerdict::Pass);
}
#[test]
fn realistic_pre_fix_all_5_failures() {
let v1 = verdict_from_size_ordering(0);
let v2 = verdict_from_alpha_scaling(16.0, 64, 0.5);
let v3 = verdict_from_concrete_size_within_20pct(5.0, 6.5);
let v4 = verdict_from_dropout_expectation(0.5, 0.7, 0.01);
let s = vec![1.0_f32];
let bumped = f32::from_bits(1.0_f32.to_bits() + 1);
let sc = vec![bumped];
let v5 = verdict_from_simd_scalar_equivalence(&s, &sc);
assert_eq!(v1, QoVerdict::Fail);
assert_eq!(v2, QoVerdict::Fail);
assert_eq!(v3, QoVerdict::Fail);
assert_eq!(v4, QoVerdict::Fail);
assert_eq!(v5, QoVerdict::Fail);
}
}