#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_dot_negative_activations() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(1.0, 0xF); let mut q4_data = Vec::with_capacity(18 * 2);
for _ in 0..2 {
q4_data.extend_from_slice(&block);
}
let q8_scales = vec![1.0f32; 2];
let q8_quants = vec![-3i8; 64];
let scalar = fused_q4_0_q8_0_dot_scalar(&q4_data, &q8_scales, &q8_quants, 64);
let avx2 = unsafe { fused_q4_0_q8_0_dot_avx2(&q4_data, &q8_scales, &q8_quants, 64) };
let diff = (scalar - avx2).abs();
let tol = scalar.abs().max(1.0) * 0.02;
assert!(
diff < tol,
"negative act: scalar={scalar} vs avx2={avx2}, diff={diff}"
);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_4block_dot_parity_with_scalar() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(0.5, 3);
let mut q4_data = Vec::with_capacity(18 * 8);
for _ in 0..8 {
q4_data.extend_from_slice(&block);
}
let q8_scales = vec![1.0f32; 8];
let q8_quants = vec![4i8; 256];
let scalar = fused_q4_0_q8_0_dot_scalar(&q4_data, &q8_scales, &q8_quants, 256);
let avx2_4b = unsafe { fused_q4_0_q8_0_dot_avx2_4block(&q4_data, &q8_scales, &q8_quants, 256) };
let diff = (scalar - avx2_4b).abs();
let tol = scalar.abs().max(1.0) * 0.02;
assert!(
diff < tol,
"4block: scalar={scalar} vs avx2_4b={avx2_4b}, diff={diff}"
);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_4block_dot_large_dim() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(1.0, 10);
let mut q4_data = Vec::with_capacity(18 * 16);
for _ in 0..16 {
q4_data.extend_from_slice(&block);
}
let q8_scales: Vec<f32> = (0..16).map(|i| 0.5 + i as f32 * 0.1).collect();
let q8_quants = vec![1i8; 512];
let scalar = fused_q4_0_q8_0_dot_scalar(&q4_data, &q8_scales, &q8_quants, 512);
let avx2_4b = unsafe { fused_q4_0_q8_0_dot_avx2_4block(&q4_data, &q8_scales, &q8_quants, 512) };
let diff = (scalar - avx2_4b).abs();
let tol = scalar.abs().max(1.0) * 0.02;
assert!(
diff < tol,
"large dim: scalar={scalar} vs avx2_4b={avx2_4b}, diff={diff}"
);
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_4block_dot_varying_scales() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(2.0, 7);
let mut q4_data = Vec::with_capacity(18 * 12);
for _ in 0..12 {
q4_data.extend_from_slice(&block);
}
let q8_scales: Vec<f32> = (0..12)
.map(|i| if i % 2 == 0 { 1.0 } else { -0.5 })
.collect();
let q8_quants = vec![5i8; 384];
let scalar = fused_q4_0_q8_0_dot_scalar(&q4_data, &q8_scales, &q8_quants, 384);
let avx2_4b = unsafe { fused_q4_0_q8_0_dot_avx2_4block(&q4_data, &q8_scales, &q8_quants, 384) };
let diff = (scalar - avx2_4b).abs();
let tol = scalar.abs().max(1.0) * 0.02;
assert!(
diff < tol,
"varying: scalar={scalar} vs avx2_4b={avx2_4b}, diff={diff}"
);
}