#[test]
fn test_quantize_to_q8_blocks_large_values() {
let mut values = vec![0.0f32; 32];
for i in 0..32 {
values[i] = (i as f32 - 16.0) * 100.0;
}
let blocks = quantize_to_q8_blocks(&values).expect("should work");
assert_eq!(blocks.len(), 1);
let dequant = dequantize_q8_blocks(&blocks);
for (o, d) in values.iter().zip(dequant.iter()) {
let diff = (o - d).abs();
assert!(
diff < blocks[0].scale * 2.0,
"Too large error: {} vs {}",
o,
d
);
}
}
#[test]
fn test_quantize_to_q8_blocks_zeros() {
let values = vec![0.0f32; 32];
let blocks = quantize_to_q8_blocks(&values).expect("should work");
assert_eq!(blocks.len(), 1);
for q in &blocks[0].quants {
assert_eq!(*q, 0);
}
}
#[test]
fn test_quantize_activations_q8k_into_large_values() {
let activations: Vec<f32> = (0..256).map(|i| (i as f32 - 128.0) * 100.0).collect();
let mut scales = vec![0.0f32; 1];
let mut quants = vec![0i8; 256];
let result = quantize_activations_q8k_into(&activations, &mut scales, &mut quants);
assert!(result.is_ok());
assert!(scales[0] > 0.0);
let max_quant = quants.iter().map(|q| q.unsigned_abs()).max().unwrap_or(0);
assert!(
max_quant >= 126,
"Max quant should be near 127, got {}",
max_quant
);
}
#[test]
fn test_fused_q4_0_q8_0_parallel_matvec_multi_row() {
let weight_data = vec![0u8; 72]; let activations = vec![0.0f32; 32];
let result = fused_q4_0_q8_0_parallel_matvec(&weight_data, &activations, 32, 4);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 4);
for &v in &output {
assert_eq!(v, 0.0);
}
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_weight_too_small() {
let weight_data = vec![0u8; 10]; let activations = vec![1.0f32; 32];
let result = fused_q8_0_q8_0_parallel_matvec(&weight_data, &activations, 32, 2);
assert!(result.is_err());
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_activation_mismatch() {
let weight_data = vec![0u8; 34]; let activations = vec![1.0f32; 64]; let result = fused_q8_0_q8_0_parallel_matvec(&weight_data, &activations, 32, 1);
assert!(result.is_err());
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_success() {
let weight_data = vec![0u8; 34]; let activations = vec![0.0f32; 32];
let result = fused_q8_0_q8_0_parallel_matvec(&weight_data, &activations, 32, 1);
assert!(result.is_ok());
assert_eq!(result.unwrap().len(), 1);
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_multi_row() {
let weight_data = vec![0u8; 136];
let activations = vec![0.0f32; 32];
let result = fused_q8_0_q8_0_parallel_matvec(&weight_data, &activations, 32, 4);
assert!(result.is_ok());
let output = result.unwrap();
assert_eq!(output.len(), 4);
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_into_weight_too_small() {
let weight_data = vec![0u8; 10];
let activations = vec![1.0f32; 32];
let mut output = vec![0.0f32; 2];
let result =
fused_q8_0_q8_0_parallel_matvec_into(&weight_data, &activations, 32, 2, &mut output);
assert!(result.is_err());
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_into_activation_mismatch() {
let weight_data = vec![0u8; 34];
let activations = vec![1.0f32; 64]; let mut output = vec![0.0f32; 1];
let result =
fused_q8_0_q8_0_parallel_matvec_into(&weight_data, &activations, 32, 1, &mut output);
assert!(result.is_err());
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_into_output_too_small() {
let weight_data = vec![0u8; 68]; let activations = vec![0.0f32; 32];
let mut output = vec![0.0f32; 1]; let result =
fused_q8_0_q8_0_parallel_matvec_into(&weight_data, &activations, 32, 2, &mut output);
assert!(result.is_err());
}
#[test]
fn test_fused_q8_0_q8_0_parallel_matvec_into_success() {
let weight_data = vec![0u8; 34];
let activations = vec![0.0f32; 32];
let mut output = vec![0.0f32; 1];
let result =
fused_q8_0_q8_0_parallel_matvec_into(&weight_data, &activations, 32, 1, &mut output);
assert!(result.is_ok());
}
#[test]
fn test_f16_to_f32_lut_half() {
let val = f16_to_f32_lut(0x3800);
assert!((val - 0.5).abs() < 0.001, "Expected 0.5, got {}", val);
}
#[test]
fn test_f16_to_f32_lut_two() {
let val = f16_to_f32_lut(0x4000);
assert!((val - 2.0).abs() < 0.001, "Expected 2.0, got {}", val);
}
#[test]
fn test_f16_to_f32_lut_infinity() {
let val = f16_to_f32_lut(0x7C00);
assert!(val.is_infinite() && val > 0.0);
}
#[test]
fn test_f16_to_f32_lut_neg_infinity() {
let val = f16_to_f32_lut(0xFC00);
assert!(val.is_infinite() && val < 0.0);
}
#[test]
fn test_f16_to_f32_lut_nan() {
let val = f16_to_f32_lut(0x7C01);
assert!(val.is_nan());
}
#[test]
fn test_f16_to_f32_lut_negative_zero() {
let val = f16_to_f32_lut(0x8000);
assert!(val == 0.0 && val.is_sign_negative());
}
#[test]
fn test_interleaved_q4k_extracts_d_dmin() {
let mut data = vec![0u8; 144];
data[0..2].copy_from_slice(&0x4000u16.to_le_bytes());
data[2..4].copy_from_slice(&0x3400u16.to_le_bytes());
let iq = InterleavedQ4K::from_q4k(&data).expect("valid data");
assert!(
(iq.d[0] - 2.0).abs() < 0.01,
"d should be 2.0, got {}",
iq.d[0]
);
assert!(
(iq.dmin[0] - 0.25).abs() < 0.01,
"dmin should be 0.25, got {}",
iq.dmin[0]
);
}
#[test]
fn test_q8_0_block_quantize_extreme_values() {
let mut values = [0.0f32; 32];
values[0] = 1000.0;
values[1] = -1000.0;
let block = Q8_0Block::quantize(&values);
assert_eq!(block.quants[0], 127); assert_eq!(block.quants[1], -127); }
#[test]
fn test_q8k_superblock_roundtrip_varied() {
let mut values = [0.0f32; 256];
for i in 0..256 {
values[i] = (i as f32 - 128.0) * 0.5;
}
let block = Q8KSuperBlock::quantize(&values);
let dequant = block.dequantize();
for (orig, deq) in values.iter().zip(dequant.iter()) {
let diff = (orig - deq).abs();
assert!(
diff < block.scale * 2.0,
"Roundtrip error too large: orig={}, deq={}, diff={}, scale={}",
orig,
deq,
diff,
block.scale
);
}
}
#[test]
fn test_q4_k_block_clone() {
let block = Q4_KBlock {
d: 1.5,
dmin: 0.3,
scales: [7; 12],
qs: [0xAB; 128],
};
let cloned = block.clone();
assert_eq!(cloned.d, 1.5);
assert_eq!(cloned.dmin, 0.3);
assert_eq!(cloned.scales, [7; 12]);
assert_eq!(cloned.qs, [0xAB; 128]);
}
#[test]
fn test_q4_k_block_debug() {
let block = Q4_KBlock {
d: 1.0,
dmin: 0.5,
scales: [0; 12],
qs: [0; 128],
};
let debug = format!("{:?}", block);
assert!(debug.contains("Q4_KBlock"));
}
#[test]
fn test_q5_k_block_clone() {
let block = Q5_KBlock {
d: 2.0,
dmin: 0.1,
scales: [1; 12],
qh: [0xFF; 32],
qs: [0x55; 128],
};
let cloned = block.clone();
assert_eq!(cloned.d, 2.0);
assert_eq!(cloned.qh, [0xFF; 32]);
}
#[test]
fn test_q5_k_block_debug() {
let block = Q5_KBlock {
d: 1.0,
dmin: 0.5,
scales: [0; 12],
qh: [0; 32],
qs: [0; 128],
};
let debug = format!("{:?}", block);
assert!(debug.contains("Q5_KBlock"));
}
#[test]
fn test_q6_k_block_clone() {
let block = Q6_KBlock {
d: 0.5,
scales: [3; 16],
qh: [0xAA; 64],
qs: [0x33; 128],
};
let cloned = block.clone();
assert_eq!(cloned.d, 0.5);
assert_eq!(cloned.scales, [3; 16]);
}
#[test]
fn test_q6_k_block_debug() {
let block = Q6_KBlock {
d: 1.0,
scales: [0; 16],
qh: [0; 64],
qs: [0; 128],
};
let debug = format!("{:?}", block);
assert!(debug.contains("Q6_KBlock"));
}
#[test]
fn test_interleaved_q4k_clone_debug() {
let data = vec![0u8; 144];
let iq = InterleavedQ4K::from_q4k(&data).expect("valid");
let cloned = iq.clone();
assert_eq!(cloned.num_super_blocks, 1);
let debug = format!("{:?}", cloned);
assert!(debug.contains("InterleavedQ4K"));
}
#[test]
fn test_q4_0_block_clone_debug() {
let block = Q4_0Block {
scale: 1.0,
quants: [0x55; 16],
};
let cloned = block.clone();
assert_eq!(cloned.scale, 1.0);
let debug = format!("{:?}", cloned);
assert!(debug.contains("Q4_0Block"));
}
#[test]
fn test_dequant_stats_clone_round_trip() {
let stats = DequantStats {
blocks_processed: 42,
bytes_processed: 756,
simd_backend: SimdBackend::Neon,
};
let cloned = stats.clone();
assert_eq!(cloned.blocks_processed, 42);
assert_eq!(cloned.bytes_processed, 756);
assert_eq!(cloned.simd_backend, SimdBackend::Neon);
}
fn build_q4_0_test_block(scale: f32, nibble_val: u8) -> [u8; 18] {
let mut block = [0u8; 18];
let scale_bits = half::f16::from_f32(scale).to_bits();
block[0..2].copy_from_slice(&scale_bits.to_le_bytes());
let packed = (nibble_val & 0x0F) | ((nibble_val & 0x0F) << 4);
for i in 0..16 {
block[2 + i] = packed;
}
block
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_dot_parity_with_scalar() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(1.0, 5);
let mut q4_data = Vec::with_capacity(18 * 4);
for _ in 0..4 {
q4_data.extend_from_slice(&block);
}
let q8_scales = vec![1.0f32; 4];
let q8_quants = vec![2i8; 128];
let scalar = fused_q4_0_q8_0_dot_scalar(&q4_data, &q8_scales, &q8_quants, 128);
let avx2 = unsafe { fused_q4_0_q8_0_dot_avx2(&q4_data, &q8_scales, &q8_quants, 128) };
let diff = (scalar - avx2).abs();
let tol = scalar.abs().max(1.0) * 0.02;
assert!(diff < tol, "scalar={scalar} vs avx2={avx2}, diff={diff}");
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_q4_0_avx2_dot_zero_quants() {
if !is_x86_feature_detected!("avx2") {
return;
}
let block = build_q4_0_test_block(1.0, 8); let mut q4_data = Vec::with_capacity(18 * 2);
for _ in 0..2 {
q4_data.extend_from_slice(&block);
}
let q8_scales = vec![1.0f32; 2];
let q8_quants = vec![0i8; 64];
let result = unsafe { fused_q4_0_q8_0_dot_avx2(&q4_data, &q8_scales, &q8_quants, 64) };
assert!(
result.abs() < 1e-3,
"zero × zero should produce ~0, got {result}"
);
}