use crate::errors::Result;
use crate::tensor::Tensor;
use anyhow::anyhow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
pub enum GGMLQuantType {
Q5_0,
Q5_1,
Q5K,
Q6K,
}
impl GGMLQuantType {
pub fn block_size(&self) -> usize {
match self {
Self::Q5_0 | Self::Q5_1 => 32,
Self::Q5K | Self::Q6K => 256,
}
}
pub fn bits_per_weight(&self) -> f32 {
match self {
Self::Q5_0 | Self::Q5_1 | Self::Q5K => 5.5,
Self::Q6K => 6.5625,
}
}
}
#[derive(Debug, Clone)]
pub struct BlockQ5_0 {
pub d: F16,
pub qh: [u8; 4],
pub qs: [u8; 16],
}
#[derive(Debug, Clone)]
pub struct BlockQ5_1 {
pub d: F16,
pub m: F16,
pub qh: [u8; 4],
pub qs: [u8; 16],
}
#[derive(Debug, Clone)]
#[allow(dead_code)] pub struct BlockQ5K {
pub d: [F16; 8],
pub dmin: [F16; 8],
pub scales: [u8; 12],
pub qh: [u8; 32],
pub qs: [u8; 128],
}
#[derive(Debug, Clone)]
pub struct BlockQ6K {
pub d: F16,
pub scales: [u8; 16],
pub ql: [u8; 128],
pub qh: [u8; 64],
}
type F16 = u16;
fn f32_to_f16(val: f32) -> F16 {
let bits = val.to_bits();
let sign = (bits >> 31) & 0x1;
let exp = ((bits >> 23) & 0xFF) as i32;
let frac = bits & 0x7FFFFF;
if exp == 0xFF {
((sign << 15) | (0x1F << 10) | (frac >> 13)) as u16
} else if exp == 0 {
(sign << 15) as u16
} else {
let new_exp = exp - 127 + 15;
if new_exp >= 0x1F {
((sign << 15) | (0x1F << 10)) as u16
} else if new_exp <= 0 {
(sign << 15) as u16
} else {
((sign << 15) | ((new_exp as u32) << 10) | (frac >> 13)) as u16
}
}
}
fn f16_to_f32(val: F16) -> f32 {
let sign = (val >> 15) & 0x1;
let exp = (val >> 10) & 0x1F;
let frac = val & 0x3FF;
if exp == 0x1F {
f32::from_bits(((sign as u32) << 31) | (0xFF << 23) | ((frac as u32) << 13))
} else if exp == 0 {
f32::from_bits((sign as u32) << 31)
} else {
let new_exp = (exp as i32) - 15 + 127;
f32::from_bits(((sign as u32) << 31) | ((new_exp as u32) << 23) | ((frac as u32) << 13))
}
}
pub fn quantize_q5_0(tensor: &Tensor) -> Result<Vec<BlockQ5_0>> {
let values: Vec<f32> = match tensor {
Tensor::F32(data) => {
data.as_slice().ok_or_else(|| anyhow!("Failed to get tensor data"))?.to_vec()
},
Tensor::F64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::F16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::BF16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::I64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::C32(_) => {
return Err(anyhow!("Complex32 tensors not yet supported for quantization").into())
},
Tensor::C64(_) => {
return Err(anyhow!("Complex64 tensors not yet supported for quantization").into())
},
Tensor::CF16(_) => {
return Err(anyhow!("Complex16 tensors not yet supported for quantization").into())
},
Tensor::CBF16(_) => {
return Err(
anyhow!("Complex BFloat16 tensors not yet supported for quantization").into(),
)
},
Tensor::Sparse(_) => {
return Err(anyhow!("Sparse tensors not yet supported for quantization").into())
},
#[cfg(feature = "torch")]
Tensor::Torch(_) => return Err(anyhow!("Torch tensors not yet supported").into()),
#[cfg(feature = "candle")]
Tensor::Candle(_) => return Err(anyhow!("Candle tensors not yet supported").into()),
#[cfg(all(target_os = "macos", feature = "metal"))]
Tensor::Metal(_) => {
return Err(anyhow!("Metal tensors not yet supported for quantization").into())
},
#[cfg(feature = "cuda")]
Tensor::CUDA(_) => {
return Err(anyhow!("CUDA tensors not yet supported for quantization").into())
},
};
let n = values.len();
let nb = n / 32; let mut blocks = Vec::with_capacity(nb);
for i in 0..nb {
let start = i * 32;
let block_data = &values[start..start + 32];
let amax = block_data.iter().fold(0.0f32, |a, &b| a.max(b.abs()));
let scale = amax / 15.0;
let iscale = if scale != 0.0 { 1.0 / scale } else { 0.0 };
let mut qs = [0u8; 16];
let mut qh = [0u8; 4];
for j in 0..32 {
let val = block_data[j];
let qi = (val * iscale + 15.5) as i32;
let qi = qi.clamp(0, 31) as u8;
if j % 2 == 0 {
qs[j / 2] = qi & 0xF;
} else {
qs[j / 2] |= (qi & 0xF) << 4;
}
if qi & 0x10 != 0 {
qh[j / 8] |= 1 << (j % 8);
}
}
blocks.push(BlockQ5_0 {
d: f32_to_f16(scale),
qh,
qs,
});
}
Ok(blocks)
}
pub fn dequantize_q5_0(blocks: &[BlockQ5_0], shape: &[usize]) -> Result<Tensor> {
let n = blocks.len() * 32;
let mut values = vec![0.0f32; n];
for (i, block) in blocks.iter().enumerate() {
let scale = f16_to_f32(block.d);
let offset = i * 32;
for j in 0..32 {
let ql = if j % 2 == 0 { block.qs[j / 2] & 0xF } else { block.qs[j / 2] >> 4 };
let qh = if block.qh[j / 8] & (1 << (j % 8)) != 0 { 0x10 } else { 0x00 };
let qi = ql | qh;
values[offset + j] = ((qi as f32) - 15.0) * scale;
}
}
Tensor::from_vec(values, shape)
}
pub fn quantize_q5_1(tensor: &Tensor) -> Result<Vec<BlockQ5_1>> {
let values: Vec<f32> = match tensor {
Tensor::F32(data) => {
data.as_slice().ok_or_else(|| anyhow!("Failed to get tensor data"))?.to_vec()
},
Tensor::F64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::F16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::BF16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::I64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::C32(_) => {
return Err(anyhow!("Complex32 tensors not yet supported for quantization").into())
},
Tensor::C64(_) => {
return Err(anyhow!("Complex64 tensors not yet supported for quantization").into())
},
Tensor::CF16(_) => {
return Err(anyhow!("Complex16 tensors not yet supported for quantization").into())
},
Tensor::CBF16(_) => {
return Err(
anyhow!("Complex BFloat16 tensors not yet supported for quantization").into(),
)
},
Tensor::Sparse(_) => {
return Err(anyhow!("Sparse tensors not yet supported for quantization").into())
},
#[cfg(feature = "torch")]
Tensor::Torch(_) => return Err(anyhow!("Torch tensors not yet supported").into()),
#[cfg(feature = "candle")]
Tensor::Candle(_) => return Err(anyhow!("Candle tensors not yet supported").into()),
#[cfg(all(target_os = "macos", feature = "metal"))]
Tensor::Metal(_) => {
return Err(anyhow!("Metal tensors not yet supported for quantization").into())
},
#[cfg(feature = "cuda")]
Tensor::CUDA(_) => {
return Err(anyhow!("CUDA tensors not yet supported for quantization").into())
},
};
let n = values.len();
let nb = n / 32;
let mut blocks = Vec::with_capacity(nb);
for i in 0..nb {
let start = i * 32;
let block_data = &values[start..start + 32];
let min = block_data.iter().fold(f32::MAX, |a, &b| a.min(b));
let max = block_data.iter().fold(f32::MIN, |a, &b| a.max(b));
let scale = (max - min) / 31.0;
let iscale = if scale != 0.0 { 1.0 / scale } else { 0.0 };
let mut qs = [0u8; 16];
let mut qh = [0u8; 4];
for j in 0..32 {
let val = block_data[j];
let qi = ((val - min) * iscale + 0.5) as i32;
let qi = qi.clamp(0, 31) as u8;
if j % 2 == 0 {
qs[j / 2] = qi & 0xF;
} else {
qs[j / 2] |= (qi & 0xF) << 4;
}
if qi & 0x10 != 0 {
qh[j / 8] |= 1 << (j % 8);
}
}
blocks.push(BlockQ5_1 {
d: f32_to_f16(scale),
m: f32_to_f16(min),
qh,
qs,
});
}
Ok(blocks)
}
pub fn quantize_q6_k(tensor: &Tensor) -> Result<Vec<BlockQ6K>> {
let values: Vec<f32> = match tensor {
Tensor::F32(data) => {
data.as_slice().ok_or_else(|| anyhow!("Failed to get tensor data"))?.to_vec()
},
Tensor::F64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::F16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::BF16(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v.to_f32())
.collect(),
Tensor::I64(data) => data
.as_slice()
.ok_or_else(|| anyhow!("Failed to get tensor data"))?
.iter()
.map(|&v| v as f32)
.collect(),
Tensor::C32(_) => {
return Err(anyhow!("Complex32 tensors not yet supported for quantization").into())
},
Tensor::C64(_) => {
return Err(anyhow!("Complex64 tensors not yet supported for quantization").into())
},
Tensor::CF16(_) => {
return Err(anyhow!("Complex16 tensors not yet supported for quantization").into())
},
Tensor::CBF16(_) => {
return Err(
anyhow!("Complex BFloat16 tensors not yet supported for quantization").into(),
)
},
Tensor::Sparse(_) => {
return Err(anyhow!("Sparse tensors not yet supported for quantization").into())
},
#[cfg(feature = "torch")]
Tensor::Torch(_) => return Err(anyhow!("Torch tensors not yet supported").into()),
#[cfg(feature = "candle")]
Tensor::Candle(_) => return Err(anyhow!("Candle tensors not yet supported").into()),
#[cfg(all(target_os = "macos", feature = "metal"))]
Tensor::Metal(_) => {
return Err(anyhow!("Metal tensors not yet supported for quantization").into())
},
#[cfg(feature = "cuda")]
Tensor::CUDA(_) => {
return Err(anyhow!("CUDA tensors not yet supported for quantization").into())
},
};
let n = values.len();
let nb = n / 256; let mut blocks = Vec::with_capacity(nb);
for i in 0..nb {
let start = i * 256;
let block_data = &values[start..start + 256];
let max = block_data.iter().fold(0.0f32, |a, &b| a.max(b.abs()));
let scale = max / 127.0;
let iscale = if scale != 0.0 { 1.0 / scale } else { 0.0 };
let mut scales = [0u8; 16];
let mut ql = [0u8; 128];
let mut qh = [0u8; 64];
for (sb, scale_ref) in scales.iter_mut().enumerate() {
let sb_start = sb * 16;
let sb_data = &block_data[sb_start..sb_start + 16];
let sb_max = sb_data.iter().fold(0.0f32, |a, &b| a.max(b.abs()));
let sb_scale = sb_max * iscale;
*scale_ref = (sb_scale * 63.0 + 0.5) as u8;
for (j, &val) in sb_data.iter().enumerate() {
let scaled = val * iscale * 63.0 / (*scale_ref as f32 + 1e-10);
let qi = (scaled + 32.5) as i32;
let qi = qi.clamp(0, 63) as u8;
let idx = sb_start + j;
if idx % 2 == 0 {
ql[idx / 2] = qi & 0xF;
} else {
ql[idx / 2] |= (qi & 0xF) << 4;
}
let qh_idx = idx / 4;
let qh_shift = (idx % 4) * 2;
qh[qh_idx] |= ((qi >> 4) & 0x3) << qh_shift;
}
}
blocks.push(BlockQ6K {
d: f32_to_f16(scale),
scales,
ql,
qh,
});
}
Ok(blocks)
}
pub struct AdvancedGGMLQuantizer {
pub quant_type: GGMLQuantType,
}
impl AdvancedGGMLQuantizer {
pub fn new(quant_type: GGMLQuantType) -> Self {
Self { quant_type }
}
pub fn quantize(&self, tensor: &Tensor) -> Result<QuantizedGGMLTensor> {
let shape = tensor.shape().to_vec();
let data = match self.quant_type {
GGMLQuantType::Q5_0 => {
let blocks = quantize_q5_0(tensor)?;
GGMLQuantData::Q5_0(blocks)
},
GGMLQuantType::Q5_1 => {
let blocks = quantize_q5_1(tensor)?;
GGMLQuantData::Q5_1(blocks)
},
GGMLQuantType::Q5K => {
let blocks = quantize_q5_0(tensor)?;
GGMLQuantData::Q5_0(blocks)
},
GGMLQuantType::Q6K => {
let blocks = quantize_q6_k(tensor)?;
GGMLQuantData::Q6K(blocks)
},
};
Ok(QuantizedGGMLTensor {
data,
shape,
quant_type: self.quant_type,
})
}
pub fn compression_ratio(&self, original_size: usize) -> f32 {
let bits_per_weight = self.quant_type.bits_per_weight();
let compressed_bits = (original_size as f32) * bits_per_weight;
let original_bits = (original_size * 32) as f32; original_bits / compressed_bits
}
}
#[derive(Debug, Clone)]
pub struct QuantizedGGMLTensor {
pub data: GGMLQuantData,
pub shape: Vec<usize>,
pub quant_type: GGMLQuantType,
}
#[derive(Debug, Clone)]
pub enum GGMLQuantData {
Q5_0(Vec<BlockQ5_0>),
Q5_1(Vec<BlockQ5_1>),
Q6K(Vec<BlockQ6K>),
}
impl QuantizedGGMLTensor {
pub fn dequantize(&self) -> Result<Tensor> {
match &self.data {
GGMLQuantData::Q5_0(blocks) => dequantize_q5_0(blocks, &self.shape),
GGMLQuantData::Q5_1(blocks) => {
let n = blocks.len() * 32;
let mut values = vec![0.0f32; n];
for (i, block) in blocks.iter().enumerate() {
let scale = f16_to_f32(block.d);
let min = f16_to_f32(block.m);
let offset = i * 32;
for j in 0..32 {
let ql =
if j % 2 == 0 { block.qs[j / 2] & 0xF } else { block.qs[j / 2] >> 4 };
let qh = if block.qh[j / 8] & (1 << (j % 8)) != 0 { 0x10 } else { 0x00 };
let qi = ql | qh;
values[offset + j] = min + (qi as f32) * scale;
}
}
Tensor::from_vec(values, &self.shape)
},
GGMLQuantData::Q6K(blocks) => {
let n = blocks.len() * 256;
let mut values = vec![0.0f32; n];
for (i, block) in blocks.iter().enumerate() {
let scale = f16_to_f32(block.d);
let offset = i * 256;
for sb in 0..16 {
let sb_scale = (block.scales[sb] as f32) / 63.0;
let sb_start = sb * 16;
for j in 0..16 {
let idx = sb_start + j;
let ql = if idx % 2 == 0 {
block.ql[idx / 2] & 0xF
} else {
block.ql[idx / 2] >> 4
};
let qh_idx = idx / 4;
let qh_shift = (idx % 4) * 2;
let qh = (block.qh[qh_idx] >> qh_shift) & 0x3;
let qi = ql | (qh << 4);
values[offset + idx] = (qi as f32 - 32.0) * scale * sb_scale;
}
}
}
Tensor::from_vec(values, &self.shape)
},
}
}
pub fn memory_usage(&self) -> usize {
match &self.data {
GGMLQuantData::Q5_0(blocks) => blocks.len() * 22,
GGMLQuantData::Q5_1(blocks) => blocks.len() * 24,
GGMLQuantData::Q6K(blocks) => blocks.len() * 210,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_q5_0_quantization() {
let values: Vec<f32> = (0..64).map(|i| i as f32 * 0.1).collect();
let tensor = Tensor::from_vec(values.clone(), &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let compression = quantizer.compression_ratio(64);
assert!(compression > 5.0 && compression < 6.0);
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
let max_error = values
.iter()
.zip(deq_values.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, |a, b| a.max(b));
assert!(max_error < 0.25); },
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_q6_k_quantization() {
let values: Vec<f32> = (0..512).map(|i| (i as f32 * 0.01).sin()).collect();
let tensor = Tensor::from_vec(values.clone(), &[512]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q6K);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let memory = quantized.memory_usage();
assert!(memory < values.len() * 4);
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
assert_eq!(deq_values.len(), values.len());
let mse: f32 =
values.iter().zip(deq_values.iter()).map(|(a, b)| (a - b).powi(2)).sum::<f32>()
/ values.len() as f32;
assert!(mse < 0.01); },
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_f64_i64_tensor_support() {
use crate::tensor::DType;
let values_f32: Vec<f32> = (0..64).map(|i| i as f32 * 0.1).collect();
let base_tensor_f64 =
Tensor::from_vec(values_f32.clone(), &[64]).expect("tensor operation failed");
let tensor_f64 = base_tensor_f64.to_dtype(DType::F64).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized_f64 = quantizer.quantize(&tensor_f64).expect("Quantization failed");
let dequantized_f64 = quantized_f64.dequantize().expect("Dequantization failed");
match dequantized_f64 {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
let max_error = values_f32
.iter()
.zip(deq_values.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, |a, b| a.max(b));
assert!(max_error < 0.25);
},
_ => panic!("Unexpected tensor type"),
}
let values_i32: Vec<f32> = (0..64).map(|i| i as f32).collect();
let base_tensor_i64 =
Tensor::from_vec(values_i32.clone(), &[64]).expect("tensor operation failed");
let tensor_i64 = base_tensor_i64.to_dtype(DType::I64).expect("tensor operation failed");
let quantized_i64 = quantizer.quantize(&tensor_i64).expect("Quantization failed");
let dequantized_i64 = quantized_i64.dequantize().expect("Dequantization failed");
match dequantized_i64 {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
let max_error = values_i32
.iter()
.zip(deq_values.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0f32, |a, b| a.max(b));
assert!(max_error < 2.1);
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_ggml_quant_type_block_sizes() {
assert_eq!(GGMLQuantType::Q5_0.block_size(), 32);
assert_eq!(GGMLQuantType::Q5_1.block_size(), 32);
assert_eq!(GGMLQuantType::Q5K.block_size(), 256);
assert_eq!(GGMLQuantType::Q6K.block_size(), 256);
}
#[test]
fn test_ggml_quant_type_bits_per_weight() {
assert!((GGMLQuantType::Q5_0.bits_per_weight() - 5.5).abs() < 1e-2);
assert!((GGMLQuantType::Q5_1.bits_per_weight() - 5.5).abs() < 1e-2);
assert!((GGMLQuantType::Q5K.bits_per_weight() - 5.5).abs() < 1e-2);
assert!((GGMLQuantType::Q6K.bits_per_weight() - 6.5625).abs() < 1e-2);
}
#[test]
fn test_ggml_quant_type_eq() {
assert_eq!(GGMLQuantType::Q5_0, GGMLQuantType::Q5_0);
assert_ne!(GGMLQuantType::Q5_0, GGMLQuantType::Q5_1);
assert_ne!(GGMLQuantType::Q5K, GGMLQuantType::Q6K);
}
#[test]
fn test_ggml_quant_type_clone() {
let qt = GGMLQuantType::Q6K;
let cloned = qt;
assert_eq!(qt, cloned);
}
#[test]
fn test_quantizer_q5_1() {
let values: Vec<f32> = (0..64).map(|i| (i as f32 * 0.1).sin()).collect();
let tensor = Tensor::from_vec(values.clone(), &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_1);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
assert_eq!(deq_values.len(), values.len());
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_quantizer_memory_usage() {
let values: Vec<f32> = (0..64).map(|i| i as f32 * 0.01).collect();
let tensor = Tensor::from_vec(values, &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let memory = quantized.memory_usage();
assert!(memory < 256);
}
#[test]
fn test_quantizer_q5_0_all_zeros() {
let values = vec![0.0f32; 64];
let tensor = Tensor::from_vec(values, &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
for val in deq_values {
assert!(val.abs() < 1e-3, "Expected ~0, got {}", val);
}
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_q6k_all_zeros() {
let values = vec![0.0f32; 256];
let tensor = Tensor::from_vec(values, &[256]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q6K);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
for val in deq_values {
assert!(val.abs() < 1e-3, "Expected ~0, got {}", val);
}
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_q5_0_negative_values() {
let values: Vec<f32> = (0..64).map(|i| -(i as f32) * 0.1).collect();
let tensor = Tensor::from_vec(values.clone(), &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
assert_eq!(deq_values.len(), values.len());
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_q6k_large_tensor() {
let values: Vec<f32> = (0..1024).map(|i| (i as f32 * 0.001).sin()).collect();
let tensor = Tensor::from_vec(values.clone(), &[1024]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q6K);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
let memory = quantized.memory_usage();
assert!(memory < values.len() * 4);
let dequantized = quantized.dequantize().expect("Dequantization failed");
match dequantized {
Tensor::F32(data) => {
let deq_values = data.as_slice().expect("operation failed in test");
assert_eq!(deq_values.len(), 1024);
},
_ => panic!("Unexpected tensor type"),
}
}
#[test]
fn test_f16_roundtrip_zero() {
let f16 = f32_to_f16(0.0);
let back = f16_to_f32(f16);
assert!((back).abs() < 1e-6);
}
#[test]
fn test_f16_roundtrip_one() {
let f16 = f32_to_f16(1.0);
let back = f16_to_f32(f16);
assert!((back - 1.0).abs() < 0.001);
}
#[test]
fn test_f16_roundtrip_negative() {
let f16 = f32_to_f16(-42.0);
let back = f16_to_f32(f16);
assert!((back - (-42.0)).abs() < 0.1);
}
#[test]
fn test_f16_special_inf() {
let f16 = f32_to_f16(f32::INFINITY);
let back = f16_to_f32(f16);
assert!(back.is_infinite() && back > 0.0);
}
#[test]
fn test_f16_special_neg_inf() {
let f16 = f32_to_f16(f32::NEG_INFINITY);
let back = f16_to_f32(f16);
assert!(back.is_infinite() && back < 0.0);
}
#[test]
fn test_f16_special_nan() {
let f16 = f32_to_f16(f32::NAN);
let back = f16_to_f32(f16);
assert!(back.is_nan());
}
#[test]
fn test_block_q5_0_clone() {
let block = BlockQ5_0 {
d: f32_to_f16(1.0),
qh: [0, 0, 0, 0],
qs: [0u8; 16],
};
let cloned = block.clone();
assert_eq!(cloned.d, block.d);
}
#[test]
fn test_block_q5_1_clone() {
let block = BlockQ5_1 {
d: f32_to_f16(1.0),
m: f32_to_f16(0.0),
qh: [0, 0, 0, 0],
qs: [0u8; 16],
};
let cloned = block.clone();
assert_eq!(cloned.d, block.d);
assert_eq!(cloned.m, block.m);
}
#[test]
fn test_block_q6k_clone() {
let block = BlockQ6K {
d: f32_to_f16(1.0),
scales: [0u8; 16],
ql: [0u8; 128],
qh: [0u8; 64],
};
let cloned = block.clone();
assert_eq!(cloned.d, block.d);
}
#[test]
fn test_quantized_shape_preserved() {
let values: Vec<f32> = (0..64).map(|i| i as f32).collect();
let tensor = Tensor::from_vec(values, &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_0);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
assert_eq!(quantized.shape, vec![64]);
}
#[test]
fn test_quantized_quant_type_field() {
let values: Vec<f32> = (0..64).map(|i| i as f32).collect();
let tensor = Tensor::from_vec(values, &[64]).expect("tensor operation failed");
let quantizer = AdvancedGGMLQuantizer::new(GGMLQuantType::Q5_1);
let quantized = quantizer.quantize(&tensor).expect("Quantization failed");
assert_eq!(quantized.quant_type, GGMLQuantType::Q5_1);
}
}