use crate::error::{ModelError, ModelResult};
use scirs2_core::ndarray::{Array1, Array2};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantizationMethod {
Symmetric,
Asymmetric,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QuantizationGranularity {
PerTensor,
PerChannel,
}
#[derive(Debug, Clone)]
pub struct QuantizationParams {
pub scale: Vec<f32>,
pub zero_point: Vec<i8>,
pub method: QuantizationMethod,
pub granularity: QuantizationGranularity,
}
impl QuantizationParams {
pub fn symmetric_per_tensor(scale: f32) -> Self {
Self {
scale: vec![scale],
zero_point: vec![0],
method: QuantizationMethod::Symmetric,
granularity: QuantizationGranularity::PerTensor,
}
}
pub fn asymmetric_per_tensor(scale: f32, zero_point: i8) -> Self {
Self {
scale: vec![scale],
zero_point: vec![zero_point],
method: QuantizationMethod::Asymmetric,
granularity: QuantizationGranularity::PerTensor,
}
}
pub fn symmetric_per_channel(scales: Vec<f32>) -> Self {
let n = scales.len();
Self {
scale: scales,
zero_point: vec![0; n],
method: QuantizationMethod::Symmetric,
granularity: QuantizationGranularity::PerChannel,
}
}
pub fn validate(&self) -> ModelResult<()> {
if self.scale.is_empty() {
return Err(ModelError::invalid_config("scale cannot be empty"));
}
if self.scale.len() != self.zero_point.len() {
return Err(ModelError::invalid_config(
"scale and zero_point must have same length",
));
}
for &s in &self.scale {
if s <= 0.0 || !s.is_finite() {
return Err(ModelError::invalid_config(format!("invalid scale: {}", s)));
}
}
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct QuantizedWeight {
pub data: Vec<i8>,
pub shape: Vec<usize>,
pub params: QuantizationParams,
}
impl QuantizedWeight {
pub fn new(data: Vec<i8>, shape: Vec<usize>, params: QuantizationParams) -> ModelResult<Self> {
params.validate()?;
let total_size: usize = shape.iter().product();
if data.len() != total_size {
return Err(ModelError::invalid_config(format!(
"data length {} does not match shape {:?}",
data.len(),
shape
)));
}
Ok(Self {
data,
shape,
params,
})
}
pub fn dequantize_1d(&self) -> ModelResult<Array1<f32>> {
if self.shape.len() != 1 {
return Err(ModelError::invalid_config(format!(
"expected 1D shape, got {:?}",
self.shape
)));
}
let n = self.shape[0];
let mut result = Array1::zeros(n);
match self.params.granularity {
QuantizationGranularity::PerTensor => {
let scale = self.params.scale[0];
let zero_point = self.params.zero_point[0];
for i in 0..n {
result[i] = (self.data[i] as i32 - zero_point as i32) as f32 * scale;
}
}
QuantizationGranularity::PerChannel => {
return Err(ModelError::invalid_config(
"per-channel quantization not supported for 1D tensors",
));
}
}
Ok(result)
}
pub fn dequantize_2d(&self) -> ModelResult<Array2<f32>> {
if self.shape.len() != 2 {
return Err(ModelError::invalid_config(format!(
"expected 2D shape, got {:?}",
self.shape
)));
}
let (rows, cols) = (self.shape[0], self.shape[1]);
let mut result = Array2::zeros((rows, cols));
match self.params.granularity {
QuantizationGranularity::PerTensor => {
let scale = self.params.scale[0];
let zero_point = self.params.zero_point[0];
for i in 0..rows {
for j in 0..cols {
let idx = i * cols + j;
result[[i, j]] = (self.data[idx] as i32 - zero_point as i32) as f32 * scale;
}
}
}
QuantizationGranularity::PerChannel => {
if self.params.scale.len() != rows {
return Err(ModelError::invalid_config(format!(
"expected {} scales for per-channel, got {}",
rows,
self.params.scale.len()
)));
}
for i in 0..rows {
let scale = self.params.scale[i];
let zero_point = self.params.zero_point[i];
for j in 0..cols {
let idx = i * cols + j;
result[[i, j]] = (self.data[idx] as i32 - zero_point as i32) as f32 * scale;
}
}
}
}
Ok(result)
}
pub fn memory_size(&self) -> usize {
self.data.len() }
}
pub fn quantize_symmetric_1d(array: &Array1<f32>) -> ModelResult<QuantizedWeight> {
let max_val = array.iter().map(|&x| x.abs()).fold(0.0f32, f32::max);
if max_val == 0.0 {
let data = vec![0i8; array.len()];
let params = QuantizationParams::symmetric_per_tensor(1.0);
return QuantizedWeight::new(data, vec![array.len()], params);
}
let scale = max_val / 127.0;
let mut data = Vec::with_capacity(array.len());
for &x in array.iter() {
let q = (x / scale).round() as i32;
let q_clamped = q.clamp(-128, 127) as i8;
data.push(q_clamped);
}
let params = QuantizationParams::symmetric_per_tensor(scale);
QuantizedWeight::new(data, vec![array.len()], params)
}
pub fn quantize_symmetric_2d(array: &Array2<f32>) -> ModelResult<QuantizedWeight> {
let max_val = array.iter().map(|&x| x.abs()).fold(0.0f32, f32::max);
if max_val == 0.0 {
let (rows, cols) = array.dim();
let data = vec![0i8; rows * cols];
let params = QuantizationParams::symmetric_per_tensor(1.0);
return QuantizedWeight::new(data, vec![rows, cols], params);
}
let scale = max_val / 127.0;
let (rows, cols) = array.dim();
let mut data = Vec::with_capacity(rows * cols);
for i in 0..rows {
for j in 0..cols {
let x = array[[i, j]];
let q = (x / scale).round() as i32;
let q_clamped = q.clamp(-128, 127) as i8;
data.push(q_clamped);
}
}
let params = QuantizationParams::symmetric_per_tensor(scale);
QuantizedWeight::new(data, vec![rows, cols], params)
}
pub fn quantize_symmetric_per_channel(array: &Array2<f32>) -> ModelResult<QuantizedWeight> {
let (rows, cols) = array.dim();
let mut scales = Vec::with_capacity(rows);
let mut data = Vec::with_capacity(rows * cols);
for i in 0..rows {
let row = array.row(i);
let max_val = row.iter().map(|&x| x.abs()).fold(0.0f32, f32::max);
let scale = if max_val == 0.0 {
1.0 } else {
max_val / 127.0
};
scales.push(scale);
}
for i in 0..rows {
let scale = scales[i];
for j in 0..cols {
let x = array[[i, j]];
let q = (x / scale).round() as i32;
let q_clamped = q.clamp(-128, 127) as i8;
data.push(q_clamped);
}
}
let params = QuantizationParams::symmetric_per_channel(scales);
QuantizedWeight::new(data, vec![rows, cols], params)
}
pub fn quantize_asymmetric_1d(array: &Array1<f32>) -> ModelResult<QuantizedWeight> {
let min_val = array.iter().copied().fold(f32::INFINITY, f32::min);
let max_val = array.iter().copied().fold(f32::NEG_INFINITY, f32::max);
if (max_val - min_val).abs() < 1e-8 {
let data = vec![0i8; array.len()];
let params = QuantizationParams::asymmetric_per_tensor(1.0, 0);
return QuantizedWeight::new(data, vec![array.len()], params);
}
let scale = (max_val - min_val) / 255.0;
let zero_point_f = -128.0 - min_val / scale;
let zero_point = zero_point_f.round().clamp(-128.0, 127.0) as i8;
let mut data = Vec::with_capacity(array.len());
for &x in array.iter() {
let q_f = x / scale + zero_point as f32;
let q = q_f.round().clamp(-128.0, 127.0) as i8;
data.push(q);
}
let params = QuantizationParams::asymmetric_per_tensor(scale, zero_point);
QuantizedWeight::new(data, vec![array.len()], params)
}
#[derive(Debug, Clone)]
pub struct CalibrationStats {
pub min: f32,
pub max: f32,
pub count: usize,
}
impl CalibrationStats {
pub fn new() -> Self {
Self {
min: f32::INFINITY,
max: f32::NEG_INFINITY,
count: 0,
}
}
pub fn update_1d(&mut self, data: &Array1<f32>) {
for &x in data.iter() {
self.min = self.min.min(x);
self.max = self.max.max(x);
}
self.count += data.len();
}
pub fn update_2d(&mut self, data: &Array2<f32>) {
for &x in data.iter() {
self.min = self.min.min(x);
self.max = self.max.max(x);
}
self.count += data.len();
}
pub fn to_symmetric_params(&self) -> ModelResult<QuantizationParams> {
let max_abs = self.max.abs().max(self.min.abs());
if max_abs == 0.0 {
Ok(QuantizationParams::symmetric_per_tensor(1.0))
} else {
Ok(QuantizationParams::symmetric_per_tensor(max_abs / 127.0))
}
}
pub fn to_asymmetric_params(&self) -> ModelResult<QuantizationParams> {
if (self.max - self.min).abs() < 1e-8 {
Ok(QuantizationParams::asymmetric_per_tensor(1.0, 0))
} else {
let scale = (self.max - self.min) / 255.0;
let zero_point_f = -128.0 - self.min / scale;
let zero_point = zero_point_f.round().clamp(-128.0, 127.0) as i8;
Ok(QuantizationParams::asymmetric_per_tensor(scale, zero_point))
}
}
}
impl Default for CalibrationStats {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct ActivationQuantizer {
method: QuantizationMethod,
#[allow(dead_code)]
granularity: QuantizationGranularity,
calibration: Option<QuantizationParams>,
}
impl ActivationQuantizer {
pub fn new_symmetric() -> Self {
Self {
method: QuantizationMethod::Symmetric,
granularity: QuantizationGranularity::PerTensor,
calibration: None,
}
}
pub fn new_asymmetric() -> Self {
Self {
method: QuantizationMethod::Asymmetric,
granularity: QuantizationGranularity::PerTensor,
calibration: None,
}
}
pub fn calibrate(&mut self, stats: &CalibrationStats) -> ModelResult<()> {
self.calibration = Some(match self.method {
QuantizationMethod::Symmetric => stats.to_symmetric_params()?,
QuantizationMethod::Asymmetric => stats.to_asymmetric_params()?,
});
Ok(())
}
pub fn quantize_activation_1d(&self, activation: &Array1<f32>) -> ModelResult<Vec<i8>> {
let params = if let Some(ref cal) = self.calibration {
cal.clone()
} else {
let min_val = activation.iter().copied().fold(f32::INFINITY, f32::min);
let max_val = activation.iter().copied().fold(f32::NEG_INFINITY, f32::max);
match self.method {
QuantizationMethod::Symmetric => {
let max_abs = max_val.abs().max(min_val.abs());
QuantizationParams::symmetric_per_tensor(max_abs / 127.0)
}
QuantizationMethod::Asymmetric => {
let scale = (max_val - min_val) / 255.0;
let zero_point = (-128.0 - min_val / scale).round().clamp(-128.0, 127.0) as i8;
QuantizationParams::asymmetric_per_tensor(scale, zero_point)
}
}
};
let scale = params.scale[0];
let zero_point = params.zero_point[0];
let mut quantized = Vec::with_capacity(activation.len());
for &x in activation.iter() {
let q = match self.method {
QuantizationMethod::Symmetric => (x / scale).round().clamp(-128.0, 127.0) as i8,
QuantizationMethod::Asymmetric => {
let q_f = x / scale + zero_point as f32;
q_f.round().clamp(-128.0, 127.0) as i8
}
};
quantized.push(q);
}
Ok(quantized)
}
pub fn dequantize_activation_1d(
&self,
quantized: &[i8],
original_len: usize,
) -> ModelResult<Array1<f32>> {
if quantized.len() != original_len {
return Err(ModelError::invalid_config(format!(
"quantized length {} doesn't match expected {}",
quantized.len(),
original_len
)));
}
let params = self
.calibration
.as_ref()
.ok_or_else(|| ModelError::invalid_config("calibration required for dequantization"))?;
let scale = params.scale[0];
let zero_point = params.zero_point[0];
let mut result = Array1::zeros(original_len);
for (i, &q) in quantized.iter().enumerate() {
result[i] = (q as i32 - zero_point as i32) as f32 * scale;
}
Ok(result)
}
pub fn simulate_quantization(&self, activation: &Array1<f32>) -> ModelResult<Array1<f32>> {
let min_val = activation.iter().copied().fold(f32::INFINITY, f32::min);
let max_val = activation.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let (scale, zero_point) = match self.method {
QuantizationMethod::Symmetric => {
let max_abs = max_val.abs().max(min_val.abs());
(max_abs / 127.0, 0)
}
QuantizationMethod::Asymmetric => {
let scale = (max_val - min_val) / 255.0;
let zp = (-128.0 - min_val / scale).round().clamp(-128.0, 127.0) as i8;
(scale, zp)
}
};
let mut result = Array1::zeros(activation.len());
for (i, &x) in activation.iter().enumerate() {
let q = match self.method {
QuantizationMethod::Symmetric => (x / scale).round().clamp(-128.0, 127.0) as i8,
QuantizationMethod::Asymmetric => {
let q_f = x / scale + zero_point as f32;
q_f.round().clamp(-128.0, 127.0) as i8
}
};
result[i] = (q as i32 - zero_point as i32) as f32 * scale;
}
Ok(result)
}
pub fn memory_savings(&self) -> f32 {
75.0 }
}
impl Default for ActivationQuantizer {
fn default() -> Self {
Self::new_symmetric()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn approx_eq(a: f32, b: f32, epsilon: f32) -> bool {
(a - b).abs() < epsilon
}
#[test]
fn test_symmetric_quantization_1d() {
let array = Array1::from_vec(vec![-10.0, -5.0, 0.0, 5.0, 10.0]);
let quantized = quantize_symmetric_1d(&array).expect("Failed to quantize 1d array");
assert_eq!(quantized.shape, vec![5]);
assert_eq!(quantized.params.method, QuantizationMethod::Symmetric);
let dequantized = quantized
.dequantize_1d()
.expect("Failed to dequantize 1d array");
for i in 0..5 {
assert!(approx_eq(array[i], dequantized[i], 0.1));
}
}
#[test]
fn test_symmetric_quantization_2d() {
let array = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, -1.0, -2.0, -3.0])
.expect("Failed to create test array");
let quantized = quantize_symmetric_2d(&array).expect("Failed to quantize 2d array");
assert_eq!(quantized.shape, vec![2, 3]);
let dequantized = quantized
.dequantize_2d()
.expect("Failed to dequantize 2d array");
for i in 0..2 {
for j in 0..3 {
assert!(approx_eq(array[[i, j]], dequantized[[i, j]], 0.05));
}
}
}
#[test]
fn test_per_channel_quantization() {
let array = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 10.0, 20.0, 30.0])
.expect("Failed to create test array");
let quantized =
quantize_symmetric_per_channel(&array).expect("Failed to quantize per channel");
assert_eq!(
quantized.params.granularity,
QuantizationGranularity::PerChannel
);
assert_eq!(quantized.params.scale.len(), 2);
let dequantized = quantized
.dequantize_2d()
.expect("Failed to dequantize 2d array");
for i in 0..2 {
for j in 0..3 {
assert!(approx_eq(array[[i, j]], dequantized[[i, j]], 0.3));
}
}
}
#[test]
fn test_asymmetric_quantization() {
let array = Array1::from_vec(vec![0.0, 1.0, 2.0, 3.0, 4.0]);
let quantized = quantize_asymmetric_1d(&array).expect("Failed to quantize asymmetric");
assert_eq!(quantized.params.method, QuantizationMethod::Asymmetric);
let dequantized = quantized.dequantize_1d().expect("Failed to dequantize");
for i in 0..5 {
assert!(approx_eq(array[i], dequantized[i], 0.05));
}
}
#[test]
fn test_calibration_stats() {
let mut stats = CalibrationStats::new();
let data1 = Array1::from_vec(vec![-5.0, 0.0, 5.0]);
let data2 = Array1::from_vec(vec![-10.0, -2.0, 8.0]);
stats.update_1d(&data1);
stats.update_1d(&data2);
assert_eq!(stats.min, -10.0);
assert_eq!(stats.max, 8.0);
assert_eq!(stats.count, 6);
let params = stats.to_symmetric_params().expect("Failed to get params");
assert!(approx_eq(params.scale[0], 10.0 / 127.0, 1e-6));
}
#[test]
fn test_memory_savings() {
let array = Array2::from_shape_vec((100, 100), vec![1.0; 10000])
.expect("Failed to create test array");
let quantized = quantize_symmetric_2d(&array).expect("Failed to quantize");
let original_size = 10000 * 4;
let quantized_size = quantized.memory_size();
assert_eq!(quantized_size, 10000); assert!(quantized_size < original_size / 3); }
#[test]
fn test_activation_quantizer_symmetric() {
let quantizer = ActivationQuantizer::new_symmetric();
let activation = Array1::from_vec(vec![-10.0, -5.0, 0.0, 5.0, 10.0]);
let quantized = quantizer
.quantize_activation_1d(&activation)
.expect("Failed to quantize activation");
assert_eq!(quantized.len(), activation.len());
assert_eq!(quantizer.memory_savings(), 75.0);
}
#[test]
fn test_activation_quantizer_asymmetric() {
let quantizer = ActivationQuantizer::new_asymmetric();
let activation = Array1::from_vec(vec![0.0, 1.0, 2.0, 3.0, 4.0]);
let quantized = quantizer
.quantize_activation_1d(&activation)
.expect("Failed to quantize activation");
assert_eq!(quantized.len(), activation.len());
}
#[test]
fn test_activation_quantizer_with_calibration() {
let mut quantizer = ActivationQuantizer::new_symmetric();
let mut stats = CalibrationStats::new();
stats.update_1d(&Array1::from_vec(vec![-10.0, 0.0, 10.0]));
stats.update_1d(&Array1::from_vec(vec![-5.0, 0.0, 5.0]));
quantizer.calibrate(&stats).expect("Failed to calibrate");
let activation = Array1::from_vec(vec![-8.0, 0.0, 8.0]);
let quantized = quantizer
.quantize_activation_1d(&activation)
.expect("Failed to quantize activation");
let dequantized = quantizer
.dequantize_activation_1d(&quantized, activation.len())
.expect("Failed to dequantize activation");
for i in 0..activation.len() {
assert!((activation[i] - dequantized[i]).abs() < 1.0);
}
}
#[test]
fn test_simulate_quantization() {
let quantizer = ActivationQuantizer::new_symmetric();
let activation = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
let simulated = quantizer
.simulate_quantization(&activation)
.expect("Failed to simulate quantization");
for i in 0..activation.len() {
assert!((activation[i] - simulated[i]).abs() < 0.1);
}
}
}