use thiserror::Error;
use trustformers_core::errors::{Result, TrustformersError};
#[derive(Debug, Error)]
pub enum MobileQuantError {
#[error("empty input tensor")]
EmptyInput,
#[error("invalid group size: {0}")]
InvalidGroupSize(usize),
#[error("shape mismatch: expected {expected}, got {got}")]
ShapeMismatch { expected: usize, got: usize },
#[error("packed buffer length {packed_len} insufficient for {count} values")]
InsufficientBuffer { packed_len: usize, count: usize },
}
impl From<MobileQuantError> for TrustformersError {
fn from(e: MobileQuantError) -> Self {
TrustformersError::invalid_input(e.to_string())
}
}
#[derive(Debug, Clone)]
pub struct Int4QuantConfig {
pub group_size: usize,
pub asymmetric: bool,
pub exclude_layers: Vec<String>,
pub double_quantize: bool,
}
impl Default for Int4QuantConfig {
fn default() -> Self {
Self {
group_size: 128,
asymmetric: false,
exclude_layers: Vec::new(),
double_quantize: false,
}
}
}
#[derive(Debug, Clone)]
pub struct Int4Config {
pub group_size: usize,
pub zero_point: bool,
pub symmetric: bool,
pub per_channel: bool,
}
impl Default for Int4Config {
fn default() -> Self {
Self {
group_size: 128,
zero_point: false,
symmetric: true,
per_channel: false,
}
}
}
impl Int4Config {
pub fn to_quant_config(&self) -> Int4QuantConfig {
Int4QuantConfig {
group_size: self.group_size,
asymmetric: self.zero_point && !self.symmetric,
exclude_layers: Vec::new(),
double_quantize: false,
}
}
}
pub fn pack_int4(values: &[i8]) -> Vec<u8> {
let padded = if values.len() % 2 == 0 {
values.len()
} else {
values.len() + 1
};
let byte_count = padded / 2;
let mut out = Vec::with_capacity(byte_count);
for i in 0..byte_count {
let lo_raw = if 2 * i < values.len() { values[2 * i] } else { 0i8 };
let hi_raw = if 2 * i + 1 < values.len() { values[2 * i + 1] } else { 0i8 };
let lo = (lo_raw.clamp(-8, 7) + 8) as u8;
let hi = (hi_raw.clamp(-8, 7) + 8) as u8;
out.push((hi << 4) | (lo & 0x0F));
}
out
}
pub fn unpack_int4(packed: &[u8], count: usize) -> Vec<i8> {
let mut out = Vec::with_capacity(count);
for i in 0..count {
let byte = packed[i / 2];
let nibble = if i % 2 == 0 {
byte & 0x0F
} else {
byte >> 4
};
out.push(nibble as i8 - 8);
}
out
}
#[derive(Debug, Clone)]
pub struct Int4Tensor {
pub packed_data: Vec<u8>,
pub scales: Vec<f32>,
pub zero_points: Option<Vec<u8>>,
pub shape: Vec<usize>,
pub group_size: usize,
pub asymmetric: bool,
}
impl Int4Tensor {
#[inline]
fn pack_nibbles(lo: u8, hi: u8) -> u8 {
(hi << 4) | (lo & 0x0F)
}
#[inline]
fn unpack_nibbles(packed: u8) -> (u8, u8) {
(packed & 0x0F, packed >> 4)
}
pub fn quantize(data: &[f32], shape: &[usize], config: &Int4QuantConfig) -> Result<Self> {
if data.is_empty() {
return Err(TrustformersError::invalid_input("empty tensor cannot be quantized".to_string()));
}
let total: usize = shape.iter().product();
if total != data.len() {
return Err(TrustformersError::invalid_input(format!(
"shape product {} does not match data length {}",
total,
data.len()
)));
}
if config.group_size == 0 {
return Err(TrustformersError::invalid_input("group_size must be > 0".to_string()));
}
let num_groups = total.div_ceil(config.group_size);
let mut scales: Vec<f32> = Vec::with_capacity(num_groups);
let mut zero_points: Vec<u8> = Vec::with_capacity(num_groups);
let padded_len = if total % 2 == 0 { total } else { total + 1 };
let mut nibbles: Vec<u8> = vec![0u8; padded_len];
for g in 0..num_groups {
let start = g * config.group_size;
let end = (start + config.group_size).min(total);
let group = &data[start..end];
if config.asymmetric {
let min = group.iter().cloned().fold(f32::INFINITY, f32::min);
let max = group.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let range = max - min;
let scale = if range.abs() < f32::EPSILON { 1.0 } else { range / 15.0 };
let zp = (-min / scale).round().clamp(0.0, 15.0) as u8;
scales.push(scale);
zero_points.push(zp);
for (i, &v) in group.iter().enumerate() {
let q = (v / scale + zp as f32).round().clamp(0.0, 15.0) as u8;
nibbles[start + i] = q;
}
} else {
let abs_max = group
.iter()
.cloned()
.fold(0.0_f32, |a, x| a.max(x.abs()));
let scale = if abs_max < f32::EPSILON { 1.0 } else { abs_max / 7.0 };
scales.push(scale);
zero_points.push(8); for (i, &v) in group.iter().enumerate() {
let q = ((v / scale) + 8.0).round().clamp(0.0, 15.0) as u8;
nibbles[start + i] = q;
}
}
}
let byte_count = padded_len / 2;
let mut packed_data: Vec<u8> = Vec::with_capacity(byte_count);
for i in 0..byte_count {
packed_data.push(Self::pack_nibbles(nibbles[2 * i], nibbles[2 * i + 1]));
}
Ok(Self {
packed_data,
scales,
zero_points: if config.asymmetric {
Some(zero_points)
} else {
None
},
shape: shape.to_vec(),
group_size: config.group_size,
asymmetric: config.asymmetric,
})
}
pub fn from_config(tensor: &[f32], config: &Int4Config) -> std::result::Result<Self, MobileQuantError> {
if tensor.is_empty() {
return Err(MobileQuantError::EmptyInput);
}
if config.group_size == 0 {
return Err(MobileQuantError::InvalidGroupSize(0));
}
let shape = vec![tensor.len()];
let qconfig = config.to_quant_config();
Self::quantize(tensor, &shape, &qconfig).map_err(|e| {
MobileQuantError::ShapeMismatch { expected: tensor.len(), got: e.to_string().len() }
})
}
pub fn dequantize(&self) -> Vec<f32> {
let total: usize = self.shape.iter().product();
let padded_len = if total % 2 == 0 { total } else { total + 1 };
let mut nibbles: Vec<u8> = Vec::with_capacity(padded_len);
for &byte in &self.packed_data {
let (lo, hi) = Self::unpack_nibbles(byte);
nibbles.push(lo);
nibbles.push(hi);
}
nibbles.truncate(total);
let mut output: Vec<f32> = Vec::with_capacity(total);
let num_groups = total.div_ceil(self.group_size);
for g in 0..num_groups {
let start = g * self.group_size;
let end = (start + self.group_size).min(total);
let scale = self.scales[g];
if self.asymmetric {
let zp = self.zero_points.as_ref().map(|z| z[g]).unwrap_or(0) as f32;
for i in start..end {
output.push((nibbles[i] as f32 - zp) * scale);
}
} else {
for i in start..end {
output.push((nibbles[i] as f32 - 8.0) * scale);
}
}
}
output
}
pub fn size_bytes(&self) -> usize {
let packed = self.packed_data.len();
let scales_bytes = self.scales.len() * std::mem::size_of::<f32>();
let zp_bytes = self
.zero_points
.as_ref()
.map(|z| z.len())
.unwrap_or(0);
packed + scales_bytes + zp_bytes
}
pub fn compression_ratio(&self) -> f32 {
let total: usize = self.shape.iter().product();
let fp32_bytes = total * std::mem::size_of::<f32>();
fp32_bytes as f32 / self.size_bytes() as f32
}
pub fn num_groups(&self) -> usize {
self.scales.len()
}
}
pub struct Int4Gemv;
impl Int4Gemv {
pub fn compute(
packed_weights: &[u8],
scales: &[f32],
input: &[f32],
rows: usize,
cols: usize,
) -> Vec<f32> {
let bytes_per_row = cols.div_ceil(2);
let mut output = vec![0.0f32; rows];
for r in 0..rows {
let row_start = r * bytes_per_row;
let scale = if r < scales.len() { scales[r] } else { 1.0 };
let mut acc = 0.0f32;
for c in 0..cols {
let byte_idx = row_start + c / 2;
let byte = if byte_idx < packed_weights.len() {
packed_weights[byte_idx]
} else {
0u8
};
let nibble = if c % 2 == 0 {
byte & 0x0F
} else {
byte >> 4
};
let weight = (nibble as f32 - 8.0) * scale;
let inp = if c < input.len() { input[c] } else { 0.0 };
acc += weight * inp;
}
output[r] = acc;
}
output
}
}
#[derive(Debug, Clone)]
pub struct QuantizationMetrics {
pub max_abs_error: f32,
pub mean_abs_error: f32,
pub rmse: f32,
pub snr_db: f32,
pub compression_ratio: f32,
}
impl QuantizationMetrics {
pub fn compute(original: &[f32], dequantized: &[f32]) -> Self {
assert_eq!(original.len(), dequantized.len(), "length mismatch");
let n = original.len() as f32;
let mut max_abs = 0.0_f32;
let mut sum_abs = 0.0_f32;
let mut sum_sq_err = 0.0_f32;
let mut signal_power = 0.0_f32;
for (&o, &d) in original.iter().zip(dequantized.iter()) {
let err = (o - d).abs();
max_abs = max_abs.max(err);
sum_abs += err;
sum_sq_err += (o - d) * (o - d);
signal_power += o * o;
}
let rmse = (sum_sq_err / n).sqrt();
let noise_power = sum_sq_err / n;
let snr_db = if noise_power < f32::EPSILON {
f32::INFINITY
} else {
10.0 * (signal_power / n / noise_power).log10()
};
Self {
max_abs_error: max_abs,
mean_abs_error: sum_abs / n,
rmse,
snr_db,
compression_ratio: 0.0, }
}
}
#[cfg(test)]
mod tests {
use super::*;
fn linspace(start: f32, end: f32, n: usize) -> Vec<f32> {
(0..n)
.map(|i| start + (end - start) * i as f32 / (n - 1) as f32)
.collect()
}
#[test]
fn test_symmetric_quantize_dequantize_basic() {
let data: Vec<f32> = linspace(-1.0, 1.0, 128);
let config = Int4QuantConfig::default();
let tensor = Int4Tensor::quantize(&data, &[128], &config).expect("should quantize");
let deq = tensor.dequantize();
assert_eq!(deq.len(), 128);
let metrics = QuantizationMetrics::compute(&data, &deq);
assert!(metrics.max_abs_error < 0.15, "max_abs_error too large: {}", metrics.max_abs_error);
}
#[test]
fn test_asymmetric_quantize_dequantize() {
let data: Vec<f32> = linspace(0.0, 4.0, 64);
let config = Int4QuantConfig {
group_size: 64,
asymmetric: true,
..Default::default()
};
let tensor = Int4Tensor::quantize(&data, &[64], &config).expect("should quantize");
assert!(tensor.zero_points.is_some());
let deq = tensor.dequantize();
let metrics = QuantizationMetrics::compute(&data, &deq);
assert!(metrics.max_abs_error < 0.3, "asymmetric error too large: {}", metrics.max_abs_error);
}
#[test]
fn test_pack_unpack_nibbles() {
for lo in 0u8..16 {
for hi in 0u8..16 {
let packed = Int4Tensor::pack_nibbles(lo, hi);
let (ul, uh) = Int4Tensor::unpack_nibbles(packed);
assert_eq!(ul, lo, "lo nibble mismatch");
assert_eq!(uh, hi, "hi nibble mismatch");
}
}
}
#[test]
fn test_compression_ratio() {
let data: Vec<f32> = vec![0.5f32; 512];
let config = Int4QuantConfig { group_size: 128, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[512], &config).expect("should quantize");
let ratio = tensor.compression_ratio();
assert!(ratio > 4.0, "expected ratio > 4, got {ratio}");
}
#[test]
fn test_size_bytes_is_less_than_fp32() {
let n = 1024usize;
let data: Vec<f32> = vec![1.0f32; n];
let config = Int4QuantConfig { group_size: 128, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[n], &config).expect("should quantize");
let fp32_bytes = n * 4;
assert!(tensor.size_bytes() < fp32_bytes);
}
#[test]
fn test_shape_mismatch_returns_error() {
let data = vec![1.0f32; 64];
let config = Int4QuantConfig::default();
let result = Int4Tensor::quantize(&data, &[65], &config);
assert!(result.is_err());
}
#[test]
fn test_zero_tensor_quantizes_without_panic() {
let data = vec![0.0f32; 128];
let config = Int4QuantConfig::default();
let tensor = Int4Tensor::quantize(&data, &[128], &config).expect("should handle zero tensor");
let deq = tensor.dequantize();
for v in &deq {
assert!(v.abs() < 1e-6, "expected ~0 after dequantize, got {v}");
}
}
#[test]
fn test_multi_group_quantization() {
let data: Vec<f32> = linspace(-2.0, 2.0, 256);
let config = Int4QuantConfig { group_size: 64, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[256], &config).expect("should quantize");
assert_eq!(tensor.num_groups(), 4); let deq = tensor.dequantize();
assert_eq!(deq.len(), 256);
}
#[test]
fn test_quantization_metrics_perfect() {
let data = vec![1.0f32, 2.0, 3.0, 4.0];
let metrics = QuantizationMetrics::compute(&data, &data);
assert_eq!(metrics.max_abs_error, 0.0);
assert_eq!(metrics.rmse, 0.0);
assert!(metrics.snr_db.is_infinite() || metrics.snr_db > 100.0);
}
#[test]
fn test_odd_length_tensor() {
let data = vec![0.5f32; 5]; let config = Int4QuantConfig { group_size: 4, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[5], &config).expect("should handle odd length");
let deq = tensor.dequantize();
assert_eq!(deq.len(), 5);
}
#[test]
fn test_pack_int4_roundtrip_even() {
let values: Vec<i8> = vec![-7, -4, -1, 0, 1, 4, 7, 3];
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, values.len());
assert_eq!(unpacked, values, "pack/unpack roundtrip failed");
}
#[test]
fn test_pack_int4_roundtrip_odd() {
let values: Vec<i8> = vec![-3, 0, 5];
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, values.len());
assert_eq!(unpacked, values, "odd-length roundtrip failed");
}
#[test]
fn test_pack_int4_byte_count() {
let values: Vec<i8> = vec![0i8; 8];
let packed = pack_int4(&values);
assert_eq!(packed.len(), 4);
}
#[test]
fn test_pack_int4_odd_byte_count() {
let values: Vec<i8> = vec![0i8; 5];
let packed = pack_int4(&values);
assert_eq!(packed.len(), 3);
}
#[test]
fn test_unpack_int4_count_truncation() {
let values: Vec<i8> = vec![1, 2, 3, 4];
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, 2);
assert_eq!(unpacked.len(), 2);
assert_eq!(unpacked[0], 1);
assert_eq!(unpacked[1], 2);
}
#[test]
fn test_int4_config_defaults() {
let cfg = Int4Config::default();
assert_eq!(cfg.group_size, 128);
assert!(!cfg.zero_point);
assert!(cfg.symmetric);
assert!(!cfg.per_channel);
}
#[test]
fn test_int4_config_per_channel_flag() {
let cfg = Int4Config { per_channel: true, ..Default::default() };
assert!(cfg.per_channel);
}
#[test]
fn test_int4_config_to_quant_config_symmetric() {
let cfg = Int4Config { symmetric: true, zero_point: false, group_size: 64, per_channel: false };
let qcfg = cfg.to_quant_config();
assert_eq!(qcfg.group_size, 64);
assert!(!qcfg.asymmetric);
}
#[test]
fn test_int4_config_to_quant_config_asymmetric() {
let cfg = Int4Config { symmetric: false, zero_point: true, group_size: 32, per_channel: false };
let qcfg = cfg.to_quant_config();
assert!(qcfg.asymmetric);
}
#[test]
fn test_mobile_quant_error_empty_input() {
let cfg = Int4Config::default();
let result = Int4Tensor::from_config(&[], &cfg);
assert!(matches!(result, Err(MobileQuantError::EmptyInput)));
}
#[test]
fn test_mobile_quant_error_invalid_group_size() {
let cfg = Int4Config { group_size: 0, ..Default::default() };
let result = Int4Tensor::from_config(&[1.0, 2.0], &cfg);
assert!(matches!(result, Err(MobileQuantError::InvalidGroupSize(0))));
}
#[test]
fn test_mobile_quant_error_display() {
let e = MobileQuantError::EmptyInput;
assert!(!e.to_string().is_empty());
let e2 = MobileQuantError::InvalidGroupSize(0);
assert!(e2.to_string().contains('0'));
}
#[test]
fn test_int4_gemv_output_shape() {
let rows: usize = 4;
let cols: usize = 8;
let packed = vec![0x88u8; rows * cols.div_ceil(2)];
let scales = vec![1.0f32; rows];
let input = vec![1.0f32; cols];
let out = Int4Gemv::compute(&packed, &scales, &input, rows, cols);
assert_eq!(out.len(), rows);
}
#[test]
fn test_int4_gemv_zero_weights_produce_zero() {
let rows: usize = 3;
let cols: usize = 4;
let packed = vec![0x88u8; rows * cols.div_ceil(2)];
let scales = vec![1.0f32; rows];
let input = vec![1.0f32; cols];
let out = Int4Gemv::compute(&packed, &scales, &input, rows, cols);
for &v in &out {
assert!(v.abs() < 1e-6, "expected 0.0, got {v}");
}
}
#[test]
fn test_int4_gemv_nonzero_weights() {
let packed = vec![0x79u8]; let scales = vec![2.0f32];
let input = vec![1.0f32, 1.0f32];
let out = Int4Gemv::compute(&packed, &scales, &input, 1, 2);
assert_eq!(out.len(), 1);
assert!((out[0]).abs() < 1e-5, "expected 0.0, got {}", out[0]);
}
#[test]
fn test_int4_gemv_single_positive_weight() {
let packed = vec![0x09u8]; let scales = vec![1.0f32];
let input = vec![3.0f32];
let out = Int4Gemv::compute(&packed, &scales, &input, 1, 1);
assert!((out[0] - 3.0).abs() < 1e-5, "expected 3.0, got {}", out[0]);
}
#[test]
fn test_compression_ratio_large_tensor() {
let n = 8192usize;
let data: Vec<f32> = (0..n).map(|i| (i as f32) * 0.001).collect();
let config = Int4QuantConfig { group_size: 128, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[n], &config).expect("quantize");
let ratio = tensor.compression_ratio();
assert!(ratio > 7.0, "expected ratio > 7.0 for large tensor, got {ratio}");
}
#[test]
fn test_from_config_basic() {
let data: Vec<f32> = (0..256).map(|i| i as f32 * 0.01).collect();
let config = Int4Config::default();
let tensor = Int4Tensor::from_config(&data, &config).expect("should succeed");
let deq = tensor.dequantize();
assert_eq!(deq.len(), 256);
}
}