use crate::error::QuantResult;
#[derive(Debug, Clone, Default)]
pub struct CompressionMetrics {
pub n_parameters: u64,
pub original_bits_per_param: f32,
pub effective_bits_per_param: f32,
pub sparsity: f32,
pub quantization_mse: f32,
}
impl CompressionMetrics {
#[must_use]
pub fn quantized_only(n_parameters: u64, quant_bits: u32, quant_mse: f32) -> Self {
Self {
n_parameters,
original_bits_per_param: 32.0,
effective_bits_per_param: quant_bits as f32,
sparsity: 0.0,
quantization_mse: quant_mse,
}
}
#[must_use]
pub fn pruned_only(n_parameters: u64, sparsity: f32) -> Self {
Self {
n_parameters,
original_bits_per_param: 32.0,
effective_bits_per_param: 32.0 * (1.0 - sparsity),
sparsity,
quantization_mse: 0.0,
}
}
#[must_use]
pub fn quantized_and_pruned(
n_parameters: u64,
quant_bits: u32,
sparsity: f32,
quant_mse: f32,
) -> Self {
Self {
n_parameters,
original_bits_per_param: 32.0,
effective_bits_per_param: quant_bits as f32 * (1.0 - sparsity),
sparsity,
quantization_mse: quant_mse,
}
}
#[must_use]
pub fn compression_ratio(&self) -> f32 {
if self.effective_bits_per_param <= 0.0 {
return f32::INFINITY;
}
self.original_bits_per_param / self.effective_bits_per_param
}
#[must_use]
pub fn total_original_bits(&self) -> f64 {
self.n_parameters as f64 * self.original_bits_per_param as f64
}
#[must_use]
pub fn total_compressed_bits(&self) -> f64 {
self.n_parameters as f64 * self.effective_bits_per_param as f64
}
}
#[derive(Debug, Clone, Default)]
pub struct ModelCompressionMetrics {
pub layers: Vec<CompressionMetrics>,
pub names: Vec<String>,
}
impl ModelCompressionMetrics {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn add_layer(&mut self, name: impl Into<String>, m: CompressionMetrics) {
self.names.push(name.into());
self.layers.push(m);
}
#[must_use]
pub fn total_parameters(&self) -> u64 {
self.layers.iter().map(|m| m.n_parameters).sum()
}
#[must_use]
pub fn model_compression_ratio(&self) -> f32 {
let orig: f64 = self.layers.iter().map(|m| m.total_original_bits()).sum();
let comp: f64 = self.layers.iter().map(|m| m.total_compressed_bits()).sum();
if comp <= 0.0 {
return f32::INFINITY;
}
(orig / comp) as f32
}
#[must_use]
pub fn mean_quantization_mse(&self) -> f32 {
let total_n: u64 = self.total_parameters();
if total_n == 0 {
return 0.0;
}
let weighted: f32 = self
.layers
.iter()
.map(|m| m.quantization_mse * m.n_parameters as f32)
.sum();
weighted / total_n as f32
}
#[must_use]
pub fn average_effective_bits(&self) -> f32 {
let total_n = self.total_parameters();
if total_n == 0 {
return 0.0;
}
let weighted: f32 = self
.layers
.iter()
.map(|m| m.effective_bits_per_param * m.n_parameters as f32)
.sum();
weighted / total_n as f32
}
pub fn add_quantized_layer(
&mut self,
name: impl Into<String>,
weights: &[f32],
quant_bits: u32,
quantization_mse: f32,
) -> QuantResult<()> {
if weights.is_empty() {
return Err(crate::error::QuantError::EmptyInput(
"ModelCompressionMetrics::add_quantized_layer",
));
}
let m =
CompressionMetrics::quantized_only(weights.len() as u64, quant_bits, quantization_mse);
self.add_layer(name, m);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn int8_compression_ratio() {
let m = CompressionMetrics::quantized_only(1024, 8, 0.0);
assert_abs_diff_eq!(m.compression_ratio(), 4.0, epsilon = 1e-5);
}
#[test]
fn int4_compression_ratio() {
let m = CompressionMetrics::quantized_only(1024, 4, 0.0);
assert_abs_diff_eq!(m.compression_ratio(), 8.0, epsilon = 1e-5);
}
#[test]
fn pruned_50_percent_fp32_ratio() {
let m = CompressionMetrics::pruned_only(1024, 0.5);
assert_abs_diff_eq!(m.compression_ratio(), 2.0, epsilon = 1e-5);
}
#[test]
fn quantized_and_pruned_metrics() {
let m = CompressionMetrics::quantized_and_pruned(1024, 4, 0.5, 0.001);
assert_abs_diff_eq!(m.effective_bits_per_param, 2.0, epsilon = 1e-5);
assert_abs_diff_eq!(m.compression_ratio(), 16.0, epsilon = 1e-5);
}
#[test]
fn model_compression_ratio_weighted() {
let mut model = ModelCompressionMetrics::new();
model.add_layer("l0", CompressionMetrics::quantized_only(100, 8, 0.0));
model.add_layer("l1", CompressionMetrics::quantized_only(900, 4, 0.0));
let ratio = model.model_compression_ratio();
assert!(ratio > 7.0 && ratio < 8.0, "ratio = {ratio}");
}
#[test]
fn average_effective_bits() {
let mut model = ModelCompressionMetrics::new();
model.add_layer("l0", CompressionMetrics::quantized_only(100, 4, 0.0));
model.add_layer("l1", CompressionMetrics::quantized_only(100, 8, 0.0));
assert_abs_diff_eq!(model.average_effective_bits(), 6.0, epsilon = 1e-5);
}
#[test]
fn total_bits_correct() {
let m = CompressionMetrics::quantized_only(1000, 8, 0.0);
assert_abs_diff_eq!(m.total_original_bits(), 32_000.0, epsilon = 1.0);
assert_abs_diff_eq!(m.total_compressed_bits(), 8_000.0, epsilon = 1.0);
}
#[test]
fn zero_effective_bits_gives_infinity() {
let m = CompressionMetrics {
effective_bits_per_param: 0.0,
..Default::default()
};
assert!(m.compression_ratio().is_infinite());
}
}