oxicuda_quant/analysis/
metrics.rs

1//! # Compression Metrics
2//!
3//! Tracks the compression statistics for quantized and pruned models:
4//! effective bits per parameter, compression ratio, and sparsity.
5
6use crate::error::QuantResult;
7
8// ─── CompressionMetrics ───────────────────────────────────────────────────────
9
10/// Compression statistics for a single layer or entire model.
11#[derive(Debug, Clone, Default)]
12pub struct CompressionMetrics {
13    /// Number of parameters.
14    pub n_parameters: u64,
15    /// Bit-width of the original (full-precision) representation.
16    pub original_bits_per_param: f32,
17    /// Effective bit-width used in the compressed representation.
18    /// For quantized weights this equals `quant_bits`; for pruned weights
19    /// the effective bits = `quant_bits × (1 − sparsity)`.
20    pub effective_bits_per_param: f32,
21    /// Fraction of weights that are pruned ∈ [0, 1].
22    pub sparsity: f32,
23    /// Mean squared quantization error (0 if no quantization was applied).
24    pub quantization_mse: f32,
25}
26
27impl CompressionMetrics {
28    /// Create metrics for a layer quantized to `quant_bits` bits with no pruning.
29    #[must_use]
30    pub fn quantized_only(n_parameters: u64, quant_bits: u32, quant_mse: f32) -> Self {
31        Self {
32            n_parameters,
33            original_bits_per_param: 32.0,
34            effective_bits_per_param: quant_bits as f32,
35            sparsity: 0.0,
36            quantization_mse: quant_mse,
37        }
38    }
39
40    /// Create metrics for a layer pruned to `sparsity` with FP32 weights.
41    #[must_use]
42    pub fn pruned_only(n_parameters: u64, sparsity: f32) -> Self {
43        Self {
44            n_parameters,
45            original_bits_per_param: 32.0,
46            effective_bits_per_param: 32.0 * (1.0 - sparsity),
47            sparsity,
48            quantization_mse: 0.0,
49        }
50    }
51
52    /// Create metrics for a layer that is both quantized and pruned.
53    #[must_use]
54    pub fn quantized_and_pruned(
55        n_parameters: u64,
56        quant_bits: u32,
57        sparsity: f32,
58        quant_mse: f32,
59    ) -> Self {
60        Self {
61            n_parameters,
62            original_bits_per_param: 32.0,
63            effective_bits_per_param: quant_bits as f32 * (1.0 - sparsity),
64            sparsity,
65            quantization_mse: quant_mse,
66        }
67    }
68
69    /// Ratio of original to compressed storage: `original_bits / effective_bits`.
70    ///
71    /// Returns `f32::INFINITY` if the effective bits per param is 0.
72    #[must_use]
73    pub fn compression_ratio(&self) -> f32 {
74        if self.effective_bits_per_param <= 0.0 {
75            return f32::INFINITY;
76        }
77        self.original_bits_per_param / self.effective_bits_per_param
78    }
79
80    /// Total original bits for this layer.
81    #[must_use]
82    pub fn total_original_bits(&self) -> f64 {
83        self.n_parameters as f64 * self.original_bits_per_param as f64
84    }
85
86    /// Total compressed bits for this layer.
87    #[must_use]
88    pub fn total_compressed_bits(&self) -> f64 {
89        self.n_parameters as f64 * self.effective_bits_per_param as f64
90    }
91}
92
93// ─── ModelCompressionMetrics ─────────────────────────────────────────────────
94
95/// Aggregated compression statistics over an entire model.
96#[derive(Debug, Clone, Default)]
97pub struct ModelCompressionMetrics {
98    /// Per-layer metrics.
99    pub layers: Vec<CompressionMetrics>,
100    /// Per-layer names.
101    pub names: Vec<String>,
102}
103
104impl ModelCompressionMetrics {
105    /// Create an empty model metrics container.
106    #[must_use]
107    pub fn new() -> Self {
108        Self::default()
109    }
110
111    /// Add a layer's metrics.
112    pub fn add_layer(&mut self, name: impl Into<String>, m: CompressionMetrics) {
113        self.names.push(name.into());
114        self.layers.push(m);
115    }
116
117    /// Total number of parameters across all layers.
118    #[must_use]
119    pub fn total_parameters(&self) -> u64 {
120        self.layers.iter().map(|m| m.n_parameters).sum()
121    }
122
123    /// Model-wide compression ratio (total original bits / total compressed bits).
124    #[must_use]
125    pub fn model_compression_ratio(&self) -> f32 {
126        let orig: f64 = self.layers.iter().map(|m| m.total_original_bits()).sum();
127        let comp: f64 = self.layers.iter().map(|m| m.total_compressed_bits()).sum();
128        if comp <= 0.0 {
129            return f32::INFINITY;
130        }
131        (orig / comp) as f32
132    }
133
134    /// Weighted average quantization MSE across all layers.
135    #[must_use]
136    pub fn mean_quantization_mse(&self) -> f32 {
137        let total_n: u64 = self.total_parameters();
138        if total_n == 0 {
139            return 0.0;
140        }
141        let weighted: f32 = self
142            .layers
143            .iter()
144            .map(|m| m.quantization_mse * m.n_parameters as f32)
145            .sum();
146        weighted / total_n as f32
147    }
148
149    /// Average effective bits per parameter across all layers (parameter-weighted).
150    #[must_use]
151    pub fn average_effective_bits(&self) -> f32 {
152        let total_n = self.total_parameters();
153        if total_n == 0 {
154            return 0.0;
155        }
156        let weighted: f32 = self
157            .layers
158            .iter()
159            .map(|m| m.effective_bits_per_param * m.n_parameters as f32)
160            .sum();
161        weighted / total_n as f32
162    }
163
164    /// Compute the MSE for a quantized layer inline and add it.
165    ///
166    /// This is a convenience method to avoid pre-computing MSE externally.
167    ///
168    /// # Errors
169    ///
170    /// Returns [`crate::error::QuantError::EmptyInput`] if `weights` is empty.
171    pub fn add_quantized_layer(
172        &mut self,
173        name: impl Into<String>,
174        weights: &[f32],
175        quant_bits: u32,
176        quantization_mse: f32,
177    ) -> QuantResult<()> {
178        if weights.is_empty() {
179            return Err(crate::error::QuantError::EmptyInput(
180                "ModelCompressionMetrics::add_quantized_layer",
181            ));
182        }
183        let m =
184            CompressionMetrics::quantized_only(weights.len() as u64, quant_bits, quantization_mse);
185        self.add_layer(name, m);
186        Ok(())
187    }
188}
189
190// ─── Tests ───────────────────────────────────────────────────────────────────
191
192#[cfg(test)]
193mod tests {
194    use super::*;
195    use approx::assert_abs_diff_eq;
196
197    #[test]
198    fn int8_compression_ratio() {
199        let m = CompressionMetrics::quantized_only(1024, 8, 0.0);
200        assert_abs_diff_eq!(m.compression_ratio(), 4.0, epsilon = 1e-5);
201    }
202
203    #[test]
204    fn int4_compression_ratio() {
205        let m = CompressionMetrics::quantized_only(1024, 4, 0.0);
206        assert_abs_diff_eq!(m.compression_ratio(), 8.0, epsilon = 1e-5);
207    }
208
209    #[test]
210    fn pruned_50_percent_fp32_ratio() {
211        let m = CompressionMetrics::pruned_only(1024, 0.5);
212        // effective = 32 * 0.5 = 16, ratio = 32/16 = 2
213        assert_abs_diff_eq!(m.compression_ratio(), 2.0, epsilon = 1e-5);
214    }
215
216    #[test]
217    fn quantized_and_pruned_metrics() {
218        // INT4 + 50% sparsity: effective = 4 * 0.5 = 2 bits/param
219        let m = CompressionMetrics::quantized_and_pruned(1024, 4, 0.5, 0.001);
220        assert_abs_diff_eq!(m.effective_bits_per_param, 2.0, epsilon = 1e-5);
221        assert_abs_diff_eq!(m.compression_ratio(), 16.0, epsilon = 1e-5);
222    }
223
224    #[test]
225    fn model_compression_ratio_weighted() {
226        let mut model = ModelCompressionMetrics::new();
227        model.add_layer("l0", CompressionMetrics::quantized_only(100, 8, 0.0));
228        model.add_layer("l1", CompressionMetrics::quantized_only(900, 4, 0.0));
229        // total orig = 1000 * 32 = 32000 bits
230        // total comp = 100*8 + 900*4 = 800 + 3600 = 4400 bits
231        // ratio = 32000 / 4400 ≈ 7.27
232        let ratio = model.model_compression_ratio();
233        assert!(ratio > 7.0 && ratio < 8.0, "ratio = {ratio}");
234    }
235
236    #[test]
237    fn average_effective_bits() {
238        let mut model = ModelCompressionMetrics::new();
239        model.add_layer("l0", CompressionMetrics::quantized_only(100, 4, 0.0));
240        model.add_layer("l1", CompressionMetrics::quantized_only(100, 8, 0.0));
241        // weighted avg = (100*4 + 100*8) / 200 = 6
242        assert_abs_diff_eq!(model.average_effective_bits(), 6.0, epsilon = 1e-5);
243    }
244
245    #[test]
246    fn total_bits_correct() {
247        let m = CompressionMetrics::quantized_only(1000, 8, 0.0);
248        assert_abs_diff_eq!(m.total_original_bits(), 32_000.0, epsilon = 1.0);
249        assert_abs_diff_eq!(m.total_compressed_bits(), 8_000.0, epsilon = 1.0);
250    }
251
252    #[test]
253    fn zero_effective_bits_gives_infinity() {
254        let m = CompressionMetrics {
255            effective_bits_per_param: 0.0,
256            ..Default::default()
257        };
258        assert!(m.compression_ratio().is_infinite());
259    }
260}
oxicuda_quant/analysis/metrics.rs

oxicuda_quant/analysis/
metrics.rs