oxicuda_quant/analysis/
metrics.rs1use crate::error::QuantResult;
7
8#[derive(Debug, Clone, Default)]
12pub struct CompressionMetrics {
13 pub n_parameters: u64,
15 pub original_bits_per_param: f32,
17 pub effective_bits_per_param: f32,
21 pub sparsity: f32,
23 pub quantization_mse: f32,
25}
26
27impl CompressionMetrics {
28 #[must_use]
30 pub fn quantized_only(n_parameters: u64, quant_bits: u32, quant_mse: f32) -> Self {
31 Self {
32 n_parameters,
33 original_bits_per_param: 32.0,
34 effective_bits_per_param: quant_bits as f32,
35 sparsity: 0.0,
36 quantization_mse: quant_mse,
37 }
38 }
39
40 #[must_use]
42 pub fn pruned_only(n_parameters: u64, sparsity: f32) -> Self {
43 Self {
44 n_parameters,
45 original_bits_per_param: 32.0,
46 effective_bits_per_param: 32.0 * (1.0 - sparsity),
47 sparsity,
48 quantization_mse: 0.0,
49 }
50 }
51
52 #[must_use]
54 pub fn quantized_and_pruned(
55 n_parameters: u64,
56 quant_bits: u32,
57 sparsity: f32,
58 quant_mse: f32,
59 ) -> Self {
60 Self {
61 n_parameters,
62 original_bits_per_param: 32.0,
63 effective_bits_per_param: quant_bits as f32 * (1.0 - sparsity),
64 sparsity,
65 quantization_mse: quant_mse,
66 }
67 }
68
69 #[must_use]
73 pub fn compression_ratio(&self) -> f32 {
74 if self.effective_bits_per_param <= 0.0 {
75 return f32::INFINITY;
76 }
77 self.original_bits_per_param / self.effective_bits_per_param
78 }
79
80 #[must_use]
82 pub fn total_original_bits(&self) -> f64 {
83 self.n_parameters as f64 * self.original_bits_per_param as f64
84 }
85
86 #[must_use]
88 pub fn total_compressed_bits(&self) -> f64 {
89 self.n_parameters as f64 * self.effective_bits_per_param as f64
90 }
91}
92
93#[derive(Debug, Clone, Default)]
97pub struct ModelCompressionMetrics {
98 pub layers: Vec<CompressionMetrics>,
100 pub names: Vec<String>,
102}
103
104impl ModelCompressionMetrics {
105 #[must_use]
107 pub fn new() -> Self {
108 Self::default()
109 }
110
111 pub fn add_layer(&mut self, name: impl Into<String>, m: CompressionMetrics) {
113 self.names.push(name.into());
114 self.layers.push(m);
115 }
116
117 #[must_use]
119 pub fn total_parameters(&self) -> u64 {
120 self.layers.iter().map(|m| m.n_parameters).sum()
121 }
122
123 #[must_use]
125 pub fn model_compression_ratio(&self) -> f32 {
126 let orig: f64 = self.layers.iter().map(|m| m.total_original_bits()).sum();
127 let comp: f64 = self.layers.iter().map(|m| m.total_compressed_bits()).sum();
128 if comp <= 0.0 {
129 return f32::INFINITY;
130 }
131 (orig / comp) as f32
132 }
133
134 #[must_use]
136 pub fn mean_quantization_mse(&self) -> f32 {
137 let total_n: u64 = self.total_parameters();
138 if total_n == 0 {
139 return 0.0;
140 }
141 let weighted: f32 = self
142 .layers
143 .iter()
144 .map(|m| m.quantization_mse * m.n_parameters as f32)
145 .sum();
146 weighted / total_n as f32
147 }
148
149 #[must_use]
151 pub fn average_effective_bits(&self) -> f32 {
152 let total_n = self.total_parameters();
153 if total_n == 0 {
154 return 0.0;
155 }
156 let weighted: f32 = self
157 .layers
158 .iter()
159 .map(|m| m.effective_bits_per_param * m.n_parameters as f32)
160 .sum();
161 weighted / total_n as f32
162 }
163
164 pub fn add_quantized_layer(
172 &mut self,
173 name: impl Into<String>,
174 weights: &[f32],
175 quant_bits: u32,
176 quantization_mse: f32,
177 ) -> QuantResult<()> {
178 if weights.is_empty() {
179 return Err(crate::error::QuantError::EmptyInput(
180 "ModelCompressionMetrics::add_quantized_layer",
181 ));
182 }
183 let m =
184 CompressionMetrics::quantized_only(weights.len() as u64, quant_bits, quantization_mse);
185 self.add_layer(name, m);
186 Ok(())
187 }
188}
189
190#[cfg(test)]
193mod tests {
194 use super::*;
195 use approx::assert_abs_diff_eq;
196
197 #[test]
198 fn int8_compression_ratio() {
199 let m = CompressionMetrics::quantized_only(1024, 8, 0.0);
200 assert_abs_diff_eq!(m.compression_ratio(), 4.0, epsilon = 1e-5);
201 }
202
203 #[test]
204 fn int4_compression_ratio() {
205 let m = CompressionMetrics::quantized_only(1024, 4, 0.0);
206 assert_abs_diff_eq!(m.compression_ratio(), 8.0, epsilon = 1e-5);
207 }
208
209 #[test]
210 fn pruned_50_percent_fp32_ratio() {
211 let m = CompressionMetrics::pruned_only(1024, 0.5);
212 assert_abs_diff_eq!(m.compression_ratio(), 2.0, epsilon = 1e-5);
214 }
215
216 #[test]
217 fn quantized_and_pruned_metrics() {
218 let m = CompressionMetrics::quantized_and_pruned(1024, 4, 0.5, 0.001);
220 assert_abs_diff_eq!(m.effective_bits_per_param, 2.0, epsilon = 1e-5);
221 assert_abs_diff_eq!(m.compression_ratio(), 16.0, epsilon = 1e-5);
222 }
223
224 #[test]
225 fn model_compression_ratio_weighted() {
226 let mut model = ModelCompressionMetrics::new();
227 model.add_layer("l0", CompressionMetrics::quantized_only(100, 8, 0.0));
228 model.add_layer("l1", CompressionMetrics::quantized_only(900, 4, 0.0));
229 let ratio = model.model_compression_ratio();
233 assert!(ratio > 7.0 && ratio < 8.0, "ratio = {ratio}");
234 }
235
236 #[test]
237 fn average_effective_bits() {
238 let mut model = ModelCompressionMetrics::new();
239 model.add_layer("l0", CompressionMetrics::quantized_only(100, 4, 0.0));
240 model.add_layer("l1", CompressionMetrics::quantized_only(100, 8, 0.0));
241 assert_abs_diff_eq!(model.average_effective_bits(), 6.0, epsilon = 1e-5);
243 }
244
245 #[test]
246 fn total_bits_correct() {
247 let m = CompressionMetrics::quantized_only(1000, 8, 0.0);
248 assert_abs_diff_eq!(m.total_original_bits(), 32_000.0, epsilon = 1.0);
249 assert_abs_diff_eq!(m.total_compressed_bits(), 8_000.0, epsilon = 1.0);
250 }
251
252 #[test]
253 fn zero_effective_bits_gives_infinity() {
254 let m = CompressionMetrics {
255 effective_bits_per_param: 0.0,
256 ..Default::default()
257 };
258 assert!(m.compression_ratio().is_infinite());
259 }
260}