torsh_quantization/analysis/
size.rs1use crate::QScheme;
4use std::collections::HashMap;
5
6pub struct SizeAnalyzer;
8
9impl SizeAnalyzer {
10 pub fn calculate_model_size(num_parameters: usize, scheme: QScheme) -> f32 {
12 let bytes_per_param = match scheme {
13 QScheme::Binary => 0.125, QScheme::Ternary => 0.25, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 0.5, QScheme::PerTensorAffine
17 | QScheme::PerChannelAffine
18 | QScheme::PerTensorSymmetric
19 | QScheme::PerChannelSymmetric => 1.0, QScheme::MixedPrecision => 2.0, QScheme::GroupWise => 1.0, };
23
24 num_parameters as f32 * bytes_per_param
25 }
26
27 pub fn calculate_size_reduction_ratio(num_parameters: usize, scheme: QScheme) -> f32 {
29 let fp32_size = num_parameters as f32 * 4.0; let quantized_size = Self::calculate_model_size(num_parameters, scheme);
31
32 if quantized_size == 0.0 {
33 return 1.0;
34 }
35
36 fp32_size / quantized_size
37 }
38
39 pub fn calculate_total_memory_footprint(
41 num_parameters: usize,
42 num_activations: usize,
43 param_scheme: QScheme,
44 activation_scheme: QScheme,
45 ) -> f32 {
46 let param_size = Self::calculate_model_size(num_parameters, param_scheme);
47 let activation_size = Self::calculate_model_size(num_activations, activation_scheme);
48
49 param_size + activation_size
50 }
51
52 pub fn estimate_compressed_size(base_size_mb: f32, scheme: QScheme) -> f32 {
54 let compression_ratio = match scheme {
55 QScheme::Binary => 0.7, QScheme::Ternary => 0.75, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 0.8,
58 QScheme::PerTensorAffine | QScheme::PerChannelAffine => 0.85,
59 QScheme::PerTensorSymmetric | QScheme::PerChannelSymmetric => 0.82,
60 QScheme::MixedPrecision => 0.9, QScheme::GroupWise => 0.83,
62 };
63
64 base_size_mb * compression_ratio
65 }
66
67 pub fn size_reduction_factor(
69 original_scheme: QScheme,
70 quantized_scheme: QScheme,
71 num_parameters: usize,
72 ) -> f32 {
73 let original_size = Self::calculate_model_size(num_parameters, original_scheme);
74 let quantized_size = Self::calculate_model_size(num_parameters, quantized_scheme);
75
76 if quantized_size == 0.0 {
77 return 1.0;
78 }
79
80 original_size / quantized_size
81 }
82
83 pub fn analyze_size_impact(num_parameters: usize) -> HashMap<QScheme, f32> {
85 let mut size_analysis = HashMap::new();
86
87 let schemes = vec![
88 QScheme::Binary,
89 QScheme::Ternary,
90 QScheme::Int4PerTensor,
91 QScheme::PerTensorAffine,
92 QScheme::PerChannelAffine,
93 QScheme::MixedPrecision,
94 QScheme::GroupWise,
95 ];
96
97 for scheme in schemes {
98 let size_mb = Self::model_size_mb(num_parameters, scheme);
99 size_analysis.insert(scheme, size_mb);
100 }
101
102 size_analysis
103 }
104
105 pub fn model_size_mb(num_parameters: usize, scheme: QScheme) -> f32 {
107 Self::calculate_model_size(num_parameters, scheme) / (1024.0 * 1024.0)
108 }
109
110 pub fn generate_size_report(
112 num_parameters: usize,
113 schemes: &[QScheme],
114 ) -> HashMap<QScheme, SizeReport> {
115 let mut report = HashMap::new();
116 let fp32_size_mb = (num_parameters as f32 * 4.0) / (1024.0 * 1024.0);
117
118 for &scheme in schemes {
119 let quantized_size_mb = Self::model_size_mb(num_parameters, scheme);
120 let reduction_ratio = Self::calculate_size_reduction_ratio(num_parameters, scheme);
121 let compressed_size_mb = Self::estimate_compressed_size(quantized_size_mb, scheme);
122
123 report.insert(
124 scheme,
125 SizeReport {
126 original_size_mb: fp32_size_mb,
127 quantized_size_mb,
128 compressed_size_mb,
129 reduction_ratio,
130 space_saved_mb: fp32_size_mb - quantized_size_mb,
131 compression_efficiency: (fp32_size_mb - compressed_size_mb) / fp32_size_mb,
132 },
133 );
134 }
135
136 report
137 }
138}
139
140#[derive(Debug, Clone)]
142pub struct SizeReport {
143 pub original_size_mb: f32,
145 pub quantized_size_mb: f32,
147 pub compressed_size_mb: f32,
149 pub reduction_ratio: f32,
151 pub space_saved_mb: f32,
153 pub compression_efficiency: f32,
155}
156
157impl SizeReport {
158 pub fn meets_reduction_threshold(&self, min_ratio: f32) -> bool {
160 self.reduction_ratio >= min_ratio
161 }
162
163 pub fn space_savings_percentage(&self) -> f32 {
165 (self.space_saved_mb / self.original_size_mb) * 100.0
166 }
167}