use crate::QScheme;
use std::collections::HashMap;
pub struct SizeAnalyzer;
impl SizeAnalyzer {
pub fn calculate_model_size(num_parameters: usize, scheme: QScheme) -> f32 {
let bytes_per_param = match scheme {
QScheme::Binary => 0.125, QScheme::Ternary => 0.25, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 0.5, QScheme::PerTensorAffine
| QScheme::PerChannelAffine
| QScheme::PerTensorSymmetric
| QScheme::PerChannelSymmetric => 1.0, QScheme::MixedPrecision => 2.0, QScheme::GroupWise => 1.0, };
num_parameters as f32 * bytes_per_param
}
pub fn calculate_size_reduction_ratio(num_parameters: usize, scheme: QScheme) -> f32 {
let fp32_size = num_parameters as f32 * 4.0; let quantized_size = Self::calculate_model_size(num_parameters, scheme);
if quantized_size == 0.0 {
return 1.0;
}
fp32_size / quantized_size
}
pub fn calculate_total_memory_footprint(
num_parameters: usize,
num_activations: usize,
param_scheme: QScheme,
activation_scheme: QScheme,
) -> f32 {
let param_size = Self::calculate_model_size(num_parameters, param_scheme);
let activation_size = Self::calculate_model_size(num_activations, activation_scheme);
param_size + activation_size
}
pub fn estimate_compressed_size(base_size_mb: f32, scheme: QScheme) -> f32 {
let compression_ratio = match scheme {
QScheme::Binary => 0.7, QScheme::Ternary => 0.75, QScheme::Int4PerTensor | QScheme::Int4PerChannel => 0.8,
QScheme::PerTensorAffine | QScheme::PerChannelAffine => 0.85,
QScheme::PerTensorSymmetric | QScheme::PerChannelSymmetric => 0.82,
QScheme::MixedPrecision => 0.9, QScheme::GroupWise => 0.83,
};
base_size_mb * compression_ratio
}
pub fn size_reduction_factor(
original_scheme: QScheme,
quantized_scheme: QScheme,
num_parameters: usize,
) -> f32 {
let original_size = Self::calculate_model_size(num_parameters, original_scheme);
let quantized_size = Self::calculate_model_size(num_parameters, quantized_scheme);
if quantized_size == 0.0 {
return 1.0;
}
original_size / quantized_size
}
pub fn analyze_size_impact(num_parameters: usize) -> HashMap<QScheme, f32> {
let mut size_analysis = HashMap::new();
let schemes = vec![
QScheme::Binary,
QScheme::Ternary,
QScheme::Int4PerTensor,
QScheme::PerTensorAffine,
QScheme::PerChannelAffine,
QScheme::MixedPrecision,
QScheme::GroupWise,
];
for scheme in schemes {
let size_mb = Self::model_size_mb(num_parameters, scheme);
size_analysis.insert(scheme, size_mb);
}
size_analysis
}
pub fn model_size_mb(num_parameters: usize, scheme: QScheme) -> f32 {
Self::calculate_model_size(num_parameters, scheme) / (1024.0 * 1024.0)
}
pub fn generate_size_report(
num_parameters: usize,
schemes: &[QScheme],
) -> HashMap<QScheme, SizeReport> {
let mut report = HashMap::new();
let fp32_size_mb = (num_parameters as f32 * 4.0) / (1024.0 * 1024.0);
for &scheme in schemes {
let quantized_size_mb = Self::model_size_mb(num_parameters, scheme);
let reduction_ratio = Self::calculate_size_reduction_ratio(num_parameters, scheme);
let compressed_size_mb = Self::estimate_compressed_size(quantized_size_mb, scheme);
report.insert(
scheme,
SizeReport {
original_size_mb: fp32_size_mb,
quantized_size_mb,
compressed_size_mb,
reduction_ratio,
space_saved_mb: fp32_size_mb - quantized_size_mb,
compression_efficiency: (fp32_size_mb - compressed_size_mb) / fp32_size_mb,
},
);
}
report
}
}
#[derive(Debug, Clone)]
pub struct SizeReport {
pub original_size_mb: f32,
pub quantized_size_mb: f32,
pub compressed_size_mb: f32,
pub reduction_ratio: f32,
pub space_saved_mb: f32,
pub compression_efficiency: f32,
}
impl SizeReport {
pub fn meets_reduction_threshold(&self, min_ratio: f32) -> bool {
self.reduction_ratio >= min_ratio
}
pub fn space_savings_percentage(&self) -> f32 {
(self.space_saved_mb / self.original_size_mb) * 100.0
}
}