pub struct PostTrainingQuantizer<F: Float + Debug> { /* private fields */ }
Expand description
Post-training quantization manager
Implementations§
Source§impl<F: Float + Debug + 'static> PostTrainingQuantizer<F>
impl<F: Float + Debug + 'static> PostTrainingQuantizer<F>
Sourcepub fn new(
bits: QuantizationBits,
scheme: QuantizationScheme,
calibration: CalibrationMethod,
) -> Self
pub fn new( bits: QuantizationBits, scheme: QuantizationScheme, calibration: CalibrationMethod, ) -> Self
Create a new post-training quantizer
Examples found in repository?
examples/neural_advanced_features.rs (lines 184-188)
178fn demonstrate_model_compression() -> Result<()> {
179 println!("🗜️ Model Compression Demonstration");
180 println!("==================================\n");
181
182 // 1. Post-training quantization
183 println!("1. Post-training Quantization:");
184 let mut quantizer = PostTrainingQuantizer::<f64>::new(
185 QuantizationBits::Int8,
186 QuantizationScheme::Symmetric,
187 CalibrationMethod::MinMax,
188 );
189
190 // Simulate layer activations
191 let activations =
192 Array::from_shape_fn((100, 256), |(i, j)| ((i + j) as f64 / 10.0).sin() * 2.0).into_dyn();
193
194 quantizer.calibrate("conv1".to_string(), &activations)?;
195
196 let quantized = quantizer.quantize_tensor("conv1", &activations)?;
197 let _dequantized = quantizer.dequantize_tensor("conv1", &quantized)?;
198
199 println!(" Original shape: {:?}", activations.shape());
200 println!(" Quantized shape: {:?}", quantized.shape());
201 println!(
202 " Compression ratio: {:.1}x",
203 quantizer.get_compression_ratio()
204 );
205
206 // 2. Model pruning
207 println!("\n2. Model Pruning:");
208 let mut pruner = ModelPruner::<f64>::new(PruningMethod::MagnitudeBased { threshold: 0.1 });
209
210 // Simulate model weights
211 let weights =
212 Array::from_shape_fn((128, 256), |(i, j)| ((i * j) as f64 / 1000.0).tanh()).into_dyn();
213
214 let mask = pruner.generate_pruning_mask("fc1".to_string(), &weights)?;
215 println!(" Weights shape: {:?}", weights.shape());
216 println!(" Pruning mask shape: {:?}", mask.shape());
217 println!(
218 " Model sparsity: {:.1}%",
219 pruner.get_model_sparsity() * 100.0
220 );
221
222 let sparsity_stats = pruner.get_sparsity_statistics();
223 for (layer_name, stats) in sparsity_stats {
224 println!(
225 " {}: {:.1}% sparse ({}/{} params)",
226 layer_name,
227 stats.sparsity_ratio * 100.0,
228 stats.pruned_params,
229 stats.total_params
230 );
231 }
232
233 println!("✅ Model compression demonstration completed!\n");
234 Ok(())
235}
Sourcepub fn calibrate(
&mut self,
layer_name: String,
activations: &ArrayD<F>,
) -> Result<()>
pub fn calibrate( &mut self, layer_name: String, activations: &ArrayD<F>, ) -> Result<()>
Calibrate quantization parameters using sample data
Examples found in repository?
examples/neural_advanced_features.rs (line 194)
178fn demonstrate_model_compression() -> Result<()> {
179 println!("🗜️ Model Compression Demonstration");
180 println!("==================================\n");
181
182 // 1. Post-training quantization
183 println!("1. Post-training Quantization:");
184 let mut quantizer = PostTrainingQuantizer::<f64>::new(
185 QuantizationBits::Int8,
186 QuantizationScheme::Symmetric,
187 CalibrationMethod::MinMax,
188 );
189
190 // Simulate layer activations
191 let activations =
192 Array::from_shape_fn((100, 256), |(i, j)| ((i + j) as f64 / 10.0).sin() * 2.0).into_dyn();
193
194 quantizer.calibrate("conv1".to_string(), &activations)?;
195
196 let quantized = quantizer.quantize_tensor("conv1", &activations)?;
197 let _dequantized = quantizer.dequantize_tensor("conv1", &quantized)?;
198
199 println!(" Original shape: {:?}", activations.shape());
200 println!(" Quantized shape: {:?}", quantized.shape());
201 println!(
202 " Compression ratio: {:.1}x",
203 quantizer.get_compression_ratio()
204 );
205
206 // 2. Model pruning
207 println!("\n2. Model Pruning:");
208 let mut pruner = ModelPruner::<f64>::new(PruningMethod::MagnitudeBased { threshold: 0.1 });
209
210 // Simulate model weights
211 let weights =
212 Array::from_shape_fn((128, 256), |(i, j)| ((i * j) as f64 / 1000.0).tanh()).into_dyn();
213
214 let mask = pruner.generate_pruning_mask("fc1".to_string(), &weights)?;
215 println!(" Weights shape: {:?}", weights.shape());
216 println!(" Pruning mask shape: {:?}", mask.shape());
217 println!(
218 " Model sparsity: {:.1}%",
219 pruner.get_model_sparsity() * 100.0
220 );
221
222 let sparsity_stats = pruner.get_sparsity_statistics();
223 for (layer_name, stats) in sparsity_stats {
224 println!(
225 " {}: {:.1}% sparse ({}/{} params)",
226 layer_name,
227 stats.sparsity_ratio * 100.0,
228 stats.pruned_params,
229 stats.total_params
230 );
231 }
232
233 println!("✅ Model compression demonstration completed!\n");
234 Ok(())
235}
Sourcepub fn quantize_tensor(
&self,
layer_name: &str,
tensor: &ArrayD<F>,
) -> Result<ArrayD<i32>>
pub fn quantize_tensor( &self, layer_name: &str, tensor: &ArrayD<F>, ) -> Result<ArrayD<i32>>
Quantize a tensor using the calibrated parameters
Examples found in repository?
examples/neural_advanced_features.rs (line 196)
178fn demonstrate_model_compression() -> Result<()> {
179 println!("🗜️ Model Compression Demonstration");
180 println!("==================================\n");
181
182 // 1. Post-training quantization
183 println!("1. Post-training Quantization:");
184 let mut quantizer = PostTrainingQuantizer::<f64>::new(
185 QuantizationBits::Int8,
186 QuantizationScheme::Symmetric,
187 CalibrationMethod::MinMax,
188 );
189
190 // Simulate layer activations
191 let activations =
192 Array::from_shape_fn((100, 256), |(i, j)| ((i + j) as f64 / 10.0).sin() * 2.0).into_dyn();
193
194 quantizer.calibrate("conv1".to_string(), &activations)?;
195
196 let quantized = quantizer.quantize_tensor("conv1", &activations)?;
197 let _dequantized = quantizer.dequantize_tensor("conv1", &quantized)?;
198
199 println!(" Original shape: {:?}", activations.shape());
200 println!(" Quantized shape: {:?}", quantized.shape());
201 println!(
202 " Compression ratio: {:.1}x",
203 quantizer.get_compression_ratio()
204 );
205
206 // 2. Model pruning
207 println!("\n2. Model Pruning:");
208 let mut pruner = ModelPruner::<f64>::new(PruningMethod::MagnitudeBased { threshold: 0.1 });
209
210 // Simulate model weights
211 let weights =
212 Array::from_shape_fn((128, 256), |(i, j)| ((i * j) as f64 / 1000.0).tanh()).into_dyn();
213
214 let mask = pruner.generate_pruning_mask("fc1".to_string(), &weights)?;
215 println!(" Weights shape: {:?}", weights.shape());
216 println!(" Pruning mask shape: {:?}", mask.shape());
217 println!(
218 " Model sparsity: {:.1}%",
219 pruner.get_model_sparsity() * 100.0
220 );
221
222 let sparsity_stats = pruner.get_sparsity_statistics();
223 for (layer_name, stats) in sparsity_stats {
224 println!(
225 " {}: {:.1}% sparse ({}/{} params)",
226 layer_name,
227 stats.sparsity_ratio * 100.0,
228 stats.pruned_params,
229 stats.total_params
230 );
231 }
232
233 println!("✅ Model compression demonstration completed!\n");
234 Ok(())
235}
Sourcepub fn dequantize_tensor(
&self,
layer_name: &str,
quantized: &ArrayD<i32>,
) -> Result<ArrayD<F>>
pub fn dequantize_tensor( &self, layer_name: &str, quantized: &ArrayD<i32>, ) -> Result<ArrayD<F>>
Dequantize a tensor back to floating point
Examples found in repository?
examples/neural_advanced_features.rs (line 197)
178fn demonstrate_model_compression() -> Result<()> {
179 println!("🗜️ Model Compression Demonstration");
180 println!("==================================\n");
181
182 // 1. Post-training quantization
183 println!("1. Post-training Quantization:");
184 let mut quantizer = PostTrainingQuantizer::<f64>::new(
185 QuantizationBits::Int8,
186 QuantizationScheme::Symmetric,
187 CalibrationMethod::MinMax,
188 );
189
190 // Simulate layer activations
191 let activations =
192 Array::from_shape_fn((100, 256), |(i, j)| ((i + j) as f64 / 10.0).sin() * 2.0).into_dyn();
193
194 quantizer.calibrate("conv1".to_string(), &activations)?;
195
196 let quantized = quantizer.quantize_tensor("conv1", &activations)?;
197 let _dequantized = quantizer.dequantize_tensor("conv1", &quantized)?;
198
199 println!(" Original shape: {:?}", activations.shape());
200 println!(" Quantized shape: {:?}", quantized.shape());
201 println!(
202 " Compression ratio: {:.1}x",
203 quantizer.get_compression_ratio()
204 );
205
206 // 2. Model pruning
207 println!("\n2. Model Pruning:");
208 let mut pruner = ModelPruner::<f64>::new(PruningMethod::MagnitudeBased { threshold: 0.1 });
209
210 // Simulate model weights
211 let weights =
212 Array::from_shape_fn((128, 256), |(i, j)| ((i * j) as f64 / 1000.0).tanh()).into_dyn();
213
214 let mask = pruner.generate_pruning_mask("fc1".to_string(), &weights)?;
215 println!(" Weights shape: {:?}", weights.shape());
216 println!(" Pruning mask shape: {:?}", mask.shape());
217 println!(
218 " Model sparsity: {:.1}%",
219 pruner.get_model_sparsity() * 100.0
220 );
221
222 let sparsity_stats = pruner.get_sparsity_statistics();
223 for (layer_name, stats) in sparsity_stats {
224 println!(
225 " {}: {:.1}% sparse ({}/{} params)",
226 layer_name,
227 stats.sparsity_ratio * 100.0,
228 stats.pruned_params,
229 stats.total_params
230 );
231 }
232
233 println!("✅ Model compression demonstration completed!\n");
234 Ok(())
235}
Sourcepub fn get_compression_ratio(&self) -> f64
pub fn get_compression_ratio(&self) -> f64
Get compression ratio achieved
Examples found in repository?
examples/neural_advanced_features.rs (line 203)
178fn demonstrate_model_compression() -> Result<()> {
179 println!("🗜️ Model Compression Demonstration");
180 println!("==================================\n");
181
182 // 1. Post-training quantization
183 println!("1. Post-training Quantization:");
184 let mut quantizer = PostTrainingQuantizer::<f64>::new(
185 QuantizationBits::Int8,
186 QuantizationScheme::Symmetric,
187 CalibrationMethod::MinMax,
188 );
189
190 // Simulate layer activations
191 let activations =
192 Array::from_shape_fn((100, 256), |(i, j)| ((i + j) as f64 / 10.0).sin() * 2.0).into_dyn();
193
194 quantizer.calibrate("conv1".to_string(), &activations)?;
195
196 let quantized = quantizer.quantize_tensor("conv1", &activations)?;
197 let _dequantized = quantizer.dequantize_tensor("conv1", &quantized)?;
198
199 println!(" Original shape: {:?}", activations.shape());
200 println!(" Quantized shape: {:?}", quantized.shape());
201 println!(
202 " Compression ratio: {:.1}x",
203 quantizer.get_compression_ratio()
204 );
205
206 // 2. Model pruning
207 println!("\n2. Model Pruning:");
208 let mut pruner = ModelPruner::<f64>::new(PruningMethod::MagnitudeBased { threshold: 0.1 });
209
210 // Simulate model weights
211 let weights =
212 Array::from_shape_fn((128, 256), |(i, j)| ((i * j) as f64 / 1000.0).tanh()).into_dyn();
213
214 let mask = pruner.generate_pruning_mask("fc1".to_string(), &weights)?;
215 println!(" Weights shape: {:?}", weights.shape());
216 println!(" Pruning mask shape: {:?}", mask.shape());
217 println!(
218 " Model sparsity: {:.1}%",
219 pruner.get_model_sparsity() * 100.0
220 );
221
222 let sparsity_stats = pruner.get_sparsity_statistics();
223 for (layer_name, stats) in sparsity_stats {
224 println!(
225 " {}: {:.1}% sparse ({}/{} params)",
226 layer_name,
227 stats.sparsity_ratio * 100.0,
228 stats.pruned_params,
229 stats.total_params
230 );
231 }
232
233 println!("✅ Model compression demonstration completed!\n");
234 Ok(())
235}
Auto Trait Implementations§
impl<F> Freeze for PostTrainingQuantizer<F>
impl<F> RefUnwindSafe for PostTrainingQuantizer<F>where
F: RefUnwindSafe,
impl<F> Send for PostTrainingQuantizer<F>where
F: Send,
impl<F> Sync for PostTrainingQuantizer<F>where
F: Sync,
impl<F> Unpin for PostTrainingQuantizer<F>where
F: Unpin,
impl<F> UnwindSafe for PostTrainingQuantizer<F>where
F: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more