pub struct PTQEngine { /* private fields */ }Expand description
Post-training quantization engine.
Implementations§
Source§impl PTQEngine
impl PTQEngine
Sourcepub fn new(num_bits: u32, signed: bool) -> Self
pub fn new(num_bits: u32, signed: bool) -> Self
Create a new PTQ engine.
Examples found in repository?
examples/full_inference_pipeline.rs (line 122)
113fn quantize_model() {
114 println!("3. Model Quantization");
115
116 let calibration_data = vec![
117 Tensor::random(&[1, 224, 224, 3]).data,
118 Tensor::random(&[1, 224, 224, 3]).data,
119 Tensor::random(&[1, 224, 224, 3]).data,
120 ];
121
122 let ptq = PTQEngine::new(8, false);
123 match ptq.calibrate(&calibration_data) {
124 Ok(converter) => {
125 println!(" ✓ Calibration complete");
126
127 let sample = &calibration_data[0];
128 let stats = QuantStats::from_tensor(sample);
129 println!(" Calibration Stats:");
130 println!(" - Min: {:.6}", stats.min_val);
131 println!(" - Max: {:.6}", stats.max_val);
132 println!(" - Mean: {:.6}", stats.mean_val);
133 println!(" - Std: {:.6}", stats.std_val);
134
135 match converter.quantize_tensor(sample) {
136 Ok(quantized) => {
137 println!(" ✓ Quantization complete: {} values", quantized.len());
138 println!(" Compression: {:.2}x\n",
139 (sample.len() * 4) as f64 / quantized.len() as f64
140 );
141 }
142 Err(e) => println!(" ✗ Quantization failed: {}\n", e),
143 }
144 }
145 Err(e) => println!(" ✗ Calibration failed: {}\n", e),
146 }
147}Sourcepub fn calibrate(&self, sample_data: &[ArrayD<f32>]) -> Result<QuantConverter>
pub fn calibrate(&self, sample_data: &[ArrayD<f32>]) -> Result<QuantConverter>
Calibrate on sample data.
Examples found in repository?
examples/full_inference_pipeline.rs (line 123)
113fn quantize_model() {
114 println!("3. Model Quantization");
115
116 let calibration_data = vec![
117 Tensor::random(&[1, 224, 224, 3]).data,
118 Tensor::random(&[1, 224, 224, 3]).data,
119 Tensor::random(&[1, 224, 224, 3]).data,
120 ];
121
122 let ptq = PTQEngine::new(8, false);
123 match ptq.calibrate(&calibration_data) {
124 Ok(converter) => {
125 println!(" ✓ Calibration complete");
126
127 let sample = &calibration_data[0];
128 let stats = QuantStats::from_tensor(sample);
129 println!(" Calibration Stats:");
130 println!(" - Min: {:.6}", stats.min_val);
131 println!(" - Max: {:.6}", stats.max_val);
132 println!(" - Mean: {:.6}", stats.mean_val);
133 println!(" - Std: {:.6}", stats.std_val);
134
135 match converter.quantize_tensor(sample) {
136 Ok(quantized) => {
137 println!(" ✓ Quantization complete: {} values", quantized.len());
138 println!(" Compression: {:.2}x\n",
139 (sample.len() * 4) as f64 / quantized.len() as f64
140 );
141 }
142 Err(e) => println!(" ✗ Quantization failed: {}\n", e),
143 }
144 }
145 Err(e) => println!(" ✗ Calibration failed: {}\n", e),
146 }
147}Auto Trait Implementations§
impl Freeze for PTQEngine
impl RefUnwindSafe for PTQEngine
impl Send for PTQEngine
impl Sync for PTQEngine
impl Unpin for PTQEngine
impl UnwindSafe for PTQEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more