axonml_quant/
calibration.rs

1//! Calibration for Quantization
2//!
3//! Calibration methods for determining optimal quantization parameters.
4//!
5//! @version 0.1.0
6//! @author AutomataNexus Development Team
7
8use axonml_tensor::Tensor;
9
10use crate::error::{QuantError, QuantResult};
11use crate::types::QuantType;
12
13// =============================================================================
14// Calibration Data
15// =============================================================================
16
17/// Calibration data collected from sample inputs.
18#[derive(Debug, Clone)]
19pub struct CalibrationData {
20    /// Minimum value seen.
21    pub min: f32,
22    /// Maximum value seen.
23    pub max: f32,
24    /// Mean value.
25    pub mean: f32,
26    /// Standard deviation.
27    pub std_dev: f32,
28    /// Number of samples.
29    pub num_samples: usize,
30    /// Histogram buckets (for percentile calibration).
31    histogram: Vec<usize>,
32    /// Histogram bin edges.
33    bin_edges: Vec<f32>,
34}
35
36impl CalibrationData {
37    /// Creates new calibration data from initial tensor.
38    pub fn new(tensor: &Tensor<f32>, num_bins: usize) -> Self {
39        let data = tensor.to_vec();
40        let min = data.iter().fold(f32::INFINITY, |a, &b| a.min(b));
41        let max = data.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
42        let mean = data.iter().sum::<f32>() / data.len() as f32;
43
44        let variance = data.iter().map(|x| (x - mean).powi(2)).sum::<f32>() / data.len() as f32;
45        let std_dev = variance.sqrt();
46
47        // Initialize histogram
48        let bin_width = (max - min) / num_bins as f32;
49        let mut histogram = vec![0usize; num_bins];
50        let bin_edges: Vec<f32> = (0..=num_bins).map(|i| min + i as f32 * bin_width).collect();
51
52        for &val in &data {
53            let bin = ((val - min) / bin_width) as usize;
54            let bin = bin.min(num_bins - 1);
55            histogram[bin] += 1;
56        }
57
58        Self {
59            min,
60            max,
61            mean,
62            std_dev,
63            num_samples: data.len(),
64            histogram,
65            bin_edges,
66        }
67    }
68
69    /// Updates calibration data with more samples.
70    pub fn update(&mut self, tensor: &Tensor<f32>) {
71        let data = tensor.to_vec();
72        let new_min = data.iter().fold(f32::INFINITY, |a, &b| a.min(b));
73        let new_max = data.iter().fold(f32::NEG_INFINITY, |a, &b| a.max(b));
74
75        // Update min/max
76        self.min = self.min.min(new_min);
77        self.max = self.max.max(new_max);
78
79        // Update running mean
80        let old_count = self.num_samples as f32;
81        let new_count = data.len() as f32;
82        let new_mean = data.iter().sum::<f32>() / new_count;
83        self.mean = (self.mean * old_count + new_mean * new_count) / (old_count + new_count);
84
85        // Update histogram (rebuild with new range)
86        self.num_samples += data.len();
87        // Note: For proper histogram update, we'd need to keep all data or use streaming algorithms
88    }
89
90    /// Returns the dynamic range.
91    pub fn dynamic_range(&self) -> f32 {
92        self.max - self.min
93    }
94
95    /// Computes the optimal scale for symmetric quantization.
96    pub fn symmetric_scale(&self, quant_type: QuantType) -> f32 {
97        let max_abs = self.min.abs().max(self.max.abs());
98        let max_int = match quant_type {
99            QuantType::Q8_0 => 127.0,
100            QuantType::Q4_0 | QuantType::Q4_1 => 7.0,
101            QuantType::Q5_0 | QuantType::Q5_1 => 15.0,
102            QuantType::F16 | QuantType::F32 => 1.0,
103        };
104        max_abs / max_int
105    }
106
107    /// Computes the optimal scale for asymmetric quantization.
108    pub fn asymmetric_scale(&self, quant_type: QuantType) -> (f32, f32) {
109        let max_int = match quant_type {
110            QuantType::Q8_0 => 255.0,
111            QuantType::Q4_0 | QuantType::Q4_1 => 15.0,
112            QuantType::Q5_0 | QuantType::Q5_1 => 31.0,
113            QuantType::F16 | QuantType::F32 => 1.0,
114        };
115
116        let scale = (self.max - self.min) / max_int;
117        let zero_point = -self.min / scale;
118
119        (scale, zero_point)
120    }
121
122    /// Returns the percentile value from the histogram.
123    pub fn percentile(&self, p: f32) -> f32 {
124        if p <= 0.0 {
125            return self.min;
126        }
127        if p >= 100.0 {
128            return self.max;
129        }
130
131        let target = (p / 100.0 * self.num_samples as f32) as usize;
132        let mut cumsum = 0usize;
133
134        for (i, &count) in self.histogram.iter().enumerate() {
135            cumsum += count;
136            if cumsum >= target {
137                return self.bin_edges[i];
138            }
139        }
140
141        self.max
142    }
143}
144
145// =============================================================================
146// Calibration Methods
147// =============================================================================
148
149/// Calibration method enumeration.
150#[derive(Debug, Clone, Copy, PartialEq, Eq)]
151pub enum CalibrationMethod {
152    /// Use min/max values directly.
153    MinMax,
154    /// Use percentiles (e.g., 99.9th) to reduce outlier impact.
155    Percentile(u32), // percentile * 10 (e.g., 999 = 99.9%)
156    /// Use entropy-based calibration (KL divergence).
157    Entropy,
158    /// Use mean + k*std_dev for range.
159    MeanStd(u32), // k * 10 (e.g., 30 = 3.0 sigma)
160}
161
162/// Calibrates a tensor for quantization.
163///
164/// # Arguments
165/// * `tensor` - The tensor to calibrate
166/// * `method` - The calibration method to use
167///
168/// # Returns
169/// Calibration data for the tensor
170pub fn calibrate(tensor: &Tensor<f32>, method: CalibrationMethod) -> QuantResult<CalibrationData> {
171    let mut data = CalibrationData::new(tensor, 2048);
172
173    match method {
174        CalibrationMethod::MinMax => {
175            // Already computed in new()
176        }
177        CalibrationMethod::Percentile(p) => {
178            let percentile = p as f32 / 10.0;
179            let lower = data.percentile(100.0 - percentile);
180            let upper = data.percentile(percentile);
181            data.min = lower;
182            data.max = upper;
183        }
184        CalibrationMethod::MeanStd(k) => {
185            let k_factor = k as f32 / 10.0;
186            data.min = data.mean - k_factor * data.std_dev;
187            data.max = data.mean + k_factor * data.std_dev;
188        }
189        CalibrationMethod::Entropy => {
190            // Simplified entropy calibration - use 99.99th percentile
191            data.min = data.percentile(0.01);
192            data.max = data.percentile(99.99);
193        }
194    }
195
196    Ok(data)
197}
198
199/// Calibrates multiple tensors and returns combined calibration data.
200pub fn calibrate_batch(
201    tensors: &[&Tensor<f32>],
202    method: CalibrationMethod,
203) -> QuantResult<CalibrationData> {
204    if tensors.is_empty() {
205        return Err(QuantError::CalibrationError(
206            "No tensors provided".to_string(),
207        ));
208    }
209
210    let mut combined = CalibrationData::new(tensors[0], 2048);
211
212    for tensor in tensors.iter().skip(1) {
213        combined.update(tensor);
214    }
215
216    // Apply method-specific adjustments
217    match method {
218        CalibrationMethod::Percentile(p) => {
219            let percentile = p as f32 / 10.0;
220            combined.min = combined.percentile(100.0 - percentile);
221            combined.max = combined.percentile(percentile);
222        }
223        CalibrationMethod::MeanStd(k) => {
224            let k_factor = k as f32 / 10.0;
225            combined.min = combined.mean - k_factor * combined.std_dev;
226            combined.max = combined.mean + k_factor * combined.std_dev;
227        }
228        _ => {}
229    }
230
231    Ok(combined)
232}
233
234// =============================================================================
235// Tests
236// =============================================================================
237
238#[cfg(test)]
239mod tests {
240    use super::*;
241
242    #[test]
243    fn test_calibration_data() {
244        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
245        let tensor = Tensor::from_vec(data, &[5]).unwrap();
246
247        let calib = CalibrationData::new(&tensor, 10);
248
249        assert_eq!(calib.min, 1.0);
250        assert_eq!(calib.max, 5.0);
251        assert_eq!(calib.mean, 3.0);
252        assert_eq!(calib.num_samples, 5);
253    }
254
255    #[test]
256    fn test_symmetric_scale() {
257        let data = vec![-4.0, -2.0, 0.0, 2.0, 4.0];
258        let tensor = Tensor::from_vec(data, &[5]).unwrap();
259
260        let calib = CalibrationData::new(&tensor, 10);
261        let scale = calib.symmetric_scale(QuantType::Q8_0);
262
263        // max_abs = 4.0, max_int = 127, scale = 4/127
264        assert!((scale - 4.0 / 127.0).abs() < 0.001);
265    }
266
267    #[test]
268    fn test_calibration_methods() {
269        let data: Vec<f32> = (0..1000).map(|x| x as f32 / 100.0).collect();
270        let tensor = Tensor::from_vec(data, &[1000]).unwrap();
271
272        // Min/Max calibration
273        let minmax = calibrate(&tensor, CalibrationMethod::MinMax).unwrap();
274        assert!((minmax.min - 0.0).abs() < 0.01);
275        assert!((minmax.max - 9.99).abs() < 0.01);
276
277        // Percentile calibration (99.9%)
278        let percentile = calibrate(&tensor, CalibrationMethod::Percentile(999)).unwrap();
279        assert!(percentile.min >= 0.0);
280        assert!(percentile.max <= 9.99);
281    }
282
283    #[test]
284    fn test_dynamic_range() {
285        let data = vec![-5.0, 10.0];
286        let tensor = Tensor::from_vec(data, &[2]).unwrap();
287
288        let calib = CalibrationData::new(&tensor, 10);
289        assert_eq!(calib.dynamic_range(), 15.0);
290    }
291}
axonml_quant/calibration.rs

axonml_quant/
calibration.rs