Skip to main content

entrenar/quant/calibration/
types.rs

1//! Type definitions for PTQ calibration
2//!
3//! Contains the core types used throughout the calibration module.
4
5/// Calibration method for PTQ
6#[derive(Clone, Debug, PartialEq, Default)]
7pub enum CalibrationMethod {
8    /// Min-max calibration: scale from actual min/max values
9    #[default]
10    MinMax,
11    /// Percentile calibration: scale from percentile values (more robust to outliers)
12    Percentile {
13        /// Lower percentile (e.g., 0.01 for 0.01%)
14        lower: f32,
15        /// Upper percentile (e.g., 99.99 for 99.99%)
16        upper: f32,
17    },
18    /// Moving average: smoothed min/max over multiple batches
19    MovingAverage {
20        /// Smoothing factor (0 = no smoothing, 1 = fully use new value)
21        momentum: f32,
22    },
23}
24
25/// Calibration result containing scale and zero_point
26#[derive(Clone, Debug)]
27pub struct CalibrationResult {
28    /// Scale factor for quantization
29    pub scale: f32,
30    /// Zero point for asymmetric quantization
31    pub zero_point: i32,
32    /// Observed minimum value
33    pub observed_min: f32,
34    /// Observed maximum value
35    pub observed_max: f32,
36    /// Method used for calibration
37    pub method: CalibrationMethod,
38}