entrenar/quant/calibration/types.rs
1//! Type definitions for PTQ calibration
2//!
3//! Contains the core types used throughout the calibration module.
4
5/// Calibration method for PTQ
6#[derive(Clone, Debug, PartialEq, Default)]
7pub enum CalibrationMethod {
8 /// Min-max calibration: scale from actual min/max values
9 #[default]
10 MinMax,
11 /// Percentile calibration: scale from percentile values (more robust to outliers)
12 Percentile {
13 /// Lower percentile (e.g., 0.01 for 0.01%)
14 lower: f32,
15 /// Upper percentile (e.g., 99.99 for 99.99%)
16 upper: f32,
17 },
18 /// Moving average: smoothed min/max over multiple batches
19 MovingAverage {
20 /// Smoothing factor (0 = no smoothing, 1 = fully use new value)
21 momentum: f32,
22 },
23}
24
25/// Calibration result containing scale and zero_point
26#[derive(Clone, Debug)]
27pub struct CalibrationResult {
28 /// Scale factor for quantization
29 pub scale: f32,
30 /// Zero point for asymmetric quantization
31 pub zero_point: i32,
32 /// Observed minimum value
33 pub observed_min: f32,
34 /// Observed maximum value
35 pub observed_max: f32,
36 /// Method used for calibration
37 pub method: CalibrationMethod,
38}