Skip to main content

entrenar/quant/fake_quantize/
config.rs

1//! Fake quantization configuration types.
2
3/// Fake quantization configuration
4#[derive(Clone, Debug)]
5pub struct FakeQuantConfig {
6    /// Number of bits for quantization (e.g., 4, 8)
7    pub bits: usize,
8    /// Whether quantization is symmetric (centered at 0)
9    pub symmetric: bool,
10    /// Quantization range: min value
11    pub qmin: i32,
12    /// Quantization range: max value
13    pub qmax: i32,
14}
15
16impl FakeQuantConfig {
17    /// Create symmetric fake quantization config
18    ///
19    /// # Arguments
20    /// * `bits` - Number of bits (4-bit: qmin=-7, qmax=7; 8-bit: qmin=-127, qmax=127)
21    pub fn symmetric(bits: usize) -> Self {
22        let qmax = (1 << (bits - 1)) - 1; // 2^(bits-1) - 1
23        let qmin = -qmax;
24        Self { bits, symmetric: true, qmin, qmax }
25    }
26
27    /// Create asymmetric fake quantization config
28    ///
29    /// # Arguments
30    /// * `bits` - Number of bits (4-bit: qmin=0, qmax=15; 8-bit: qmin=0, qmax=255)
31    pub fn asymmetric(bits: usize) -> Self {
32        let qmax = (1 << bits) - 1; // 2^bits - 1
33        Self { bits, symmetric: false, qmin: 0, qmax }
34    }
35
36    /// 4-bit symmetric quantization
37    pub fn q4_symmetric() -> Self {
38        Self::symmetric(4)
39    }
40
41    /// 8-bit symmetric quantization
42    pub fn q8_symmetric() -> Self {
43        Self::symmetric(8)
44    }
45}
46
47impl Default for FakeQuantConfig {
48    fn default() -> Self {
49        Self::q8_symmetric()
50    }
51}