ai_hwaccel/
quantization.rs1use std::fmt;
4
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
19#[non_exhaustive]
20pub enum QuantizationLevel {
21 None,
23 Float16,
25 BFloat16,
27 Int8,
29 Int4,
31}
32
33impl QuantizationLevel {
34 #[must_use]
36 #[inline]
37 pub fn bits_per_param(&self) -> u32 {
38 match self {
39 Self::None => 32,
40 Self::Float16 | Self::BFloat16 => 16,
41 Self::Int8 => 8,
42 Self::Int4 => 4,
43 }
44 }
45
46 #[must_use]
48 #[inline]
49 pub fn memory_reduction_factor(&self) -> f64 {
50 32.0 / self.bits_per_param() as f64
51 }
52}
53
54impl TryFrom<u32> for QuantizationLevel {
55 type Error = u32;
56
57 fn try_from(bits: u32) -> Result<Self, u32> {
64 match bits {
65 32 => Ok(Self::None),
66 16 => Ok(Self::Float16),
67 8 => Ok(Self::Int8),
68 4 => Ok(Self::Int4),
69 other => Err(other),
70 }
71 }
72}
73
74impl fmt::Display for QuantizationLevel {
75 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
76 match self {
77 Self::None => write!(f, "FP32"),
78 Self::Float16 => write!(f, "FP16"),
79 Self::BFloat16 => write!(f, "BF16"),
80 Self::Int8 => write!(f, "INT8"),
81 Self::Int4 => write!(f, "INT4"),
82 }
83 }
84}