trueno/brick/tracing/
quant_type.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
9#[allow(non_camel_case_types)]
10pub enum QuantType {
11 #[default]
13 F32,
14 F16,
16 Bf16,
18 Q8_0,
20 Q4_0,
22 Q4_K,
24 Q5_K,
26 Q6_K,
28 Q2_K,
30 Q3_K,
32}
33
34impl QuantType {
35 pub fn bits_per_element(self) -> f32 {
37 match self {
38 Self::F32 => 32.0,
39 Self::F16 | Self::Bf16 => 16.0,
40 Self::Q8_0 => 8.0,
41 Self::Q6_K => 6.5,
42 Self::Q5_K => 5.5,
43 Self::Q4_0 | Self::Q4_K => 4.5,
44 Self::Q3_K => 3.5,
45 Self::Q2_K => 2.5,
46 }
47 }
48
49 pub fn compression_ratio(self) -> f32 {
51 32.0 / self.bits_per_element()
52 }
53}
54
55#[cfg(test)]
56mod tests {
57 use super::*;
58
59 #[test]
60 fn test_quant_type_bits() {
61 assert_eq!(QuantType::F32.bits_per_element(), 32.0);
62 assert_eq!(QuantType::F16.bits_per_element(), 16.0);
63 assert_eq!(QuantType::Q8_0.bits_per_element(), 8.0);
64 assert_eq!(QuantType::Q4_K.bits_per_element(), 4.5);
65 }
66
67 #[test]
68 fn test_quant_type_compression_ratio() {
69 assert!((QuantType::F32.compression_ratio() - 1.0).abs() < 0.01);
71 assert!((QuantType::F16.compression_ratio() - 2.0).abs() < 0.01);
73 assert!(QuantType::Q4_K.compression_ratio() > 7.0);
75 }
76}