1#![allow(non_camel_case_types)]
5
6use std::fmt;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub enum QuantFormat {
13 F32,
15 F16,
17 BF16,
19
20 Q4_0,
22 Q4_K,
24 Q5_K,
26 Q6_K,
28 Q8_0,
30
31 Gptq { bits: u8, group_size: u16 },
33 Awq { bits: u8 },
35}
36
37impl QuantFormat {
38 pub fn bits_per_weight(&self) -> f64 {
40 match self {
41 QuantFormat::F32 => 32.0,
42 QuantFormat::F16 => 16.0,
43 QuantFormat::BF16 => 16.0,
44 QuantFormat::Q4_0 => 4.5, QuantFormat::Q4_K => 4.5, QuantFormat::Q5_K => 5.5,
47 QuantFormat::Q6_K => 6.5,
48 QuantFormat::Q8_0 => 8.5, QuantFormat::Gptq { bits, .. } => *bits as f64 + 0.5,
50 QuantFormat::Awq { bits } => *bits as f64 + 0.5,
51 }
52 }
53
54 pub fn memory_ratio(&self) -> f64 {
56 self.bits_per_weight() / 16.0
57 }
58
59 pub fn expected_ppl_delta(&self) -> f64 {
63 match self {
64 QuantFormat::F32 => 0.0,
65 QuantFormat::F16 => 0.0,
66 QuantFormat::BF16 => 0.01,
67 QuantFormat::Q4_0 => 0.5,
68 QuantFormat::Q4_K => 0.3,
69 QuantFormat::Q5_K => 0.1,
70 QuantFormat::Q6_K => 0.05,
71 QuantFormat::Q8_0 => 0.01,
72 QuantFormat::Gptq { bits, .. } => match bits {
73 4 => 0.4,
74 8 => 0.02,
75 _ => 0.5,
76 },
77 QuantFormat::Awq { bits } => match bits {
78 4 => 0.2, _ => 0.3,
80 },
81 }
82 }
83
84 pub fn block_size(&self) -> usize {
86 match self {
87 QuantFormat::F32 | QuantFormat::F16 | QuantFormat::BF16 => 1,
88 QuantFormat::Q4_0 | QuantFormat::Q8_0 => 32,
89 QuantFormat::Q4_K | QuantFormat::Q5_K | QuantFormat::Q6_K => 256, QuantFormat::Gptq { group_size, .. } => *group_size as usize,
91 QuantFormat::Awq { .. } => 128,
92 }
93 }
94
95 pub fn bytes_per_block(&self) -> usize {
97 match self {
98 QuantFormat::F32 => 4,
99 QuantFormat::F16 | QuantFormat::BF16 => 2,
100 QuantFormat::Q4_0 => 18, QuantFormat::Q4_K => 144, QuantFormat::Q5_K => 176, QuantFormat::Q6_K => 210, QuantFormat::Q8_0 => 34, QuantFormat::Gptq { bits, group_size } => {
106 let data_bytes = (*group_size as usize * *bits as usize).div_ceil(8);
107 data_bytes + 4 }
109 QuantFormat::Awq { bits } => {
110 let data_bytes = (128 * *bits as usize).div_ceil(8);
111 data_bytes + 4
112 }
113 }
114 }
115}
116
117impl fmt::Display for QuantFormat {
118 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
119 match self {
120 QuantFormat::F32 => write!(f, "F32"),
121 QuantFormat::F16 => write!(f, "F16"),
122 QuantFormat::BF16 => write!(f, "BF16"),
123 QuantFormat::Q4_0 => write!(f, "Q4_0"),
124 QuantFormat::Q4_K => write!(f, "Q4_K"),
125 QuantFormat::Q5_K => write!(f, "Q5_K"),
126 QuantFormat::Q6_K => write!(f, "Q6_K"),
127 QuantFormat::Q8_0 => write!(f, "Q8_0"),
128 QuantFormat::Gptq { bits, group_size } => {
129 write!(f, "GPTQ-{}bit-g{}", bits, group_size)
130 }
131 QuantFormat::Awq { bits } => write!(f, "AWQ-{}bit", bits),
132 }
133 }
134}
135
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
140pub enum DequantStrategy {
141 Fused,
143 Prefetch { lookahead_blocks: usize },
145 OnDemand,
147}
148
149impl Default for DequantStrategy {
150 fn default() -> Self {
151 DequantStrategy::Fused
152 }
153}