entrenar/quant/gguf_quant/
q4_0.rs1use super::GGUF_BLOCK_SIZE;
4use serde::{Deserialize, Serialize};
5
6#[derive(Clone, Debug, Serialize, Deserialize)]
11pub struct Q4_0 {
12 pub scales: Vec<f32>,
14 pub data: Vec<u8>,
16 pub len: usize,
18}
19
20impl Q4_0 {
21 pub fn quantize(values: &[f32]) -> Self {
23 let len = values.len();
24 let num_blocks = len.div_ceil(GGUF_BLOCK_SIZE);
25
26 let mut scales = Vec::with_capacity(num_blocks);
27 let mut data = Vec::with_capacity(num_blocks * 16); for block_idx in 0..num_blocks {
30 let start = block_idx * GGUF_BLOCK_SIZE;
31 let end = (start + GGUF_BLOCK_SIZE).min(len);
32 let block = &values[start..end];
33
34 let max_abs = block
36 .iter()
37 .map(|v| v.abs())
38 .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
39 .unwrap_or(0.0);
40
41 let scale = if max_abs < 1e-10 { 1e-10 } else { max_abs / 7.0 };
42 scales.push(scale);
43
44 let mut block_data = [0u8; 16];
46 for i in 0..GGUF_BLOCK_SIZE {
47 let val = if start + i < end { block[i] } else { 0.0 };
48
49 let q = ((val / scale).round().clamp(-8.0, 7.0) as i8) & 0x0F;
51
52 if i % 2 == 0 {
54 block_data[i / 2] = (q as u8) & 0x0F;
55 } else {
56 block_data[i / 2] |= ((q as u8) & 0x0F) << 4;
57 }
58 }
59 data.extend_from_slice(&block_data);
60 }
61
62 Self { scales, data, len }
63 }
64
65 pub fn dequantize(&self) -> Vec<f32> {
67 let mut result = Vec::with_capacity(self.len);
68 let num_blocks = self.scales.len();
69
70 for block_idx in 0..num_blocks {
71 let scale = self.scales[block_idx];
72 let start = block_idx * GGUF_BLOCK_SIZE;
73 let block_len = (self.len - start).min(GGUF_BLOCK_SIZE);
74
75 for i in 0..block_len {
76 let byte_idx = block_idx * 16 + i / 2;
77 let byte = self.data[byte_idx];
78
79 let nibble = if i % 2 == 0 { byte & 0x0F } else { (byte >> 4) & 0x0F };
81
82 let q = if nibble & 0x08 != 0 { (nibble | 0xF0) as i8 } else { nibble as i8 };
84
85 result.push(f32::from(q) * scale);
86 }
87 }
88
89 result
90 }
91
92 pub fn memory_bytes(&self) -> usize {
94 self.scales.len() * 4 + self.data.len() }
96
97 pub fn gguf_bytes(&self) -> usize {
99 self.scales.len() * 2 + self.data.len() }
101
102 pub fn compression_ratio(&self) -> f32 {
104 let original = self.len * 4;
105 original as f32 / self.gguf_bytes() as f32
106 }
107
108 pub fn num_blocks(&self) -> usize {
110 self.scales.len()
111 }
112}