use super::GGUF_BLOCK_SIZE;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct Q8_0 {
pub scales: Vec<f32>,
pub data: Vec<i8>,
pub len: usize,
}
impl Q8_0 {
pub fn quantize(values: &[f32]) -> Self {
let len = values.len();
let num_blocks = len.div_ceil(GGUF_BLOCK_SIZE);
let mut scales = Vec::with_capacity(num_blocks);
let mut data = Vec::with_capacity(len);
for block_idx in 0..num_blocks {
let start = block_idx * GGUF_BLOCK_SIZE;
let end = (start + GGUF_BLOCK_SIZE).min(len);
let block = &values[start..end];
let max_abs = block
.iter()
.map(|v| v.abs())
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.unwrap_or(0.0);
let scale = if max_abs < 1e-10 { 1e-10 } else { max_abs / 127.0 };
scales.push(scale);
for &val in block {
let q = (val / scale).round().clamp(-128.0, 127.0) as i8;
data.push(q);
}
let padding = GGUF_BLOCK_SIZE - block.len();
data.extend(std::iter::repeat_n(0i8, padding));
}
data.truncate(len);
Self { scales, data, len }
}
pub fn dequantize(&self) -> Vec<f32> {
let mut result = Vec::with_capacity(self.len);
for (i, &q) in self.data.iter().enumerate() {
let block_idx = i / GGUF_BLOCK_SIZE;
let scale = self.scales[block_idx];
result.push(f32::from(q) * scale);
}
result
}
pub fn memory_bytes(&self) -> usize {
self.scales.len() * 4 + self.data.len()
}
pub fn gguf_bytes(&self) -> usize {
self.scales.len() * 2 + self.data.len()
}
pub fn compression_ratio(&self) -> f32 {
let original = self.len * 4;
original as f32 / self.gguf_bytes() as f32
}
pub fn num_blocks(&self) -> usize {
self.scales.len()
}
}