#[inline]
pub fn f16_to_f32(bits: u16) -> f32 {
trueno::f16_to_f32(bits)
}
pub fn dequantize_f16(bytes: &[u8], num_elements: usize) -> Vec<f32> {
let mut result = Vec::with_capacity(num_elements);
for chunk in bytes.chunks_exact(2) {
let bits = u16::from_le_bytes([chunk[0], chunk[1]]);
result.push(f16_to_f32(bits));
}
result.truncate(num_elements);
result
}
pub fn dequantize_q8_0(bytes: &[u8], num_elements: usize) -> Vec<f32> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 32;
let mut result = Vec::with_capacity(num_elements);
let mut offset = 0;
while result.len() < num_elements && offset + BLOCK_BYTES <= bytes.len() {
let scale_bits = u16::from_le_bytes([bytes[offset], bytes[offset + 1]]);
let scale = f16_to_f32(scale_bits);
offset += 2;
for i in 0..BLOCK_SIZE {
if result.len() >= num_elements {
break;
}
let v = f32::from(bytes[offset + i] as i8);
result.push(v * scale);
}
offset += 32;
}
result.truncate(num_elements);
result
}
#[inline]
fn push_q4k_nibbles(
result: &mut Vec<f32>,
num_elements: usize,
bytes: &[u8],
d_scale: f32,
d_min: f32,
shift: u8,
) {
for &byte in bytes {
if result.len() >= num_elements {
break;
}
let q_val = ((byte >> shift) & 0x0F) as f32;
result.push(d_scale * q_val - d_min);
}
}
pub fn dequantize_q4_k(bytes: &[u8], num_elements: usize) -> Vec<f32> {
const QK_K: usize = 256;
const SUPER_BLOCK_BYTES: usize = 2 + 2 + 12 + 128;
let mut result = Vec::with_capacity(num_elements);
let mut offset = 0;
while result.len() < num_elements && offset + SUPER_BLOCK_BYTES <= bytes.len() {
let d = f16_to_f32(u16::from_le_bytes([bytes[offset], bytes[offset + 1]]));
let dmin = f16_to_f32(u16::from_le_bytes([bytes[offset + 2], bytes[offset + 3]]));
offset += 4;
let mut scales = [0u8; 12];
scales.copy_from_slice(&bytes[offset..offset + 12]);
offset += 12;
let qs = &bytes[offset..offset + 128];
offset += 128;
for j in (0..QK_K).step_by(64) {
let q = &qs[j / 2..j / 2 + 32];
let is = j / 32;
let (sc1, m1) = extract_scale_min_q4k(&scales, is);
push_q4k_nibbles(&mut result, num_elements, q, d * sc1, dmin * m1, 0);
let (sc2, m2) = extract_scale_min_q4k(&scales, is + 1);
push_q4k_nibbles(&mut result, num_elements, q, d * sc2, dmin * m2, 4);
}
}
result.truncate(num_elements);
result
}
#[inline]
fn extract_scale_min_q4k(scales: &[u8; 12], block_idx: usize) -> (f32, f32) {
let j = block_idx;
let (scale_bits, min_bits) = if j < 4 {
let d = scales[j] & 63;
let m = scales[j + 4] & 63;
(d, m)
} else {
let d = (scales[j + 4] & 0x0F) | ((scales[j - 4] >> 6) << 4);
let m = (scales[j + 4] >> 4) | ((scales[j] >> 6) << 4);
(d, m)
};
(f32::from(scale_bits), f32::from(min_bits))
}
#[inline]
#[allow(clippy::cast_possible_wrap)]
fn dequantize_q6k_quadrant(
result: &mut Vec<f32>,
num_elements: usize,
d: f32,
sc: &[i8],
sc_offset: usize,
extract_q: impl Fn(usize) -> i32,
) {
for l in 0..32 {
if result.len() >= num_elements {
break;
}
let is = l / 16;
let q = extract_q(l);
result.push(d * (sc[is + sc_offset] as f32) * (q as f32));
}
}
#[allow(clippy::cast_possible_wrap)]
pub fn dequantize_q6_k(bytes: &[u8], num_elements: usize) -> Vec<f32> {
const QK_K: usize = 256;
const SUPER_BLOCK_BYTES: usize = 128 + 64 + 16 + 2;
let mut result = Vec::with_capacity(num_elements);
let mut offset = 0;
while result.len() < num_elements && offset + SUPER_BLOCK_BYTES <= bytes.len() {
let ql = &bytes[offset..offset + 128];
offset += 128;
let qh = &bytes[offset..offset + 64];
offset += 64;
let mut scales = [0i8; 16];
for (i, scale) in scales.iter_mut().enumerate() {
*scale = bytes[offset + i] as i8;
}
offset += 16;
let d = f16_to_f32(u16::from_le_bytes([bytes[offset], bytes[offset + 1]]));
offset += 2;
for n in (0..QK_K).step_by(128) {
let idx = n / 128;
let sc = &scales[8 * idx..];
let ql_slice = &ql[64 * idx..];
let qh_slice = &qh[32 * idx..];
dequantize_q6k_quadrant(&mut result, num_elements, d, sc, 0, |l| {
((ql_slice[l] & 0xF) | ((qh_slice[l] & 3) << 4)) as i32 - 32
});
dequantize_q6k_quadrant(&mut result, num_elements, d, sc, 2, |l| {
((ql_slice[l + 32] & 0xF) | (((qh_slice[l] >> 2) & 3) << 4)) as i32 - 32
});
dequantize_q6k_quadrant(&mut result, num_elements, d, sc, 4, |l| {
((ql_slice[l] >> 4) | (((qh_slice[l] >> 4) & 3) << 4)) as i32 - 32
});
dequantize_q6k_quadrant(&mut result, num_elements, d, sc, 6, |l| {
((ql_slice[l + 32] >> 4) | (((qh_slice[l] >> 6) & 3) << 4)) as i32 - 32
});
}
}
result.truncate(num_elements);
result
}
#[inline]
pub fn dtype_to_ggml_qtype(dtype: &str) -> Option<u32> {
crate::gguf::GgmlQuantType::from_str_lossy(dtype)
.filter(|qt| {
matches!(
qt,
crate::gguf::GgmlQuantType::Q4_0
| crate::gguf::GgmlQuantType::Q4_1
| crate::gguf::GgmlQuantType::Q5_0
| crate::gguf::GgmlQuantType::Q5_1
| crate::gguf::GgmlQuantType::Q8_0
| crate::gguf::GgmlQuantType::Q8_1
| crate::gguf::GgmlQuantType::Q2K
| crate::gguf::GgmlQuantType::Q3K
| crate::gguf::GgmlQuantType::Q4K
| crate::gguf::GgmlQuantType::Q5K
| crate::gguf::GgmlQuantType::Q6K
| crate::gguf::GgmlQuantType::IQ2XXS
| crate::gguf::GgmlQuantType::IQ2XS
)
})
.map(crate::gguf::GgmlQuantType::as_id)
}
#[inline]
pub fn is_quantized_dtype(dtype: &str) -> bool {
dtype_to_ggml_qtype(dtype).is_some()
}
pub fn dequantize_apr_q8(bytes: &[u8], num_elements: usize) -> Vec<f32> {
if bytes.len() < 4 {
return vec![0.0; num_elements];
}
let scale = f32::from_le_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]);
let quant_bytes = bytes
.get(4..)
.expect("APR Q8 buffer validated to have at least 4 bytes above");
let mut result = Vec::with_capacity(num_elements);
for i in 0..num_elements.min(quant_bytes.len()) {
let q = quant_bytes[i] as i8;
result.push(f32::from(q) * scale);
}
result
}
pub fn dequantize_apr_q4(bytes: &[u8], num_elements: usize) -> Vec<f32> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 16;
let mut result = Vec::with_capacity(num_elements);
let mut offset = 0;
while result.len() < num_elements && offset + BLOCK_BYTES <= bytes.len() {
let scale_bits = u16::from_le_bytes([bytes[offset], bytes[offset + 1]]);
let scale = f16_to_f32(scale_bits);
offset += 2;
let packed = &bytes[offset..offset + 16];
for i in 0..BLOCK_SIZE {
if result.len() >= num_elements {
break;
}
let byte = packed[i / 2];
let nibble = if i % 2 == 0 {
byte & 0x0F
} else {
(byte >> 4) & 0x0F
};
let value = (f32::from(nibble) - 8.0) * scale;
result.push(value);
}
offset += 16;
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dequantize_apr_q8_round_trip() {
let original = vec![1.0f32, -0.5, 0.3, -0.8, 0.0, 0.7];
let max_abs = original.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
let scale = max_abs / 127.0;
let mut bytes = Vec::with_capacity(4 + original.len());
bytes.extend_from_slice(&scale.to_le_bytes());
for &v in &original {
let q = (v / scale).round().clamp(-127.0, 127.0) as i8;
bytes.push(q as u8);
}
let result = dequantize_apr_q8(&bytes, original.len());
assert_eq!(result.len(), original.len());
for (i, (&orig, &dequant)) in original.iter().zip(result.iter()).enumerate() {
assert!(
(orig - dequant).abs() < 0.02,
"APR Q8 mismatch at {i}: orig={orig}, dequant={dequant}"
);
}
}
#[test]
fn test_dequantize_apr_q8_zeros() {
let mut bytes = Vec::new();
bytes.extend_from_slice(&1.0f32.to_le_bytes());
bytes.extend_from_slice(&[0u8; 10]);
let result = dequantize_apr_q8(&bytes, 10);
assert_eq!(result.len(), 10);
for &v in &result {
assert_eq!(v, 0.0);
}
}
#[test]
fn test_dequantize_apr_q8_empty() {
let result = dequantize_apr_q8(&[], 10);
assert_eq!(result, vec![0.0; 10]);
}
fn f32_to_f16_bits(value: f32) -> u16 {
let bits = value.to_bits();
let sign = (bits >> 31) & 1;
let exp = ((bits >> 23) & 0xFF) as i32 - 127;
let mant = bits & 0x7FFFFF;
if exp > 15 {
((sign << 15) | (0x1F << 10)) as u16
} else if exp < -14 {
(sign << 15) as u16
} else {
let f16_exp = (exp + 15) as u32;
let f16_mant = mant >> 13;
((sign << 15) | (f16_exp << 10) | f16_mant) as u16
}
}
#[test]
fn test_dequantize_apr_q4_round_trip() {
let original: Vec<f32> = (0..32).map(|i| (i as f32 - 16.0) / 16.0).collect();
let max_abs = original.iter().map(|v| v.abs()).fold(0.0f32, f32::max);
let scale = if max_abs == 0.0 { 1.0 } else { max_abs / 7.0 };
let mut bytes = Vec::new();
bytes.extend_from_slice(&f32_to_f16_bits(scale).to_le_bytes());
let mut packed = [0u8; 16];
for (i, &v) in original.iter().enumerate() {
let q = (v / scale).round().clamp(-8.0, 7.0) as i8;
let nibble = ((q + 8) as u8) & 0x0F;
if i % 2 == 0 {
packed[i / 2] = nibble;
} else {
packed[i / 2] |= nibble << 4;
}
}
bytes.extend_from_slice(&packed);
let result = dequantize_apr_q4(&bytes, 32);
assert_eq!(result.len(), 32);
for (i, (&orig, &dequant)) in original.iter().zip(result.iter()).enumerate() {
assert!(
(orig - dequant).abs() < 0.25,
"APR Q4 mismatch at {i}: orig={orig}, dequant={dequant}"
);
}
}
#[test]
fn test_dequantize_apr_q4_empty() {
let result = dequantize_apr_q4(&[], 10);
assert!(result.is_empty());
}
#[test]
fn test_apr_q8_not_ggml_q8_0() {
let mut apr_bytes = Vec::new();
apr_bytes.extend_from_slice(&0.01f32.to_le_bytes()); for i in 0..32 {
apr_bytes.push((i as i8 - 16) as u8);
}
let apr_result = dequantize_apr_q8(&apr_bytes, 32);
let ggml_result = dequantize_q8_0(&apr_bytes, 32);
assert_ne!(
apr_result, ggml_result,
"APR Q8 and GGML Q8_0 should produce different results from same bytes"
);
}
#[test]
fn test_dtype_to_ggml_qtype_apr_native_returns_none() {
assert_eq!(dtype_to_ggml_qtype("q8"), None);
assert_eq!(dtype_to_ggml_qtype("Q8"), None);
assert_eq!(dtype_to_ggml_qtype("q4"), None);
assert_eq!(dtype_to_ggml_qtype("Q4"), None);
assert_eq!(dtype_to_ggml_qtype("Q8_0"), Some(8));
assert_eq!(dtype_to_ggml_qtype("Q4_K"), Some(12));
assert_eq!(dtype_to_ggml_qtype("Q6_K"), Some(14));
}
}