use crate::error::{AprenderError, Result};
use provable_contracts_macros::ensures;
#[provable_contracts_macros::contract("f16-conversion-v1", equation = "f16_to_f32_bias")]
pub(crate) fn f16_to_f32(bits: u16) -> f32 {
trueno_quant::f16_to_f32(bits)
}
#[inline]
fn safe_f16_scale(bits: u16) -> f32 {
let val = f16_to_f32(bits);
if val.is_nan() || val.is_infinite() {
0.0
} else {
val
}
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub fn dequantize_q4_0(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 16;
let num_blocks = num_elements.div_ceil(BLOCK_SIZE);
let total_bytes = num_blocks * BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q4_0 data exceeds file size".to_string(),
});
}
let mut result = Vec::with_capacity(num_elements);
let mut offset = start;
for _ in 0..num_blocks {
let scale_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
let scale = safe_f16_scale(scale_bits);
offset += 2;
for i in 0..16 {
let byte = data[offset + i];
let v0 = f32::from((byte & 0x0F) as i8 - 8);
result.push(v0 * scale);
}
for i in 0..16 {
let byte = data[offset + i];
let v1 = f32::from((byte >> 4) as i8 - 8);
result.push(v1 * scale);
}
offset += 16;
}
result.truncate(num_elements);
Ok(result)
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub fn dequantize_q8_0(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 32;
let num_blocks = num_elements.div_ceil(BLOCK_SIZE);
let total_bytes = num_blocks * BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q8_0 data exceeds file size".to_string(),
});
}
let mut result = Vec::with_capacity(num_elements);
let mut offset = start;
for _ in 0..num_blocks {
let scale_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
let scale = safe_f16_scale(scale_bits);
offset += 2;
for i in 0..32 {
let v = f32::from(data[offset + i] as i8);
result.push(v * scale);
}
offset += 32;
}
result.truncate(num_elements);
Ok(result)
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub fn dequantize_q5_0(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 4 + 16;
let num_blocks = num_elements.div_ceil(BLOCK_SIZE);
let total_bytes = num_blocks * BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q5_0 data exceeds file size".to_string(),
});
}
let mut result = Vec::with_capacity(num_elements);
let mut offset = start;
for _ in 0..num_blocks {
let scale_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
let scale = safe_f16_scale(scale_bits);
offset += 2;
let high_bits = u32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]);
offset += 4;
for i in 0..16 {
let byte = data[offset + i];
let low0 = byte & 0x0F;
let low1 = byte >> 4;
let high0 = ((high_bits >> (i * 2)) & 1) as u8;
let high1 = ((high_bits >> (i * 2 + 1)) & 1) as u8;
let v0 = f32::from(((high0 << 4) | low0) as i8 - 16);
let v1 = f32::from(((high1 << 4) | low1) as i8 - 16);
result.push(v0 * scale);
result.push(v1 * scale);
}
offset += 16;
}
result.truncate(num_elements);
Ok(result)
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub(crate) fn dequantize_q5_1(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 2 + 4 + 16;
let num_blocks = num_elements.div_ceil(BLOCK_SIZE);
let total_bytes = num_blocks * BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q5_1 data exceeds file size".to_string(),
});
}
let mut result = Vec::with_capacity(num_elements);
let mut offset = start;
for _ in 0..num_blocks {
let scale_bits = u16::from_le_bytes([data[offset], data[offset + 1]]);
let scale = safe_f16_scale(scale_bits);
let min_bits = u16::from_le_bytes([data[offset + 2], data[offset + 3]]);
let min_val = safe_f16_scale(min_bits);
offset += 4;
let high_bits = u32::from_le_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]);
offset += 4;
for i in 0..16 {
let byte = data[offset + i];
let low0 = byte & 0x0F;
let low1 = byte >> 4;
let high0 = ((high_bits >> (i * 2)) & 1) as u8;
let high1 = ((high_bits >> (i * 2 + 1)) & 1) as u8;
let v0 = f32::from((high0 << 4) | low0);
let v1 = f32::from((high1 << 4) | low1);
result.push(v0 * scale + min_val);
result.push(v1 * scale + min_val);
}
offset += 16;
}
result.truncate(num_elements);
Ok(result)
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub(crate) fn dequantize_q4_k(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const SUPER_BLOCK_SIZE: usize = 256;
const SUPER_BLOCK_BYTES: usize = 144;
let num_blocks = num_elements.div_ceil(SUPER_BLOCK_SIZE);
let total_bytes = num_blocks * SUPER_BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q4_K data exceeds file size".to_string(),
});
}
Ok(trueno_quant::dequantize_q4_k_to_f32(
&data[start..],
num_elements,
))
}
#[ensures(ret.as_ref().map_or(true, |v| v.len() == num_elements))]
pub(crate) fn dequantize_q5_k(data: &[u8], start: usize, num_elements: usize) -> Result<Vec<f32>> {
const SUPER_BLOCK_SIZE: usize = 256;
const SUPER_BLOCK_BYTES: usize = 176;
let num_blocks = num_elements.div_ceil(SUPER_BLOCK_SIZE);
let total_bytes = num_blocks * SUPER_BLOCK_BYTES;
if start + total_bytes > data.len() {
return Err(AprenderError::FormatError {
message: "Q5_K data exceeds file size".to_string(),
});
}
Ok(trueno_quant::dequantize_q5_k_to_f32(
&data[start..],
num_elements,
))
}
include!("dequantize.rs");
include!("dequant_f16_tests.rs");