pub const NF4_BLOCK_SIZE: usize = 64;
pub const NF4_BLOCK_BYTES: usize = 36;
#[allow(clippy::excessive_precision, clippy::unreadable_literal)]
pub const NF4_LUT: [f32; 16] = [
-1.0,
-0.6961928009986877,
-0.5250730514526367,
-0.39491748809814453,
-0.28444138169288635,
-0.18477343022823334,
-0.09105003625154495,
0.0,
0.07958029955625534,
0.16093020141124725,
0.24611230194568634,
0.33791524171829224,
0.44070982933044434,
0.5626170039176941,
0.7229568362236023,
1.0,
];
#[derive(Debug, Clone)]
pub struct Nf4Quantized {
pub scales: Vec<f32>,
pub data: Vec<u8>,
pub shape: (usize, usize),
}
impl Nf4Quantized {
#[must_use]
pub fn num_blocks(&self) -> usize {
self.scales.len()
}
#[must_use]
pub fn num_values(&self) -> usize {
self.shape.0 * self.shape.1
}
#[must_use]
pub fn data_bytes(&self) -> usize {
self.data.len()
}
#[must_use]
pub fn total_bytes(&self) -> usize {
self.scales.len() * 4 + self.data.len()
}
}
fn nearest_nf4_index(normalized: f32) -> u8 {
let mut best_idx = 0u8;
let mut best_dist = f32::MAX;
for (i, &entry) in NF4_LUT.iter().enumerate() {
let dist = (normalized - entry).abs();
if dist < best_dist {
best_dist = dist;
best_idx = i as u8;
}
}
best_idx
}
#[must_use]
pub fn quantize_nf4(values: &[f32], rows: usize, cols: usize) -> Nf4Quantized {
let n = values.len();
assert!(
n % NF4_BLOCK_SIZE == 0,
"C-NF4-002: value count {n} not divisible by NF4 block size {NF4_BLOCK_SIZE}"
);
assert_eq!(rows * cols, n, "C-NF4-002: shape ({rows}, {cols}) does not match value count {n}");
let num_blocks = n / NF4_BLOCK_SIZE;
let mut scales = Vec::with_capacity(num_blocks);
let mut data = Vec::with_capacity(n / 2);
for block_idx in 0..num_blocks {
let start = block_idx * NF4_BLOCK_SIZE;
let block = &values[start..start + NF4_BLOCK_SIZE];
let absmax = block.iter().fold(0.0f32, |acc, &v| acc.max(v.abs()));
scales.push(absmax);
let inv_scale = if absmax > 0.0 { 1.0 / absmax } else { 0.0 };
for pair in block.chunks_exact(2) {
let idx_lo = nearest_nf4_index(pair[0] * inv_scale);
let idx_hi = nearest_nf4_index(pair[1] * inv_scale);
data.push(idx_lo | (idx_hi << 4));
}
}
Nf4Quantized { scales, data, shape: (rows, cols) }
}
#[must_use]
pub fn dequantize_nf4(q: &Nf4Quantized) -> Vec<f32> {
let n = q.num_values();
let mut output = Vec::with_capacity(n);
for (block_idx, &scale) in q.scales.iter().enumerate() {
let data_start = block_idx * (NF4_BLOCK_SIZE / 2);
for byte_idx in 0..(NF4_BLOCK_SIZE / 2) {
let packed = q.data[data_start + byte_idx];
let idx_lo = (packed & 0x0F) as usize;
let idx_hi = (packed >> 4) as usize;
output.push(NF4_LUT[idx_lo] * scale);
output.push(NF4_LUT[idx_hi] * scale);
}
}
output
}
#[must_use]
pub fn pack_nf4_for_gpu(q: &Nf4Quantized) -> Vec<u8> {
let num_blocks = q.num_blocks();
let mut packed = Vec::with_capacity(num_blocks * NF4_BLOCK_BYTES);
for block_idx in 0..num_blocks {
packed.extend_from_slice(&q.scales[block_idx].to_le_bytes());
let data_start = block_idx * (NF4_BLOCK_SIZE / 2);
let data_end = data_start + (NF4_BLOCK_SIZE / 2);
packed.extend_from_slice(&q.data[data_start..data_end]);
}
packed
}
#[must_use]
pub fn unpack_nf4_from_gpu(packed: &[u8], rows: usize, cols: usize) -> Nf4Quantized {
let n = rows * cols;
let num_blocks = n / NF4_BLOCK_SIZE;
let mut scales = Vec::with_capacity(num_blocks);
let mut data = Vec::with_capacity(n / 2);
for block_idx in 0..num_blocks {
let offset = block_idx * NF4_BLOCK_BYTES;
let scale_bytes: [u8; 4] = packed[offset..offset + 4]
.try_into()
.expect("C-NF4-002: packed buffer too short for scale");
scales.push(f32::from_le_bytes(scale_bytes));
data.extend_from_slice(&packed[offset + 4..offset + NF4_BLOCK_BYTES]);
}
Nf4Quantized { scales, data, shape: (rows, cols) }
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_c_nf4_001_codebook_fidelity() {
let n = 1024; let mut values = Vec::with_capacity(n);
for i in 0..n {
let u1 = ((i * 1103515245 + 12345) % 65536) as f32 / 65536.0;
let u2 = ((i * 6364136223 + 1442695) % 65536) as f32 / 65536.0;
let u1_clamped = u1.max(1e-6).min(1.0 - 1e-6);
let z = (-2.0 * u1_clamped.ln()).sqrt() * (2.0 * std::f32::consts::PI * u2).cos();
values.push(z);
}
let q = quantize_nf4(&values, 1, n);
let deq = dequantize_nf4(&q);
assert_eq!(deq.len(), n);
let mut max_err = 0.0f32;
for block_idx in 0..q.num_blocks() {
let start = block_idx * NF4_BLOCK_SIZE;
let absmax = q.scales[block_idx];
if absmax == 0.0 {
continue;
}
for i in 0..NF4_BLOCK_SIZE {
let orig_norm = values[start + i] / absmax;
let deq_norm = deq[start + i] / absmax;
let err = (orig_norm - deq_norm).abs();
max_err = max_err.max(err);
assert!(
err < 0.16,
"C-NF4-001 violated: block {block_idx} element {i}: \
orig_norm={orig_norm:.4}, deq_norm={deq_norm:.4}, error={err:.4}"
);
}
}
assert!(max_err > 0.0, "Max error should be non-zero for random data");
}
#[test]
fn test_c_nf4_002_block_alignment() {
let rows = 896;
let cols = 896;
let n = rows * cols;
let values = vec![0.1f32; n];
let q = quantize_nf4(&values, rows, cols);
let expected_blocks = n / NF4_BLOCK_SIZE;
assert_eq!(q.num_blocks(), expected_blocks);
assert_eq!(q.data.len(), n / 2);
assert_eq!(q.total_bytes(), expected_blocks * 4 + n / 2);
let packed = pack_nf4_for_gpu(&q);
assert_eq!(packed.len(), expected_blocks * NF4_BLOCK_BYTES);
}
#[test]
fn test_c_nf4_004_compression_ratio() {
let rows = 896;
let cols = 896;
let n = rows * cols;
let values = vec![0.5f32; n];
let q = quantize_nf4(&values, rows, cols);
let fp32_bytes = n * 4;
let nf4_bytes = q.total_bytes();
let ratio = fp32_bytes as f64 / nf4_bytes as f64;
assert!(
ratio >= 7.1,
"C-NF4-004 violated: compression ratio {ratio:.2}x < 7.1x \
(fp32={fp32_bytes}, nf4={nf4_bytes})"
);
}
#[test]
fn test_nf4_codebook_properties() {
assert_eq!(NF4_LUT.len(), 16);
assert_eq!(NF4_LUT[0], -1.0);
assert_eq!(NF4_LUT[15], 1.0);
for i in 1..16 {
assert!(NF4_LUT[i] > NF4_LUT[i - 1], "NF4_LUT not monotonic at index {i}");
}
}
#[test]
fn test_nf4_zero_block() {
let values = vec![0.0f32; 64];
let q = quantize_nf4(&values, 1, 64);
let deq = dequantize_nf4(&q);
for (i, &v) in deq.iter().enumerate() {
assert_eq!(v, 0.0, "zero block element {i} = {v}");
}
}
#[test]
fn test_nf4_gpu_pack_roundtrip() {
let n = 256;
let values: Vec<f32> = (0..n).map(|i| (i as f32 - 128.0) / 128.0).collect();
let q = quantize_nf4(&values, 4, 64);
let packed = pack_nf4_for_gpu(&q);
let unpacked = unpack_nf4_from_gpu(&packed, 4, 64);
assert_eq!(unpacked.scales, q.scales);
assert_eq!(unpacked.data, q.data);
assert_eq!(unpacked.shape, q.shape);
}
#[test]
fn test_nearest_nf4_index_boundaries() {
assert_eq!(nearest_nf4_index(-1.0), 0);
assert_eq!(nearest_nf4_index(1.0), 15);
assert_eq!(nearest_nf4_index(0.0), 7);
}
#[test]
#[should_panic(expected = "C-NF4-002")]
fn test_nf4_rejects_misaligned_input() {
let values = vec![0.0f32; 63]; let _ = quantize_nf4(&values, 1, 63);
}
}