trueno_quant/lib.rs
1#![cfg_attr(
2 test,
3 allow(
4 clippy::expect_used,
5 clippy::unwrap_used,
6 clippy::disallowed_methods,
7 clippy::float_cmp,
8 clippy::panic,
9 clippy::cast_precision_loss
10 )
11)]
12//! K-Quantization formats for GGUF/APR model weights (Toyota Way: ONE source of truth)
13//!
14//! This crate provides quantization functions for converting F32 data to
15//! K-quantization formats (`Q4_K`, `Q5_K`, `Q6_K`). This is the ONLY implementation
16//! in the Sovereign AI Stack - aprender and realizar import from here.
17//!
18//! ## Stack Architecture (Toyota Way)
19//!
20//! ```text
21//! ┌─────────┐
22//! │ apr CLI │
23//! └────┬────┘
24//! │
25//! ┌───────┼───────┬───────────┐
26//! ▼ ▼ ▼ ▼
27//! ┌────────┐ ┌────────┐ ┌─────────┐
28//! │entrenar│ │aprender│ │realizar │
29//! └───┬────┘ └───┬────┘ └────┬────┘
30//! │ │ │
31//! └────┬─────┴───────────┴────┘
32//! ▼
33//! ┌────────────────┐
34//! │ trueno-quant │ ← YOU ARE HERE
35//! └───────┬────────┘
36//! ▼
37//! ┌────────────────┐
38//! │ trueno │
39//! └────────────────┘
40//! ```
41//!
42//! ## Format Specifications
43//!
44//! - `Q4_K`: 256-element super-blocks, 144 bytes (4.5 bits/weight)
45//! - `Q5_K`: 256-element super-blocks, 176 bytes (5.5 bits/weight)
46//! - `Q6_K`: 256-element super-blocks, 210 bytes (6.5 bits/weight)
47//!
48//! ## Usage
49//!
50//! ```rust
51//! use trueno_quant::{quantize_q4_k, dequantize_q4_k_to_f32};
52//!
53//! let data: Vec<f32> = (0..256).map(|i| i as f32 / 10.0).collect();
54//! let quantized = quantize_q4_k(&data);
55//! let restored = dequantize_q4_k_to_f32(&quantized, 256);
56//! ```
57
58#![warn(missing_docs)]
59
60mod dequantize;
61mod quantize;
62mod transpose;
63
64#[cfg(test)]
65mod tests;
66
67// Re-export all public functions so the public API doesn't change
68pub use dequantize::{dequantize_q4_k_to_f32, dequantize_q5_k_to_f32, dequantize_q6_k_to_f32};
69pub use quantize::{
70 quantize_q4_k, quantize_q4_k_matrix, quantize_q5_k, quantize_q5_k_matrix, quantize_q6_k,
71 quantize_q6_k_matrix,
72};
73pub use transpose::{transpose_q4k_for_matmul, transpose_q5k_for_matmul, transpose_q6k_for_matmul};
74
75// ============================================================================
76// Constants
77// ============================================================================
78
79/// Minimum valid f16 normal value (~6.1e-5)
80/// Prevents NaN on round-trip through f16 encoding
81pub const F16_MIN_NORMAL: f32 = 6.1e-5;
82
83/// `Q4_K` super-block size (elements per block)
84pub const Q4_K_BLOCK_SIZE: usize = 256;
85
86/// `Q4_K` super-block byte size
87pub const Q4_K_BLOCK_BYTES: usize = 144;
88
89/// `Q5_K` super-block size (elements per block)
90pub const Q5_K_BLOCK_SIZE: usize = 256;
91
92/// `Q5_K` super-block byte size
93pub const Q5_K_BLOCK_BYTES: usize = 176;
94
95/// `Q6_K` super-block size (elements per block)
96pub const Q6_K_BLOCK_SIZE: usize = 256;
97
98/// `Q6_K` super-block byte size
99pub const Q6_K_BLOCK_BYTES: usize = 210;
100
101// ============================================================================
102// f16 Conversion Helpers
103// ============================================================================
104
105/// Convert f32 to f16 (using half crate)
106#[inline]
107#[must_use]
108pub fn f32_to_f16(value: f32) -> u16 {
109 half::f16::from_f32(value).to_bits()
110}
111
112/// Convert f16 to f32 (using half crate)
113#[inline]
114#[must_use]
115pub fn f16_to_f32(bits: u16) -> f32 {
116 half::f16::from_bits(bits).to_f32()
117}