Skip to main content

trueno_quant/
lib.rs

1#![cfg_attr(
2    test,
3    allow(
4        clippy::expect_used,
5        clippy::unwrap_used,
6        clippy::disallowed_methods,
7        clippy::float_cmp,
8        clippy::panic,
9        clippy::cast_precision_loss
10    )
11)]
12//! K-Quantization formats for GGUF/APR model weights (Toyota Way: ONE source of truth)
13//!
14//! This crate provides quantization functions for converting F32 data to
15//! K-quantization formats (`Q4_K`, `Q5_K`, `Q6_K`). This is the ONLY implementation
16//! in the Sovereign AI Stack - aprender and realizar import from here.
17//!
18//! ## Stack Architecture (Toyota Way)
19//!
20//! ```text
21//!        ┌─────────┐
22//!        │ apr CLI │
23//!        └────┬────┘
24//!             │
25//!     ┌───────┼───────┬───────────┐
26//!     ▼       ▼       ▼           ▼
27//! ┌────────┐ ┌────────┐ ┌─────────┐
28//! │entrenar│ │aprender│ │realizar │
29//! └───┬────┘ └───┬────┘ └────┬────┘
30//!     │          │           │
31//!     └────┬─────┴───────────┴────┘
32//!          ▼
33//!       ┌────────────────┐
34//!       │  trueno-quant  │  ← YOU ARE HERE
35//!       └───────┬────────┘
36//!               ▼
37//!       ┌────────────────┐
38//!       │     trueno     │
39//!       └────────────────┘
40//! ```
41//!
42//! ## Format Specifications
43//!
44//! - `Q4_K`: 256-element super-blocks, 144 bytes (4.5 bits/weight)
45//! - `Q5_K`: 256-element super-blocks, 176 bytes (5.5 bits/weight)
46//! - `Q6_K`: 256-element super-blocks, 210 bytes (6.5 bits/weight)
47//!
48//! ## Usage
49//!
50//! ```rust
51//! use trueno_quant::{quantize_q4_k, dequantize_q4_k_to_f32};
52//!
53//! let data: Vec<f32> = (0..256).map(|i| i as f32 / 10.0).collect();
54//! let quantized = quantize_q4_k(&data);
55//! let restored = dequantize_q4_k_to_f32(&quantized, 256);
56//! ```
57
58#![warn(missing_docs)]
59
60mod dequantize;
61mod quantize;
62mod transpose;
63
64#[cfg(test)]
65mod tests;
66
67// Re-export all public functions so the public API doesn't change
68pub use dequantize::{dequantize_q4_k_to_f32, dequantize_q5_k_to_f32, dequantize_q6_k_to_f32};
69pub use quantize::{
70    quantize_q4_k, quantize_q4_k_matrix, quantize_q5_k, quantize_q5_k_matrix, quantize_q6_k,
71    quantize_q6_k_matrix,
72};
73pub use transpose::{transpose_q4k_for_matmul, transpose_q5k_for_matmul, transpose_q6k_for_matmul};
74
75// ============================================================================
76// Constants
77// ============================================================================
78
79/// Minimum valid f16 normal value (~6.1e-5)
80/// Prevents NaN on round-trip through f16 encoding
81pub const F16_MIN_NORMAL: f32 = 6.1e-5;
82
83/// `Q4_K` super-block size (elements per block)
84pub const Q4_K_BLOCK_SIZE: usize = 256;
85
86/// `Q4_K` super-block byte size
87pub const Q4_K_BLOCK_BYTES: usize = 144;
88
89/// `Q5_K` super-block size (elements per block)
90pub const Q5_K_BLOCK_SIZE: usize = 256;
91
92/// `Q5_K` super-block byte size
93pub const Q5_K_BLOCK_BYTES: usize = 176;
94
95/// `Q6_K` super-block size (elements per block)
96pub const Q6_K_BLOCK_SIZE: usize = 256;
97
98/// `Q6_K` super-block byte size
99pub const Q6_K_BLOCK_BYTES: usize = 210;
100
101// ============================================================================
102// f16 Conversion Helpers
103// ============================================================================
104
105/// Convert f32 to f16 (using half crate)
106#[inline]
107#[must_use]
108pub fn f32_to_f16(value: f32) -> u16 {
109    half::f16::from_f32(value).to_bits()
110}
111
112/// Convert f16 to f32 (using half crate)
113#[inline]
114#[must_use]
115pub fn f16_to_f32(bits: u16) -> f32 {
116    half::f16::from_bits(bits).to_f32()
117}