1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//! K-Quantization formats for GGUF/APR model weights (Toyota Way: ONE source of truth)
//!
//! This crate provides quantization functions for converting F32 data to
//! K-quantization formats (`Q4_K`, `Q5_K`, `Q6_K`). This is the ONLY implementation
//! in the Sovereign AI Stack - aprender and realizar import from here.
//!
//! ## Stack Architecture (Toyota Way)
//!
//! ```text
//! ┌─────────┐
//! │ apr CLI │
//! └────┬────┘
//! │
//! ┌───────┼───────┬───────────┐
//! ▼ ▼ ▼ ▼
//! ┌────────┐ ┌────────┐ ┌─────────┐
//! │entrenar│ │aprender│ │realizar │
//! └───┬────┘ └───┬────┘ └────┬────┘
//! │ │ │
//! └────┬─────┴───────────┴────┘
//! ▼
//! ┌────────────────┐
//! │ trueno-quant │ ← YOU ARE HERE
//! └───────┬────────┘
//! ▼
//! ┌────────────────┐
//! │ trueno │
//! └────────────────┘
//! ```
//!
//! ## Format Specifications
//!
//! - `Q4_K`: 256-element super-blocks, 144 bytes (4.5 bits/weight)
//! - `Q5_K`: 256-element super-blocks, 176 bytes (5.5 bits/weight)
//! - `Q6_K`: 256-element super-blocks, 210 bytes (6.5 bits/weight)
//!
//! ## Usage
//!
//! ```rust
//! use trueno_quant::{quantize_q4_k, dequantize_q4_k_to_f32};
//!
//! let data: Vec<f32> = (0..256).map(|i| i as f32 / 10.0).collect();
//! let quantized = quantize_q4_k(&data);
//! let restored = dequantize_q4_k_to_f32(&quantized, 256);
//! ```
// Re-export all public functions so the public API doesn't change
pub use ;
pub use ;
pub use ;
// ============================================================================
// Constants
// ============================================================================
/// Minimum valid f16 normal value (~6.1e-5)
/// Prevents NaN on round-trip through f16 encoding
pub const F16_MIN_NORMAL: f32 = 6.1e-5;
/// `Q4_K` super-block size (elements per block)
pub const Q4_K_BLOCK_SIZE: usize = 256;
/// `Q4_K` super-block byte size
pub const Q4_K_BLOCK_BYTES: usize = 144;
/// `Q5_K` super-block size (elements per block)
pub const Q5_K_BLOCK_SIZE: usize = 256;
/// `Q5_K` super-block byte size
pub const Q5_K_BLOCK_BYTES: usize = 176;
/// `Q6_K` super-block size (elements per block)
pub const Q6_K_BLOCK_SIZE: usize = 256;
/// `Q6_K` super-block byte size
pub const Q6_K_BLOCK_BYTES: usize = 210;
// ============================================================================
// f16 Conversion Helpers
// ============================================================================
/// Convert f32 to f16 (using half crate)
/// Convert f16 to f32 (using half crate)