irithyll_core/packed_i16.rs
1//! 8-byte quantized packed node format for integer-only inference.
2//!
3//! # Binary Format
4//!
5//! ```text
6//! [QuantizedEnsembleHeader: 16 bytes]
7//! [leaf_scale: f32] // 4 bytes
8//! [feature_scales: n_features × f32] // n_features × 4 bytes
9//! [TreeEntry × n_trees: 8 bytes each]
10//! [PackedNodeI16 × total_nodes: 8 bytes each]
11//! ```
12//!
13//! # Quantization scheme
14//!
15//! Per-feature quantization with shared scale: each feature gets its own scale
16//! factor `scale_f = 32767.0 / max(|threshold_values_for_feature_f|)`. Thresholds
17//! are stored as `(threshold * feature_scale[feat_idx]) as i16`. At predict time,
18//! features are quantized once: `(feature * feature_scale[feat_idx]) as i16`.
19//! Comparison is pure integer: `quantized_feature > quantized_threshold`.
20//!
21//! Leaf values use a separate global leaf scale: `leaf_i16 = (lr * leaf_f64 * leaf_scale) as i16`.
22//! Final prediction: `base + sum(leaf_i16) / leaf_scale` — accumulate in i32,
23//! dequantize once at the end.
24//!
25//! On Cortex-M0+ (no FPU), this eliminates all float ops from the hot loop.
26//! The only float ops happen once at the end for dequantization.
27
28/// 8-byte quantized decision tree node. Integer-only traversal for FPU-less targets.
29///
30/// 8 nodes per 64-byte cache line. All comparisons are i16 — no floating point
31/// in the hot loop. On Cortex-M0+ (no FPU), this is ~25x faster than f32.
32///
33/// # Field layout
34///
35/// - `value`: quantized split threshold (internal nodes) or quantized leaf prediction (leaf nodes).
36/// For splits: `(threshold * feature_scale[feat_idx]) as i16`.
37/// For leaves: `(lr * leaf_f64 * leaf_scale) as i16`.
38/// - `feature_flags`: bit 15 = is_leaf flag. Bits 14:0 = feature index (max 32767).
39/// For leaves, the feature index is unused.
40/// - `children`: packed left (low u16) and right (high u16) child indices.
41/// For leaves, this field is unused (set to 0).
42#[repr(C, align(4))]
43#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
44pub struct PackedNodeI16 {
45 /// Quantized split threshold (internal) or quantized leaf prediction (leaf).
46 pub value: i16,
47 /// Bit 15 = is_leaf. Bits 14:0 = feature index.
48 pub feature_flags: u16,
49 /// Packed children: left = low u16, right = high u16.
50 pub children: u32,
51}
52
53impl PackedNodeI16 {
54 /// Bit mask for the is_leaf flag in `feature_flags`.
55 pub const LEAF_FLAG: u16 = 0x8000;
56
57 /// Create a leaf node with a quantized prediction value.
58 #[inline]
59 pub const fn leaf(value: i16) -> Self {
60 Self {
61 value,
62 children: 0,
63 feature_flags: Self::LEAF_FLAG,
64 }
65 }
66
67 /// Create an internal (split) node with a quantized threshold.
68 #[inline]
69 pub const fn split(threshold: i16, feature_idx: u16, left: u16, right: u16) -> Self {
70 Self {
71 value: threshold,
72 children: (left as u32) | ((right as u32) << 16),
73 feature_flags: feature_idx & 0x7FFF,
74 }
75 }
76
77 /// Returns `true` if this is a leaf node.
78 #[inline]
79 pub const fn is_leaf(&self) -> bool {
80 self.feature_flags & Self::LEAF_FLAG != 0
81 }
82
83 /// Feature index (bits 14:0). Only meaningful for internal nodes.
84 #[inline]
85 pub const fn feature_idx(&self) -> u16 {
86 self.feature_flags & 0x7FFF
87 }
88
89 /// Left child index (low 16 bits of `children`).
90 #[inline]
91 pub const fn left_child(&self) -> u16 {
92 self.children as u16
93 }
94
95 /// Right child index (high 16 bits of `children`).
96 #[inline]
97 pub const fn right_child(&self) -> u16 {
98 (self.children >> 16) as u16
99 }
100}
101
102/// Header for quantized ensemble binary. 16 bytes, 4-byte aligned.
103///
104/// Uses magic `"IR16"` (0x49523136 LE) to distinguish from the f32 format (`"IRIT"`).
105/// Appears at the start of every quantized binary, followed by `leaf_scale` (f32),
106/// then `n_features` feature scale factors (f32 each), then the tree table and nodes.
107///
108/// Binary layout:
109/// ```text
110/// [QuantizedEnsembleHeader: 16 bytes]
111/// [leaf_scale: f32] // 4 bytes
112/// [feature_scales: n_features × f32] // n_features × 4 bytes
113/// [TreeEntry × n_trees: 8 bytes each]
114/// [PackedNodeI16 × total_nodes: 8 bytes each]
115/// ```
116#[repr(C, align(4))]
117#[derive(Clone, Copy, Debug, PartialEq)]
118pub struct QuantizedEnsembleHeader {
119 /// Magic bytes: 0x49523136 ("IR16" in LE ASCII).
120 pub magic: u32,
121 /// Format version (1).
122 pub version: u16,
123 /// Number of trees.
124 pub n_trees: u16,
125 /// Number of input features.
126 pub n_features: u16,
127 /// Reserved.
128 pub _reserved: u16,
129 /// Base prediction (f32).
130 pub base_prediction: f32,
131}
132
133impl QuantizedEnsembleHeader {
134 /// Magic value: "IR16" in little-endian ASCII.
135 pub const MAGIC: u32 = u32::from_le_bytes(*b"IR16");
136 /// Current format version.
137 pub const VERSION: u16 = 1;
138}
139
140// Re-export TreeEntry for convenience — same struct as packed.rs.
141pub use crate::packed::TreeEntry as QuantizedTreeEntry;
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146 use core::mem::{align_of, size_of};
147
148 #[test]
149 fn packed_node_i16_is_8_bytes() {
150 assert_eq!(size_of::<PackedNodeI16>(), 8);
151 }
152
153 #[test]
154 fn packed_node_i16_alignment_is_4() {
155 assert_eq!(align_of::<PackedNodeI16>(), 4);
156 }
157
158 #[test]
159 fn quantized_header_is_16_bytes() {
160 assert_eq!(size_of::<QuantizedEnsembleHeader>(), 16);
161 }
162
163 #[test]
164 fn leaf_node_i16_roundtrip() {
165 let node = PackedNodeI16::leaf(1234);
166 assert!(node.is_leaf());
167 assert_eq!(node.value, 1234);
168 assert_eq!(node.children, 0);
169 }
170
171 #[test]
172 fn split_node_i16_roundtrip() {
173 let node = PackedNodeI16::split(5000, 7, 1, 2);
174 assert!(!node.is_leaf());
175 assert_eq!(node.feature_idx(), 7);
176 assert_eq!(node.value, 5000);
177 assert_eq!(node.left_child(), 1);
178 assert_eq!(node.right_child(), 2);
179 }
180
181 #[test]
182 fn max_feature_index_i16() {
183 // 15 bits = max 32767
184 let node = PackedNodeI16::split(0, 0x7FFF, 0, 0);
185 assert_eq!(node.feature_idx(), 0x7FFF);
186 assert!(!node.is_leaf());
187 }
188
189 #[test]
190 fn max_child_indices_i16() {
191 let node = PackedNodeI16::split(0, 0, u16::MAX, u16::MAX);
192 assert_eq!(node.left_child(), u16::MAX);
193 assert_eq!(node.right_child(), u16::MAX);
194 }
195
196 #[test]
197 fn eight_nodes_per_cache_line() {
198 // 8 x 8 = 64 bytes, fits exactly in 64-byte cache line
199 assert!(8 * size_of::<PackedNodeI16>() <= 64);
200 }
201
202 #[test]
203 fn header_magic_is_ir16() {
204 // "IR16" as little-endian bytes: I=0x49, R=0x52, 1=0x31, 6=0x36
205 let bytes = QuantizedEnsembleHeader::MAGIC.to_le_bytes();
206 assert_eq!(&bytes, b"IR16");
207 }
208
209 #[test]
210 fn leaf_negative_value_roundtrip() {
211 let node = PackedNodeI16::leaf(-32000);
212 assert!(node.is_leaf());
213 assert_eq!(node.value, -32000);
214 }
215
216 #[test]
217 fn split_negative_threshold_roundtrip() {
218 let node = PackedNodeI16::split(-16000, 3, 10, 20);
219 assert!(!node.is_leaf());
220 assert_eq!(node.value, -16000);
221 assert_eq!(node.feature_idx(), 3);
222 assert_eq!(node.left_child(), 10);
223 assert_eq!(node.right_child(), 20);
224 }
225
226 #[test]
227 fn tree_entry_reused_from_packed() {
228 // Verify QuantizedTreeEntry is the same type as TreeEntry
229 let entry: QuantizedTreeEntry = QuantizedTreeEntry {
230 n_nodes: 5,
231 offset: 40,
232 };
233 assert_eq!(entry.n_nodes, 5);
234 assert_eq!(entry.offset, 40);
235 assert_eq!(size_of::<QuantizedTreeEntry>(), 8);
236 }
237}