Skip to main content

irithyll_core/
packed_i16.rs

1//! 8-byte quantized packed node format for integer-only inference.
2//!
3//! # Binary Format
4//!
5//! ```text
6//! [QuantizedEnsembleHeader: 16 bytes]
7//! [leaf_scale: f32]                      // 4 bytes
8//! [feature_scales: n_features × f32]     // n_features × 4 bytes
9//! [TreeEntry × n_trees: 8 bytes each]
10//! [PackedNodeI16 × total_nodes: 8 bytes each]
11//! ```
12//!
13//! # Quantization scheme
14//!
15//! Per-feature quantization with shared scale: each feature gets its own scale
16//! factor `scale_f = 32767.0 / max(|threshold_values_for_feature_f|)`. Thresholds
17//! are stored as `(threshold * feature_scale[feat_idx]) as i16`. At predict time,
18//! features are quantized once: `(feature * feature_scale[feat_idx]) as i16`.
19//! Comparison is pure integer: `quantized_feature > quantized_threshold`.
20//!
21//! Leaf values use a separate global leaf scale: `leaf_i16 = (lr * leaf_f64 * leaf_scale) as i16`.
22//! Final prediction: `base + sum(leaf_i16) / leaf_scale` — accumulate in i32,
23//! dequantize once at the end.
24//!
25//! On Cortex-M0+ (no FPU), this eliminates all float ops from the hot loop.
26//! The only float ops happen once at the end for dequantization.
27
28/// 8-byte quantized decision tree node. Integer-only traversal for FPU-less targets.
29///
30/// 8 nodes per 64-byte cache line. All comparisons are i16 — no floating point
31/// in the hot loop. On Cortex-M0+ (no FPU), this is ~25x faster than f32.
32///
33/// # Field layout
34///
35/// - `value`: quantized split threshold (internal nodes) or quantized leaf prediction (leaf nodes).
36///   For splits: `(threshold * feature_scale[feat_idx]) as i16`.
37///   For leaves: `(lr * leaf_f64 * leaf_scale) as i16`.
38/// - `feature_flags`: bit 15 = is_leaf flag. Bits 14:0 = feature index (max 32767).
39///   For leaves, the feature index is unused.
40/// - `children`: packed left (low u16) and right (high u16) child indices.
41///   For leaves, this field is unused (set to 0).
42#[repr(C, align(4))]
43#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
44pub struct PackedNodeI16 {
45    /// Quantized split threshold (internal) or quantized leaf prediction (leaf).
46    pub value: i16,
47    /// Bit 15 = is_leaf. Bits 14:0 = feature index.
48    pub feature_flags: u16,
49    /// Packed children: left = low u16, right = high u16.
50    pub children: u32,
51}
52
53impl PackedNodeI16 {
54    /// Bit mask for the is_leaf flag in `feature_flags`.
55    pub const LEAF_FLAG: u16 = 0x8000;
56
57    /// Create a leaf node with a quantized prediction value.
58    #[inline]
59    pub const fn leaf(value: i16) -> Self {
60        Self {
61            value,
62            children: 0,
63            feature_flags: Self::LEAF_FLAG,
64        }
65    }
66
67    /// Create an internal (split) node with a quantized threshold.
68    #[inline]
69    pub const fn split(threshold: i16, feature_idx: u16, left: u16, right: u16) -> Self {
70        Self {
71            value: threshold,
72            children: (left as u32) | ((right as u32) << 16),
73            feature_flags: feature_idx & 0x7FFF,
74        }
75    }
76
77    /// Returns `true` if this is a leaf node.
78    #[inline]
79    pub const fn is_leaf(&self) -> bool {
80        self.feature_flags & Self::LEAF_FLAG != 0
81    }
82
83    /// Feature index (bits 14:0). Only meaningful for internal nodes.
84    #[inline]
85    pub const fn feature_idx(&self) -> u16 {
86        self.feature_flags & 0x7FFF
87    }
88
89    /// Left child index (low 16 bits of `children`).
90    #[inline]
91    pub const fn left_child(&self) -> u16 {
92        self.children as u16
93    }
94
95    /// Right child index (high 16 bits of `children`).
96    #[inline]
97    pub const fn right_child(&self) -> u16 {
98        (self.children >> 16) as u16
99    }
100}
101
102/// Header for quantized ensemble binary. 16 bytes, 4-byte aligned.
103///
104/// Uses magic `"IR16"` (0x49523136 LE) to distinguish from the f32 format (`"IRIT"`).
105/// Appears at the start of every quantized binary, followed by `leaf_scale` (f32),
106/// then `n_features` feature scale factors (f32 each), then the tree table and nodes.
107///
108/// Binary layout:
109/// ```text
110/// [QuantizedEnsembleHeader: 16 bytes]
111/// [leaf_scale: f32]                      // 4 bytes
112/// [feature_scales: n_features × f32]     // n_features × 4 bytes
113/// [TreeEntry × n_trees: 8 bytes each]
114/// [PackedNodeI16 × total_nodes: 8 bytes each]
115/// ```
116#[repr(C, align(4))]
117#[derive(Clone, Copy, Debug, PartialEq)]
118pub struct QuantizedEnsembleHeader {
119    /// Magic bytes: 0x49523136 ("IR16" in LE ASCII).
120    pub magic: u32,
121    /// Format version (1).
122    pub version: u16,
123    /// Number of trees.
124    pub n_trees: u16,
125    /// Number of input features.
126    pub n_features: u16,
127    /// Reserved.
128    pub _reserved: u16,
129    /// Base prediction (f32).
130    pub base_prediction: f32,
131}
132
133impl QuantizedEnsembleHeader {
134    /// Magic value: "IR16" in little-endian ASCII.
135    pub const MAGIC: u32 = u32::from_le_bytes(*b"IR16");
136    /// Current format version.
137    pub const VERSION: u16 = 1;
138}
139
140// Re-export TreeEntry for convenience — same struct as packed.rs.
141pub use crate::packed::TreeEntry as QuantizedTreeEntry;
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146    use core::mem::{align_of, size_of};
147
148    #[test]
149    fn packed_node_i16_is_8_bytes() {
150        assert_eq!(size_of::<PackedNodeI16>(), 8);
151    }
152
153    #[test]
154    fn packed_node_i16_alignment_is_4() {
155        assert_eq!(align_of::<PackedNodeI16>(), 4);
156    }
157
158    #[test]
159    fn quantized_header_is_16_bytes() {
160        assert_eq!(size_of::<QuantizedEnsembleHeader>(), 16);
161    }
162
163    #[test]
164    fn leaf_node_i16_roundtrip() {
165        let node = PackedNodeI16::leaf(1234);
166        assert!(node.is_leaf());
167        assert_eq!(node.value, 1234);
168        assert_eq!(node.children, 0);
169    }
170
171    #[test]
172    fn split_node_i16_roundtrip() {
173        let node = PackedNodeI16::split(5000, 7, 1, 2);
174        assert!(!node.is_leaf());
175        assert_eq!(node.feature_idx(), 7);
176        assert_eq!(node.value, 5000);
177        assert_eq!(node.left_child(), 1);
178        assert_eq!(node.right_child(), 2);
179    }
180
181    #[test]
182    fn max_feature_index_i16() {
183        // 15 bits = max 32767
184        let node = PackedNodeI16::split(0, 0x7FFF, 0, 0);
185        assert_eq!(node.feature_idx(), 0x7FFF);
186        assert!(!node.is_leaf());
187    }
188
189    #[test]
190    fn max_child_indices_i16() {
191        let node = PackedNodeI16::split(0, 0, u16::MAX, u16::MAX);
192        assert_eq!(node.left_child(), u16::MAX);
193        assert_eq!(node.right_child(), u16::MAX);
194    }
195
196    #[test]
197    fn eight_nodes_per_cache_line() {
198        // 8 x 8 = 64 bytes, fits exactly in 64-byte cache line
199        assert!(8 * size_of::<PackedNodeI16>() <= 64);
200    }
201
202    #[test]
203    fn header_magic_is_ir16() {
204        // "IR16" as little-endian bytes: I=0x49, R=0x52, 1=0x31, 6=0x36
205        let bytes = QuantizedEnsembleHeader::MAGIC.to_le_bytes();
206        assert_eq!(&bytes, b"IR16");
207    }
208
209    #[test]
210    fn leaf_negative_value_roundtrip() {
211        let node = PackedNodeI16::leaf(-32000);
212        assert!(node.is_leaf());
213        assert_eq!(node.value, -32000);
214    }
215
216    #[test]
217    fn split_negative_threshold_roundtrip() {
218        let node = PackedNodeI16::split(-16000, 3, 10, 20);
219        assert!(!node.is_leaf());
220        assert_eq!(node.value, -16000);
221        assert_eq!(node.feature_idx(), 3);
222        assert_eq!(node.left_child(), 10);
223        assert_eq!(node.right_child(), 20);
224    }
225
226    #[test]
227    fn tree_entry_reused_from_packed() {
228        // Verify QuantizedTreeEntry is the same type as TreeEntry
229        let entry: QuantizedTreeEntry = QuantizedTreeEntry {
230            n_nodes: 5,
231            offset: 40,
232        };
233        assert_eq!(entry.n_nodes, 5);
234        assert_eq!(entry.offset, 40);
235        assert_eq!(size_of::<QuantizedTreeEntry>(), 8);
236    }
237}