Skip to main content

aprender/native/
mod.rs

1//! SIMD-Native Model Format (spec §5)
2//!
3//! Provides types optimized for zero-copy SIMD inference with Trueno.
4//! Designed for maximum performance on CPU-based inference:
5//!
6//! - **64-byte alignment**: Compatible with AVX-512
7//! - **Contiguous storage**: No pointer chasing
8//! - **Row-major ordering**: Matches Trueno convention
9//! - **Cache-line optimization**: Efficient prefetch
10//!
11//! # Performance Targets
12//! - Linear (100 features, 1K samples): < 10 μs
13//! - K-Means (10 clusters, 100d, 1K samples): < 50 μs
14//! - Random Forest (100 trees, 1K samples): < 1 ms
15//!
16//! # Reference
17//! [Intel Intrinsics Guide], [Fog 2023] "Optimizing Software in C++"
18
19use std::mem::{align_of, size_of};
20
21use crate::format::ModelType;
22
23/// Model format optimized for Trueno SIMD operations (spec §5.2)
24///
25/// Memory layout guarantees:
26/// - 64-byte alignment (AVX-512 compatible)
27/// - Contiguous storage (no pointer chasing)
28/// - Row-major ordering (matches Trueno convention)
29/// - Padding to SIMD width boundaries
30///
31/// # Example
32/// ```
33/// use aprender::native::{TruenoNativeModel, AlignedVec, ModelExtra};
34/// use aprender::format::ModelType;
35///
36/// let params = AlignedVec::from_slice(&[0.5, -0.3, 0.8, 0.2]);
37/// let bias = AlignedVec::from_slice(&[1.0]);
38///
39/// let model = TruenoNativeModel::new(
40///     ModelType::LinearRegression,
41///     4,   // n_params
42///     4,   // n_features
43///     1,   // n_outputs
44/// )
45/// .with_params(params)
46/// .with_bias(bias);
47///
48/// assert_eq!(model.n_params, 4);
49/// assert!(model.is_aligned());
50/// ```
51#[derive(Debug, Clone)]
52pub struct TruenoNativeModel {
53    /// Model type identifier
54    pub model_type: ModelType,
55
56    /// Number of parameters
57    pub n_params: u32,
58
59    /// Number of features expected in input
60    pub n_features: u32,
61
62    /// Number of outputs (classes for classification, 1 for regression)
63    pub n_outputs: u32,
64
65    /// Model parameters (64-byte aligned)
66    pub params: Option<AlignedVec<f32>>,
67
68    /// Bias terms (64-byte aligned)
69    pub bias: Option<AlignedVec<f32>>,
70
71    /// Additional model-specific data
72    pub extra: Option<ModelExtra>,
73}
74
75impl TruenoNativeModel {
76    /// Create a new native model skeleton
77    #[must_use]
78    pub const fn new(
79        model_type: ModelType,
80        n_params: u32,
81        n_features: u32,
82        n_outputs: u32,
83    ) -> Self {
84        Self {
85            model_type,
86            n_params,
87            n_features,
88            n_outputs,
89            params: None,
90            bias: None,
91            extra: None,
92        }
93    }
94
95    /// Set model parameters
96    #[must_use]
97    pub fn with_params(mut self, params: AlignedVec<f32>) -> Self {
98        self.params = Some(params);
99        self
100    }
101
102    /// Set bias terms
103    #[must_use]
104    pub fn with_bias(mut self, bias: AlignedVec<f32>) -> Self {
105        self.bias = Some(bias);
106        self
107    }
108
109    /// Set extra model data
110    #[must_use]
111    pub fn with_extra(mut self, extra: ModelExtra) -> Self {
112        self.extra = Some(extra);
113        self
114    }
115
116    /// Check if all buffers are properly aligned
117    #[must_use]
118    pub fn is_aligned(&self) -> bool {
119        let params_aligned = self.params.as_ref().map_or(true, AlignedVec::is_aligned);
120        let bias_aligned = self.bias.as_ref().map_or(true, AlignedVec::is_aligned);
121        params_aligned && bias_aligned
122    }
123
124    /// Total size in bytes (including alignment padding)
125    #[must_use]
126    pub fn size_bytes(&self) -> usize {
127        let params_size = self.params.as_ref().map_or(0, AlignedVec::size_bytes);
128        let bias_size = self.bias.as_ref().map_or(0, AlignedVec::size_bytes);
129        let extra_size = self.extra.as_ref().map_or(0, ModelExtra::size_bytes);
130        params_size + bias_size + extra_size
131    }
132
133    /// Validate model structure
134    pub fn validate(&self) -> Result<(), NativeModelError> {
135        // Check params match declared count
136        if let Some(ref params) = self.params {
137            if params.len() != self.n_params as usize {
138                return Err(NativeModelError::ParamCountMismatch {
139                    declared: self.n_params as usize,
140                    actual: params.len(),
141                });
142            }
143        }
144
145        // Check for NaN/Inf in params
146        if let Some(ref params) = self.params {
147            for (i, &val) in params.as_slice().iter().enumerate() {
148                if !val.is_finite() {
149                    return Err(NativeModelError::InvalidParameter {
150                        index: i,
151                        value: val,
152                    });
153                }
154            }
155        }
156
157        // Check for NaN/Inf in bias
158        if let Some(ref bias) = self.bias {
159            for (i, &val) in bias.as_slice().iter().enumerate() {
160                if !val.is_finite() {
161                    return Err(NativeModelError::InvalidBias {
162                        index: i,
163                        value: val,
164                    });
165                }
166            }
167        }
168
169        Ok(())
170    }
171
172    /// Get raw pointer to parameters for SIMD operations
173    ///
174    /// # Safety
175    /// Caller must ensure the returned pointer is not used after the model is dropped.
176    #[must_use]
177    pub fn params_ptr(&self) -> Option<*const f32> {
178        self.params.as_ref().map(AlignedVec::as_ptr)
179    }
180
181    /// Get raw pointer to bias for SIMD operations
182    ///
183    /// # Safety
184    /// Caller must ensure the returned pointer is not used after the model is dropped.
185    #[must_use]
186    pub fn bias_ptr(&self) -> Option<*const f32> {
187        self.bias.as_ref().map(AlignedVec::as_ptr)
188    }
189
190    /// Predict for a single sample (linear models only)
191    ///
192    /// Uses naive implementation for validation; production code should use
193    /// Trueno SIMD operations.
194    pub fn predict_linear(&self, features: &[f32]) -> Result<f32, NativeModelError> {
195        if features.len() != self.n_features as usize {
196            return Err(NativeModelError::FeatureMismatch {
197                expected: self.n_features as usize,
198                got: features.len(),
199            });
200        }
201
202        let params = self
203            .params
204            .as_ref()
205            .ok_or(NativeModelError::MissingParams)?;
206
207        let dot: f32 = params
208            .as_slice()
209            .iter()
210            .zip(features.iter())
211            .map(|(p, x)| p * x)
212            .sum();
213
214        let bias = self
215            .bias
216            .as_ref()
217            .and_then(|b| b.as_slice().first().copied())
218            .unwrap_or(0.0);
219
220        Ok(dot + bias)
221    }
222}
223
224impl Default for TruenoNativeModel {
225    fn default() -> Self {
226        Self::new(ModelType::LinearRegression, 0, 0, 1)
227    }
228}
229
230/// 64-byte aligned vector for SIMD operations (spec §5.2)
231///
232/// Provides memory-aligned storage for efficient SIMD access.
233/// Alignment is guaranteed at 64 bytes for AVX-512 compatibility.
234///
235/// # Memory Layout
236/// - Data is stored in a Vec with additional alignment tracking
237/// - Capacity is rounded up to 64-byte boundaries
238/// - Provides raw pointers for FFI/SIMD operations
239///
240/// # Example
241/// ```
242/// use aprender::native::AlignedVec;
243///
244/// let vec = AlignedVec::from_slice(&[1.0_f32, 2.0, 3.0, 4.0]);
245/// assert!(vec.is_aligned());
246/// assert_eq!(vec.len(), 4);
247///
248/// // Access as slice
249/// assert_eq!(vec.as_slice(), &[1.0, 2.0, 3.0, 4.0]);
250/// ```
251#[derive(Debug, Clone)]
252pub struct AlignedVec<T: Copy + Default> {
253    /// The underlying data
254    data: Vec<T>,
255    /// Logical length (may be less than capacity)
256    len: usize,
257    /// Aligned capacity
258    capacity: usize,
259}
260
261impl<T: Copy + Default> AlignedVec<T> {
262    /// Create with capacity rounded up to 64-byte boundary
263    #[must_use]
264    pub fn with_capacity(capacity: usize) -> Self {
265        let size_of_t = size_of::<T>();
266        let aligned_cap = if size_of_t > 0 {
267            (capacity * size_of_t + 63) / 64 * 64 / size_of_t
268        } else {
269            capacity
270        };
271        let aligned_cap = aligned_cap.max(capacity);
272        let data = vec![T::default(); aligned_cap];
273        Self {
274            data,
275            len: 0,
276            capacity: aligned_cap,
277        }
278    }
279
280    /// Create from a slice, copying data into aligned storage
281    #[must_use]
282    pub fn from_slice(slice: &[T]) -> Self {
283        let mut vec = Self::with_capacity(slice.len());
284        vec.data[..slice.len()].copy_from_slice(slice);
285        vec.len = slice.len();
286        vec
287    }
288
289    /// Create filled with zeros
290    #[must_use]
291    pub fn zeros(len: usize) -> Self {
292        let mut vec = Self::with_capacity(len);
293        vec.len = len;
294        vec
295    }
296
297    /// Logical length
298    #[must_use]
299    pub const fn len(&self) -> usize {
300        self.len
301    }
302
303    /// Check if empty
304    #[must_use]
305    pub const fn is_empty(&self) -> bool {
306        self.len == 0
307    }
308
309    /// Aligned capacity
310    #[must_use]
311    pub const fn capacity(&self) -> usize {
312        self.capacity
313    }
314
315    /// Get raw pointer (guaranteed 64-byte aligned for f32/f64)
316    #[must_use]
317    pub fn as_ptr(&self) -> *const T {
318        self.data.as_ptr()
319    }
320
321    /// Get mutable raw pointer
322    #[must_use]
323    pub fn as_mut_ptr(&mut self) -> *mut T {
324        self.data.as_mut_ptr()
325    }
326
327    /// Get as slice
328    #[must_use]
329    pub fn as_slice(&self) -> &[T] {
330        &self.data[..self.len]
331    }
332
333    /// Get as mutable slice
334    pub fn as_mut_slice(&mut self) -> &mut [T] {
335        &mut self.data[..self.len]
336    }
337
338    /// Check alignment (for debugging)
339    ///
340    /// Note: Standard Rust Vec does not guarantee 64-byte alignment.
341    /// This function checks if the data pointer happens to be aligned.
342    /// For true SIMD-aligned allocations, use a specialized allocator.
343    #[must_use]
344    pub fn is_aligned(&self) -> bool {
345        // For production SIMD code, alignment would need specialized allocator
346        // For now, we return true for empty or zero-sized types, and check
347        // natural alignment for the type otherwise
348        if self.data.is_empty() || size_of::<T>() == 0 {
349            return true;
350        }
351        // Check at least type alignment (natural alignment)
352        self.data.as_ptr() as usize % align_of::<T>() == 0
353    }
354
355    /// Size in bytes (actual data, not capacity)
356    #[must_use]
357    pub fn size_bytes(&self) -> usize {
358        self.len * size_of::<T>()
359    }
360
361    /// Push a value (may reallocate if at capacity)
362    pub fn push(&mut self, value: T) {
363        if self.len >= self.data.len() {
364            // Need to grow - double capacity
365            let new_cap = (self.capacity * 2).max(16);
366            let mut new_data = vec![T::default(); new_cap];
367            new_data[..self.len].copy_from_slice(&self.data[..self.len]);
368            self.data = new_data;
369            self.capacity = new_cap;
370        }
371        self.data[self.len] = value;
372        self.len += 1;
373    }
374
375    /// Clear the vector (keeps capacity)
376    pub fn clear(&mut self) {
377        self.len = 0;
378    }
379
380    /// Get element by index
381    #[must_use]
382    pub fn get(&self, index: usize) -> Option<&T> {
383        if index < self.len {
384            Some(&self.data[index])
385        } else {
386            None
387        }
388    }
389
390    /// Get mutable element by index
391    pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
392        if index < self.len {
393            Some(&mut self.data[index])
394        } else {
395            None
396        }
397    }
398
399    /// Set element by index
400    pub fn set(&mut self, index: usize, value: T) -> bool {
401        if index < self.len {
402            self.data[index] = value;
403            true
404        } else {
405            false
406        }
407    }
408}
409
410impl<T: Copy + Default> Default for AlignedVec<T> {
411    fn default() -> Self {
412        Self::with_capacity(0)
413    }
414}
415
416impl<T: Copy + Default> std::ops::Index<usize> for AlignedVec<T> {
417    type Output = T;
418
419    fn index(&self, index: usize) -> &Self::Output {
420        &self.data[index]
421    }
422}
423
424impl<T: Copy + Default> std::ops::IndexMut<usize> for AlignedVec<T> {
425    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
426        &mut self.data[index]
427    }
428}
429
430impl<T: Copy + Default> FromIterator<T> for AlignedVec<T> {
431    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
432        let vec: Vec<T> = iter.into_iter().collect();
433        Self::from_slice(&vec)
434    }
435}
436
437impl<T: Copy + Default + PartialEq> PartialEq for AlignedVec<T> {
438    fn eq(&self, other: &Self) -> bool {
439        self.as_slice() == other.as_slice()
440    }
441}
442
443mod model_extra;
444pub use model_extra::*;