Skip to main content

trueno/vector/
mod.rs

1//! Vector type with multi-backend support
2//!
3//! This module provides the core `Vector<T>` type with SIMD-optimized operations
4//! across multiple backends (Scalar, SSE2, AVX2, AVX-512, NEON, WASM SIMD).
5//!
6//! GPU thresholds intentionally set to usize::MAX to disable GPU for element-wise ops.
7//! See docs/performance-analysis.md - GPU is 2-65,000x SLOWER than scalar for these ops.
8
9#![allow(clippy::absurd_extreme_comparisons)]
10
11// Submodules
12pub mod dispatch;
13mod ops;
14
15// Tests (~10K lines extracted for TDG compliance)
16#[cfg(test)]
17mod tests;
18
19use crate::{Backend, Result, TruenoError};
20
21/// High-performance vector with multi-backend support
22///
23/// # Examples
24///
25/// ```
26/// use trueno::Vector;
27///
28/// let a = Vector::from_slice(&[1.0, 2.0, 3.0]);
29/// let b = Vector::from_slice(&[4.0, 5.0, 6.0]);
30/// let result = a.add(&b).unwrap();
31///
32/// assert_eq!(result.as_slice(), &[5.0, 7.0, 9.0]);
33/// ```
34#[derive(Debug, Clone, PartialEq)]
35pub struct Vector<T> {
36    data: Vec<T>,
37    backend: Backend,
38}
39
40impl<T> Vector<T>
41where
42    T: Clone,
43{
44    /// Create vector from slice using auto-selected optimal backend
45    ///
46    /// # Performance
47    ///
48    /// Auto-selects the best available backend at creation time based on:
49    /// - CPU feature detection (AVX-512 > AVX2 > AVX > SSE2)
50    /// - Vector size (GPU for large workloads)
51    /// - Platform availability (NEON on ARM, WASM SIMD in browser)
52    ///
53    /// # Examples
54    ///
55    /// ```
56    /// use trueno::Vector;
57    ///
58    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
59    /// assert_eq!(v.len(), 4);
60    /// ```
61    pub fn from_slice(data: &[T]) -> Self {
62        Self { data: data.to_vec(), backend: crate::select_best_available_backend() }
63    }
64
65    /// Create vector from an existing Vec (takes ownership, no copy)
66    ///
67    /// This is more efficient than `from_slice` when you already have a Vec
68    /// and don't need to keep it, as it avoids an extra allocation and copy.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// use trueno::Vector;
74    ///
75    /// let data = vec![1.0, 2.0, 3.0];
76    /// let v = Vector::from_vec(data);
77    /// assert_eq!(v.len(), 3);
78    /// ```
79    pub fn from_vec(data: Vec<T>) -> Self {
80        Self { data, backend: crate::select_best_available_backend() }
81    }
82
83    /// Create vector with specific backend (for benchmarking or testing)
84    ///
85    /// # Examples
86    ///
87    /// ```
88    /// use trueno::{Vector, Backend};
89    ///
90    /// let v = Vector::from_slice_with_backend(&[1.0, 2.0], Backend::Scalar);
91    /// assert_eq!(v.len(), 2);
92    /// ```
93    pub fn from_slice_with_backend(data: &[T], backend: Backend) -> Self {
94        let resolved_backend = match backend {
95            Backend::Auto => crate::select_best_available_backend(),
96            other => other,
97        };
98
99        Self { data: data.to_vec(), backend: resolved_backend }
100    }
101}
102
103impl Vector<f32> {
104    /// Create vector with specified alignment for optimal SIMD performance
105    ///
106    /// This method attempts to create a vector with memory aligned to the specified byte boundary.
107    /// Note: Rust's Vec allocator may already provide sufficient alignment for most use cases.
108    /// This method validates the alignment requirement but uses standard Vec allocation.
109    ///
110    /// # Arguments
111    ///
112    /// * `size` - Number of elements to allocate
113    /// * `backend` - Backend to use for operations
114    /// * `alignment` - Requested alignment in bytes (must be power of 2: 16, 32, 64)
115    ///
116    /// # Recommended Alignments
117    ///
118    /// - SSE2: 16 bytes (128-bit)
119    /// - AVX2: 32 bytes (256-bit)
120    /// - AVX-512: 64 bytes (512-bit)
121    ///
122    /// # Note on Implementation
123    ///
124    /// Currently uses Rust's default Vec allocator, which typically provides 16-byte alignment
125    /// on modern systems. Custom allocators for specific alignments will be added in future versions.
126    ///
127    /// # Examples
128    ///
129    /// ```
130    /// use trueno::{Vector, Backend};
131    ///
132    /// // Create vector with requested 16-byte alignment
133    /// let v = Vector::with_alignment(100, Backend::SSE2, 16).unwrap();
134    /// assert_eq!(v.len(), 100);
135    /// ```
136    ///
137    /// # Errors
138    ///
139    /// Returns `TruenoError::InvalidInput` if alignment is not a power of 2.
140    pub fn with_alignment(size: usize, backend: Backend, alignment: usize) -> Result<Self> {
141        // Validate alignment is power of 2
142        if alignment == 0 || (alignment & (alignment - 1)) != 0 {
143            return Err(TruenoError::InvalidInput(format!(
144                "Alignment must be power of 2, got {}",
145                alignment
146            )));
147        }
148
149        // Resolve backend
150        let resolved_backend = match backend {
151            Backend::Auto => crate::select_best_available_backend(),
152            other => other,
153        };
154
155        // For now, use standard Vec allocation which typically provides good alignment
156        // Future enhancement: use custom allocator for guaranteed alignment > 16 bytes
157        let data = vec![0.0f32; size];
158
159        // Verify actual alignment (for informational purposes)
160        let ptr = data.as_ptr() as usize;
161        let actual_alignment = ptr & !(ptr - 1); // Find lowest set bit
162
163        // Log warning if alignment requirement not met (for future enhancement)
164        if alignment > actual_alignment {
165            // Note: This is not an error, just informational
166            // The unaligned loads in SSE2 (_mm_loadu_ps) will still work correctly
167            eprintln!(
168                "Note: Requested {}-byte alignment, got {}-byte alignment. Using unaligned loads.",
169                alignment, actual_alignment
170            );
171        }
172
173        Ok(Self { data, backend: resolved_backend })
174    }
175}
176
177impl<T> Vector<T>
178where
179    T: Clone,
180{
181    /// Get underlying data as slice
182    ///
183    /// # Examples
184    ///
185    /// ```
186    /// use trueno::Vector;
187    ///
188    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
189    /// assert_eq!(v.as_slice(), &[1.0, 2.0, 3.0]);
190    /// ```
191    pub fn as_slice(&self) -> &[T] {
192        &self.data
193    }
194
195    /// Get vector length
196    ///
197    /// # Examples
198    ///
199    /// ```
200    /// use trueno::Vector;
201    ///
202    /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
203    /// assert_eq!(v.len(), 5);
204    /// ```
205    pub fn len(&self) -> usize {
206        self.data.len()
207    }
208
209    /// Check if vector is empty
210    ///
211    /// # Examples
212    ///
213    /// ```
214    /// use trueno::Vector;
215    ///
216    /// let v1: Vector<f32> = Vector::from_slice(&[]);
217    /// assert!(v1.is_empty());
218    ///
219    /// let v2 = Vector::from_slice(&[1.0]);
220    /// assert!(!v2.is_empty());
221    /// ```
222    pub fn is_empty(&self) -> bool {
223        self.data.is_empty()
224    }
225
226    /// Get the backend being used
227    pub fn backend(&self) -> Backend {
228        self.backend
229    }
230}
231
232// Note: Vector<f32> operations have been moved to submodules in ops/:
233// - ops/normalization.rs: zscore, minmax_normalize, layer_norm, layer_norm_simple, normalize
234// - ops/norms.rs: norm_l1, norm_l2, norm_linf
235// - ops/transforms.rs: abs, clamp, clip, lerp, sqrt, recip, pow
236// - ops/arithmetic.rs: add, sub, mul, div, scale, fma
237// - ops/reductions.rs: dot, sum, max, min, argmax, argmin, mean, variance, stddev, covariance, correlation
238// - ops/activations.rs: relu, sigmoid, gelu, etc.
239// - ops/transcendental.rs: exp, log, sin, cos, etc.
240// - ops/rounding.rs: floor, ceil, round, trunc, etc.