trueno/vector/mod.rs
1//! Vector type with multi-backend support
2//!
3//! This module provides the core `Vector<T>` type with SIMD-optimized operations
4//! across multiple backends (Scalar, SSE2, AVX2, AVX-512, NEON, WASM SIMD).
5//!
6//! GPU thresholds intentionally set to usize::MAX to disable GPU for element-wise ops.
7//! See docs/performance-analysis.md - GPU is 2-65,000x SLOWER than scalar for these ops.
8
9#![allow(clippy::absurd_extreme_comparisons)]
10
11// Submodules
12pub mod dispatch;
13mod ops;
14
15// Tests (~10K lines extracted for TDG compliance)
16#[cfg(test)]
17mod tests;
18
19use crate::{Backend, Result, TruenoError};
20
21/// High-performance vector with multi-backend support
22///
23/// # Examples
24///
25/// ```
26/// use trueno::Vector;
27///
28/// let a = Vector::from_slice(&[1.0, 2.0, 3.0]);
29/// let b = Vector::from_slice(&[4.0, 5.0, 6.0]);
30/// let result = a.add(&b).unwrap();
31///
32/// assert_eq!(result.as_slice(), &[5.0, 7.0, 9.0]);
33/// ```
34#[derive(Debug, Clone, PartialEq)]
35pub struct Vector<T> {
36 data: Vec<T>,
37 backend: Backend,
38}
39
40impl<T> Vector<T>
41where
42 T: Clone,
43{
44 /// Create vector from slice using auto-selected optimal backend
45 ///
46 /// # Performance
47 ///
48 /// Auto-selects the best available backend at creation time based on:
49 /// - CPU feature detection (AVX-512 > AVX2 > AVX > SSE2)
50 /// - Vector size (GPU for large workloads)
51 /// - Platform availability (NEON on ARM, WASM SIMD in browser)
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// use trueno::Vector;
57 ///
58 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0]);
59 /// assert_eq!(v.len(), 4);
60 /// ```
61 pub fn from_slice(data: &[T]) -> Self {
62 Self { data: data.to_vec(), backend: crate::select_best_available_backend() }
63 }
64
65 /// Create vector from an existing Vec (takes ownership, no copy)
66 ///
67 /// This is more efficient than `from_slice` when you already have a Vec
68 /// and don't need to keep it, as it avoids an extra allocation and copy.
69 ///
70 /// # Examples
71 ///
72 /// ```
73 /// use trueno::Vector;
74 ///
75 /// let data = vec![1.0, 2.0, 3.0];
76 /// let v = Vector::from_vec(data);
77 /// assert_eq!(v.len(), 3);
78 /// ```
79 pub fn from_vec(data: Vec<T>) -> Self {
80 Self { data, backend: crate::select_best_available_backend() }
81 }
82
83 /// Create vector with specific backend (for benchmarking or testing)
84 ///
85 /// # Examples
86 ///
87 /// ```
88 /// use trueno::{Vector, Backend};
89 ///
90 /// let v = Vector::from_slice_with_backend(&[1.0, 2.0], Backend::Scalar);
91 /// assert_eq!(v.len(), 2);
92 /// ```
93 pub fn from_slice_with_backend(data: &[T], backend: Backend) -> Self {
94 let resolved_backend = match backend {
95 Backend::Auto => crate::select_best_available_backend(),
96 other => other,
97 };
98
99 Self { data: data.to_vec(), backend: resolved_backend }
100 }
101}
102
103impl Vector<f32> {
104 /// Create vector with specified alignment for optimal SIMD performance
105 ///
106 /// This method attempts to create a vector with memory aligned to the specified byte boundary.
107 /// Note: Rust's Vec allocator may already provide sufficient alignment for most use cases.
108 /// This method validates the alignment requirement but uses standard Vec allocation.
109 ///
110 /// # Arguments
111 ///
112 /// * `size` - Number of elements to allocate
113 /// * `backend` - Backend to use for operations
114 /// * `alignment` - Requested alignment in bytes (must be power of 2: 16, 32, 64)
115 ///
116 /// # Recommended Alignments
117 ///
118 /// - SSE2: 16 bytes (128-bit)
119 /// - AVX2: 32 bytes (256-bit)
120 /// - AVX-512: 64 bytes (512-bit)
121 ///
122 /// # Note on Implementation
123 ///
124 /// Currently uses Rust's default Vec allocator, which typically provides 16-byte alignment
125 /// on modern systems. Custom allocators for specific alignments will be added in future versions.
126 ///
127 /// # Examples
128 ///
129 /// ```
130 /// use trueno::{Vector, Backend};
131 ///
132 /// // Create vector with requested 16-byte alignment
133 /// let v = Vector::with_alignment(100, Backend::SSE2, 16).unwrap();
134 /// assert_eq!(v.len(), 100);
135 /// ```
136 ///
137 /// # Errors
138 ///
139 /// Returns `TruenoError::InvalidInput` if alignment is not a power of 2.
140 pub fn with_alignment(size: usize, backend: Backend, alignment: usize) -> Result<Self> {
141 // Validate alignment is power of 2
142 if alignment == 0 || (alignment & (alignment - 1)) != 0 {
143 return Err(TruenoError::InvalidInput(format!(
144 "Alignment must be power of 2, got {}",
145 alignment
146 )));
147 }
148
149 // Resolve backend
150 let resolved_backend = match backend {
151 Backend::Auto => crate::select_best_available_backend(),
152 other => other,
153 };
154
155 // For now, use standard Vec allocation which typically provides good alignment
156 // Future enhancement: use custom allocator for guaranteed alignment > 16 bytes
157 let data = vec![0.0f32; size];
158
159 // Verify actual alignment (for informational purposes)
160 let ptr = data.as_ptr() as usize;
161 let actual_alignment = ptr & !(ptr - 1); // Find lowest set bit
162
163 // Log warning if alignment requirement not met (for future enhancement)
164 if alignment > actual_alignment {
165 // Note: This is not an error, just informational
166 // The unaligned loads in SSE2 (_mm_loadu_ps) will still work correctly
167 eprintln!(
168 "Note: Requested {}-byte alignment, got {}-byte alignment. Using unaligned loads.",
169 alignment, actual_alignment
170 );
171 }
172
173 Ok(Self { data, backend: resolved_backend })
174 }
175}
176
177impl<T> Vector<T>
178where
179 T: Clone,
180{
181 /// Get underlying data as slice
182 ///
183 /// # Examples
184 ///
185 /// ```
186 /// use trueno::Vector;
187 ///
188 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0]);
189 /// assert_eq!(v.as_slice(), &[1.0, 2.0, 3.0]);
190 /// ```
191 pub fn as_slice(&self) -> &[T] {
192 &self.data
193 }
194
195 /// Get vector length
196 ///
197 /// # Examples
198 ///
199 /// ```
200 /// use trueno::Vector;
201 ///
202 /// let v = Vector::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0]);
203 /// assert_eq!(v.len(), 5);
204 /// ```
205 pub fn len(&self) -> usize {
206 self.data.len()
207 }
208
209 /// Check if vector is empty
210 ///
211 /// # Examples
212 ///
213 /// ```
214 /// use trueno::Vector;
215 ///
216 /// let v1: Vector<f32> = Vector::from_slice(&[]);
217 /// assert!(v1.is_empty());
218 ///
219 /// let v2 = Vector::from_slice(&[1.0]);
220 /// assert!(!v2.is_empty());
221 /// ```
222 pub fn is_empty(&self) -> bool {
223 self.data.is_empty()
224 }
225
226 /// Get the backend being used
227 pub fn backend(&self) -> Backend {
228 self.backend
229 }
230}
231
232// Note: Vector<f32> operations have been moved to submodules in ops/:
233// - ops/normalization.rs: zscore, minmax_normalize, layer_norm, layer_norm_simple, normalize
234// - ops/norms.rs: norm_l1, norm_l2, norm_linf
235// - ops/transforms.rs: abs, clamp, clip, lerp, sqrt, recip, pow
236// - ops/arithmetic.rs: add, sub, mul, div, scale, fma
237// - ops/reductions.rs: dot, sum, max, min, argmax, argmin, mean, variance, stddev, covariance, correlation
238// - ops/activations.rs: relu, sigmoid, gelu, etc.
239// - ops/transcendental.rs: exp, log, sin, cos, etc.
240// - ops/rounding.rs: floor, ceil, round, trunc, etc.