scirs2_core/ndarray_ext/preprocessing.rs
1//! SIMD-accelerated preprocessing operations for array normalization and standardization
2//!
3//! This module provides high-performance implementations of common data preprocessing
4//! operations that are critical for machine learning pipelines, statistical analysis,
5//! and scientific computing.
6//!
7//! # Operations
8//!
9//! - **L2 Normalization** (`normalize_simd`): Converts vectors to unit length
10//! - **Z-Score Standardization** (`standardize_simd`): Zero mean, unit variance
11//! - **Value Clipping** (`clip_simd`): Bounds values to a specified range
12//!
13//! # Performance
14//!
15//! All operations automatically use SIMD acceleration when:
16//! - Platform supports AVX2 (x86_64) or NEON (ARM)
17//! - Array size is large enough to benefit from vectorization
18//! - Array memory layout is contiguous
19//!
20//! Falls back to scalar implementations for small arrays or unsupported platforms.
21//!
22//! # Examples
23//!
24//! ```
25//! use scirs2_core::ndarray::array;
26//! use scirs2_core::ndarray_ext::preprocessing::{normalize_simd, standardize_simd, clip_simd};
27//!
28//! // L2 normalization - convert to unit vector
29//! let x = array![3.0, 4.0]; // norm = 5
30//! let normalized = normalize_simd(&x.view());
31//! // Result: [0.6, 0.8]
32//!
33//! // Z-score standardization
34//! let data = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
35//! let standardized = standardize_simd(&data.view());
36//! // Result: mean ≈ 0, std ≈ 1
37//!
38//! // Value clipping
39//! let values = array![-10.0, -5.0, 0.0, 5.0, 10.0];
40//! let clipped = clip_simd(&values.view(), -3.0, 7.0);
41//! // Result: [-3.0, -3.0, 0.0, 5.0, 7.0]
42//! ```
43
44use crate::numeric::Float;
45use crate::simd_ops::{AutoOptimizer, SimdUnifiedOps};
46use ::ndarray::{Array1, ArrayView1};
47
48/// Normalize a 1D array to unit length using L2 norm (SIMD-accelerated).
49///
50/// Computes x / ||x||₂ where ||x||₂ is the Euclidean (L2) norm.
51/// The resulting vector will have unit length (norm = 1).
52///
53/// # Arguments
54///
55/// * `x` - Input 1D array to normalize
56///
57/// # Returns
58///
59/// `Array1<F>` with the same length as input, normalized to unit length.
60/// Returns zero array if input norm is zero or NaN.
61///
62/// # Performance
63///
64/// - **SIMD**: Automatically used for large arrays (1000+ elements)
65/// - **Scalar**: Used for small arrays or when SIMD unavailable
66/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
67///
68/// # Mathematical Definition
69///
70/// ```text
71/// normalize(x) = x / ||x||₂
72/// where ||x||₂ = sqrt(Σ xᵢ²)
73/// ```
74///
75/// # Examples
76///
77/// ```
78/// use scirs2_core::ndarray::array;
79/// use scirs2_core::ndarray_ext::preprocessing::normalize_simd;
80///
81/// // 3-4-5 triangle
82/// let x = array![3.0_f64, 4.0];
83/// let result = normalize_simd(&x.view());
84///
85/// // norm = sqrt(9 + 16) = 5
86/// // result = [3/5, 4/5] = [0.6, 0.8]
87/// assert!((result[0] - 0.6_f64).abs() < 1e-10);
88/// assert!((result[1] - 0.8_f64).abs() < 1e-10);
89///
90/// // Verify unit norm
91/// let norm: f64 = result.iter().map(|x| x * x).sum::<f64>().sqrt();
92/// assert!((norm - 1.0_f64).abs() < 1e-10);
93/// ```
94///
95/// # Edge Cases
96///
97/// - **Empty array**: Returns empty array
98/// - **Zero vector**: Returns zero array (undefined normalization)
99/// - **Single element**: Returns array with single element ±1
100/// - **NaN values**: Returns zero array
101///
102/// # Applications
103///
104/// - **Machine Learning**: Feature scaling for algorithms sensitive to magnitude
105/// - **Cosine Similarity**: Prerequisite for efficient similarity computation
106/// - **Direction Vectors**: Unit direction vectors in physics/graphics
107/// - **Text Processing**: TF-IDF normalization in NLP
108/// - **Signal Processing**: Normalized cross-correlation
109pub fn normalize_simd<F>(x: &ArrayView1<F>) -> Array1<F>
110where
111 F: Float + SimdUnifiedOps,
112{
113 if x.is_empty() {
114 return Array1::zeros(0);
115 }
116
117 let optimizer = AutoOptimizer::new();
118 if optimizer.should_use_simd(x.len()) {
119 return F::simd_normalize(x);
120 }
121
122 // Scalar fallback for small arrays
123 let norm = x
124 .iter()
125 .map(|&val| val * val)
126 .fold(F::zero(), |acc, val| acc + val)
127 .sqrt();
128
129 if norm == F::zero() || norm.is_nan() {
130 return Array1::zeros(x.len());
131 }
132
133 x.mapv(|val| val / norm)
134}
135
136/// Standardize a 1D array to zero mean and unit variance (SIMD-accelerated).
137///
138/// Computes (x - μ) / σ where μ is the mean and σ is the sample standard deviation.
139/// The resulting array will have mean ≈ 0 and standard deviation ≈ 1.
140///
141/// # Arguments
142///
143/// * `x` - Input 1D array to standardize
144///
145/// # Returns
146///
147/// `Array1<F>` with the same length as input, standardized to zero mean and unit variance.
148/// Returns zero array if input has <= 1 element or zero standard deviation.
149///
150/// # Performance
151///
152/// - **SIMD**: Automatically used for large arrays (1000+ elements)
153/// - **Scalar**: Used for small arrays or when SIMD unavailable
154/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
155///
156/// # Mathematical Definition
157///
158/// ```text
159/// standardize(x) = (x - μ) / σ
160/// where:
161/// μ = (1/n) Σ xᵢ (sample mean)
162/// σ = sqrt((1/(n-1)) Σ (xᵢ - μ)²) (sample std, ddof=1)
163/// ```
164///
165/// # Examples
166///
167/// ```
168/// use scirs2_core::ndarray::array;
169/// use scirs2_core::ndarray_ext::preprocessing::standardize_simd;
170///
171/// let x = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
172/// let result = standardize_simd(&x.view());
173///
174/// // Verify mean ≈ 0
175/// let mean: f64 = result.iter().sum::<f64>() / result.len() as f64;
176/// assert!(mean.abs() < 1e-10);
177///
178/// // Verify std ≈ 1 (sample std with ddof=1)
179/// let variance: f64 = result.iter()
180/// .map(|&x| x * x)
181/// .sum::<f64>() / (result.len() - 1) as f64;
182/// let std = variance.sqrt();
183/// assert!((std - 1.0).abs() < 1e-10);
184/// ```
185///
186/// # Edge Cases
187///
188/// - **Empty array**: Returns empty array
189/// - **Single element**: Returns zero array (std undefined for n=1)
190/// - **Constant array**: Returns zero array (std = 0)
191/// - **NaN values**: Returns zero array
192///
193/// # Applications
194///
195/// - **Machine Learning**: Feature preprocessing for models assuming normally distributed features
196/// - **Statistical Analysis**: Z-score computation for outlier detection
197/// - **Time Series**: Detrending and variance normalization
198/// - **Data Science**: Preparing features for PCA, clustering, regression
199/// - **Neural Networks**: Input normalization for faster convergence
200///
201/// # Implementation Notes
202///
203/// Uses **sample standard deviation** (ddof=1, Bessel's correction) rather than
204/// population standard deviation (ddof=0). This is consistent with NumPy, SciPy,
205/// and pandas default behavior.
206pub fn standardize_simd<F>(x: &ArrayView1<F>) -> Array1<F>
207where
208 F: Float + SimdUnifiedOps,
209{
210 if x.is_empty() {
211 return Array1::zeros(0);
212 }
213
214 if x.len() <= 1 {
215 return Array1::zeros(x.len());
216 }
217
218 let optimizer = AutoOptimizer::new();
219 if optimizer.should_use_simd(x.len()) {
220 return F::simd_standardize(x);
221 }
222
223 // Scalar fallback for small arrays
224 let n = F::from(x.len()).unwrap();
225 let mean = x.iter().fold(F::zero(), |acc, &val| acc + val) / n;
226
227 let n_minus_1 = F::from(x.len() - 1).unwrap();
228 let variance = x
229 .iter()
230 .map(|&val| {
231 let diff = val - mean;
232 diff * diff
233 })
234 .fold(F::zero(), |acc, val| acc + val)
235 / n_minus_1;
236
237 let std = variance.sqrt();
238
239 if std == F::zero() || std.is_nan() {
240 return Array1::zeros(x.len());
241 }
242
243 x.mapv(|val| (val - mean) / std)
244}
245
246/// Clip (clamp) array values to a specified range (SIMD-accelerated).
247///
248/// Constrains each element to be within [min_val, max_val]:
249/// - Values < min_val are set to min_val
250/// - Values > max_val are set to max_val
251/// - Values within range are unchanged
252///
253/// # Arguments
254///
255/// * `x` - Input 1D array to clip
256/// * `min_val` - Minimum allowed value
257/// * `max_val` - Maximum allowed value
258///
259/// # Returns
260///
261/// `Array1<F>` with the same length as input, with values clipped to [min_val, max_val].
262///
263/// # Panics
264///
265/// Panics if `min_val > max_val`.
266///
267/// # Performance
268///
269/// - **SIMD**: Automatically used for large arrays (1000+ elements)
270/// - **Scalar**: Used for small arrays or when SIMD unavailable
271/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
272///
273/// # Mathematical Definition
274///
275/// ```text
276/// clip(x, min, max) = {
277/// min if x < min
278/// max if x > max
279/// x otherwise
280/// }
281/// ```
282///
283/// # Examples
284///
285/// ```
286/// use scirs2_core::ndarray::array;
287/// use scirs2_core::ndarray_ext::preprocessing::clip_simd;
288///
289/// let x = array![-10.0, -5.0, 0.0, 5.0, 10.0];
290/// let result = clip_simd(&x.view(), -3.0, 7.0);
291///
292/// // Values clipped to [-3.0, 7.0]
293/// assert_eq!(result[0], -3.0); // -10 -> -3
294/// assert_eq!(result[1], -3.0); // -5 -> -3
295/// assert_eq!(result[2], 0.0); // 0 unchanged
296/// assert_eq!(result[3], 5.0); // 5 unchanged
297/// assert_eq!(result[4], 7.0); // 10 -> 7
298/// ```
299///
300/// # Edge Cases
301///
302/// - **Empty array**: Returns empty array
303/// - **min_val == max_val**: All values set to that value
304/// - **NaN values**: Behavior is platform-dependent (typically become min_val)
305/// - **Infinite values**: Clipped to min/max as appropriate
306///
307/// # Applications
308///
309/// - **Gradient Clipping**: Prevent exploding gradients in neural network training
310/// - **Outlier Handling**: Remove or bound extreme values in statistical analysis
311/// - **Data Preprocessing**: Enforce valid ranges for features
312/// - **Numerical Stability**: Prevent overflow/underflow in computations
313/// - **Image Processing**: Clamp pixel values to valid ranges
314/// - **Audio Processing**: Prevent clipping distortion
315///
316/// # Example: Gradient Clipping
317///
318/// ```
319/// use scirs2_core::ndarray::array;
320/// use scirs2_core::ndarray_ext::preprocessing::clip_simd;
321///
322/// // Clip gradients to prevent exploding gradients
323/// let gradients = array![10.5, -8.2, 0.3, 15.7, -12.1];
324/// let clipped = clip_simd(&gradients.view(), -5.0, 5.0);
325///
326/// // All gradients now in [-5.0, 5.0]
327/// assert_eq!(clipped[0], 5.0); // 10.5 -> 5.0
328/// assert_eq!(clipped[1], -5.0); // -8.2 -> -5.0
329/// assert_eq!(clipped[2], 0.3); // unchanged
330/// assert_eq!(clipped[3], 5.0); // 15.7 -> 5.0
331/// assert_eq!(clipped[4], -5.0); // -12.1 -> -5.0
332/// ```
333pub fn clip_simd<F>(x: &ArrayView1<F>, min_val: F, max_val: F) -> Array1<F>
334where
335 F: Float + SimdUnifiedOps,
336{
337 assert!(min_val <= max_val, "min_val must be <= max_val");
338
339 if x.is_empty() {
340 return Array1::zeros(0);
341 }
342
343 let optimizer = AutoOptimizer::new();
344 if optimizer.should_use_simd(x.len()) {
345 return F::simd_clip(x, min_val, max_val);
346 }
347
348 // Scalar fallback for small arrays
349 x.mapv(|val| {
350 if val < min_val {
351 min_val
352 } else if val > max_val {
353 max_val
354 } else {
355 val
356 }
357 })
358}
359
360/// Compute softmax activation function with SIMD acceleration (Phase 33).
361///
362/// The softmax function converts a vector of real numbers into a probability distribution
363/// where all values are in (0, 1) and sum to 1. It is numerically stable using the
364/// max-subtraction trick.
365///
366/// # Arguments
367///
368/// * `x` - Input 1D array
369///
370/// # Returns
371///
372/// `Array1<F>` containing the softmax probabilities, where:
373/// - All values are in the range (0, 1)
374/// - Sum of all values equals 1.0
375/// - Empty input returns empty array
376///
377/// # Performance
378///
379/// - **SIMD**: Automatically used for large arrays (1000+ elements)
380/// - **Scalar**: Used for small arrays or when SIMD unavailable
381/// - **Speedup**: 4-8x for large arrays on AVX2/NEON systems
382/// - Uses newly implemented min_simd (Phase 29) and sum_simd (Phase 30)
383///
384/// # Mathematical Definition
385///
386/// ```text
387/// softmax(x)ᵢ = exp(xᵢ - max(x)) / Σⱼ exp(xⱼ - max(x))
388/// ```
389///
390/// The max-subtraction trick (xᵢ - max(x)) ensures numerical stability by preventing
391/// overflow in the exponential function.
392///
393/// # Examples
394///
395/// ```
396/// use scirs2_core::ndarray::array;
397/// use scirs2_core::ndarray_ext::preprocessing::softmax_simd;
398///
399/// let x = array![1.0, 2.0, 3.0];
400/// let result = softmax_simd(&x.view());
401///
402/// // Verify probabilities sum to 1
403/// let sum: f64 = result.iter().sum();
404/// assert!((sum - 1.0).abs() < 1e-10);
405///
406/// // Verify all values in (0, 1)
407/// for &val in result.iter() {
408/// assert!(val > 0.0 && val < 1.0);
409/// }
410/// ```
411///
412/// # Applications
413///
414/// - **Attention Mechanisms**: Compute attention weights in Transformers (PRIMARY USE CASE)
415/// - **Multi-class Classification**: Convert logits to class probabilities
416/// - **Neural Networks**: Final layer activation for multi-class problems
417/// - **Reinforcement Learning**: Action probability distributions
418/// - **Natural Language Processing**: Token probability distributions
419///
420/// # Numerical Stability
421///
422/// The implementation subtracts the maximum value before exponentiation to prevent
423/// overflow. This is mathematically equivalent to the standard softmax but numerically
424/// stable even for large input values.
425///
426/// # Example: Attention Weights
427///
428/// ```
429/// use scirs2_core::ndarray::array;
430/// use scirs2_core::ndarray_ext::preprocessing::softmax_simd;
431///
432/// // Attention scores (before softmax)
433/// let scores = array![2.0, 4.0, 1.0, 3.0];
434/// let attention_weights = softmax_simd(&scores.view());
435///
436/// // Highest score (4.0) gets highest probability
437/// let max_idx = attention_weights
438/// .iter()
439/// .enumerate()
440/// .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
441/// .map(|(idx, _)| idx)
442/// .unwrap();
443/// assert_eq!(max_idx, 1); // Index of score 4.0
444///
445/// // All weights sum to 1
446/// let sum: f64 = attention_weights.iter().sum();
447/// assert!((sum - 1.0).abs() < 1e-10);
448/// ```
449#[allow(dead_code)]
450pub fn softmax_simd<F>(x: &ArrayView1<F>) -> Array1<F>
451where
452 F: Float + SimdUnifiedOps,
453{
454 if x.is_empty() {
455 return Array1::zeros(0);
456 }
457
458 let optimizer = AutoOptimizer::new();
459
460 if optimizer.should_use_simd(x.len()) {
461 // SIMD fast path using Phase 29-30 operations
462 // 1. Find max for numerical stability (Phase 29: max_simd)
463 let max_val = F::simd_max_element(x);
464
465 // 2. Subtract max and exponentiate (SIMD exp)
466 let shifted = x.mapv(|val| val - max_val);
467 let exp_vals = F::simd_exp(&shifted.view());
468
469 // 3. Sum the exponents (Phase 30: sum_simd)
470 let sum = F::simd_sum(&exp_vals.view());
471
472 // 4. Normalize by dividing by sum (scalar multiplication with 1/sum)
473 if sum > F::zero() {
474 let inv_sum = F::one() / sum;
475 F::simd_scalar_mul(&exp_vals.view(), inv_sum)
476 } else {
477 Array1::zeros(x.len())
478 }
479 } else {
480 // Scalar fallback for small arrays
481 // Find max for numerical stability
482 let max_val = x.fold(F::neg_infinity(), |max, &val| max.max(val));
483
484 // Subtract max and exponentiate
485 let mut exp_vals = x.mapv(|val| (val - max_val).exp());
486
487 // Sum the exponents
488 let sum = exp_vals.iter().fold(F::zero(), |acc, &val| acc + val);
489
490 // Normalize
491 if sum > F::zero() {
492 exp_vals.mapv_inplace(|val| val / sum);
493 }
494
495 exp_vals
496 }
497}
498
499// ============================================================================
500// Activation Functions (Phase 33.5)
501// ============================================================================
502
503/// Compute ReLU (Rectified Linear Unit) activation with SIMD acceleration.
504///
505/// ReLU is one of the most common activation functions in deep learning, defined as:
506/// ReLU(x) = max(0, x)
507///
508/// # Arguments
509///
510/// * `x` - Input 1D array
511///
512/// # Returns
513///
514/// `Array1<F>` containing the ReLU activation output, where negative values are zeroed
515///
516/// # Performance
517///
518/// - **SIMD**: Automatically used for large arrays (1000+ elements)
519/// - **Scalar**: Used for small arrays or when SIMD unavailable
520/// - **Speedup**: 3-5x for large arrays on AVX2/NEON systems
521/// - Most common activation in CNNs, ResNets, and modern architectures
522///
523/// # Mathematical Definition
524///
525/// ```text
526/// ReLU(x) = { x if x > 0
527/// { 0 if x ≤ 0
528/// ```
529///
530/// # Examples
531///
532/// ```
533/// use scirs2_core::ndarray::array;
534/// use scirs2_core::ndarray_ext::preprocessing::relu_simd;
535///
536/// let x = array![-2.0, -1.0, 0.0, 1.0, 2.0];
537/// let result = relu_simd(&x.view());
538///
539/// assert_eq!(result[0], 0.0); // -2.0 -> 0.0
540/// assert_eq!(result[1], 0.0); // -1.0 -> 0.0
541/// assert_eq!(result[2], 0.0); // 0.0 -> 0.0
542/// assert_eq!(result[3], 1.0); // 1.0 -> 1.0
543/// assert_eq!(result[4], 2.0); // 2.0 -> 2.0
544/// ```
545///
546/// # Applications
547///
548/// - **Convolutional Neural Networks**: Primary activation in CNN layers
549/// - **ResNet/DenseNet**: Core activation in residual blocks
550/// - **Fully Connected Layers**: Standard activation for hidden layers
551/// - **Feature Extraction**: Non-linear transformation in deep networks
552/// - **Modern Architectures**: Default choice for most deep learning models
553///
554/// # Advantages of ReLU
555///
556/// - **Computational Efficiency**: Very fast to compute (simple max operation)
557/// - **No Gradient Vanishing**: Gradients don't saturate for positive values
558/// - **Sparse Activation**: Promotes sparsity (zeros out negative values)
559/// - **SIMD-Friendly**: Perfect for vectorization
560#[allow(dead_code)]
561pub fn relu_simd<F>(x: &ArrayView1<F>) -> Array1<F>
562where
563 F: Float + SimdUnifiedOps,
564{
565 if x.is_empty() {
566 return Array1::zeros(0);
567 }
568
569 let optimizer = AutoOptimizer::new();
570
571 if optimizer.should_use_simd(x.len()) {
572 F::simd_relu(x)
573 } else {
574 // Scalar fallback: max(0, x)
575 x.mapv(|val| if val > F::zero() { val } else { F::zero() })
576 }
577}
578
579/// Compute Leaky ReLU activation with SIMD acceleration.
580///
581/// Leaky ReLU is a variant of ReLU that allows small negative values:
582/// LeakyReLU(x) = max(αx, x) where α is a small constant (typically 0.01)
583///
584/// # Arguments
585///
586/// * `x` - Input 1D array
587/// * `alpha` - Negative slope coefficient (typically 0.01)
588///
589/// # Returns
590///
591/// `Array1<F>` containing the Leaky ReLU activation output
592///
593/// # Performance
594///
595/// - **SIMD**: Automatically used for large arrays (1000+ elements)
596/// - **Speedup**: 3-5x for large arrays
597///
598/// # Mathematical Definition
599///
600/// ```text
601/// LeakyReLU(x) = { x if x > 0
602/// { αx if x ≤ 0
603/// ```
604///
605/// # Examples
606///
607/// ```
608/// use scirs2_core::ndarray::array;
609/// use scirs2_core::ndarray_ext::preprocessing::leaky_relu_simd;
610///
611/// let x = array![-2.0, -1.0, 0.0, 1.0, 2.0];
612/// let result = leaky_relu_simd(&x.view(), 0.01);
613///
614/// assert_eq!(result[0], -0.02); // -2.0 * 0.01
615/// assert_eq!(result[1], -0.01); // -1.0 * 0.01
616/// assert_eq!(result[2], 0.0); // 0.0 * 0.01
617/// assert_eq!(result[3], 1.0); // 1.0 (unchanged)
618/// assert_eq!(result[4], 2.0); // 2.0 (unchanged)
619/// ```
620///
621/// # Applications
622///
623/// - **Addressing Dying ReLU Problem**: Allows gradient flow for negative values
624/// - **GANs**: Common in discriminator networks
625/// - **ResNet Variants**: Alternative to standard ReLU
626#[allow(dead_code)]
627pub fn leaky_relu_simd<F>(x: &ArrayView1<F>, alpha: F) -> Array1<F>
628where
629 F: Float + SimdUnifiedOps,
630{
631 if x.is_empty() {
632 return Array1::zeros(0);
633 }
634
635 let optimizer = AutoOptimizer::new();
636
637 if optimizer.should_use_simd(x.len()) {
638 F::simd_leaky_relu(x, alpha)
639 } else {
640 // Scalar fallback: max(alpha * x, x)
641 x.mapv(|val| if val > F::zero() { val } else { val * alpha })
642 }
643}
644
645#[cfg(test)]
646mod tests {
647 use super::*;
648 use ::ndarray::array;
649
650 #[test]
651 fn test_normalize_simd_basic_f64() {
652 let x = array![3.0, 4.0];
653 let result = normalize_simd(&x.view());
654
655 // 3-4-5 triangle: norm = 5, so [0.6, 0.8]
656 assert!((result[0] - 0.6).abs() < 1e-10);
657 assert!((result[1] - 0.8).abs() < 1e-10);
658
659 // Verify unit norm
660 let norm: f64 = result.iter().map(|x| x * x).sum::<f64>().sqrt();
661 assert!((norm - 1.0).abs() < 1e-10);
662 }
663
664 #[test]
665 fn test_normalize_simd_basic_f32() {
666 let x = array![3.0f32, 4.0];
667 let result = normalize_simd(&x.view());
668
669 assert!((result[0] - 0.6).abs() < 1e-6);
670 assert!((result[1] - 0.8).abs() < 1e-6);
671
672 // Verify unit norm
673 let norm: f32 = result.iter().map(|x| x * x).sum::<f32>().sqrt();
674 assert!((norm - 1.0).abs() < 1e-6);
675 }
676
677 #[test]
678 fn test_normalize_simd_empty() {
679 let x: Array1<f64> = array![];
680 let result = normalize_simd(&x.view());
681 assert_eq!(result.len(), 0);
682 }
683
684 #[test]
685 fn test_normalize_simd_zero_vector() {
686 let x = array![0.0, 0.0, 0.0];
687 let result = normalize_simd(&x.view());
688
689 // Zero norm should return zero array
690 assert_eq!(result[0], 0.0);
691 assert_eq!(result[1], 0.0);
692 assert_eq!(result[2], 0.0);
693 }
694
695 #[test]
696 fn test_standardize_simd_basic_f64() {
697 let x = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
698 let result = standardize_simd(&x.view());
699
700 // Verify mean ≈ 0
701 let mean: f64 = result.iter().sum::<f64>() / result.len() as f64;
702 assert!(mean.abs() < 1e-10, "Mean should be ~0, got {}", mean);
703
704 // Verify std ≈ 1 (sample std with ddof=1)
705 let variance: f64 = result.iter().map(|&x| x * x).sum::<f64>() / (result.len() - 1) as f64;
706 let std = variance.sqrt();
707 assert!((std - 1.0).abs() < 1e-10, "Std should be ~1, got {}", std);
708 }
709
710 #[test]
711 fn test_standardize_simd_basic_f32() {
712 let x = array![1.0f32, 2.0, 3.0, 4.0, 5.0];
713 let result = standardize_simd(&x.view());
714
715 // Verify mean ≈ 0
716 let mean: f32 = result.iter().sum::<f32>() / result.len() as f32;
717 assert!(mean.abs() < 1e-5, "Mean should be ~0, got {}", mean);
718
719 // Verify std ≈ 1
720 let variance: f32 = result.iter().map(|&x| x * x).sum::<f32>() / (result.len() - 1) as f32;
721 let std = variance.sqrt();
722 assert!((std - 1.0).abs() < 1e-5, "Std should be ~1, got {}", std);
723 }
724
725 #[test]
726 fn test_standardize_simd_empty() {
727 let x: Array1<f64> = array![];
728 let result = standardize_simd(&x.view());
729 assert_eq!(result.len(), 0);
730 }
731
732 #[test]
733 fn test_standardize_simd_single_element() {
734 let x = array![5.0];
735 let result = standardize_simd(&x.view());
736
737 // Single element has undefined std, should return zero
738 assert_eq!(result[0], 0.0);
739 }
740
741 #[test]
742 fn test_standardize_simd_constant() {
743 let x = array![5.0, 5.0, 5.0, 5.0];
744 let result = standardize_simd(&x.view());
745
746 // Constant array has zero std, should return zero array
747 assert_eq!(result[0], 0.0);
748 assert_eq!(result[1], 0.0);
749 assert_eq!(result[2], 0.0);
750 assert_eq!(result[3], 0.0);
751 }
752
753 // ========================================================================
754 // Tests for Phase 33: softmax_simd
755 // ========================================================================
756
757 #[test]
758 fn test_softmax_simd_f64_basic() {
759 let x = array![1.0f64, 2.0, 3.0];
760 let result = softmax_simd(&x.view());
761
762 // Verify probabilities sum to 1
763 let sum: f64 = result.iter().sum();
764 assert!((sum - 1.0).abs() < 1e-10);
765
766 // Verify all values in (0, 1)
767 for &val in result.iter() {
768 assert!(val > 0.0 && val < 1.0);
769 }
770
771 // Verify highest input gets highest probability
772 let max_idx = result
773 .iter()
774 .enumerate()
775 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
776 .map(|(idx, _)| idx)
777 .unwrap();
778 assert_eq!(max_idx, 2); // Index of value 3.0
779 }
780
781 #[test]
782 fn test_softmax_simd_f32_basic() {
783 let x = array![1.0f32, 2.0, 3.0, 4.0];
784 let result = softmax_simd(&x.view());
785
786 // Verify probabilities sum to 1
787 let sum: f32 = result.iter().sum();
788 assert!((sum - 1.0).abs() < 1e-6);
789
790 // Verify all values in (0, 1)
791 for &val in result.iter() {
792 assert!(val > 0.0 && val < 1.0);
793 }
794 }
795
796 #[test]
797 fn test_softmax_simd_empty() {
798 let x: Array1<f64> = array![];
799 let result = softmax_simd(&x.view());
800 assert_eq!(result.len(), 0);
801 }
802
803 #[test]
804 fn test_softmax_simd_single() {
805 let x = array![42.0f64];
806 let result = softmax_simd(&x.view());
807
808 // Single element softmax should be 1.0
809 assert!((result[0] - 1.0).abs() < 1e-10);
810 }
811
812 #[test]
813 fn test_softmax_simd_uniform() {
814 let x = array![2.0f64, 2.0, 2.0, 2.0];
815 let result = softmax_simd(&x.view());
816
817 // Uniform input should give uniform probabilities (0.25 each)
818 for &val in result.iter() {
819 assert!((val - 0.25).abs() < 1e-10);
820 }
821
822 let sum: f64 = result.iter().sum();
823 assert!((sum - 1.0).abs() < 1e-10);
824 }
825
826 #[test]
827 fn test_softmax_simd_large_values() {
828 // Test numerical stability with large values
829 let x = array![1000.0f64, 1001.0, 1002.0];
830 let result = softmax_simd(&x.view());
831
832 // Should not overflow
833 assert!(!result.iter().any(|&v| v.is_infinite() || v.is_nan()));
834
835 // Should still sum to 1
836 let sum: f64 = result.iter().sum();
837 assert!((sum - 1.0).abs() < 1e-10);
838
839 // Highest value should get highest probability
840 let max_idx = result
841 .iter()
842 .enumerate()
843 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
844 .map(|(idx, _)| idx)
845 .unwrap();
846 assert_eq!(max_idx, 2);
847 }
848
849 #[test]
850 fn test_softmax_simd_negative_values() {
851 let x = array![-10.0f64, -5.0, -2.0, -8.0];
852 let result = softmax_simd(&x.view());
853
854 // Should sum to 1
855 let sum: f64 = result.iter().sum();
856 assert!((sum - 1.0).abs() < 1e-10);
857
858 // Highest value (-2.0) should get highest probability
859 let max_idx = result
860 .iter()
861 .enumerate()
862 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
863 .map(|(idx, _)| idx)
864 .unwrap();
865 assert_eq!(max_idx, 2); // Index of -2.0
866 }
867
868 #[test]
869 fn test_softmax_simd_attention_scores() {
870 // Realistic attention scores scenario
871 let scores = array![2.0f64, 4.0, 1.0, 3.0];
872 let attention_weights = softmax_simd(&scores.view());
873
874 // Verify it's a valid probability distribution
875 let sum: f64 = attention_weights.iter().sum();
876 assert!((sum - 1.0).abs() < 1e-10);
877
878 // Highest score (4.0) should get highest weight
879 let max_idx = attention_weights
880 .iter()
881 .enumerate()
882 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
883 .map(|(idx, _)| idx)
884 .unwrap();
885 assert_eq!(max_idx, 1); // Index of score 4.0
886
887 // Verify all weights are positive
888 for &weight in attention_weights.iter() {
889 assert!(weight > 0.0);
890 }
891 }
892
893 #[test]
894 fn test_softmax_simd_large_array() {
895 // Test SIMD path with large array (>1000 elements)
896 let x: Array1<f64> = Array1::from_vec((0..5000).map(|i| (i as f64) * 0.01).collect());
897 let result = softmax_simd(&x.view());
898
899 // Should sum to 1
900 let sum: f64 = result.iter().sum();
901 assert!((sum - 1.0).abs() < 1e-9);
902
903 // All values should be positive
904 assert!(result.iter().all(|&v| v > 0.0));
905
906 // Highest input should give highest probability
907 let max_idx = result
908 .iter()
909 .enumerate()
910 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
911 .map(|(idx, _)| idx)
912 .unwrap();
913 assert_eq!(max_idx, 4999); // Last element has highest value
914 }
915
916 #[test]
917 fn test_softmax_simd_temperature_scaling() {
918 // Test temperature scaling (dividing logits by temperature)
919 let logits = array![1.0f64, 2.0, 3.0];
920
921 // High temperature (T=2.0) -> more uniform distribution
922 let high_temp = softmax_simd(&logits.mapv(|x| x / 2.0).view());
923
924 // Low temperature (T=0.5) -> more peaked distribution
925 let low_temp = softmax_simd(&logits.mapv(|x| x * 2.0).view());
926
927 // Both should sum to 1
928 let sum_high: f64 = high_temp.iter().sum();
929 let sum_low: f64 = low_temp.iter().sum();
930 assert!((sum_high - 1.0).abs() < 1e-10);
931 assert!((sum_low - 1.0).abs() < 1e-10);
932
933 // Low temperature should have higher max probability (more peaked)
934 let max_high = high_temp.iter().cloned().fold(0.0f64, f64::max);
935 let max_low = low_temp.iter().cloned().fold(0.0f64, f64::max);
936 assert!(max_low > max_high);
937 }
938
939 // ========================================================================
940 // Tests for Phase 33.5: relu_simd and leaky_relu_simd
941 // ========================================================================
942
943 #[test]
944 fn test_relu_simd_f64_basic() {
945 let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
946 let result = relu_simd(&x.view());
947
948 assert_eq!(result[0], 0.0);
949 assert_eq!(result[1], 0.0);
950 assert_eq!(result[2], 0.0);
951 assert_eq!(result[3], 1.0);
952 assert_eq!(result[4], 2.0);
953 }
954
955 #[test]
956 fn test_relu_simd_f32_basic() {
957 let x = array![-2.0f32, -1.0, 0.0, 1.0, 2.0];
958 let result = relu_simd(&x.view());
959
960 assert_eq!(result[0], 0.0);
961 assert_eq!(result[1], 0.0);
962 assert_eq!(result[2], 0.0);
963 assert_eq!(result[3], 1.0);
964 assert_eq!(result[4], 2.0);
965 }
966
967 #[test]
968 fn test_relu_simd_all_positive() {
969 let x = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
970 let result = relu_simd(&x.view());
971
972 // All values should remain unchanged
973 for i in 0..5 {
974 assert_eq!(result[i], x[i]);
975 }
976 }
977
978 #[test]
979 fn test_relu_simd_all_negative() {
980 let x = array![-5.0f64, -4.0, -3.0, -2.0, -1.0];
981 let result = relu_simd(&x.view());
982
983 // All values should be zero
984 for i in 0..5 {
985 assert_eq!(result[i], 0.0);
986 }
987 }
988
989 #[test]
990 fn test_relu_simd_empty() {
991 let x: Array1<f64> = array![];
992 let result = relu_simd(&x.view());
993 assert_eq!(result.len(), 0);
994 }
995
996 #[test]
997 fn test_relu_simd_large_array() {
998 // Test SIMD path with large array (>1000 elements)
999 let x: Array1<f64> = Array1::from_vec((0..5000).map(|i| (i as f64) - 2500.0).collect());
1000 let result = relu_simd(&x.view());
1001
1002 // First 2500 should be zero (negative)
1003 for i in 0..2500 {
1004 assert_eq!(result[i], 0.0);
1005 }
1006
1007 // Element at 2500 is exactly 0 (2500 - 2500 = 0)
1008 assert_eq!(result[2500], 0.0);
1009
1010 // Last 2499 should be positive (2501-2500=1, ..., 4999-2500=2499)
1011 for i in 2501..5000 {
1012 assert!(result[i] > 0.0);
1013 }
1014 }
1015
1016 #[test]
1017 fn test_leaky_relu_simd_f64_basic() {
1018 let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
1019 let result = leaky_relu_simd(&x.view(), 0.01);
1020
1021 assert_eq!(result[0], -0.02);
1022 assert_eq!(result[1], -0.01);
1023 assert_eq!(result[2], 0.0);
1024 assert_eq!(result[3], 1.0);
1025 assert_eq!(result[4], 2.0);
1026 }
1027
1028 #[test]
1029 fn test_leaky_relu_simd_f32_basic() {
1030 let x = array![-2.0f32, -1.0, 0.0, 1.0, 2.0];
1031 let result = leaky_relu_simd(&x.view(), 0.01);
1032
1033 assert!((result[0] - (-0.02)).abs() < 1e-6);
1034 assert!((result[1] - (-0.01)).abs() < 1e-6);
1035 assert_eq!(result[2], 0.0);
1036 assert_eq!(result[3], 1.0);
1037 assert_eq!(result[4], 2.0);
1038 }
1039
1040 #[test]
1041 fn test_leaky_relu_simd_different_alpha() {
1042 let x = array![-10.0f64, -5.0, 0.0, 5.0, 10.0];
1043
1044 // Test with alpha = 0.1
1045 let result_01 = leaky_relu_simd(&x.view(), 0.1);
1046 assert_eq!(result_01[0], -1.0); // -10.0 * 0.1
1047 assert_eq!(result_01[1], -0.5); // -5.0 * 0.1
1048
1049 // Test with alpha = 0.2
1050 let result_02 = leaky_relu_simd(&x.view(), 0.2);
1051 assert_eq!(result_02[0], -2.0); // -10.0 * 0.2
1052 assert_eq!(result_02[1], -1.0); // -5.0 * 0.2
1053 }
1054
1055 #[test]
1056 fn test_leaky_relu_simd_preserves_positive() {
1057 let x = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
1058 let result = leaky_relu_simd(&x.view(), 0.01);
1059
1060 // All positive values should remain unchanged
1061 for i in 0..5 {
1062 assert_eq!(result[i], x[i]);
1063 }
1064 }
1065
1066 #[test]
1067 fn test_leaky_relu_simd_empty() {
1068 let x: Array1<f64> = array![];
1069 let result = leaky_relu_simd(&x.view(), 0.01);
1070 assert_eq!(result.len(), 0);
1071 }
1072
1073 #[test]
1074 fn test_relu_vs_leaky_relu() {
1075 let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
1076
1077 let relu_result = relu_simd(&x.view());
1078 let leaky_result = leaky_relu_simd(&x.view(), 0.01);
1079
1080 // Positive values should be the same
1081 assert_eq!(relu_result[3], leaky_result[3]);
1082 assert_eq!(relu_result[4], leaky_result[4]);
1083
1084 // Negative values should differ
1085 assert!(relu_result[0] != leaky_result[0]);
1086 assert!(relu_result[1] != leaky_result[1]);
1087
1088 // ReLU zeros negatives, Leaky ReLU scales them
1089 assert_eq!(relu_result[0], 0.0);
1090 assert_eq!(leaky_result[0], -0.02);
1091 }
1092}