scirs2_core/ndarray_ext/
preprocessing.rs

1//! SIMD-accelerated preprocessing operations for array normalization and standardization
2//!
3//! This module provides high-performance implementations of common data preprocessing
4//! operations that are critical for machine learning pipelines, statistical analysis,
5//! and scientific computing.
6//!
7//! # Operations
8//!
9//! - **L2 Normalization** (`normalize_simd`): Converts vectors to unit length
10//! - **Z-Score Standardization** (`standardize_simd`): Zero mean, unit variance
11//! - **Value Clipping** (`clip_simd`): Bounds values to a specified range
12//!
13//! # Performance
14//!
15//! All operations automatically use SIMD acceleration when:
16//! - Platform supports AVX2 (x86_64) or NEON (ARM)
17//! - Array size is large enough to benefit from vectorization
18//! - Array memory layout is contiguous
19//!
20//! Falls back to scalar implementations for small arrays or unsupported platforms.
21//!
22//! # Examples
23//!
24//! ```
25//! use scirs2_core::ndarray::array;
26//! use scirs2_core::ndarray_ext::preprocessing::{normalize_simd, standardize_simd, clip_simd};
27//!
28//! // L2 normalization - convert to unit vector
29//! let x = array![3.0, 4.0];  // norm = 5
30//! let normalized = normalize_simd(&x.view());
31//! // Result: [0.6, 0.8]
32//!
33//! // Z-score standardization
34//! let data = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
35//! let standardized = standardize_simd(&data.view());
36//! // Result: mean ≈ 0, std ≈ 1
37//!
38//! // Value clipping
39//! let values = array![-10.0, -5.0, 0.0, 5.0, 10.0];
40//! let clipped = clip_simd(&values.view(), -3.0, 7.0);
41//! // Result: [-3.0, -3.0, 0.0, 5.0, 7.0]
42//! ```
43
44use crate::numeric::Float;
45use crate::simd_ops::{AutoOptimizer, SimdUnifiedOps};
46use ::ndarray::{Array1, ArrayView1};
47
48/// Normalize a 1D array to unit length using L2 norm (SIMD-accelerated).
49///
50/// Computes x / ||x||₂ where ||x||₂ is the Euclidean (L2) norm.
51/// The resulting vector will have unit length (norm = 1).
52///
53/// # Arguments
54///
55/// * `x` - Input 1D array to normalize
56///
57/// # Returns
58///
59/// `Array1<F>` with the same length as input, normalized to unit length.
60/// Returns zero array if input norm is zero or NaN.
61///
62/// # Performance
63///
64/// - **SIMD**: Automatically used for large arrays (1000+ elements)
65/// - **Scalar**: Used for small arrays or when SIMD unavailable
66/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
67///
68/// # Mathematical Definition
69///
70/// ```text
71/// normalize(x) = x / ||x||₂
72/// where ||x||₂ = sqrt(Σ xᵢ²)
73/// ```
74///
75/// # Examples
76///
77/// ```
78/// use scirs2_core::ndarray::array;
79/// use scirs2_core::ndarray_ext::preprocessing::normalize_simd;
80///
81/// // 3-4-5 triangle
82/// let x = array![3.0_f64, 4.0];
83/// let result = normalize_simd(&x.view());
84///
85/// // norm = sqrt(9 + 16) = 5
86/// // result = [3/5, 4/5] = [0.6, 0.8]
87/// assert!((result[0] - 0.6_f64).abs() < 1e-10);
88/// assert!((result[1] - 0.8_f64).abs() < 1e-10);
89///
90/// // Verify unit norm
91/// let norm: f64 = result.iter().map(|x| x * x).sum::<f64>().sqrt();
92/// assert!((norm - 1.0_f64).abs() < 1e-10);
93/// ```
94///
95/// # Edge Cases
96///
97/// - **Empty array**: Returns empty array
98/// - **Zero vector**: Returns zero array (undefined normalization)
99/// - **Single element**: Returns array with single element ±1
100/// - **NaN values**: Returns zero array
101///
102/// # Applications
103///
104/// - **Machine Learning**: Feature scaling for algorithms sensitive to magnitude
105/// - **Cosine Similarity**: Prerequisite for efficient similarity computation
106/// - **Direction Vectors**: Unit direction vectors in physics/graphics
107/// - **Text Processing**: TF-IDF normalization in NLP
108/// - **Signal Processing**: Normalized cross-correlation
109pub fn normalize_simd<F>(x: &ArrayView1<F>) -> Array1<F>
110where
111    F: Float + SimdUnifiedOps,
112{
113    if x.is_empty() {
114        return Array1::zeros(0);
115    }
116
117    let optimizer = AutoOptimizer::new();
118    if optimizer.should_use_simd(x.len()) {
119        return F::simd_normalize(x);
120    }
121
122    // Scalar fallback for small arrays
123    let norm = x
124        .iter()
125        .map(|&val| val * val)
126        .fold(F::zero(), |acc, val| acc + val)
127        .sqrt();
128
129    if norm == F::zero() || norm.is_nan() {
130        return Array1::zeros(x.len());
131    }
132
133    x.mapv(|val| val / norm)
134}
135
136/// Standardize a 1D array to zero mean and unit variance (SIMD-accelerated).
137///
138/// Computes (x - μ) / σ where μ is the mean and σ is the sample standard deviation.
139/// The resulting array will have mean ≈ 0 and standard deviation ≈ 1.
140///
141/// # Arguments
142///
143/// * `x` - Input 1D array to standardize
144///
145/// # Returns
146///
147/// `Array1<F>` with the same length as input, standardized to zero mean and unit variance.
148/// Returns zero array if input has <= 1 element or zero standard deviation.
149///
150/// # Performance
151///
152/// - **SIMD**: Automatically used for large arrays (1000+ elements)
153/// - **Scalar**: Used for small arrays or when SIMD unavailable
154/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
155///
156/// # Mathematical Definition
157///
158/// ```text
159/// standardize(x) = (x - μ) / σ
160/// where:
161///   μ = (1/n) Σ xᵢ         (sample mean)
162///   σ = sqrt((1/(n-1)) Σ (xᵢ - μ)²)  (sample std, ddof=1)
163/// ```
164///
165/// # Examples
166///
167/// ```
168/// use scirs2_core::ndarray::array;
169/// use scirs2_core::ndarray_ext::preprocessing::standardize_simd;
170///
171/// let x = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
172/// let result = standardize_simd(&x.view());
173///
174/// // Verify mean ≈ 0
175/// let mean: f64 = result.iter().sum::<f64>() / result.len() as f64;
176/// assert!(mean.abs() < 1e-10);
177///
178/// // Verify std ≈ 1 (sample std with ddof=1)
179/// let variance: f64 = result.iter()
180///     .map(|&x| x * x)
181///     .sum::<f64>() / (result.len() - 1) as f64;
182/// let std = variance.sqrt();
183/// assert!((std - 1.0).abs() < 1e-10);
184/// ```
185///
186/// # Edge Cases
187///
188/// - **Empty array**: Returns empty array
189/// - **Single element**: Returns zero array (std undefined for n=1)
190/// - **Constant array**: Returns zero array (std = 0)
191/// - **NaN values**: Returns zero array
192///
193/// # Applications
194///
195/// - **Machine Learning**: Feature preprocessing for models assuming normally distributed features
196/// - **Statistical Analysis**: Z-score computation for outlier detection
197/// - **Time Series**: Detrending and variance normalization
198/// - **Data Science**: Preparing features for PCA, clustering, regression
199/// - **Neural Networks**: Input normalization for faster convergence
200///
201/// # Implementation Notes
202///
203/// Uses **sample standard deviation** (ddof=1, Bessel's correction) rather than
204/// population standard deviation (ddof=0). This is consistent with NumPy, SciPy,
205/// and pandas default behavior.
206pub fn standardize_simd<F>(x: &ArrayView1<F>) -> Array1<F>
207where
208    F: Float + SimdUnifiedOps,
209{
210    if x.is_empty() {
211        return Array1::zeros(0);
212    }
213
214    if x.len() <= 1 {
215        return Array1::zeros(x.len());
216    }
217
218    let optimizer = AutoOptimizer::new();
219    if optimizer.should_use_simd(x.len()) {
220        return F::simd_standardize(x);
221    }
222
223    // Scalar fallback for small arrays
224    let n = F::from(x.len()).unwrap();
225    let mean = x.iter().fold(F::zero(), |acc, &val| acc + val) / n;
226
227    let n_minus_1 = F::from(x.len() - 1).unwrap();
228    let variance = x
229        .iter()
230        .map(|&val| {
231            let diff = val - mean;
232            diff * diff
233        })
234        .fold(F::zero(), |acc, val| acc + val)
235        / n_minus_1;
236
237    let std = variance.sqrt();
238
239    if std == F::zero() || std.is_nan() {
240        return Array1::zeros(x.len());
241    }
242
243    x.mapv(|val| (val - mean) / std)
244}
245
246/// Clip (clamp) array values to a specified range (SIMD-accelerated).
247///
248/// Constrains each element to be within [min_val, max_val]:
249/// - Values < min_val are set to min_val
250/// - Values > max_val are set to max_val
251/// - Values within range are unchanged
252///
253/// # Arguments
254///
255/// * `x` - Input 1D array to clip
256/// * `min_val` - Minimum allowed value
257/// * `max_val` - Maximum allowed value
258///
259/// # Returns
260///
261/// `Array1<F>` with the same length as input, with values clipped to [min_val, max_val].
262///
263/// # Panics
264///
265/// Panics if `min_val > max_val`.
266///
267/// # Performance
268///
269/// - **SIMD**: Automatically used for large arrays (1000+ elements)
270/// - **Scalar**: Used for small arrays or when SIMD unavailable
271/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
272///
273/// # Mathematical Definition
274///
275/// ```text
276/// clip(x, min, max) = {
277///     min     if x < min
278///     max     if x > max
279///     x       otherwise
280/// }
281/// ```
282///
283/// # Examples
284///
285/// ```
286/// use scirs2_core::ndarray::array;
287/// use scirs2_core::ndarray_ext::preprocessing::clip_simd;
288///
289/// let x = array![-10.0, -5.0, 0.0, 5.0, 10.0];
290/// let result = clip_simd(&x.view(), -3.0, 7.0);
291///
292/// // Values clipped to [-3.0, 7.0]
293/// assert_eq!(result[0], -3.0);  // -10 -> -3
294/// assert_eq!(result[1], -3.0);  // -5 -> -3
295/// assert_eq!(result[2], 0.0);   // 0 unchanged
296/// assert_eq!(result[3], 5.0);   // 5 unchanged
297/// assert_eq!(result[4], 7.0);   // 10 -> 7
298/// ```
299///
300/// # Edge Cases
301///
302/// - **Empty array**: Returns empty array
303/// - **min_val == max_val**: All values set to that value
304/// - **NaN values**: Behavior is platform-dependent (typically become min_val)
305/// - **Infinite values**: Clipped to min/max as appropriate
306///
307/// # Applications
308///
309/// - **Gradient Clipping**: Prevent exploding gradients in neural network training
310/// - **Outlier Handling**: Remove or bound extreme values in statistical analysis
311/// - **Data Preprocessing**: Enforce valid ranges for features
312/// - **Numerical Stability**: Prevent overflow/underflow in computations
313/// - **Image Processing**: Clamp pixel values to valid ranges
314/// - **Audio Processing**: Prevent clipping distortion
315///
316/// # Example: Gradient Clipping
317///
318/// ```
319/// use scirs2_core::ndarray::array;
320/// use scirs2_core::ndarray_ext::preprocessing::clip_simd;
321///
322/// // Clip gradients to prevent exploding gradients
323/// let gradients = array![10.5, -8.2, 0.3, 15.7, -12.1];
324/// let clipped = clip_simd(&gradients.view(), -5.0, 5.0);
325///
326/// // All gradients now in [-5.0, 5.0]
327/// assert_eq!(clipped[0], 5.0);   // 10.5 -> 5.0
328/// assert_eq!(clipped[1], -5.0);  // -8.2 -> -5.0
329/// assert_eq!(clipped[2], 0.3);   // unchanged
330/// assert_eq!(clipped[3], 5.0);   // 15.7 -> 5.0
331/// assert_eq!(clipped[4], -5.0);  // -12.1 -> -5.0
332/// ```
333pub fn clip_simd<F>(x: &ArrayView1<F>, min_val: F, max_val: F) -> Array1<F>
334where
335    F: Float + SimdUnifiedOps,
336{
337    assert!(min_val <= max_val, "min_val must be <= max_val");
338
339    if x.is_empty() {
340        return Array1::zeros(0);
341    }
342
343    let optimizer = AutoOptimizer::new();
344    if optimizer.should_use_simd(x.len()) {
345        return F::simd_clip(x, min_val, max_val);
346    }
347
348    // Scalar fallback for small arrays
349    x.mapv(|val| {
350        if val < min_val {
351            min_val
352        } else if val > max_val {
353            max_val
354        } else {
355            val
356        }
357    })
358}
359
360/// Compute softmax activation function with SIMD acceleration (Phase 33).
361///
362/// The softmax function converts a vector of real numbers into a probability distribution
363/// where all values are in (0, 1) and sum to 1. It is numerically stable using the
364/// max-subtraction trick.
365///
366/// # Arguments
367///
368/// * `x` - Input 1D array
369///
370/// # Returns
371///
372/// `Array1<F>` containing the softmax probabilities, where:
373/// - All values are in the range (0, 1)
374/// - Sum of all values equals 1.0
375/// - Empty input returns empty array
376///
377/// # Performance
378///
379/// - **SIMD**: Automatically used for large arrays (1000+ elements)
380/// - **Scalar**: Used for small arrays or when SIMD unavailable
381/// - **Speedup**: 4-8x for large arrays on AVX2/NEON systems
382/// - Uses newly implemented min_simd (Phase 29) and sum_simd (Phase 30)
383///
384/// # Mathematical Definition
385///
386/// ```text
387/// softmax(x)ᵢ = exp(xᵢ - max(x)) / Σⱼ exp(xⱼ - max(x))
388/// ```
389///
390/// The max-subtraction trick (xᵢ - max(x)) ensures numerical stability by preventing
391/// overflow in the exponential function.
392///
393/// # Examples
394///
395/// ```
396/// use scirs2_core::ndarray::array;
397/// use scirs2_core::ndarray_ext::preprocessing::softmax_simd;
398///
399/// let x = array![1.0, 2.0, 3.0];
400/// let result = softmax_simd(&x.view());
401///
402/// // Verify probabilities sum to 1
403/// let sum: f64 = result.iter().sum();
404/// assert!((sum - 1.0).abs() < 1e-10);
405///
406/// // Verify all values in (0, 1)
407/// for &val in result.iter() {
408///     assert!(val > 0.0 && val < 1.0);
409/// }
410/// ```
411///
412/// # Applications
413///
414/// - **Attention Mechanisms**: Compute attention weights in Transformers (PRIMARY USE CASE)
415/// - **Multi-class Classification**: Convert logits to class probabilities
416/// - **Neural Networks**: Final layer activation for multi-class problems
417/// - **Reinforcement Learning**: Action probability distributions
418/// - **Natural Language Processing**: Token probability distributions
419///
420/// # Numerical Stability
421///
422/// The implementation subtracts the maximum value before exponentiation to prevent
423/// overflow. This is mathematically equivalent to the standard softmax but numerically
424/// stable even for large input values.
425///
426/// # Example: Attention Weights
427///
428/// ```
429/// use scirs2_core::ndarray::array;
430/// use scirs2_core::ndarray_ext::preprocessing::softmax_simd;
431///
432/// // Attention scores (before softmax)
433/// let scores = array![2.0, 4.0, 1.0, 3.0];
434/// let attention_weights = softmax_simd(&scores.view());
435///
436/// // Highest score (4.0) gets highest probability
437/// let max_idx = attention_weights
438///     .iter()
439///     .enumerate()
440///     .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
441///     .map(|(idx, _)| idx)
442///     .unwrap();
443/// assert_eq!(max_idx, 1); // Index of score 4.0
444///
445/// // All weights sum to 1
446/// let sum: f64 = attention_weights.iter().sum();
447/// assert!((sum - 1.0).abs() < 1e-10);
448/// ```
449#[allow(dead_code)]
450pub fn softmax_simd<F>(x: &ArrayView1<F>) -> Array1<F>
451where
452    F: Float + SimdUnifiedOps,
453{
454    if x.is_empty() {
455        return Array1::zeros(0);
456    }
457
458    let optimizer = AutoOptimizer::new();
459
460    if optimizer.should_use_simd(x.len()) {
461        // SIMD fast path using Phase 29-30 operations
462        // 1. Find max for numerical stability (Phase 29: max_simd)
463        let max_val = F::simd_max_element(x);
464
465        // 2. Subtract max and exponentiate (SIMD exp)
466        let shifted = x.mapv(|val| val - max_val);
467        let exp_vals = F::simd_exp(&shifted.view());
468
469        // 3. Sum the exponents (Phase 30: sum_simd)
470        let sum = F::simd_sum(&exp_vals.view());
471
472        // 4. Normalize by dividing by sum (scalar multiplication with 1/sum)
473        if sum > F::zero() {
474            let inv_sum = F::one() / sum;
475            F::simd_scalar_mul(&exp_vals.view(), inv_sum)
476        } else {
477            Array1::zeros(x.len())
478        }
479    } else {
480        // Scalar fallback for small arrays
481        // Find max for numerical stability
482        let max_val = x.fold(F::neg_infinity(), |max, &val| max.max(val));
483
484        // Subtract max and exponentiate
485        let mut exp_vals = x.mapv(|val| (val - max_val).exp());
486
487        // Sum the exponents
488        let sum = exp_vals.iter().fold(F::zero(), |acc, &val| acc + val);
489
490        // Normalize
491        if sum > F::zero() {
492            exp_vals.mapv_inplace(|val| val / sum);
493        }
494
495        exp_vals
496    }
497}
498
499// ============================================================================
500// Activation Functions (Phase 33.5)
501// ============================================================================
502
503/// Compute ReLU (Rectified Linear Unit) activation with SIMD acceleration.
504///
505/// ReLU is one of the most common activation functions in deep learning, defined as:
506/// ReLU(x) = max(0, x)
507///
508/// # Arguments
509///
510/// * `x` - Input 1D array
511///
512/// # Returns
513///
514/// `Array1<F>` containing the ReLU activation output, where negative values are zeroed
515///
516/// # Performance
517///
518/// - **SIMD**: Automatically used for large arrays (1000+ elements)
519/// - **Scalar**: Used for small arrays or when SIMD unavailable
520/// - **Speedup**: 3-5x for large arrays on AVX2/NEON systems
521/// - Most common activation in CNNs, ResNets, and modern architectures
522///
523/// # Mathematical Definition
524///
525/// ```text
526/// ReLU(x) = { x  if x > 0
527///           { 0  if x ≤ 0
528/// ```
529///
530/// # Examples
531///
532/// ```
533/// use scirs2_core::ndarray::array;
534/// use scirs2_core::ndarray_ext::preprocessing::relu_simd;
535///
536/// let x = array![-2.0, -1.0, 0.0, 1.0, 2.0];
537/// let result = relu_simd(&x.view());
538///
539/// assert_eq!(result[0], 0.0); // -2.0 -> 0.0
540/// assert_eq!(result[1], 0.0); // -1.0 -> 0.0
541/// assert_eq!(result[2], 0.0); //  0.0 -> 0.0
542/// assert_eq!(result[3], 1.0); //  1.0 -> 1.0
543/// assert_eq!(result[4], 2.0); //  2.0 -> 2.0
544/// ```
545///
546/// # Applications
547///
548/// - **Convolutional Neural Networks**: Primary activation in CNN layers
549/// - **ResNet/DenseNet**: Core activation in residual blocks
550/// - **Fully Connected Layers**: Standard activation for hidden layers
551/// - **Feature Extraction**: Non-linear transformation in deep networks
552/// - **Modern Architectures**: Default choice for most deep learning models
553///
554/// # Advantages of ReLU
555///
556/// - **Computational Efficiency**: Very fast to compute (simple max operation)
557/// - **No Gradient Vanishing**: Gradients don't saturate for positive values
558/// - **Sparse Activation**: Promotes sparsity (zeros out negative values)
559/// - **SIMD-Friendly**: Perfect for vectorization
560#[allow(dead_code)]
561pub fn relu_simd<F>(x: &ArrayView1<F>) -> Array1<F>
562where
563    F: Float + SimdUnifiedOps,
564{
565    if x.is_empty() {
566        return Array1::zeros(0);
567    }
568
569    let optimizer = AutoOptimizer::new();
570
571    if optimizer.should_use_simd(x.len()) {
572        F::simd_relu(x)
573    } else {
574        // Scalar fallback: max(0, x)
575        x.mapv(|val| if val > F::zero() { val } else { F::zero() })
576    }
577}
578
579/// Compute Leaky ReLU activation with SIMD acceleration.
580///
581/// Leaky ReLU is a variant of ReLU that allows small negative values:
582/// LeakyReLU(x) = max(αx, x) where α is a small constant (typically 0.01)
583///
584/// # Arguments
585///
586/// * `x` - Input 1D array
587/// * `alpha` - Negative slope coefficient (typically 0.01)
588///
589/// # Returns
590///
591/// `Array1<F>` containing the Leaky ReLU activation output
592///
593/// # Performance
594///
595/// - **SIMD**: Automatically used for large arrays (1000+ elements)
596/// - **Speedup**: 3-5x for large arrays
597///
598/// # Mathematical Definition
599///
600/// ```text
601/// LeakyReLU(x) = { x     if x > 0
602///                { αx    if x ≤ 0
603/// ```
604///
605/// # Examples
606///
607/// ```
608/// use scirs2_core::ndarray::array;
609/// use scirs2_core::ndarray_ext::preprocessing::leaky_relu_simd;
610///
611/// let x = array![-2.0, -1.0, 0.0, 1.0, 2.0];
612/// let result = leaky_relu_simd(&x.view(), 0.01);
613///
614/// assert_eq!(result[0], -0.02); // -2.0 * 0.01
615/// assert_eq!(result[1], -0.01); // -1.0 * 0.01
616/// assert_eq!(result[2], 0.0);   //  0.0 * 0.01
617/// assert_eq!(result[3], 1.0);   //  1.0 (unchanged)
618/// assert_eq!(result[4], 2.0);   //  2.0 (unchanged)
619/// ```
620///
621/// # Applications
622///
623/// - **Addressing Dying ReLU Problem**: Allows gradient flow for negative values
624/// - **GANs**: Common in discriminator networks
625/// - **ResNet Variants**: Alternative to standard ReLU
626#[allow(dead_code)]
627pub fn leaky_relu_simd<F>(x: &ArrayView1<F>, alpha: F) -> Array1<F>
628where
629    F: Float + SimdUnifiedOps,
630{
631    if x.is_empty() {
632        return Array1::zeros(0);
633    }
634
635    let optimizer = AutoOptimizer::new();
636
637    if optimizer.should_use_simd(x.len()) {
638        F::simd_leaky_relu(x, alpha)
639    } else {
640        // Scalar fallback: max(alpha * x, x)
641        x.mapv(|val| if val > F::zero() { val } else { val * alpha })
642    }
643}
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648    use ::ndarray::array;
649
650    #[test]
651    fn test_normalize_simd_basic_f64() {
652        let x = array![3.0, 4.0];
653        let result = normalize_simd(&x.view());
654
655        // 3-4-5 triangle: norm = 5, so [0.6, 0.8]
656        assert!((result[0] - 0.6).abs() < 1e-10);
657        assert!((result[1] - 0.8).abs() < 1e-10);
658
659        // Verify unit norm
660        let norm: f64 = result.iter().map(|x| x * x).sum::<f64>().sqrt();
661        assert!((norm - 1.0).abs() < 1e-10);
662    }
663
664    #[test]
665    fn test_normalize_simd_basic_f32() {
666        let x = array![3.0f32, 4.0];
667        let result = normalize_simd(&x.view());
668
669        assert!((result[0] - 0.6).abs() < 1e-6);
670        assert!((result[1] - 0.8).abs() < 1e-6);
671
672        // Verify unit norm
673        let norm: f32 = result.iter().map(|x| x * x).sum::<f32>().sqrt();
674        assert!((norm - 1.0).abs() < 1e-6);
675    }
676
677    #[test]
678    fn test_normalize_simd_empty() {
679        let x: Array1<f64> = array![];
680        let result = normalize_simd(&x.view());
681        assert_eq!(result.len(), 0);
682    }
683
684    #[test]
685    fn test_normalize_simd_zero_vector() {
686        let x = array![0.0, 0.0, 0.0];
687        let result = normalize_simd(&x.view());
688
689        // Zero norm should return zero array
690        assert_eq!(result[0], 0.0);
691        assert_eq!(result[1], 0.0);
692        assert_eq!(result[2], 0.0);
693    }
694
695    #[test]
696    fn test_standardize_simd_basic_f64() {
697        let x = array![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
698        let result = standardize_simd(&x.view());
699
700        // Verify mean ≈ 0
701        let mean: f64 = result.iter().sum::<f64>() / result.len() as f64;
702        assert!(mean.abs() < 1e-10, "Mean should be ~0, got {}", mean);
703
704        // Verify std ≈ 1 (sample std with ddof=1)
705        let variance: f64 = result.iter().map(|&x| x * x).sum::<f64>() / (result.len() - 1) as f64;
706        let std = variance.sqrt();
707        assert!((std - 1.0).abs() < 1e-10, "Std should be ~1, got {}", std);
708    }
709
710    #[test]
711    fn test_standardize_simd_basic_f32() {
712        let x = array![1.0f32, 2.0, 3.0, 4.0, 5.0];
713        let result = standardize_simd(&x.view());
714
715        // Verify mean ≈ 0
716        let mean: f32 = result.iter().sum::<f32>() / result.len() as f32;
717        assert!(mean.abs() < 1e-5, "Mean should be ~0, got {}", mean);
718
719        // Verify std ≈ 1
720        let variance: f32 = result.iter().map(|&x| x * x).sum::<f32>() / (result.len() - 1) as f32;
721        let std = variance.sqrt();
722        assert!((std - 1.0).abs() < 1e-5, "Std should be ~1, got {}", std);
723    }
724
725    #[test]
726    fn test_standardize_simd_empty() {
727        let x: Array1<f64> = array![];
728        let result = standardize_simd(&x.view());
729        assert_eq!(result.len(), 0);
730    }
731
732    #[test]
733    fn test_standardize_simd_single_element() {
734        let x = array![5.0];
735        let result = standardize_simd(&x.view());
736
737        // Single element has undefined std, should return zero
738        assert_eq!(result[0], 0.0);
739    }
740
741    #[test]
742    fn test_standardize_simd_constant() {
743        let x = array![5.0, 5.0, 5.0, 5.0];
744        let result = standardize_simd(&x.view());
745
746        // Constant array has zero std, should return zero array
747        assert_eq!(result[0], 0.0);
748        assert_eq!(result[1], 0.0);
749        assert_eq!(result[2], 0.0);
750        assert_eq!(result[3], 0.0);
751    }
752
753    // ========================================================================
754    // Tests for Phase 33: softmax_simd
755    // ========================================================================
756
757    #[test]
758    fn test_softmax_simd_f64_basic() {
759        let x = array![1.0f64, 2.0, 3.0];
760        let result = softmax_simd(&x.view());
761
762        // Verify probabilities sum to 1
763        let sum: f64 = result.iter().sum();
764        assert!((sum - 1.0).abs() < 1e-10);
765
766        // Verify all values in (0, 1)
767        for &val in result.iter() {
768            assert!(val > 0.0 && val < 1.0);
769        }
770
771        // Verify highest input gets highest probability
772        let max_idx = result
773            .iter()
774            .enumerate()
775            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
776            .map(|(idx, _)| idx)
777            .unwrap();
778        assert_eq!(max_idx, 2); // Index of value 3.0
779    }
780
781    #[test]
782    fn test_softmax_simd_f32_basic() {
783        let x = array![1.0f32, 2.0, 3.0, 4.0];
784        let result = softmax_simd(&x.view());
785
786        // Verify probabilities sum to 1
787        let sum: f32 = result.iter().sum();
788        assert!((sum - 1.0).abs() < 1e-6);
789
790        // Verify all values in (0, 1)
791        for &val in result.iter() {
792            assert!(val > 0.0 && val < 1.0);
793        }
794    }
795
796    #[test]
797    fn test_softmax_simd_empty() {
798        let x: Array1<f64> = array![];
799        let result = softmax_simd(&x.view());
800        assert_eq!(result.len(), 0);
801    }
802
803    #[test]
804    fn test_softmax_simd_single() {
805        let x = array![42.0f64];
806        let result = softmax_simd(&x.view());
807
808        // Single element softmax should be 1.0
809        assert!((result[0] - 1.0).abs() < 1e-10);
810    }
811
812    #[test]
813    fn test_softmax_simd_uniform() {
814        let x = array![2.0f64, 2.0, 2.0, 2.0];
815        let result = softmax_simd(&x.view());
816
817        // Uniform input should give uniform probabilities (0.25 each)
818        for &val in result.iter() {
819            assert!((val - 0.25).abs() < 1e-10);
820        }
821
822        let sum: f64 = result.iter().sum();
823        assert!((sum - 1.0).abs() < 1e-10);
824    }
825
826    #[test]
827    fn test_softmax_simd_large_values() {
828        // Test numerical stability with large values
829        let x = array![1000.0f64, 1001.0, 1002.0];
830        let result = softmax_simd(&x.view());
831
832        // Should not overflow
833        assert!(!result.iter().any(|&v| v.is_infinite() || v.is_nan()));
834
835        // Should still sum to 1
836        let sum: f64 = result.iter().sum();
837        assert!((sum - 1.0).abs() < 1e-10);
838
839        // Highest value should get highest probability
840        let max_idx = result
841            .iter()
842            .enumerate()
843            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
844            .map(|(idx, _)| idx)
845            .unwrap();
846        assert_eq!(max_idx, 2);
847    }
848
849    #[test]
850    fn test_softmax_simd_negative_values() {
851        let x = array![-10.0f64, -5.0, -2.0, -8.0];
852        let result = softmax_simd(&x.view());
853
854        // Should sum to 1
855        let sum: f64 = result.iter().sum();
856        assert!((sum - 1.0).abs() < 1e-10);
857
858        // Highest value (-2.0) should get highest probability
859        let max_idx = result
860            .iter()
861            .enumerate()
862            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
863            .map(|(idx, _)| idx)
864            .unwrap();
865        assert_eq!(max_idx, 2); // Index of -2.0
866    }
867
868    #[test]
869    fn test_softmax_simd_attention_scores() {
870        // Realistic attention scores scenario
871        let scores = array![2.0f64, 4.0, 1.0, 3.0];
872        let attention_weights = softmax_simd(&scores.view());
873
874        // Verify it's a valid probability distribution
875        let sum: f64 = attention_weights.iter().sum();
876        assert!((sum - 1.0).abs() < 1e-10);
877
878        // Highest score (4.0) should get highest weight
879        let max_idx = attention_weights
880            .iter()
881            .enumerate()
882            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
883            .map(|(idx, _)| idx)
884            .unwrap();
885        assert_eq!(max_idx, 1); // Index of score 4.0
886
887        // Verify all weights are positive
888        for &weight in attention_weights.iter() {
889            assert!(weight > 0.0);
890        }
891    }
892
893    #[test]
894    fn test_softmax_simd_large_array() {
895        // Test SIMD path with large array (>1000 elements)
896        let x: Array1<f64> = Array1::from_vec((0..5000).map(|i| (i as f64) * 0.01).collect());
897        let result = softmax_simd(&x.view());
898
899        // Should sum to 1
900        let sum: f64 = result.iter().sum();
901        assert!((sum - 1.0).abs() < 1e-9);
902
903        // All values should be positive
904        assert!(result.iter().all(|&v| v > 0.0));
905
906        // Highest input should give highest probability
907        let max_idx = result
908            .iter()
909            .enumerate()
910            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
911            .map(|(idx, _)| idx)
912            .unwrap();
913        assert_eq!(max_idx, 4999); // Last element has highest value
914    }
915
916    #[test]
917    fn test_softmax_simd_temperature_scaling() {
918        // Test temperature scaling (dividing logits by temperature)
919        let logits = array![1.0f64, 2.0, 3.0];
920
921        // High temperature (T=2.0) -> more uniform distribution
922        let high_temp = softmax_simd(&logits.mapv(|x| x / 2.0).view());
923
924        // Low temperature (T=0.5) -> more peaked distribution
925        let low_temp = softmax_simd(&logits.mapv(|x| x * 2.0).view());
926
927        // Both should sum to 1
928        let sum_high: f64 = high_temp.iter().sum();
929        let sum_low: f64 = low_temp.iter().sum();
930        assert!((sum_high - 1.0).abs() < 1e-10);
931        assert!((sum_low - 1.0).abs() < 1e-10);
932
933        // Low temperature should have higher max probability (more peaked)
934        let max_high = high_temp.iter().cloned().fold(0.0f64, f64::max);
935        let max_low = low_temp.iter().cloned().fold(0.0f64, f64::max);
936        assert!(max_low > max_high);
937    }
938
939    // ========================================================================
940    // Tests for Phase 33.5: relu_simd and leaky_relu_simd
941    // ========================================================================
942
943    #[test]
944    fn test_relu_simd_f64_basic() {
945        let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
946        let result = relu_simd(&x.view());
947
948        assert_eq!(result[0], 0.0);
949        assert_eq!(result[1], 0.0);
950        assert_eq!(result[2], 0.0);
951        assert_eq!(result[3], 1.0);
952        assert_eq!(result[4], 2.0);
953    }
954
955    #[test]
956    fn test_relu_simd_f32_basic() {
957        let x = array![-2.0f32, -1.0, 0.0, 1.0, 2.0];
958        let result = relu_simd(&x.view());
959
960        assert_eq!(result[0], 0.0);
961        assert_eq!(result[1], 0.0);
962        assert_eq!(result[2], 0.0);
963        assert_eq!(result[3], 1.0);
964        assert_eq!(result[4], 2.0);
965    }
966
967    #[test]
968    fn test_relu_simd_all_positive() {
969        let x = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
970        let result = relu_simd(&x.view());
971
972        // All values should remain unchanged
973        for i in 0..5 {
974            assert_eq!(result[i], x[i]);
975        }
976    }
977
978    #[test]
979    fn test_relu_simd_all_negative() {
980        let x = array![-5.0f64, -4.0, -3.0, -2.0, -1.0];
981        let result = relu_simd(&x.view());
982
983        // All values should be zero
984        for i in 0..5 {
985            assert_eq!(result[i], 0.0);
986        }
987    }
988
989    #[test]
990    fn test_relu_simd_empty() {
991        let x: Array1<f64> = array![];
992        let result = relu_simd(&x.view());
993        assert_eq!(result.len(), 0);
994    }
995
996    #[test]
997    fn test_relu_simd_large_array() {
998        // Test SIMD path with large array (>1000 elements)
999        let x: Array1<f64> = Array1::from_vec((0..5000).map(|i| (i as f64) - 2500.0).collect());
1000        let result = relu_simd(&x.view());
1001
1002        // First 2500 should be zero (negative)
1003        for i in 0..2500 {
1004            assert_eq!(result[i], 0.0);
1005        }
1006
1007        // Element at 2500 is exactly 0 (2500 - 2500 = 0)
1008        assert_eq!(result[2500], 0.0);
1009
1010        // Last 2499 should be positive (2501-2500=1, ..., 4999-2500=2499)
1011        for i in 2501..5000 {
1012            assert!(result[i] > 0.0);
1013        }
1014    }
1015
1016    #[test]
1017    fn test_leaky_relu_simd_f64_basic() {
1018        let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
1019        let result = leaky_relu_simd(&x.view(), 0.01);
1020
1021        assert_eq!(result[0], -0.02);
1022        assert_eq!(result[1], -0.01);
1023        assert_eq!(result[2], 0.0);
1024        assert_eq!(result[3], 1.0);
1025        assert_eq!(result[4], 2.0);
1026    }
1027
1028    #[test]
1029    fn test_leaky_relu_simd_f32_basic() {
1030        let x = array![-2.0f32, -1.0, 0.0, 1.0, 2.0];
1031        let result = leaky_relu_simd(&x.view(), 0.01);
1032
1033        assert!((result[0] - (-0.02)).abs() < 1e-6);
1034        assert!((result[1] - (-0.01)).abs() < 1e-6);
1035        assert_eq!(result[2], 0.0);
1036        assert_eq!(result[3], 1.0);
1037        assert_eq!(result[4], 2.0);
1038    }
1039
1040    #[test]
1041    fn test_leaky_relu_simd_different_alpha() {
1042        let x = array![-10.0f64, -5.0, 0.0, 5.0, 10.0];
1043
1044        // Test with alpha = 0.1
1045        let result_01 = leaky_relu_simd(&x.view(), 0.1);
1046        assert_eq!(result_01[0], -1.0); // -10.0 * 0.1
1047        assert_eq!(result_01[1], -0.5); // -5.0 * 0.1
1048
1049        // Test with alpha = 0.2
1050        let result_02 = leaky_relu_simd(&x.view(), 0.2);
1051        assert_eq!(result_02[0], -2.0); // -10.0 * 0.2
1052        assert_eq!(result_02[1], -1.0); // -5.0 * 0.2
1053    }
1054
1055    #[test]
1056    fn test_leaky_relu_simd_preserves_positive() {
1057        let x = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
1058        let result = leaky_relu_simd(&x.view(), 0.01);
1059
1060        // All positive values should remain unchanged
1061        for i in 0..5 {
1062            assert_eq!(result[i], x[i]);
1063        }
1064    }
1065
1066    #[test]
1067    fn test_leaky_relu_simd_empty() {
1068        let x: Array1<f64> = array![];
1069        let result = leaky_relu_simd(&x.view(), 0.01);
1070        assert_eq!(result.len(), 0);
1071    }
1072
1073    #[test]
1074    fn test_relu_vs_leaky_relu() {
1075        let x = array![-2.0f64, -1.0, 0.0, 1.0, 2.0];
1076
1077        let relu_result = relu_simd(&x.view());
1078        let leaky_result = leaky_relu_simd(&x.view(), 0.01);
1079
1080        // Positive values should be the same
1081        assert_eq!(relu_result[3], leaky_result[3]);
1082        assert_eq!(relu_result[4], leaky_result[4]);
1083
1084        // Negative values should differ
1085        assert!(relu_result[0] != leaky_result[0]);
1086        assert!(relu_result[1] != leaky_result[1]);
1087
1088        // ReLU zeros negatives, Leaky ReLU scales them
1089        assert_eq!(relu_result[0], 0.0);
1090        assert_eq!(leaky_result[0], -0.02);
1091    }
1092}