scirs2_core/ndarray_ext/
elementwise.rs

1//! SIMD-accelerated element-wise mathematical operations
2//!
3//! This module provides high-performance implementations of common element-wise
4//! mathematical functions that are fundamental for scientific computing, numerical
5//! analysis, and data processing.
6//!
7//! # Operations
8//!
9//! ## Basic Operations
10//! - **Absolute Value** (`abs_simd`): Computes |x| for each element
11//! - **Sign** (`sign_simd`): Computes sign(x) = +1, 0, or -1 for each element
12//! - **Square Root** (`sqrt_simd`): Computes √x for each element
13//!
14//! ## Exponential and Logarithmic
15//! - **Exponential** (`exp_simd`): Computes e^x for each element
16//! - **Natural Logarithm** (`ln_simd`): Computes ln(x) for each element
17//!
18//! ## Trigonometric Functions
19//! - **Sine** (`sin_simd`): Computes sin(x) for each element
20//! - **Cosine** (`cos_simd`): Computes cos(x) for each element
21//! - **Tangent** (`tan_simd`): Computes tan(x) for each element
22//!
23//! ## Hyperbolic Functions
24//! - **Hyperbolic Sine** (`sinh_simd`): Computes sinh(x) for each element
25//! - **Hyperbolic Cosine** (`cosh_simd`): Computes cosh(x) for each element
26//! - **Hyperbolic Tangent** (`tanh_simd`): Computes tanh(x) for each element
27//!
28//! ## Power Functions
29//! - **Power (scalar)** (`powf_simd`): Computes base^exp with scalar exponent
30//! - **Power (array)** (`pow_simd`): Computes `base[i]^exp[i]` element-wise
31//!
32//! ## Rounding Functions
33//! - **Floor** (`floor_simd`): Rounds down to largest integer <= x
34//! - **Ceiling** (`ceil_simd`): Rounds up to smallest integer >= x
35//! - **Round** (`round_simd`): Rounds to nearest integer (ties to even)
36//! - **Fractional Part** (`fract_simd`): Returns fractional part (x - floor(x))
37//!
38//! ## Inverse Trigonometric Functions
39//! - **Arctangent** (`atan_simd`): Computes atan(x) for each element
40//! - **Arcsine** (`asin_simd`): Computes asin(x) for each element
41//! - **Arccosine** (`acos_simd`): Computes acos(x) for each element
42//! - **Two-argument arctangent** (`atan2_simd`): Computes atan2(y, x) element-wise
43//!
44//! ## Logarithm Variants
45//! - **Base-10 Logarithm** (`log10_simd`): Computes log₁₀(x) for each element
46//! - **Base-2 Logarithm** (`log2_simd`): Computes log₂(x) for each element
47//!
48//! ## Clamping Operations
49//! - **Clamp** (`clamp_simd`): Constrains each element to [min, max] range
50//!
51//! # Performance
52//!
53//! All operations automatically use SIMD acceleration when:
54//! - Platform supports AVX2 (x86_64) or NEON (ARM)
55//! - Array size is large enough to benefit from vectorization
56//! - Array memory layout is contiguous
57//!
58//! Falls back to scalar implementations for small arrays or unsupported platforms.
59//!
60//! # Examples
61//!
62//! ```
63//! use scirs2_core::ndarray::array;
64//! use scirs2_core::ndarray_ext::elementwise::{abs_simd, sign_simd, sqrt_simd, sinh_simd, tanh_simd, floor_simd, atan2_simd, log10_simd, clamp_simd};
65//!
66//! // Absolute value
67//! let x = array![-3.0, -1.0, 0.0, 1.0, 3.0];
68//! let abs_x = abs_simd(&x.view());
69//! // Result: [3.0, 1.0, 0.0, 1.0, 3.0]
70//!
71//! // Sign function (signum)
72//! let signs = sign_simd(&x.view());
73//! // Result: [-1.0, -1.0, 0.0, 1.0, 1.0]
74//!
75//! // Square root
76//! let y = array![1.0, 4.0, 9.0, 16.0, 25.0];
77//! let sqrt_y = sqrt_simd(&y.view());
78//! // Result: [1.0, 2.0, 3.0, 4.0, 5.0]
79//!
80//! // Hyperbolic tangent (neural network activation)
81//! let z = array![-1.0, 0.0, 1.0];
82//! let tanh_z = tanh_simd(&z.view());
83//! // Result: [-0.762, 0.0, 0.762]
84//!
85//! // Floor (rounding down)
86//! let w = array![1.2, 2.7, -1.3, -2.9];
87//! let floor_w = floor_simd(&w.view());
88//! // Result: [1.0, 2.0, -2.0, -3.0]
89//!
90//! // Arctangent 2 (angle from coordinates)
91//! let y_coords = array![1.0, 1.0, -1.0, -1.0];
92//! let x_coords = array![1.0, -1.0, -1.0, 1.0];
93//! let angles = atan2_simd(&y_coords.view(), &x_coords.view());
94//! // Result: [π/4, 3π/4, -3π/4, -π/4]
95//!
96//! // Base-10 logarithm (decibels, pH scale)
97//! let powers = array![1.0, 10.0, 100.0, 1000.0];
98//! let log10_powers = log10_simd(&powers.view());
99//! // Result: [0.0, 1.0, 2.0, 3.0]
100//!
101//! // Clamp values to range (pixel normalization)
102//! let pixels = array![-0.5, 0.3, 0.7, 1.2, 1.8];
103//! let normalized = clamp_simd(&pixels.view(), 0.0, 1.0);
104//! // Result: [0.0, 0.3, 0.7, 1.0, 1.0]
105//! ```
106
107use crate::numeric::Float;
108use crate::simd_ops::{AutoOptimizer, SimdUnifiedOps};
109use ::ndarray::{Array1, ArrayView1};
110
111/// Compute the absolute value of each element (SIMD-accelerated).
112///
113/// Computes |x| for each element in the array.
114///
115/// # Arguments
116///
117/// * `x` - Input 1D array
118///
119/// # Returns
120///
121/// `Array1<F>` with the same length as input, with absolute values.
122///
123/// # Performance
124///
125/// - **SIMD**: Automatically used for large arrays (1000+ elements)
126/// - **Scalar**: Used for small arrays or when SIMD unavailable
127/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
128///
129/// # Mathematical Definition
130///
131/// ```text
132/// abs(x) = |x| = {
133///     x   if x >= 0
134///     -x  if x < 0
135/// }
136/// ```
137///
138/// # Examples
139///
140/// ```
141/// use scirs2_core::ndarray::array;
142/// use scirs2_core::ndarray_ext::elementwise::abs_simd;
143///
144/// let x = array![-3.0, -1.5, 0.0, 1.5, 3.0];
145/// let result = abs_simd(&x.view());
146///
147/// assert_eq!(result[0], 3.0);
148/// assert_eq!(result[1], 1.5);
149/// assert_eq!(result[2], 0.0);
150/// assert_eq!(result[3], 1.5);
151/// assert_eq!(result[4], 3.0);
152/// ```
153///
154/// # Edge Cases
155///
156/// - **Empty array**: Returns empty array
157/// - **Zero**: Returns zero
158/// - **NaN**: Returns NaN (preserves NaN)
159/// - **Infinity**: Returns positive infinity
160///
161/// # Applications
162///
163/// - **Statistics**: Absolute deviation, MAE (Mean Absolute Error)
164/// - **Signal Processing**: Envelope detection, magnitude calculation
165/// - **Optimization**: L1 norm computation, absolute loss functions
166/// - **Numerical Analysis**: Error analysis, residual computation
167/// - **Image Processing**: Gradient magnitude, difference images
168pub fn abs_simd<F>(x: &ArrayView1<F>) -> Array1<F>
169where
170    F: Float + SimdUnifiedOps,
171{
172    if x.is_empty() {
173        return Array1::zeros(0);
174    }
175
176    let optimizer = AutoOptimizer::new();
177    if optimizer.should_use_simd(x.len()) {
178        return F::simd_abs(x);
179    }
180
181    // Scalar fallback for small arrays
182    x.mapv(|val| val.abs())
183}
184
185/// Compute the sign (signum) of each element (SIMD-accelerated).
186///
187/// Returns +1.0 for positive values, -1.0 for negative values, and 0.0 for zero.
188///
189/// # Arguments
190///
191/// * `x` - Input 1D array
192///
193/// # Returns
194///
195/// `Array1<F>` with the same length as input, where each element is the sign of the input.
196///
197/// # Performance
198///
199/// - **SIMD**: Automatically used for large arrays (1000+ elements)
200/// - **Scalar**: Used for small arrays or when SIMD unavailable
201/// - **Speedup**: 2-4x for large arrays on AVX2/NEON systems
202///
203/// # Mathematical Definition
204///
205/// ```text
206/// sign(x) = { +1.0  if x > 0
207///           {  0.0  if x = 0
208///           { -1.0  if x < 0
209/// ```
210///
211/// # Properties
212///
213/// - sign(-x) = -sign(x) (odd function)
214/// - sign(0) = 0
215/// - sign(x) * |x| = x
216/// - sign(x) * sign(y) = sign(x * y)
217/// - |sign(x)| ≤ 1 for all x
218///
219/// # Applications
220///
221/// - **Numerical Analysis**: Gradient descent direction, optimization algorithms
222/// - **Signal Processing**: Phase detection, zero-crossing analysis
223/// - **Physics Simulations**: Force direction, velocity direction
224/// - **Machine Learning**: Feature engineering, binary classification features
225/// - **Control Systems**: Error sign for PID controllers
226/// - **Game Development**: Direction vectors, collision normals
227/// - **Statistics**: Wilcoxon signed-rank test, sign test
228/// - **Finance**: Market trend indicators (bull/bear)
229/// - **Geometry**: Surface normal orientation, vector direction
230/// - **Image Processing**: Edge direction, gradient orientation
231///
232/// # Examples
233///
234/// ```
235/// use scirs2_core::ndarray::array;
236/// use scirs2_core::ndarray_ext::elementwise::sign_simd;
237///
238/// let x = array![-3.0_f64, -1.5, 0.0, 1.5, 3.0];
239/// let result = sign_simd(&x.view());
240/// assert_eq!(result[0], -1.0_f64); // negative → -1
241/// assert_eq!(result[1], -1.0_f64); // negative → -1
242/// assert_eq!(result[2],  0.0_f64); // zero → 0
243/// assert_eq!(result[3],  1.0_f64); // positive → +1
244/// assert_eq!(result[4],  1.0_f64); // positive → +1
245///
246/// // Property: sign(x) * |x| = x
247/// let values = array![-5.0_f64, -2.0, 0.0, 2.0, 5.0];
248/// let signs = sign_simd(&values.view());
249/// let abs_values = values.mapv(|x: f64| x.abs());
250/// let reconstructed = signs * abs_values;
251/// for i in 0..values.len() {
252///     assert!((reconstructed[i] - values[i]).abs() < 1e-10);
253/// }
254/// ```
255///
256/// # See Also
257///
258/// - [`abs_simd`]: Magnitude without sign
259/// - [`clamp_simd`]: Constrain values to a range
260pub fn sign_simd<F>(x: &ArrayView1<F>) -> Array1<F>
261where
262    F: Float + SimdUnifiedOps,
263{
264    if x.is_empty() {
265        return Array1::zeros(0);
266    }
267
268    let optimizer = AutoOptimizer::new();
269    if optimizer.should_use_simd(x.len()) {
270        return F::simd_sign(x);
271    }
272
273    // Scalar fallback for small arrays
274    x.mapv(|val| {
275        if val > F::zero() {
276            F::one()
277        } else if val < F::zero() {
278            -F::one()
279        } else {
280            F::zero()
281        }
282    })
283}
284
285/// Compute the square root of each element (SIMD-accelerated).
286///
287/// Computes √x for each element in the array.
288///
289/// # Arguments
290///
291/// * `x` - Input 1D array (must contain non-negative values)
292///
293/// # Returns
294///
295/// `Array1<F>` with the same length as input, with square roots.
296///
297/// # Performance
298///
299/// - **SIMD**: Automatically used for large arrays (1000+ elements)
300/// - **Scalar**: Used for small arrays or when SIMD unavailable
301/// - **Speedup**: 2-4x for large f32 arrays on AVX2 systems
302///
303/// # Mathematical Definition
304///
305/// ```text
306/// sqrt(x) = √x = y such that y² = x
307/// ```
308///
309/// # Examples
310///
311/// ```
312/// use scirs2_core::ndarray::array;
313/// use scirs2_core::ndarray_ext::elementwise::sqrt_simd;
314///
315/// let x = array![1.0_f64, 4.0, 9.0, 16.0, 25.0];
316/// let result = sqrt_simd(&x.view());
317///
318/// assert_eq!(result[0], 1.0);
319/// assert_eq!(result[1], 2.0);
320/// assert_eq!(result[2], 3.0);
321/// assert_eq!(result[3], 4.0);
322/// assert_eq!(result[4], 5.0);
323/// ```
324///
325/// # Edge Cases
326///
327/// - **Empty array**: Returns empty array
328/// - **Zero**: Returns zero
329/// - **Negative numbers**: Returns NaN (undefined in real numbers)
330/// - **NaN**: Returns NaN (preserves NaN)
331/// - **Positive infinity**: Returns positive infinity
332///
333/// # Applications
334///
335/// - **Statistics**: Standard deviation, RMS (Root Mean Square)
336/// - **Geometry**: Distance calculations, Euclidean metrics
337/// - **Physics**: Velocity from kinetic energy, magnitude calculations
338/// - **Machine Learning**: Gradient scaling, learning rate schedules
339/// - **Image Processing**: Gaussian blur, distance transforms
340///
341/// # Note on Negative Values
342///
343/// Square root of negative numbers is undefined in real arithmetic.
344/// The function will return NaN for negative inputs, following IEEE 754 standard.
345///
346/// ```
347/// use scirs2_core::ndarray::array;
348/// use scirs2_core::ndarray_ext::elementwise::sqrt_simd;
349///
350/// let x = array![-1.0_f64];
351/// let result = sqrt_simd(&x.view());
352/// assert!(result[0].is_nan());
353/// ```
354pub fn sqrt_simd<F>(x: &ArrayView1<F>) -> Array1<F>
355where
356    F: Float + SimdUnifiedOps,
357{
358    if x.is_empty() {
359        return Array1::zeros(0);
360    }
361
362    let optimizer = AutoOptimizer::new();
363    if optimizer.should_use_simd(x.len()) {
364        return F::simd_sqrt(x);
365    }
366
367    // Scalar fallback for small arrays
368    x.mapv(|val| val.sqrt())
369}
370
371/// Compute the exponential (e^x) of each element (SIMD-accelerated).
372///
373/// Computes e^x for each element in the array.
374///
375/// # Arguments
376///
377/// * `x` - Input 1D array
378///
379/// # Returns
380///
381/// `Array1<F>` with the same length as input, with exponential values.
382///
383/// # Performance
384///
385/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
386/// - **Speedup**: 2-4x on large arrays via auto-vectorization
387///
388/// # Mathematical Definition
389///
390/// ```text
391/// exp(x) = e^x where e ≈ 2.71828...
392/// ```
393///
394/// # Examples
395///
396/// ```
397/// use scirs2_core::ndarray::array;
398/// use scirs2_core::ndarray_ext::elementwise::exp_simd;
399///
400/// let x = array![0.0_f64, 1.0, 2.0];
401/// let result = exp_simd(&x.view());
402///
403/// assert!((result[0] - 1.0).abs() < 1e-10);
404/// assert!((result[1] - 2.718281828).abs() < 1e-9);
405/// assert!((result[2] - 7.389056099).abs() < 1e-9);
406/// ```
407///
408/// # Edge Cases
409///
410/// - **Empty array**: Returns empty array
411/// - **Zero**: exp(0) = 1
412/// - **Large positive**: May overflow to infinity
413/// - **Large negative**: Approaches zero
414/// - **NaN**: Returns NaN (preserves NaN)
415///
416/// # Applications
417///
418/// - **Machine Learning**: Softmax, sigmoid activation
419/// - **Optimization**: Exponential decay, learning rate schedules
420/// - **Probability**: Exponential distribution, Gaussian PDF
421/// - **Neural Networks**: Attention mechanisms, transformer models
422/// - **Reinforcement Learning**: Policy gradients, Q-learning
423pub fn exp_simd<F>(x: &ArrayView1<F>) -> Array1<F>
424where
425    F: Float + SimdUnifiedOps,
426{
427    if x.is_empty() {
428        return Array1::zeros(0);
429    }
430
431    let optimizer = AutoOptimizer::new();
432    if optimizer.should_use_simd(x.len()) {
433        return F::simd_exp(x);
434    }
435
436    // Scalar fallback for small arrays
437    x.mapv(|val| val.exp())
438}
439
440/// Compute the natural logarithm (ln(x)) of each element (SIMD-accelerated).
441///
442/// Computes ln(x) for each element in the array.
443///
444/// # Arguments
445///
446/// * `x` - Input 1D array (must contain positive values)
447///
448/// # Returns
449///
450/// `Array1<F>` with the same length as input, with natural logarithm values.
451///
452/// # Performance
453///
454/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
455/// - **Speedup**: 2-4x on large arrays via auto-vectorization
456///
457/// # Mathematical Definition
458///
459/// ```text
460/// ln(x) = log_e(x) = y such that e^y = x
461/// ```
462///
463/// # Examples
464///
465/// ```
466/// use scirs2_core::ndarray::array;
467/// use scirs2_core::ndarray_ext::elementwise::ln_simd;
468///
469/// let x = array![1.0_f64, 2.718281828, 7.389056099];
470/// let result = ln_simd(&x.view());
471///
472/// assert!((result[0] - 0.0).abs() < 1e-10);
473/// assert!((result[1] - 1.0).abs() < 1e-9);
474/// assert!((result[2] - 2.0).abs() < 1e-9);
475/// ```
476///
477/// # Edge Cases
478///
479/// - **Empty array**: Returns empty array
480/// - **One**: ln(1) = 0
481/// - **Zero**: Returns negative infinity
482/// - **Negative numbers**: Returns NaN (undefined in reals)
483/// - **NaN**: Returns NaN (preserves NaN)
484/// - **Positive infinity**: Returns positive infinity
485///
486/// # Applications
487///
488/// - **Machine Learning**: Log-likelihood, cross-entropy loss
489/// - **Statistics**: Log-normal distribution, Shannon entropy
490/// - **Optimization**: Log-barrier methods, logarithmic objectives
491/// - **Automatic Differentiation**: Logarithmic derivatives
492/// - **Information Theory**: Mutual information, KL divergence
493///
494/// # Note on Negative Values
495///
496/// Natural logarithm of negative numbers is undefined in real arithmetic.
497/// The function will return NaN for negative inputs, following IEEE 754 standard.
498///
499/// ```
500/// use scirs2_core::ndarray::array;
501/// use scirs2_core::ndarray_ext::elementwise::ln_simd;
502///
503/// let x = array![-1.0_f64];
504/// let result = ln_simd(&x.view());
505/// assert!(result[0].is_nan());
506/// ```
507pub fn ln_simd<F>(x: &ArrayView1<F>) -> Array1<F>
508where
509    F: Float + SimdUnifiedOps,
510{
511    if x.is_empty() {
512        return Array1::zeros(0);
513    }
514
515    let optimizer = AutoOptimizer::new();
516    if optimizer.should_use_simd(x.len()) {
517        return F::simd_ln(x);
518    }
519
520    // Scalar fallback for small arrays
521    x.mapv(|val| val.ln())
522}
523
524/// Compute the sine of each element (SIMD-accelerated).
525///
526/// Computes sin(x) for each element in the array.
527///
528/// # Arguments
529///
530/// * `x` - Input 1D array (angles in radians)
531///
532/// # Returns
533///
534/// `Array1<F>` with the same length as input, with sine values.
535///
536/// # Performance
537///
538/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
539/// - **Speedup**: 2-4x on large arrays via auto-vectorization
540///
541/// # Mathematical Definition
542///
543/// ```text
544/// sin(x) = opposite/hypotenuse in a right triangle
545/// Periodic with period 2π: sin(x + 2π) = sin(x)
546/// Range: [-1, 1]
547/// ```
548///
549/// # Examples
550///
551/// ```
552/// use scirs2_core::ndarray::array;
553/// use scirs2_core::ndarray_ext::elementwise::sin_simd;
554/// use std::f64::consts::PI;
555///
556/// let x = array![0.0_f64, PI/6.0, PI/4.0, PI/2.0, PI];
557/// let result = sin_simd(&x.view());
558///
559/// // sin(0) = 0, sin(π/2) = 1, sin(π) ≈ 0
560/// ```
561///
562/// # Edge Cases
563///
564/// - **Empty array**: Returns empty array
565/// - **Zero**: sin(0) = 0
566/// - **NaN**: Returns NaN (preserves NaN)
567/// - **Infinity**: Returns NaN (undefined for infinity)
568///
569/// # Applications
570///
571/// - **Signal Processing**: Wave generation, modulation, Fourier analysis
572/// - **Computer Graphics**: Rotation matrices, smooth animations
573/// - **Physics Simulations**: Oscillations, wave propagation
574/// - **Audio**: Synthesizers, tone generation
575/// - **Robotics**: Trajectory planning, inverse kinematics
576pub fn sin_simd<F>(x: &ArrayView1<F>) -> Array1<F>
577where
578    F: Float + SimdUnifiedOps,
579{
580    if x.is_empty() {
581        return Array1::zeros(0);
582    }
583
584    let optimizer = AutoOptimizer::new();
585    if optimizer.should_use_simd(x.len()) {
586        return F::simd_sin(x);
587    }
588
589    // Scalar fallback for small arrays
590    x.mapv(|val| val.sin())
591}
592
593/// Compute the cosine of each element (SIMD-accelerated).
594///
595/// Computes cos(x) for each element in the array.
596///
597/// # Arguments
598///
599/// * `x` - Input 1D array (angles in radians)
600///
601/// # Returns
602///
603/// `Array1<F>` with the same length as input, with cosine values.
604///
605/// # Performance
606///
607/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
608/// - **Speedup**: 2-4x on large arrays via auto-vectorization
609///
610/// # Mathematical Definition
611///
612/// ```text
613/// cos(x) = adjacent/hypotenuse in a right triangle
614/// Periodic with period 2π: cos(x + 2π) = cos(x)
615/// Range: [-1, 1]
616/// ```
617///
618/// # Examples
619///
620/// ```
621/// use scirs2_core::ndarray::array;
622/// use scirs2_core::ndarray_ext::elementwise::cos_simd;
623/// use std::f64::consts::PI;
624///
625/// let x = array![0.0_f64, PI/3.0, PI/2.0, PI];
626/// let result = cos_simd(&x.view());
627///
628/// // cos(0) = 1, cos(π/2) ≈ 0, cos(π) = -1
629/// ```
630///
631/// # Edge Cases
632///
633/// - **Empty array**: Returns empty array
634/// - **Zero**: cos(0) = 1
635/// - **NaN**: Returns NaN (preserves NaN)
636/// - **Infinity**: Returns NaN (undefined for infinity)
637///
638/// # Applications
639///
640/// - **Computer Vision**: Rotation matrices, coordinate transformations
641/// - **Signal Processing**: Fourier transforms, filtering
642/// - **Path Planning**: Dubins curves, trajectory optimization
643/// - **Geometry**: Circle parametrization, spherical coordinates
644/// - **Neural Networks**: Positional encoding (transformers)
645pub fn cos_simd<F>(x: &ArrayView1<F>) -> Array1<F>
646where
647    F: Float + SimdUnifiedOps,
648{
649    if x.is_empty() {
650        return Array1::zeros(0);
651    }
652
653    let optimizer = AutoOptimizer::new();
654    if optimizer.should_use_simd(x.len()) {
655        return F::simd_cos(x);
656    }
657
658    // Scalar fallback for small arrays
659    x.mapv(|val| val.cos())
660}
661
662/// Compute the tangent of each element (SIMD-accelerated).
663///
664/// Computes tan(x) for each element in the array.
665///
666/// # Arguments
667///
668/// * `x` - Input 1D array (angles in radians)
669///
670/// # Returns
671///
672/// `Array1<F>` with the same length as input, with tangent values.
673///
674/// # Performance
675///
676/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
677/// - **Speedup**: 2-4x on large arrays via auto-vectorization
678///
679/// # Mathematical Definition
680///
681/// ```text
682/// tan(x) = sin(x) / cos(x)
683/// Periodic with period π: tan(x + π) = tan(x)
684/// Range: (-∞, ∞)
685/// ```
686///
687/// # Examples
688///
689/// ```
690/// use scirs2_core::ndarray::array;
691/// use scirs2_core::ndarray_ext::elementwise::tan_simd;
692/// use std::f64::consts::PI;
693///
694/// let x = array![0.0_f64, PI/4.0, PI/6.0];
695/// let result = tan_simd(&x.view());
696///
697/// // tan(0) = 0, tan(π/4) = 1
698/// ```
699///
700/// # Edge Cases
701///
702/// - **Empty array**: Returns empty array
703/// - **Zero**: tan(0) = 0
704/// - **π/2, 3π/2, ...**: Returns ±infinity (undefined at cos(x)=0)
705/// - **NaN**: Returns NaN (preserves NaN)
706/// - **Infinity**: Returns NaN (undefined for infinity)
707///
708/// # Applications
709///
710/// - **Computer Graphics**: Perspective projection, field of view
711/// - **Navigation**: Bearing calculations, angle determination
712/// - **Physics**: Slope calculations, inclined planes
713/// - **Image Processing**: Gradient direction, edge angles
714/// - **Surveying**: Distance and height measurements
715///
716/// # Note on Singularities
717///
718/// Tangent has singularities at x = π/2 + nπ where n is any integer.
719/// At these points, cos(x) = 0 and tan(x) approaches ±infinity.
720pub fn tan_simd<F>(x: &ArrayView1<F>) -> Array1<F>
721where
722    F: Float + SimdUnifiedOps,
723{
724    if x.is_empty() {
725        return Array1::zeros(0);
726    }
727
728    let optimizer = AutoOptimizer::new();
729    if optimizer.should_use_simd(x.len()) {
730        return F::simd_tan(x);
731    }
732
733    // Scalar fallback for small arrays
734    x.mapv(|val| val.tan())
735}
736
737/// Compute the power of each element with a scalar exponent (SIMD-accelerated).
738///
739/// Computes base^exp for each element in the base array.
740///
741/// # Arguments
742///
743/// * `base` - Input 1D array of base values
744/// * `exp` - Scalar exponent value
745///
746/// # Returns
747///
748/// `Array1<F>` with the same length as input, with power values.
749///
750/// # Performance
751///
752/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
753/// - **Speedup**: 2-4x on large arrays via auto-vectorization
754///
755/// # Mathematical Definition
756///
757/// ```text
758/// powf(base, exp) = base^exp
759/// ```
760///
761/// # Examples
762///
763/// ```
764/// use scirs2_core::ndarray::array;
765/// use scirs2_core::ndarray_ext::elementwise::powf_simd;
766///
767/// let base = array![2.0_f64, 3.0, 4.0, 5.0];
768/// let result = powf_simd(&base.view(), 2.0);
769///
770/// // [4.0, 9.0, 16.0, 25.0]
771/// ```
772///
773/// # Edge Cases
774///
775/// - **Empty array**: Returns empty array
776/// - **x^0**: Returns 1 for any x (including 0^0 = 1 by convention)
777/// - **x^1**: Returns x
778/// - **0^negative**: Returns infinity
779/// - **negative^non-integer**: Returns NaN
780/// - **NaN**: Returns NaN (preserves NaN)
781///
782/// # Applications
783///
784/// - **Statistics**: Variance, standard deviation, moment calculations
785/// - **Machine Learning**: Polynomial features, L2 regularization
786/// - **Signal Processing**: Power spectral density, energy calculations
787/// - **Physics**: Kinetic energy, gravitational potential
788/// - **Finance**: Compound interest, exponential growth models
789pub fn powf_simd<F>(base: &ArrayView1<F>, exp: F) -> Array1<F>
790where
791    F: Float + SimdUnifiedOps,
792{
793    if base.is_empty() {
794        return Array1::zeros(0);
795    }
796
797    let optimizer = AutoOptimizer::new();
798    if optimizer.should_use_simd(base.len()) {
799        return F::simd_powf(base, exp);
800    }
801
802    // Scalar fallback for small arrays
803    base.mapv(|val| val.powf(exp))
804}
805
806/// Compute the element-wise power with array exponents (SIMD-accelerated).
807///
808/// Computes `base[i]^exp[i]` for each pair of elements.
809///
810/// # Arguments
811///
812/// * `base` - Input 1D array of base values
813/// * `exp` - Input 1D array of exponent values (must match base length)
814///
815/// # Returns
816///
817/// `Array1<F>` with the same length as inputs, with power values.
818///
819/// # Performance
820///
821/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
822/// - **Speedup**: 2-4x on large arrays via auto-vectorization
823///
824/// # Mathematical Definition
825///
826/// ```text
827/// pow(base, exp)[i] = base[i]^exp[i]
828/// ```
829///
830/// # Examples
831///
832/// ```
833/// use scirs2_core::ndarray::array;
834/// use scirs2_core::ndarray_ext::elementwise::pow_simd;
835///
836/// let base = array![2.0_f64, 3.0, 4.0, 5.0];
837/// let exp = array![2.0, 3.0, 2.0, 1.0];
838/// let result = pow_simd(&base.view(), &exp.view());
839///
840/// // [4.0, 27.0, 16.0, 5.0]
841/// ```
842///
843/// # Edge Cases
844///
845/// - **Empty arrays**: Returns empty array
846/// - **Mismatched lengths**: Panics (arrays must have same length)
847/// - **x^0**: Returns 1 for any x
848/// - **0^0**: Returns 1 by convention
849/// - **0^negative**: Returns infinity
850/// - **negative^non-integer**: Returns NaN
851/// - **NaN**: Returns NaN (preserves NaN)
852///
853/// # Applications
854///
855/// - **Machine Learning**: Custom activation functions, attention mechanisms
856/// - **Statistics**: Generalized power transformations
857/// - **Optimization**: Power-law scaling, Pareto distributions
858/// - **Signal Processing**: Non-linear transformations, compression
859/// - **Physics**: Variable exponent models, fractal dimensions
860pub fn pow_simd<F>(base: &ArrayView1<F>, exp: &ArrayView1<F>) -> Array1<F>
861where
862    F: Float + SimdUnifiedOps,
863{
864    assert_eq!(
865        base.len(),
866        exp.len(),
867        "Base and exponent arrays must have the same length"
868    );
869
870    if base.is_empty() {
871        return Array1::zeros(0);
872    }
873
874    let optimizer = AutoOptimizer::new();
875    if optimizer.should_use_simd(base.len()) {
876        return F::simd_pow(base, exp);
877    }
878
879    // Scalar fallback for small arrays
880    base.iter()
881        .zip(exp.iter())
882        .map(|(&b, &e)| b.powf(e))
883        .collect::<Vec<_>>()
884        .into()
885}
886
887/// Compute the hyperbolic sine of each element (SIMD-accelerated).
888///
889/// Computes sinh(x) for each element in the array.
890///
891/// # Arguments
892///
893/// * `x` - Input 1D array
894///
895/// # Returns
896///
897/// `Array1<F>` with the same length as input, with hyperbolic sine values.
898///
899/// # Performance
900///
901/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
902/// - **Speedup**: 2-4x on large arrays via auto-vectorization
903///
904/// # Mathematical Definition
905///
906/// ```text
907/// sinh(x) = (e^x - e^(-x)) / 2
908/// Range: (-∞, ∞)
909/// ```
910///
911/// # Examples
912///
913/// ```
914/// use scirs2_core::ndarray::array;
915/// use scirs2_core::ndarray_ext::elementwise::sinh_simd;
916///
917/// let x = array![0.0_f64, 1.0, -1.0];
918/// let result = sinh_simd(&x.view());
919///
920/// // sinh(0) = 0, sinh(1) ≈ 1.175, sinh(-1) ≈ -1.175
921/// assert!((result[0] - 0.0).abs() < 1e-10);
922/// assert!((result[1] - 1.1752011936).abs() < 1e-9);
923/// assert!((result[2] + 1.1752011936).abs() < 1e-9);
924/// ```
925///
926/// # Edge Cases
927///
928/// - **Empty array**: Returns empty array
929/// - **Zero**: sinh(0) = 0
930/// - **Large positive**: May overflow to infinity
931/// - **Large negative**: May overflow to negative infinity
932/// - **NaN**: Returns NaN (preserves NaN)
933///
934/// # Applications
935///
936/// - **Neural Networks**: Tanh activation (related via tanh = sinh/cosh)
937/// - **Numerical Integration**: Tanh-sinh quadrature
938/// - **Physics**: Catenary curves, special relativity
939/// - **Engineering**: Transmission line theory, heat transfer
940/// - **Mathematics**: Hyperbolic geometry, complex analysis
941pub fn sinh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
942where
943    F: Float + SimdUnifiedOps,
944{
945    if x.is_empty() {
946        return Array1::zeros(0);
947    }
948
949    let optimizer = AutoOptimizer::new();
950    if optimizer.should_use_simd(x.len()) {
951        return F::simd_sinh(x);
952    }
953
954    // Scalar fallback for small arrays
955    x.mapv(|val| val.sinh())
956}
957
958/// Compute the hyperbolic cosine of each element (SIMD-accelerated).
959///
960/// Computes cosh(x) for each element in the array.
961///
962/// # Arguments
963///
964/// * `x` - Input 1D array
965///
966/// # Returns
967///
968/// `Array1<F>` with the same length as input, with hyperbolic cosine values.
969///
970/// # Performance
971///
972/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
973/// - **Speedup**: 2-4x on large arrays via auto-vectorization
974///
975/// # Mathematical Definition
976///
977/// ```text
978/// cosh(x) = (e^x + e^(-x)) / 2
979/// Range: [1, ∞) (always >= 1)
980/// ```
981///
982/// # Examples
983///
984/// ```
985/// use scirs2_core::ndarray::array;
986/// use scirs2_core::ndarray_ext::elementwise::cosh_simd;
987///
988/// let x = array![0.0_f64, 1.0, -1.0];
989/// let result = cosh_simd(&x.view());
990///
991/// // cosh(0) = 1, cosh(1) ≈ 1.543, cosh(-1) ≈ 1.543
992/// assert!((result[0] - 1.0).abs() < 1e-10);
993/// assert!((result[1] - 1.5430806348).abs() < 1e-9);
994/// assert!((result[2] - 1.5430806348).abs() < 1e-9);
995/// ```
996///
997/// # Edge Cases
998///
999/// - **Empty array**: Returns empty array
1000/// - **Zero**: cosh(0) = 1
1001/// - **Symmetric**: cosh(-x) = cosh(x)
1002/// - **Large values**: May overflow to infinity
1003/// - **NaN**: Returns NaN (preserves NaN)
1004///
1005/// # Applications
1006///
1007/// - **Neural Networks**: Activation functions, normalization
1008/// - **Physics**: Wave propagation, special relativity
1009/// - **Engineering**: Cable suspension, arch design
1010/// - **Mathematics**: Hyperbolic identities (cosh² - sinh² = 1)
1011/// - **Numerical Methods**: Stability analysis
1012pub fn cosh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1013where
1014    F: Float + SimdUnifiedOps,
1015{
1016    if x.is_empty() {
1017        return Array1::zeros(0);
1018    }
1019
1020    let optimizer = AutoOptimizer::new();
1021    if optimizer.should_use_simd(x.len()) {
1022        return F::simd_cosh(x);
1023    }
1024
1025    // Scalar fallback for small arrays
1026    x.mapv(|val| val.cosh())
1027}
1028
1029/// Compute the hyperbolic tangent of each element (SIMD-accelerated).
1030///
1031/// Computes tanh(x) for each element in the array.
1032///
1033/// # Arguments
1034///
1035/// * `x` - Input 1D array
1036///
1037/// # Returns
1038///
1039/// `Array1<F>` with the same length as input, with hyperbolic tangent values.
1040///
1041/// # Performance
1042///
1043/// - **Auto-vectorization**: Compiler optimizations provide excellent performance
1044/// - **Speedup**: 2-4x on large arrays via auto-vectorization
1045///
1046/// # Mathematical Definition
1047///
1048/// ```text
1049/// tanh(x) = sinh(x) / cosh(x) = (e^x - e^(-x)) / (e^x + e^(-x))
1050/// Range: (-1, 1)
1051/// ```
1052///
1053/// # Examples
1054///
1055/// ```ignore
1056/// use scirs2_core::ndarray::array;
1057/// use scirs2_core::ndarray_ext::elementwise::tanh_simd;
1058///
1059/// let x = array![0.0_f64, 1.0, -1.0, 10.0];
1060/// let result = tanh_simd(&x.view());
1061///
1062/// // tanh(0) = 0, tanh(1) ≈ 0.762, tanh(-1) ≈ -0.762, tanh(∞) → 1
1063/// assert!((result[0] - 0.0_f64).abs() < 1e-10);
1064/// assert!((result[1] - 0.7615941559_f64).abs() < 1e-9);
1065/// assert!((result[2] + 0.7615941559_f64).abs() < 1e-9);
1066/// assert!((result[3] - 1.0_f64).abs() < 1e-9); // tanh(10) ≈ 1
1067/// ```
1068///
1069/// # Edge Cases
1070///
1071/// - **Empty array**: Returns empty array
1072/// - **Zero**: tanh(0) = 0
1073/// - **Asymptotic**: tanh(x) → ±1 as x → ±∞
1074/// - **Anti-symmetric**: tanh(-x) = -tanh(x)
1075/// - **NaN**: Returns NaN (preserves NaN)
1076///
1077/// # Applications
1078///
1079/// - **Neural Networks**: Tanh activation function (classic activation)
1080/// - **Machine Learning**: Gradient clipping, normalization layers
1081/// - **Reinforcement Learning**: Policy networks, value functions
1082/// - **Signal Processing**: Soft limiting, saturation
1083/// - **Optimization**: Smooth approximation to sign function
1084/// - **Physics**: Relativistic velocity addition
1085///
1086/// # Note on Neural Networks
1087///
1088/// Tanh was historically the most popular activation function before ReLU.
1089/// It's still widely used in RNNs, LSTMs, and GRUs for gating mechanisms.
1090/// Gradient: d/dx tanh(x) = 1 - tanh²(x) = sech²(x)
1091pub fn tanh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1092where
1093    F: Float + SimdUnifiedOps,
1094{
1095    if x.is_empty() {
1096        return Array1::zeros(0);
1097    }
1098
1099    let optimizer = AutoOptimizer::new();
1100    if optimizer.should_use_simd(x.len()) {
1101        return F::simd_tanh(x);
1102    }
1103
1104    // Scalar fallback for small arrays
1105    x.mapv(|val| val.tanh())
1106}
1107
1108/// Compute the floor (round down) of each element (SIMD-accelerated).
1109///
1110/// Computes the largest integer less than or equal to x for each element.
1111///
1112/// # Arguments
1113///
1114/// * `x` - Input 1D array
1115///
1116/// # Returns
1117///
1118/// `Array1<F>` with the same length as input, where each element is rounded down
1119/// to the nearest integer.
1120///
1121/// # Performance
1122///
1123/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1124/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1125///
1126/// # Mathematical Properties
1127///
1128/// - For any x: floor(x) <= x
1129/// - floor(x) is the largest integer <= x
1130/// - floor(-x) = -ceil(x)
1131/// - floor(x) = x if x is already an integer
1132///
1133/// # Examples
1134///
1135/// ```
1136/// use scirs2_core::ndarray::array;
1137/// use scirs2_core::ndarray_ext::elementwise::floor_simd;
1138///
1139/// let x = array![1.2, 2.7, -1.3, -2.9, 3.0];
1140/// let result = floor_simd(&x.view());
1141/// // Result: [1.0, 2.0, -2.0, -3.0, 3.0]
1142/// ```
1143///
1144/// # Applications
1145///
1146/// - **Binning**: Discretizing continuous values into bins
1147/// - **Indexing**: Converting continuous coordinates to discrete indices
1148/// - **Quantization**: Reducing precision for data compression
1149/// - **Digital Signal Processing**: Sample rate conversion, downsampling
1150/// - **Computer Graphics**: Pixel coordinate calculations
1151/// - **Financial**: Rounding down monetary amounts
1152///
1153/// # See Also
1154///
1155/// - [`ceil_simd`]: Round up to smallest integer >= x
1156/// - [`round_simd`]: Round to nearest integer
1157pub fn floor_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1158where
1159    F: Float + SimdUnifiedOps,
1160{
1161    if x.is_empty() {
1162        return Array1::zeros(0);
1163    }
1164
1165    let optimizer = AutoOptimizer::new();
1166    if optimizer.should_use_simd(x.len()) {
1167        return F::simd_floor(x);
1168    }
1169
1170    // Scalar fallback for small arrays
1171    x.mapv(|val| val.floor())
1172}
1173
1174/// Compute the ceiling (round up) of each element (SIMD-accelerated).
1175///
1176/// Computes the smallest integer greater than or equal to x for each element.
1177///
1178/// # Arguments
1179///
1180/// * `x` - Input 1D array
1181///
1182/// # Returns
1183///
1184/// `Array1<F>` with the same length as input, where each element is rounded up
1185/// to the nearest integer.
1186///
1187/// # Performance
1188///
1189/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1190/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1191///
1192/// # Mathematical Properties
1193///
1194/// - For any x: ceil(x) >= x
1195/// - ceil(x) is the smallest integer >= x
1196/// - ceil(-x) = -floor(x)
1197/// - ceil(x) = x if x is already an integer
1198///
1199/// # Examples
1200///
1201/// ```
1202/// use scirs2_core::ndarray::array;
1203/// use scirs2_core::ndarray_ext::elementwise::ceil_simd;
1204///
1205/// let x = array![1.2, 2.7, -1.3, -2.9, 3.0];
1206/// let result = ceil_simd(&x.view());
1207/// // Result: [2.0, 3.0, -1.0, -2.0, 3.0]
1208/// ```
1209///
1210/// # Applications
1211///
1212/// - **Memory Allocation**: Rounding up buffer sizes
1213/// - **Pagination**: Calculating number of pages needed
1214/// - **Resource Planning**: Determining minimum resources required
1215/// - **Digital Signal Processing**: Sample rate conversion, upsampling
1216/// - **Computer Graphics**: Bounding box calculations
1217/// - **Financial**: Rounding up monetary amounts (conservative estimation)
1218///
1219/// # See Also
1220///
1221/// - [`floor_simd`]: Round down to largest integer <= x
1222/// - [`round_simd`]: Round to nearest integer
1223pub fn ceil_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1224where
1225    F: Float + SimdUnifiedOps,
1226{
1227    if x.is_empty() {
1228        return Array1::zeros(0);
1229    }
1230
1231    let optimizer = AutoOptimizer::new();
1232    if optimizer.should_use_simd(x.len()) {
1233        return F::simd_ceil(x);
1234    }
1235
1236    // Scalar fallback for small arrays
1237    x.mapv(|val| val.ceil())
1238}
1239
1240/// Compute the rounding to nearest integer of each element (SIMD-accelerated).
1241///
1242/// Rounds each element to the nearest integer, with ties (x.5) rounding away
1243/// from zero (standard rounding).
1244///
1245/// # Arguments
1246///
1247/// * `x` - Input 1D array
1248///
1249/// # Returns
1250///
1251/// `Array1<F>` with the same length as input, where each element is rounded to
1252/// the nearest integer.
1253///
1254/// # Performance
1255///
1256/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1257/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1258///
1259/// # Rounding Behavior
1260///
1261/// This function uses "round half away from zero" (standard rounding):
1262/// - 0.5 rounds to 1.0
1263/// - 1.5 rounds to 2.0
1264/// - 2.5 rounds to 3.0
1265/// - -0.5 rounds to -1.0
1266/// - -1.5 rounds to -2.0
1267///
1268/// # Mathematical Properties
1269///
1270/// - |round(x) - x| <= 0.5 for all x
1271/// - round(x) = x if x is already an integer
1272/// - round(-x) = -round(x)
1273///
1274/// # Examples
1275///
1276/// ```
1277/// use scirs2_core::ndarray::array;
1278/// use scirs2_core::ndarray_ext::elementwise::round_simd;
1279///
1280/// let x = array![1.2, 2.7, -1.3, -2.9, 3.0, 2.5];
1281/// let result = round_simd(&x.view());
1282/// // Result: [1.0, 3.0, -1.0, -3.0, 3.0, 3.0]
1283/// //          note: 2.5 rounds to 3.0 (away from zero)
1284/// ```
1285///
1286/// # Applications
1287///
1288/// - **Data Visualization**: Rounding values for display
1289/// - **Statistics**: Rounding statistical summaries
1290/// - **Financial**: General-purpose monetary rounding
1291/// - **Machine Learning**: Quantization for model compression
1292/// - **Image Processing**: Pixel value normalization
1293/// - **Scientific Computing**: Reducing numerical noise
1294///
1295/// # See Also
1296///
1297/// - [`floor_simd`]: Round down to largest integer <= x
1298/// - [`ceil_simd`]: Round up to smallest integer >= x
1299pub fn round_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1300where
1301    F: Float + SimdUnifiedOps,
1302{
1303    if x.is_empty() {
1304        return Array1::zeros(0);
1305    }
1306
1307    let optimizer = AutoOptimizer::new();
1308    if optimizer.should_use_simd(x.len()) {
1309        return F::simd_round(x);
1310    }
1311
1312    // Scalar fallback for small arrays
1313    x.mapv(|val| val.round())
1314}
1315
1316/// Compute the fractional part of each element (SIMD-accelerated).
1317///
1318/// Returns the signed fractional component of each value (x - trunc(x)).
1319///
1320/// # Arguments
1321///
1322/// * `x` - Input 1D array
1323///
1324/// # Returns
1325///
1326/// `Array1<F>` with the same length as input, where each element is the signed fractional part
1327/// in the range (-1, 1).
1328///
1329/// # Performance
1330///
1331/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1332/// - **Scalar**: Used for small arrays or when SIMD unavailable
1333/// - **Speedup**: 2-4x for large arrays on AVX2/NEON systems
1334///
1335/// # Mathematical Definition
1336///
1337/// ```text
1338/// fract(x) = x - trunc(x)
1339/// ```
1340///
1341/// For positive x: same as x - floor(x)
1342/// For negative x: preserves sign (e.g., fract(-1.5) = -0.5)
1343///
1344/// # Properties
1345///
1346/// - -1 < fract(x) < 1 for all finite x
1347/// - fract(x + n) = fract(x) for any integer n (periodic with period 1)
1348/// - fract(x) = 0 if and only if x is an integer
1349/// - fract(x) + trunc(x) = x
1350/// - fract(-x) = -fract(x) (odd function)
1351///
1352/// # Applications
1353///
1354/// - **Computer Graphics**: Texture coordinate wrapping, repeating patterns
1355/// - **Animation**: Cyclic motion, looping animations
1356/// - **Signal Processing**: Modulo 1 operations, phase wrapping
1357/// - **Numerical Methods**: Fractional part extraction, decimal decomposition
1358/// - **Game Development**: Tile-based rendering, repeating textures
1359/// - **Scientific Computing**: Periodic boundary conditions, modulo arithmetic
1360/// - **Audio Processing**: Phase accumulation, waveform generation
1361/// - **Time Calculations**: Extracting fractional seconds, subsecond precision
1362/// - **Cryptography**: Linear congruential generators, pseudo-random sequences
1363/// - **Financial**: Fractional share calculations, interest accrual
1364///
1365/// # Examples
1366///
1367/// ```
1368/// use scirs2_core::ndarray::array;
1369/// use scirs2_core::ndarray_ext::elementwise::fract_simd;
1370///
1371/// let x = array![1.5_f64, 2.7, -1.3, 0.0, 3.0];
1372/// let result = fract_simd(&x.view());
1373/// assert!((result[0] - 0.5_f64).abs() < 1e-10);    // 1.5 - trunc(1.5) = 0.5
1374/// assert!((result[1] - 0.7_f64).abs() < 1e-10);    // 2.7 - trunc(2.7) = 0.7
1375/// assert!((result[2] - (-0.3_f64)).abs() < 1e-10); // -1.3 - trunc(-1.3) = -0.3
1376/// assert_eq!(result[3], 0.0_f64);                   // 0.0 - trunc(0.0) = 0.0
1377/// assert_eq!(result[4], 0.0_f64);                   // 3.0 - trunc(3.0) = 0.0
1378///
1379/// // Property: fract(x) + trunc(x) = x
1380/// let values = array![5.25_f64, -2.75, 0.5, 10.0];
1381/// let fract_parts = fract_simd(&values.view());
1382/// let trunc_parts = values.mapv(|v: f64| v.trunc());
1383/// let reconstructed = &fract_parts + &trunc_parts;
1384/// for i in 0..values.len() {
1385///     assert!((reconstructed[i] - values[i]).abs() < 1e-10);
1386/// }
1387/// ```
1388///
1389/// # See Also
1390///
1391/// - [`floor_simd`]: Round down to integer
1392/// - [`round_simd`]: Round to nearest integer
1393pub fn fract_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1394where
1395    F: Float + SimdUnifiedOps,
1396{
1397    if x.is_empty() {
1398        return Array1::zeros(0);
1399    }
1400
1401    let optimizer = AutoOptimizer::new();
1402    if optimizer.should_use_simd(x.len()) {
1403        return F::simd_fract(x);
1404    }
1405
1406    // Scalar fallback for small arrays
1407    x.mapv(|val| val.fract())
1408}
1409
1410/// Compute the reciprocal (multiplicative inverse) of each element (SIMD-accelerated).
1411///
1412/// Returns 1/x for each element in the array. For zero values, the result is infinity.
1413///
1414/// # Arguments
1415///
1416/// * `x` - Input 1D array
1417///
1418/// # Returns
1419///
1420/// `Array1<F>` with the same length as input, where each element is the reciprocal (1/x).
1421/// Zero elements yield infinity values.
1422///
1423/// # Mathematical Definition
1424///
1425/// ```text
1426/// recip(x) = 1/x for x ≠ 0
1427/// recip(0) = ∞
1428/// ```
1429///
1430/// # Properties
1431///
1432/// - recip(recip(x)) = x (involutive property)
1433/// - recip(x * y) = recip(x) * recip(y) (multiplicative property)
1434/// - recip(x/y) = y/x (division inversion)
1435/// - recip(1) = 1 (identity element)
1436/// - recip(-x) = -recip(x) (odd function)
1437/// - recip(x^n) = (recip(x))^n (power property)
1438///
1439/// # Applications
1440///
1441/// ## Numerical Analysis
1442/// - Matrix inversions: Computing A^(-1) in linear systems
1443/// - Newton's method: Division-free iterations via reciprocal approximations
1444/// - Condition number estimation: ||A|| * ||A^(-1)||
1445/// - Iterative refinement: Improving solution accuracy in linear solvers
1446///
1447/// ## Computer Graphics
1448/// - Projection transformations: Converting world coordinates to screen space
1449/// - Lighting calculations: Inverse square law for light attenuation
1450/// - Texture mapping: Computing texture coordinate gradients
1451/// - Depth buffer operations: Converting depth to 1/z for perspective
1452///
1453/// ## Physics Simulations
1454/// - Inverse square laws: Gravitational force (F = G * m1 * m2 / r^2)
1455/// - Wave propagation: Intensity attenuation (I ∝ 1/r^2)
1456/// - Electromagnetic fields: Coulomb's law (F = k * q1 * q2 / r^2)
1457/// - Fluid dynamics: Pressure gradient computations
1458///
1459/// ## Signal Processing
1460/// - Filter design: Converting frequency response to impulse response
1461/// - Normalization: Scaling signals by reciprocal of maximum amplitude
1462/// - Frequency domain analysis: Converting period to frequency (f = 1/T)
1463/// - Deconvolution: Inverse filtering for signal restoration
1464///
1465/// ## Machine Learning
1466/// - Normalization layers: Scaling features by reciprocal of variance
1467/// - Activation functions: Sigmoid (1 / (1 + exp(-x)))
1468/// - Loss functions: Reciprocal of predictions in certain regression tasks
1469/// - Learning rate schedules: Inverse decay (lr = initial_lr / (1 + decay * t))
1470///
1471/// ## Financial Modeling
1472/// - Discount rates: Present value calculations (PV = FV * recip(1 + r)^n)
1473/// - Interest rate conversions: Converting rates to factors
1474/// - Portfolio optimization: Inverse volatility weighting
1475/// - Option pricing: Black-Scholes model components
1476///
1477/// # Edge Cases
1478///
1479/// - recip(0.0) = f64::INFINITY
1480/// - recip(-0.0) = f64::NEG_INFINITY
1481/// - recip(INFINITY) = 0.0
1482/// - recip(NEG_INFINITY) = -0.0
1483/// - recip(NaN) = NaN
1484///
1485/// # Examples
1486///
1487/// ```
1488/// use scirs2_core::ndarray::{array, Array1};
1489/// use scirs2_core::ndarray_ext::elementwise::recip_simd;
1490///
1491/// let x = array![2.0_f64, 4.0, 0.5, 1.0];
1492/// let result = recip_simd(&x.view());
1493///
1494/// assert!((result[0] - 0.5_f64).abs() < 1e-10);    // recip(2.0) = 0.5
1495/// assert!((result[1] - 0.25_f64).abs() < 1e-10);   // recip(4.0) = 0.25
1496/// assert!((result[2] - 2.0_f64).abs() < 1e-10);    // recip(0.5) = 2.0
1497/// assert!((result[3] - 1.0_f64).abs() < 1e-10);    // recip(1.0) = 1.0
1498/// ```
1499///
1500/// # Performance
1501///
1502/// This function uses SIMD acceleration for arrays larger than 1000 elements,
1503/// providing significant speedups through vectorized reciprocal approximations.
1504/// For smaller arrays, a scalar fallback is used to avoid SIMD overhead.
1505///
1506/// # Panics
1507///
1508/// This function does not panic. Division by zero results in infinity values
1509/// as per IEEE 754 floating-point semantics.
1510pub fn recip_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1511where
1512    F: Float + SimdUnifiedOps,
1513{
1514    if x.is_empty() {
1515        return Array1::zeros(0);
1516    }
1517
1518    let optimizer = AutoOptimizer::new();
1519    if optimizer.should_use_simd(x.len()) {
1520        return F::simd_recip(x);
1521    }
1522
1523    // Scalar fallback for small arrays
1524    x.mapv(|val| val.recip())
1525}
1526
1527/// Compute integer power (base^n) for each element (SIMD-accelerated).
1528///
1529/// Returns base^n for each element in the array, where n is an integer exponent.
1530/// More efficient than powf for integer exponents.
1531///
1532/// # Arguments
1533///
1534/// * `base` - Input 1D array
1535/// * `n` - Integer exponent
1536///
1537/// # Returns
1538///
1539/// `Array1<F>` with the same length as input, where each element is base^n.
1540///
1541/// # Mathematical Definition
1542///
1543/// ```text
1544/// powi(x, n) = x^n for integer n
1545/// ```
1546///
1547/// # Properties
1548///
1549/// - powi(x, 0) = 1 for all x ≠ 0
1550/// - powi(x, 1) = x
1551/// - powi(x, -n) = 1 / powi(x, n)
1552/// - powi(x, n+m) = powi(x, n) * powi(x, m) (exponent addition)
1553/// - powi(x*y, n) = powi(x, n) * powi(y, n) (distributive over multiplication)
1554/// - powi(x, n*m) = powi(powi(x, n), m) (exponent multiplication)
1555///
1556/// # Applications
1557///
1558/// ## Statistics
1559/// - Chi-square distributions: Σ(x_i - μ)^2
1560/// - Polynomial distributions: Computing moments
1561/// - Variance calculations: E\[X^2\] - (E\[X\])^2
1562/// - Higher-order moments: Skewness (3rd moment), kurtosis (4th moment)
1563///
1564/// ## Linear Algebra
1565/// - Matrix powers: A^n for matrix operations
1566/// - Eigenvalue problems: Computing characteristic polynomials
1567/// - Norm calculations: ||x||_p = (Σ|x_i|^p)^(1/p)
1568/// - Gram matrices: X^T X operations
1569///
1570/// ## Signal Processing
1571/// - Polynomial filters: Computing filter responses
1572/// - Power spectral density: |X(f)|^2
1573/// - Autocorrelation: r(k) = Σ x(n) * x(n-k)
1574/// - Window functions: Raised cosine windows with integer powers
1575///
1576/// ## Machine Learning
1577/// - Polynomial features: x^2, x^3 for feature engineering
1578/// - Loss functions: L2 loss with (y - ŷ)^2
1579/// - Regularization: Ridge regression with ||w||^2
1580/// - Gradient descent: Computing squared gradients
1581///
1582/// ## Numerical Analysis
1583/// - Taylor series: Computing polynomial approximations
1584/// - Newton's method: f(x) and f'(x) evaluations
1585/// - Polynomial interpolation: Lagrange basis functions
1586/// - Finite differences: Computing higher-order derivatives
1587///
1588/// ## Physics Simulations
1589/// - Inverse square laws: 1/r^2 for gravity and electromagnetism
1590/// - Kinetic energy: (1/2)mv^2
1591/// - Potential energy: Polynomial potentials
1592/// - Power laws: Scaling relationships
1593///
1594/// # Edge Cases
1595///
1596/// - powi(0, n) = 0 for n > 0
1597/// - powi(0, 0) = 1 (mathematical convention)
1598/// - powi(x, 0) = 1 for all x
1599/// - powi(∞, n) = ∞ for n > 0, 0 for n < 0
1600/// - powi(NaN, n) = NaN
1601///
1602/// # Examples
1603///
1604/// ```
1605/// use scirs2_core::ndarray::{array, Array1};
1606/// use scirs2_core::ndarray_ext::elementwise::powi_simd;
1607///
1608/// let x = array![2.0_f64, 3.0, 4.0, 5.0];
1609///
1610/// // Square
1611/// let squared = powi_simd(&x.view(), 2);
1612/// assert!((squared[0] - 4.0_f64).abs() < 1e-10);    // 2^2 = 4
1613/// assert!((squared[1] - 9.0_f64).abs() < 1e-10);    // 3^2 = 9
1614/// assert!((squared[2] - 16.0_f64).abs() < 1e-10);   // 4^2 = 16
1615/// assert!((squared[3] - 25.0_f64).abs() < 1e-10);   // 5^2 = 25
1616///
1617/// // Cube
1618/// let cubed = powi_simd(&x.view(), 3);
1619/// assert!((cubed[0] - 8.0_f64).abs() < 1e-10);      // 2^3 = 8
1620/// assert!((cubed[1] - 27.0_f64).abs() < 1e-10);     // 3^3 = 27
1621///
1622/// // Negative exponent (reciprocal)
1623/// let inv_squared = powi_simd(&x.view(), -2);
1624/// assert!((inv_squared[0] - 0.25_f64).abs() < 1e-10);   // 2^(-2) = 0.25
1625/// assert!((inv_squared[1] - (1.0_f64/9.0)).abs() < 1e-10); // 3^(-2) = 1/9
1626/// ```
1627///
1628/// # Performance
1629///
1630/// This function uses SIMD acceleration for arrays larger than 1000 elements.
1631/// Integer power is computed using efficient exponentiation by squaring,
1632/// providing better performance than powf for integer exponents.
1633///
1634/// For small arrays, a scalar fallback is used to avoid SIMD overhead.
1635///
1636/// # Panics
1637///
1638/// This function does not panic. Edge cases like 0^0 return 1 as per
1639/// mathematical convention and IEEE 754 semantics.
1640pub fn powi_simd<F>(base: &ArrayView1<F>, n: i32) -> Array1<F>
1641where
1642    F: Float + SimdUnifiedOps,
1643{
1644    if base.is_empty() {
1645        return Array1::zeros(0);
1646    }
1647
1648    let optimizer = AutoOptimizer::new();
1649    if optimizer.should_use_simd(base.len()) {
1650        return F::simd_powi(base, n);
1651    }
1652
1653    // Scalar fallback for small arrays
1654    base.mapv(|val| val.powi(n))
1655}
1656
1657/// Compute the gamma function Γ(x) for each element (SIMD-accelerated).
1658///
1659/// Evaluates the gamma function using the Lanczos approximation, providing high accuracy
1660/// across the entire real domain. The gamma function is a generalization of the factorial
1661/// function to non-integer values.
1662///
1663/// # Arguments
1664///
1665/// * `x` - Input 1D array
1666///
1667/// # Returns
1668///
1669/// `Array1<F>` with the same length as input, where each element is Γ(x).
1670///
1671/// # Mathematical Definition
1672///
1673/// The gamma function is defined by Euler's integral:
1674/// ```text
1675/// Γ(z) = ∫₀^∞ t^(z-1) e^(-t) dt,    for Re(z) > 0
1676/// ```
1677///
1678/// For other values, it is defined by analytic continuation using the functional equation:
1679/// ```text
1680/// Γ(z+1) = z·Γ(z)
1681/// ```
1682///
1683/// # Key Properties
1684///
1685/// **Fundamental Properties**:
1686/// - Γ(1) = 1
1687/// - Γ(n+1) = n! for positive integers n
1688/// - Γ(1/2) = √π
1689/// - Γ(z+1) = z·Γ(z) (functional equation)
1690/// - Γ(z)·Γ(1-z) = π/sin(πz) (reflection formula)
1691///
1692/// **Special Values**:
1693/// - Γ(0) = ∞ (pole)
1694/// - Γ(-n) = ∞ for negative integers (poles)
1695/// - Γ(n+1/2) = (2n-1)!!·√π/2ⁿ for non-negative integers n
1696///
1697/// # Applications
1698///
1699/// ## 1. Statistical Distributions
1700/// - **Gamma Distribution**: PDF = (x^(α-1) e^(-x/β)) / (β^α Γ(α))
1701/// - **Beta Distribution**: B(α,β) = Γ(α)Γ(β)/Γ(α+β)
1702/// - **Chi-Square Distribution**: Uses Γ(k/2)
1703/// - **Student's t-Distribution**: Normalization involves Γ((ν+1)/2) and Γ(ν/2)
1704///
1705/// ## 2. Special Functions
1706/// - **Incomplete Gamma**: γ(s,x), Γ(s,x)
1707/// - **Beta Function**: B(α,β) = Γ(α)Γ(β)/Γ(α+β)
1708/// - **Bessel Functions**: Many representations involve gamma
1709/// - **Hypergeometric Functions**: Coefficients use gamma ratios
1710///
1711/// ## 3. Combinatorics & Number Theory
1712/// - **Binomial Coefficients**: C(n,k) = Γ(n+1)/(Γ(k+1)Γ(n-k+1))
1713/// - **Stirling Numbers**: Involve gamma function ratios
1714/// - **Riemann Zeta Function**: Functional equation uses Γ(s/2)
1715///
1716/// ## 4. Bayesian Inference
1717/// - **Conjugate Priors**: Gamma and Beta distributions
1718/// - **Dirichlet Distribution**: Normalization uses gamma products
1719/// - **Variational Inference**: Optimization involves gamma and digamma
1720///
1721/// ## 5. Physics & Engineering
1722/// - **Quantum Mechanics**: Angular momentum eigenstates
1723/// - **String Theory**: Veneziano amplitude
1724/// - **Statistical Mechanics**: Partition functions
1725/// - **Signal Processing**: Window functions
1726///
1727/// ## 6. Numerical Analysis
1728/// - **Integration**: Change of variables
1729/// - **Asymptotic Analysis**: Stirling's approximation
1730/// - **Interpolation**: Gamma function interpolates factorial
1731///
1732/// # Implementation
1733///
1734/// Uses the **Lanczos approximation** with g=7 and 9 coefficients, providing
1735/// ~15 decimal digits of accuracy for x ∈ [0.5, 100]. For x < 0.5, uses the
1736/// reflection formula to leverage the accurate computation of Γ(1-x).
1737///
1738/// **Algorithm**:
1739/// ```text
1740/// Γ(z+1) = √(2π) * (z + g + 1/2)^(z + 1/2) * e^(-(z + g + 1/2)) * A_g(z)
1741///
1742/// where A_g(z) = c₀ + c₁/(z+1) + c₂/(z+2) + ... + c₈/(z+9)
1743/// ```
1744///
1745/// **Coefficients** (from Boost C++ Library):
1746/// - Optimized using Remez exchange algorithm
1747/// - Minimax criterion for optimal approximation
1748/// - Provides uniform accuracy across domain
1749///
1750/// # Edge Cases
1751///
1752/// - `gamma(NaN)` → NaN
1753/// - `gamma(0)` → ∞
1754/// - `gamma(-n)` → ∞ for negative integers (poles)
1755/// - `gamma(x)` → ∞ as x → ∞
1756/// - `gamma(x)` → 0 as x → -∞ (alternating sign)
1757///
1758/// # Performance Characteristics
1759///
1760/// - **Small arrays** (< 1000 elements): Scalar implementation
1761/// - **Large arrays** (≥ 1000 elements): SIMD-accelerated via compiler auto-vectorization
1762/// - **Time complexity**: O(n) where n is array length
1763/// - **Space complexity**: O(n) for output array
1764/// - **Accuracy**: ~15 decimal digits for typical inputs
1765///
1766/// # Examples
1767///
1768/// ```
1769/// use scirs2_core::ndarray::array;
1770/// use scirs2_core::ndarray_ext::elementwise::gamma_simd;
1771///
1772/// // Factorials: Γ(n+1) = n!
1773/// let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
1774/// let gamma_x = gamma_simd(&x.view());
1775/// // gamma_x ≈ [1, 1, 2, 6, 24] (0!, 1!, 2!, 3!, 4!)
1776///
1777/// // Half-integer values
1778/// let x_half = array![0.5];
1779/// let gamma_half = gamma_simd(&x_half.view());
1780/// // gamma_half[0] ≈ 1.772453850905516 (√π)
1781///
1782/// // Statistical applications
1783/// use scirs2_core::numeric::Float;
1784/// let alpha = array![2.0, 3.0, 5.0];
1785/// let normalization = gamma_simd(&alpha.view());
1786/// // Use in gamma distribution PDF: x^(α-1) e^(-x/β) / (β^α Γ(α))
1787/// ```
1788///
1789/// # Mathematical References
1790///
1791/// - Abramowitz & Stegun, "Handbook of Mathematical Functions", §6.1
1792/// - Lanczos, "A Precision Approximation of the Gamma Function" (1964)
1793/// - Press et al., "Numerical Recipes", §6.1
1794/// - Boost C++ Libraries: Math Toolkit Documentation
1795///
1796/// # See Also
1797///
1798/// - [`powi_simd`] - Integer power (related operation)
1799/// - [`exp_simd`], [`ln_simd`] - Component operations
1800/// - Future: `digamma_simd` - Logarithmic derivative of gamma
1801pub fn gamma_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1802where
1803    F: Float + SimdUnifiedOps,
1804{
1805    if x.is_empty() {
1806        return Array1::zeros(0);
1807    }
1808
1809    let optimizer = AutoOptimizer::new();
1810    if optimizer.should_use_simd(x.len()) {
1811        return F::simd_gamma(x);
1812    }
1813
1814    // Scalar fallback for small arrays - use the same Lanczos implementation
1815    F::simd_gamma(x)
1816}
1817
1818/// Compute the arctangent of each element (SIMD-accelerated).
1819///
1820/// Computes atan(x) for each element, returning values in the range (-π/2, π/2).
1821///
1822/// # Arguments
1823///
1824/// * `x` - Input 1D array
1825///
1826/// # Returns
1827///
1828/// `Array1<F>` with the same length as input, where each element is the arctangent
1829/// in radians.
1830///
1831/// # Performance
1832///
1833/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1834/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1835///
1836/// # Mathematical Properties
1837///
1838/// - Range: (-π/2, π/2) for all finite x
1839/// - atan(0) = 0
1840/// - atan(-x) = -atan(x) (odd function)
1841/// - atan(∞) = π/2, atan(-∞) = -π/2
1842/// - atan(tan(x)) = x for x ∈ (-π/2, π/2)
1843///
1844/// # Examples
1845///
1846/// ```
1847/// use scirs2_core::ndarray::array;
1848/// use scirs2_core::ndarray_ext::elementwise::atan_simd;
1849///
1850/// let x = array![0.0_f64, 1.0, -1.0, f64::INFINITY];
1851/// let result = atan_simd(&x.view());
1852/// // Result: [0.0, π/4, -π/4, π/2]
1853/// ```
1854///
1855/// # Applications
1856///
1857/// - **Geometry**: Calculating angles from slopes
1858/// - **Robotics**: Inverse kinematics for joint angles
1859/// - **Computer Vision**: Feature detection, edge orientation
1860/// - **Signal Processing**: Phase unwrapping, frequency analysis
1861/// - **Physics**: Trajectory analysis, projectile motion
1862///
1863/// # See Also
1864///
1865/// - [`asin_simd`]: Arcsine function
1866/// - [`acos_simd`]: Arccosine function
1867/// - [`atan2_simd`]: Two-argument arctangent for full angle range
1868pub fn atan_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1869where
1870    F: Float + SimdUnifiedOps,
1871{
1872    if x.is_empty() {
1873        return Array1::zeros(0);
1874    }
1875
1876    let optimizer = AutoOptimizer::new();
1877    if optimizer.should_use_simd(x.len()) {
1878        return F::simd_atan(x);
1879    }
1880
1881    // Scalar fallback for small arrays
1882    x.mapv(|val| val.atan())
1883}
1884
1885/// Compute the arcsine of each element (SIMD-accelerated).
1886///
1887/// Computes asin(x) for each element, returning values in the range [-π/2, π/2].
1888///
1889/// # Arguments
1890///
1891/// * `x` - Input 1D array with values in [-1, 1]
1892///
1893/// # Returns
1894///
1895/// `Array1<F>` with the same length as input, where each element is the arcsine
1896/// in radians. Returns NaN for |x| > 1.
1897///
1898/// # Performance
1899///
1900/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1901/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1902///
1903/// # Mathematical Properties
1904///
1905/// - Domain: [-1, 1]
1906/// - Range: [-π/2, π/2]
1907/// - asin(0) = 0
1908/// - asin(-x) = -asin(x) (odd function)
1909/// - asin(1) = π/2, asin(-1) = -π/2
1910/// - asin(sin(x)) = x for x ∈ [-π/2, π/2]
1911/// - Returns NaN for |x| > 1
1912///
1913/// # Examples
1914///
1915/// ```
1916/// use scirs2_core::ndarray::array;
1917/// use scirs2_core::ndarray_ext::elementwise::asin_simd;
1918///
1919/// let x = array![0.0_f64, 0.5, 1.0, -1.0];
1920/// let result = asin_simd(&x.view());
1921/// // Result: [0.0, π/6, π/2, -π/2]
1922/// ```
1923///
1924/// # Applications
1925///
1926/// - **Navigation**: Great circle distance calculations
1927/// - **Astronomy**: Celestial coordinate transformations
1928/// - **Physics**: Wave mechanics, pendulum analysis
1929/// - **Computer Graphics**: Spherical coordinates, lighting
1930/// - **Robotics**: Inverse kinematics with constraints
1931///
1932/// # See Also
1933///
1934/// - [`atan_simd`]: Arctangent function
1935/// - [`acos_simd`]: Arccosine function
1936/// - [`sin_simd`]: Forward sine function
1937pub fn asin_simd<F>(x: &ArrayView1<F>) -> Array1<F>
1938where
1939    F: Float + SimdUnifiedOps,
1940{
1941    if x.is_empty() {
1942        return Array1::zeros(0);
1943    }
1944
1945    let optimizer = AutoOptimizer::new();
1946    if optimizer.should_use_simd(x.len()) {
1947        return F::simd_asin(x);
1948    }
1949
1950    // Scalar fallback for small arrays
1951    x.mapv(|val| val.asin())
1952}
1953
1954/// Compute the arccosine of each element (SIMD-accelerated).
1955///
1956/// Computes acos(x) for each element, returning values in the range [0, π].
1957///
1958/// # Arguments
1959///
1960/// * `x` - Input 1D array with values in [-1, 1]
1961///
1962/// # Returns
1963///
1964/// `Array1<F>` with the same length as input, where each element is the arccosine
1965/// in radians. Returns NaN for |x| > 1.
1966///
1967/// # Performance
1968///
1969/// - **SIMD**: Automatically used for large arrays (1000+ elements)
1970/// - **Scalar**: Used for small arrays or when SIMD is unavailable
1971///
1972/// # Mathematical Properties
1973///
1974/// - Domain: [-1, 1]
1975/// - Range: [0, π]
1976/// - acos(1) = 0, acos(-1) = π
1977/// - acos(0) = π/2
1978/// - acos(-x) = π - acos(x)
1979/// - acos(cos(x)) = x for x ∈ [0, π]
1980/// - acos(x) + asin(x) = π/2
1981/// - Returns NaN for |x| > 1
1982///
1983/// # Examples
1984///
1985/// ```
1986/// use scirs2_core::ndarray::array;
1987/// use scirs2_core::ndarray_ext::elementwise::acos_simd;
1988///
1989/// let x = array![1.0_f64, 0.5, 0.0, -1.0];
1990/// let result = acos_simd(&x.view());
1991/// // Result: [0.0, π/3, π/2, π]
1992/// ```
1993///
1994/// # Applications
1995///
1996/// - **3D Graphics**: Dot product angle calculations
1997/// - **Machine Learning**: Cosine similarity to angle conversion
1998/// - **Physics**: Angular momentum, rotation analysis
1999/// - **Astronomy**: Coordinate system transformations
2000/// - **Navigation**: Bearing and heading calculations
2001///
2002/// # See Also
2003///
2004/// - [`atan_simd`]: Arctangent function
2005/// - [`asin_simd`]: Arcsine function
2006/// - [`cos_simd`]: Forward cosine function
2007pub fn acos_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2008where
2009    F: Float + SimdUnifiedOps,
2010{
2011    if x.is_empty() {
2012        return Array1::zeros(0);
2013    }
2014
2015    let optimizer = AutoOptimizer::new();
2016    if optimizer.should_use_simd(x.len()) {
2017        return F::simd_acos(x);
2018    }
2019
2020    // Scalar fallback for small arrays
2021    x.mapv(|val| val.acos())
2022}
2023
2024/// Compute the two-argument arctangent element-wise (SIMD-accelerated).
2025///
2026/// Computes atan2(y, x) for each pair of elements, returning values in the range
2027/// (-π, π]. This function correctly handles the signs of both arguments to
2028/// determine the quadrant.
2029///
2030/// # Arguments
2031///
2032/// * `y` - Y-coordinates (sine component)
2033/// * `x` - X-coordinates (cosine component)
2034///
2035/// # Returns
2036///
2037/// `Array1<F>` with the same length as inputs, where each element is the angle
2038/// in radians from the positive x-axis to the point (x, y).
2039///
2040/// # Performance
2041///
2042/// - **SIMD**: Automatically used for large arrays (1000+ elements)
2043/// - **Scalar**: Used for small arrays or when SIMD is unavailable
2044///
2045/// # Mathematical Properties
2046///
2047/// - Range: (-π, π]
2048/// - atan2(0, 0) = 0 (by convention)
2049/// - atan2(y, x) = atan(y/x) when x > 0
2050/// - atan2(y, 0) = π/2 * sign(y) when x = 0
2051/// - atan2(-y, x) = -atan2(y, x)
2052/// - atan2(y, -x) = π - atan2(y, x) when y >= 0
2053/// - atan2(y, -x) = -π + atan2(y, x) when y < 0
2054///
2055/// # Quadrants
2056///
2057/// - Quadrant I (x > 0, y > 0): (0, π/2)
2058/// - Quadrant II (x < 0, y > 0): (π/2, π)
2059/// - Quadrant III (x < 0, y < 0): (-π, -π/2)
2060/// - Quadrant IV (x > 0, y < 0): (-π/2, 0)
2061///
2062/// # Examples
2063///
2064/// ```
2065/// use scirs2_core::ndarray::array;
2066/// use scirs2_core::ndarray_ext::elementwise::atan2_simd;
2067///
2068/// let y = array![1.0, 1.0, -1.0, -1.0];
2069/// let x = array![1.0_f64, -1.0, -1.0, 1.0];
2070/// let angles = atan2_simd(&y.view(), &x.view());
2071/// // Result: [π/4, 3π/4, -3π/4, -π/4]
2072/// ```
2073///
2074/// # Applications
2075///
2076/// - **Robotics**: Joint angle calculations, path planning
2077/// - **Computer Vision**: Feature orientation, optical flow
2078/// - **Navigation**: Heading and bearing calculations
2079/// - **Spatial Computing**: Coordinate transformations
2080/// - **Physics**: Vector angle calculations, force analysis
2081/// - **Computer Graphics**: Rotation angles, sprite orientation
2082/// - **Signal Processing**: Phase calculations, complex number arguments
2083///
2084/// # See Also
2085///
2086/// - [`atan_simd`]: Single-argument arctangent
2087/// - [`asin_simd`]: Arcsine function
2088/// - [`acos_simd`]: Arccosine function
2089pub fn atan2_simd<F>(y: &ArrayView1<F>, x: &ArrayView1<F>) -> Array1<F>
2090where
2091    F: Float + SimdUnifiedOps,
2092{
2093    assert_eq!(y.len(), x.len(), "y and x arrays must have the same length");
2094
2095    if y.is_empty() {
2096        return Array1::zeros(0);
2097    }
2098
2099    let optimizer = AutoOptimizer::new();
2100    if optimizer.should_use_simd(y.len()) {
2101        return F::simd_atan2(y, x);
2102    }
2103
2104    // Scalar fallback for small arrays
2105    y.iter()
2106        .zip(x.iter())
2107        .map(|(&y_val, &x_val)| y_val.atan2(x_val))
2108        .collect::<Vec<_>>()
2109        .into()
2110}
2111
2112/// Compute the base-10 logarithm of each element (SIMD-accelerated).
2113///
2114/// Computes log₁₀(x) for each element, where log₁₀(x) = ln(x) / ln(10).
2115///
2116/// # Arguments
2117///
2118/// * `x` - Input 1D array with positive values
2119///
2120/// # Returns
2121///
2122/// `Array1<F>` with the same length as input, where each element is the base-10
2123/// logarithm. Returns NaN for x ≤ 0, and -∞ for x = 0.
2124///
2125/// # Performance
2126///
2127/// - **SIMD**: Automatically used for large arrays (1000+ elements)
2128/// - **Scalar**: Used for small arrays or when SIMD is unavailable
2129///
2130/// # Mathematical Properties
2131///
2132/// - Domain: (0, ∞)
2133/// - Range: (-∞, ∞)
2134/// - log₁₀(1) = 0
2135/// - log₁₀(10) = 1
2136/// - log₁₀(10ⁿ) = n
2137/// - log₁₀(x * y) = log₁₀(x) + log₁₀(y)
2138/// - log₁₀(x / y) = log₁₀(x) - log₁₀(y)
2139/// - log₁₀(xⁿ) = n * log₁₀(x)
2140/// - Returns NaN for x ≤ 0
2141/// - Returns -∞ for x = 0
2142///
2143/// # Examples
2144///
2145/// ```
2146/// use scirs2_core::ndarray::array;
2147/// use scirs2_core::ndarray_ext::elementwise::log10_simd;
2148///
2149/// let x = array![1.0_f64, 10.0, 100.0, 1000.0];
2150/// let result = log10_simd(&x.view());
2151/// // Result: [0.0, 1.0, 2.0, 3.0]
2152/// ```
2153///
2154/// # Applications
2155///
2156/// - **Signal Processing**: Decibel scale (dB = 10 * log₁₀(P/P₀))
2157/// - **Chemistry**: pH scale (pH = -log₁₀[H⁺])
2158/// - **Astronomy**: Magnitude scale (apparent magnitude)
2159/// - **Scientific Computing**: Decades representation, log-log plots
2160/// - **Machine Learning**: Feature scaling, log-loss functions
2161/// - **Information Theory**: Hartley's entropy (base-10)
2162/// - **Physics**: Richter scale (earthquake magnitude)
2163/// - **Audio Processing**: Sound intensity level
2164///
2165/// # See Also
2166///
2167/// - [`ln_simd`]: Natural logarithm (base e)
2168/// - [`log2_simd`]: Base-2 logarithm
2169/// - [`exp_simd`]: Exponential function (inverse of ln)
2170pub fn log10_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2171where
2172    F: Float + SimdUnifiedOps,
2173{
2174    if x.is_empty() {
2175        return Array1::zeros(0);
2176    }
2177
2178    let optimizer = AutoOptimizer::new();
2179    if optimizer.should_use_simd(x.len()) {
2180        return F::simd_log10(x);
2181    }
2182
2183    // Scalar fallback for small arrays
2184    x.mapv(|val| val.log10())
2185}
2186
2187/// Compute the base-2 logarithm of each element (SIMD-accelerated).
2188///
2189/// Computes log₂(x) for each element, where log₂(x) = ln(x) / ln(2).
2190///
2191/// # Arguments
2192///
2193/// * `x` - Input 1D array with positive values
2194///
2195/// # Returns
2196///
2197/// `Array1<F>` with the same length as input, where each element is the base-2
2198/// logarithm. Returns NaN for x ≤ 0, and -∞ for x = 0.
2199///
2200/// # Performance
2201///
2202/// - **SIMD**: Automatically used for large arrays (1000+ elements)
2203/// - **Scalar**: Used for small arrays or when SIMD is unavailable
2204///
2205/// # Mathematical Properties
2206///
2207/// - Domain: (0, ∞)
2208/// - Range: (-∞, ∞)
2209/// - log₂(1) = 0
2210/// - log₂(2) = 1
2211/// - log₂(2ⁿ) = n
2212/// - log₂(x * y) = log₂(x) + log₂(y)
2213/// - log₂(x / y) = log₂(x) - log₂(y)
2214/// - log₂(xⁿ) = n * log₂(x)
2215/// - Returns NaN for x ≤ 0
2216/// - Returns -∞ for x = 0
2217///
2218/// # Examples
2219///
2220/// ```
2221/// use scirs2_core::ndarray::array;
2222/// use scirs2_core::ndarray_ext::elementwise::log2_simd;
2223///
2224/// let x = array![1.0_f64, 2.0, 4.0, 8.0, 16.0];
2225/// let result = log2_simd(&x.view());
2226/// // Result: [0.0, 1.0, 2.0, 3.0, 4.0]
2227/// ```
2228///
2229/// # Applications
2230///
2231/// - **Information Theory**: Shannon entropy (bits), channel capacity
2232/// - **Computer Science**: Binary tree depth, algorithm complexity analysis
2233/// - **Machine Learning**: Decision trees, information gain
2234/// - **Signal Processing**: Octave scales, frequency resolution
2235/// - **Data Compression**: Optimal coding, Huffman coding
2236/// - **Cryptography**: Key size representation, security levels
2237/// - **Scientific Computing**: Binary logarithmic plots
2238/// - **Digital Systems**: Bit depth calculations
2239///
2240/// # See Also
2241///
2242/// - [`ln_simd`]: Natural logarithm (base e)
2243/// - [`log10_simd`]: Base-10 logarithm
2244/// - [`exp_simd`]: Exponential function (inverse of ln)
2245pub fn log2_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2246where
2247    F: Float + SimdUnifiedOps,
2248{
2249    if x.is_empty() {
2250        return Array1::zeros(0);
2251    }
2252
2253    let optimizer = AutoOptimizer::new();
2254    if optimizer.should_use_simd(x.len()) {
2255        return F::simd_log2(x);
2256    }
2257
2258    // Scalar fallback for small arrays
2259    x.mapv(|val| val.log2())
2260}
2261
2262/// Clamp each element to a specified range [min, max] (SIMD-accelerated).
2263///
2264/// Constrains each element x to satisfy min ≤ x ≤ max. Values below min are set
2265/// to min, values above max are set to max, and values within range are unchanged.
2266///
2267/// # Arguments
2268///
2269/// * `x` - Input 1D array
2270/// * `min` - Minimum value (lower bound)
2271/// * `max` - Maximum value (upper bound)
2272///
2273/// # Returns
2274///
2275/// `Array1<F>` with the same length as input, where each element is clamped to [min, max].
2276///
2277/// # Panics
2278///
2279/// Panics if min > max (invalid range).
2280///
2281/// # Performance
2282///
2283/// - **SIMD**: Automatically used for large arrays (1000+ elements)
2284/// - **Scalar**: Used for small arrays or when SIMD is unavailable
2285///
2286/// # Mathematical Properties
2287///
2288/// - clamp(x, min, max) = max(min, min(x, max))
2289/// - clamp(x, min, max) ∈ [min, max] for all x
2290/// - clamp(min, min, max) = min
2291/// - clamp(max, min, max) = max
2292/// - clamp is idempotent: clamp(clamp(x, a, b), a, b) = clamp(x, a, b)
2293/// - clamp preserves monotonicity: if x₁ ≤ x₂, then clamp(x₁) ≤ clamp(x₂)
2294///
2295/// # Examples
2296///
2297/// ```
2298/// use scirs2_core::ndarray::array;
2299/// use scirs2_core::ndarray_ext::elementwise::clamp_simd;
2300///
2301/// let x = array![-1.0_f64, 0.5, 1.5, 2.5, 3.5];
2302/// let result = clamp_simd(&x.view(), 0.0, 2.0);
2303/// // Result: [0.0, 0.5, 1.5, 2.0, 2.0]
2304/// ```
2305///
2306/// # Applications
2307///
2308/// - **Image Processing**: Pixel value normalization (0-255, 0.0-1.0)
2309/// - **Neural Networks**: Gradient clipping, activation bounding
2310/// - **Computer Vision**: Color space conversions, contrast limiting
2311/// - **Signal Processing**: Dynamic range compression, amplitude limiting
2312/// - **Data Normalization**: Feature scaling, outlier handling
2313/// - **Numerical Stability**: Preventing overflow/underflow in computations
2314/// - **Game Development**: Velocity limiting, position constraints
2315/// - **Robotics**: Joint angle limits, actuator bounds
2316/// - **Audio Processing**: Volume limiting, signal clipping prevention
2317/// - **Machine Learning**: Learning rate bounds, weight constraints
2318///
2319/// # See Also
2320///
2321/// - [`abs_simd`]: Absolute value (clamping negative values to positive)
2322/// - [`floor_simd`]: Lower bound only (ceiling at integers)
2323/// - [`ceil_simd`]: Upper bound only (floor at integers)
2324pub fn clamp_simd<F>(x: &ArrayView1<F>, min: F, max: F) -> Array1<F>
2325where
2326    F: Float + SimdUnifiedOps + std::fmt::Debug,
2327{
2328    assert!(
2329        min <= max,
2330        "clamp_simd: min ({:?}) must be <= max ({:?})",
2331        min,
2332        max
2333    );
2334
2335    if x.is_empty() {
2336        return Array1::zeros(0);
2337    }
2338
2339    let optimizer = AutoOptimizer::new();
2340    if optimizer.should_use_simd(x.len()) {
2341        return F::simd_clamp(x, min, max);
2342    }
2343
2344    // Scalar fallback for small arrays
2345    x.mapv(|val| val.clamp(min, max))
2346}
2347
2348/// Compute 2^x for each element (SIMD-accelerated).
2349///
2350/// Computes the base-2 exponential function 2^x for each element.
2351///
2352/// # Arguments
2353///
2354/// * `x` - Input 1D array
2355///
2356/// # Returns
2357///
2358/// `Array1<F>` with the same length as input, where each element is 2 raised to the
2359/// corresponding input power.
2360///
2361/// # Performance
2362///
2363/// Uses the identity 2^x = exp(x * ln(2)) with SIMD-accelerated exp and scalar multiply.
2364///
2365/// # Mathematical Properties
2366///
2367/// - 2^0 = 1
2368/// - 2^1 = 2
2369/// - 2^(-1) = 0.5
2370/// - 2^n for integer n is exact for small n
2371///
2372/// # Examples
2373///
2374/// ```
2375/// use scirs2_core::ndarray::array;
2376/// use scirs2_core::ndarray_ext::elementwise::exp2_simd;
2377///
2378/// let x = array![0.0_f64, 1.0, 2.0, 3.0, -1.0];
2379/// let result = exp2_simd(&x.view());
2380/// // Result: [1.0, 2.0, 4.0, 8.0, 0.5]
2381/// ```
2382///
2383/// # Applications
2384///
2385/// - **Audio Processing**: Octave/semitone calculations
2386/// - **Computer Graphics**: Level of detail (LOD) calculations
2387/// - **Floating-Point**: Exponent manipulation
2388pub fn exp2_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2389where
2390    F: Float + SimdUnifiedOps,
2391{
2392    if x.is_empty() {
2393        return Array1::zeros(0);
2394    }
2395
2396    let optimizer = AutoOptimizer::new();
2397    if optimizer.should_use_simd(x.len()) {
2398        return F::simd_exp2(x);
2399    }
2400
2401    // Scalar fallback for small arrays
2402    F::simd_exp2(x)
2403}
2404
2405/// Compute the cube root of each element (SIMD-accelerated).
2406///
2407/// Computes cbrt(x) = x^(1/3) for each element, correctly handling negative values.
2408///
2409/// # Arguments
2410///
2411/// * `x` - Input 1D array
2412///
2413/// # Returns
2414///
2415/// `Array1<F>` with the same length as input, where each element is the cube root.
2416///
2417/// # Mathematical Properties
2418///
2419/// - cbrt(x^3) = x (exact inverse of cubing)
2420/// - cbrt(-x) = -cbrt(x) (handles negative numbers)
2421/// - cbrt(0) = 0
2422/// - cbrt(1) = 1, cbrt(8) = 2, cbrt(27) = 3
2423///
2424/// # Examples
2425///
2426/// ```
2427/// use scirs2_core::ndarray::array;
2428/// use scirs2_core::ndarray_ext::elementwise::cbrt_simd;
2429///
2430/// let x = array![0.0_f64, 1.0, 8.0, 27.0, -8.0];
2431/// let result = cbrt_simd(&x.view());
2432/// // Result: [0.0, 1.0, 2.0, 3.0, -2.0]
2433/// ```
2434///
2435/// # Applications
2436///
2437/// - **Statistics**: Transforming skewed data
2438/// - **Physics**: Volume calculations from cubic dimensions
2439/// - **Numerical Analysis**: Root-finding algorithms
2440pub fn cbrt_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2441where
2442    F: Float + SimdUnifiedOps,
2443{
2444    if x.is_empty() {
2445        return Array1::zeros(0);
2446    }
2447
2448    let optimizer = AutoOptimizer::new();
2449    if optimizer.should_use_simd(x.len()) {
2450        return F::simd_cbrt(x);
2451    }
2452
2453    // Scalar fallback for small arrays
2454    x.mapv(|val| val.cbrt())
2455}
2456
2457/// Compute ln(1+x) for each element (SIMD-accelerated, numerically stable).
2458///
2459/// Computes the natural logarithm of (1+x) with improved accuracy for small x values
2460/// where direct computation of ln(1+x) would lose precision.
2461///
2462/// # Arguments
2463///
2464/// * `x` - Input 1D array with values > -1
2465///
2466/// # Returns
2467///
2468/// `Array1<F>` with the same length as input, where each element is ln(1+x).
2469///
2470/// # Mathematical Properties
2471///
2472/// - ln_1p(0) = 0
2473/// - ln_1p(x) ≈ x for |x| << 1 (Taylor series first term)
2474/// - More accurate than ln(1+x) when |x| is small
2475///
2476/// # Examples
2477///
2478/// ```
2479/// use scirs2_core::ndarray::array;
2480/// use scirs2_core::ndarray_ext::elementwise::ln_1p_simd;
2481///
2482/// let x = array![0.0_f64, 1.0, 1e-15, -0.5];
2483/// let result = ln_1p_simd(&x.view());
2484/// // Result: [0.0, ln(2), ≈1e-15, -ln(2)]
2485/// ```
2486///
2487/// # Applications
2488///
2489/// - **Finance**: Continuous compound interest rates
2490/// - **Statistics**: Log-likelihood computations
2491/// - **Numerical Analysis**: Avoiding catastrophic cancellation
2492pub fn ln_1p_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2493where
2494    F: Float + SimdUnifiedOps,
2495{
2496    if x.is_empty() {
2497        return Array1::zeros(0);
2498    }
2499
2500    let optimizer = AutoOptimizer::new();
2501    if optimizer.should_use_simd(x.len()) {
2502        return F::simd_ln_1p(x);
2503    }
2504
2505    // Scalar fallback for small arrays
2506    x.mapv(|val| val.ln_1p())
2507}
2508
2509/// Compute exp(x)-1 for each element (SIMD-accelerated, numerically stable).
2510///
2511/// Computes e^x - 1 with improved accuracy for small x values where direct
2512/// computation would lose precision.
2513///
2514/// # Arguments
2515///
2516/// * `x` - Input 1D array
2517///
2518/// # Returns
2519///
2520/// `Array1<F>` with the same length as input, where each element is exp(x)-1.
2521///
2522/// # Mathematical Properties
2523///
2524/// - exp_m1(0) = 0
2525/// - exp_m1(x) ≈ x for |x| << 1 (Taylor series first term)
2526/// - exp_m1(-x) = -exp_m1(x) / (1 + exp_m1(x))
2527/// - More accurate than exp(x)-1 when |x| is small
2528///
2529/// # Examples
2530///
2531/// ```
2532/// use scirs2_core::ndarray::array;
2533/// use scirs2_core::ndarray_ext::elementwise::exp_m1_simd;
2534///
2535/// let x = array![0.0_f64, 1.0, 1e-15, -1.0];
2536/// let result = exp_m1_simd(&x.view());
2537/// // Result: [0.0, e-1, ≈1e-15, 1/e - 1]
2538/// ```
2539///
2540/// # Applications
2541///
2542/// - **Finance**: Continuous compounding calculations
2543/// - **Physics**: Small perturbation calculations
2544/// - **Numerical Analysis**: Avoiding catastrophic cancellation
2545pub fn exp_m1_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2546where
2547    F: Float + SimdUnifiedOps,
2548{
2549    if x.is_empty() {
2550        return Array1::zeros(0);
2551    }
2552
2553    let optimizer = AutoOptimizer::new();
2554    if optimizer.should_use_simd(x.len()) {
2555        return F::simd_exp_m1(x);
2556    }
2557
2558    // Scalar fallback for small arrays
2559    x.mapv(|val| val.exp_m1())
2560}
2561
2562/// Convert degrees to radians for each element (SIMD-accelerated).
2563///
2564/// Computes x * π / 180 for each element, converting angle measurements
2565/// from degrees to radians.
2566///
2567/// # Arguments
2568///
2569/// * `x` - Input 1D array of angles in degrees
2570///
2571/// # Returns
2572///
2573/// `Array1<F>` with the same length as input, where each element is the
2574/// corresponding angle in radians.
2575///
2576/// # Performance
2577///
2578/// Uses SIMD scalar multiplication for optimal performance.
2579///
2580/// # Mathematical Properties
2581///
2582/// - to_radians(0) = 0
2583/// - to_radians(90) = π/2
2584/// - to_radians(180) = π
2585/// - to_radians(360) = 2π
2586///
2587/// # Examples
2588///
2589/// ```
2590/// use scirs2_core::ndarray::array;
2591/// use scirs2_core::ndarray_ext::elementwise::to_radians_simd;
2592///
2593/// let degrees = array![0.0, 90.0, 180.0, 360.0];
2594/// let radians = to_radians_simd(&degrees.view());
2595/// // Result: [0.0, π/2, π, 2π]
2596/// ```
2597///
2598/// # Applications
2599///
2600/// - **Trigonometry**: Converting human-readable angles to radian form
2601/// - **Graphics**: Rotation transformations
2602/// - **Physics**: Angular velocity and position calculations
2603/// - **Navigation**: Bearing and heading conversions
2604pub fn to_radians_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2605where
2606    F: Float + SimdUnifiedOps,
2607{
2608    if x.is_empty() {
2609        return Array1::zeros(0);
2610    }
2611
2612    let optimizer = AutoOptimizer::new();
2613    if optimizer.should_use_simd(x.len()) {
2614        return F::simd_to_radians(x);
2615    }
2616
2617    // Scalar fallback for small arrays
2618    F::simd_to_radians(x)
2619}
2620
2621/// Convert radians to degrees for each element (SIMD-accelerated).
2622///
2623/// Computes x * 180 / π for each element, converting angle measurements
2624/// from radians to degrees.
2625///
2626/// # Arguments
2627///
2628/// * `x` - Input 1D array of angles in radians
2629///
2630/// # Returns
2631///
2632/// `Array1<F>` with the same length as input, where each element is the
2633/// corresponding angle in degrees.
2634///
2635/// # Performance
2636///
2637/// Uses SIMD scalar multiplication for optimal performance.
2638///
2639/// # Mathematical Properties
2640///
2641/// - to_degrees(0) = 0
2642/// - to_degrees(π/2) = 90
2643/// - to_degrees(π) = 180
2644/// - to_degrees(2π) = 360
2645///
2646/// # Examples
2647///
2648/// ```
2649/// use scirs2_core::ndarray::array;
2650/// use scirs2_core::ndarray_ext::elementwise::to_degrees_simd;
2651/// use std::f64::consts::PI;
2652///
2653/// let radians = array![0.0, PI/2.0, PI, 2.0*PI];
2654/// let degrees = to_degrees_simd(&radians.view());
2655/// // Result: [0.0, 90.0, 180.0, 360.0]
2656/// ```
2657///
2658/// # Applications
2659///
2660/// - **Trigonometry**: Converting calculation results to human-readable form
2661/// - **Graphics**: Displaying rotation angles
2662/// - **Physics**: Reporting angular measurements
2663/// - **Navigation**: Displaying compass headings
2664pub fn to_degrees_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2665where
2666    F: Float + SimdUnifiedOps,
2667{
2668    if x.is_empty() {
2669        return Array1::zeros(0);
2670    }
2671
2672    let optimizer = AutoOptimizer::new();
2673    if optimizer.should_use_simd(x.len()) {
2674        return F::simd_to_degrees(x);
2675    }
2676
2677    // Scalar fallback for small arrays
2678    F::simd_to_degrees(x)
2679}
2680
2681/// Computes the element-wise digamma function ψ(x) = d/dx ln(Γ(x)) using SIMD acceleration.
2682///
2683/// The digamma function is the logarithmic derivative of the gamma function.
2684/// It is essential for:
2685/// - Bayesian variational inference (Dirichlet, Beta priors)
2686/// - Maximum likelihood estimation for gamma distributions
2687/// - Statistical special function computations
2688///
2689/// # Implementation Details
2690///
2691/// The implementation uses three techniques for high accuracy:
2692/// 1. **Reflection formula**: For x < 0.5, uses ψ(1-x) - π/tan(πx)
2693/// 2. **Recurrence relation**: For small x, uses ψ(x+1) = ψ(x) + 1/x
2694/// 3. **Asymptotic expansion**: For large x, uses Bernoulli number series
2695///
2696/// # Arguments
2697///
2698/// * `x` - Input array of values. Should avoid non-positive integers where digamma has poles.
2699///
2700/// # Returns
2701///
2702/// Array of ψ(x) values, same shape as input.
2703///
2704/// # Mathematical Properties
2705///
2706/// - ψ(1) = -γ (Euler-Mascheroni constant ≈ -0.5772)
2707/// - ψ(n) = -γ + Σ(k=1 to n-1) 1/k for positive integers
2708/// - ψ(x+1) = ψ(x) + 1/x
2709/// - ψ(1-x) - ψ(x) = π·cot(πx)
2710///
2711/// # Example
2712///
2713/// ```ignore
2714/// use scirs2_core::ndarray_ext::elementwise::digamma_simd;
2715/// use scirs2_core::ndarray::array;
2716///
2717/// let x = array![1.0f64, 2.0, 3.0, 4.0];
2718/// let result = digamma_simd(&x.view());
2719/// // ψ(1) ≈ -0.5772, ψ(2) ≈ 0.4228, ψ(3) ≈ 0.9228, ψ(4) ≈ 1.2561
2720/// ```
2721pub fn digamma_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2722where
2723    F: Float + SimdUnifiedOps,
2724{
2725    if x.is_empty() {
2726        return Array1::zeros(0);
2727    }
2728
2729    let optimizer = AutoOptimizer::new();
2730    if optimizer.should_use_simd(x.len()) {
2731        return F::simd_digamma(x);
2732    }
2733
2734    // Scalar fallback for small arrays
2735    F::simd_digamma(x)
2736}
2737
2738/// Computes the element-wise trigamma function ψ'(x) = d²/dx² ln(Γ(x)) using SIMD acceleration.
2739///
2740/// The trigamma function is the second derivative of the log-gamma function (first derivative
2741/// of digamma). It is critical for:
2742/// - Fisher information computation in Bayesian inference
2743/// - Variance of gamma and beta distribution parameters
2744/// - Maximum likelihood estimation for gamma distributions
2745/// - Sensitivity analysis in Bayesian variational inference
2746///
2747/// # Implementation Details
2748///
2749/// The implementation uses three techniques for high accuracy:
2750/// 1. **Reflection formula**: For x < 0, uses ψ'(1-x) + ψ'(x) = π²/sin²(πx)
2751/// 2. **Recurrence relation**: For small x, uses ψ'(x+1) = ψ'(x) - 1/x²
2752/// 3. **Asymptotic expansion**: For large x, uses ψ'(x) ≈ 1/x + 1/(2x²) + B₂/x³ - B₄/x⁵ + ...
2753///
2754/// # Arguments
2755///
2756/// * `x` - Input array of values. Should avoid non-positive integers where trigamma has poles.
2757///
2758/// # Returns
2759///
2760/// Array of ψ'(x) values, same shape as input.
2761///
2762/// # Mathematical Properties
2763///
2764/// - ψ'(1) = π²/6 ≈ 1.6449340668 (Basel problem)
2765/// - ψ'(n) = π²/6 - Σ(k=1 to n-1) 1/k² for positive integers
2766/// - ψ'(x+1) = ψ'(x) - 1/x²
2767/// - For large x: ψ'(x) → 1/x
2768///
2769/// # Example
2770///
2771/// ```ignore
2772/// use scirs2_core::ndarray_ext::elementwise::trigamma_simd;
2773/// use scirs2_core::ndarray::array;
2774///
2775/// let x = array![1.0f64, 2.0, 3.0, 4.0];
2776/// let result = trigamma_simd(&x.view());
2777/// // ψ'(1) ≈ 1.6449, ψ'(2) ≈ 0.6449, ψ'(3) ≈ 0.3949, ψ'(4) ≈ 0.2838
2778/// ```
2779pub fn trigamma_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2780where
2781    F: Float + SimdUnifiedOps,
2782{
2783    if x.is_empty() {
2784        return Array1::zeros(0);
2785    }
2786
2787    let optimizer = AutoOptimizer::new();
2788    if optimizer.should_use_simd(x.len()) {
2789        return F::simd_trigamma(x);
2790    }
2791
2792    // Scalar fallback for small arrays
2793    F::simd_trigamma(x)
2794}
2795
2796/// Computes the element-wise log-gamma function ln(Γ(x)) using SIMD acceleration.
2797///
2798/// The log-gamma function is more numerically stable than computing `gamma(x).ln()`
2799/// since it avoids overflow for large arguments and handles the full range of inputs.
2800/// This function is extensively used in:
2801/// - Statistical distributions (gamma, beta, binomial, Poisson)
2802/// - Bayesian inference (prior/posterior computations)
2803/// - Maximum likelihood estimation
2804/// - Combinatorics (log of binomial coefficients)
2805///
2806/// # Implementation Details
2807///
2808/// Uses the Lanczos approximation with reflection formula:
2809/// - For x >= 0.5: ln(Γ(z)) = ln(√(2π)) + (z-0.5)·ln(t) - t + ln(sum) where t = z + g - 0.5
2810/// - For x < 0.5: ln(Γ(z)) = ln(π) - ln(|sin(πz)|) - ln(Γ(1-z))
2811///
2812/// # Arguments
2813///
2814/// * `x` - Input array of values. Poles at non-positive integers (returns +∞).
2815///
2816/// # Returns
2817///
2818/// Array of ln(Γ(x)) values, same shape as input.
2819///
2820/// # Mathematical Properties
2821///
2822/// - ln(Γ(1)) = ln(Γ(2)) = 0
2823/// - ln(Γ(n)) = ln((n-1)!) for positive integers
2824/// - ln(Γ(1/2)) = ln(√π) ≈ 0.5724
2825/// - For large x: ln(Γ(x)) ≈ (x-0.5)·ln(x) - x + ln(√(2π))
2826///
2827/// # Example
2828///
2829/// ```ignore
2830/// use scirs2_core::ndarray_ext::elementwise::ln_gamma_simd;
2831/// use scirs2_core::ndarray::array;
2832///
2833/// let x = array![1.0f64, 2.0, 3.0, 4.0, 5.0];
2834/// let result = ln_gamma_simd(&x.view());
2835/// // ln(Γ(1)) = 0, ln(Γ(2)) = 0, ln(Γ(3)) = ln(2!) ≈ 0.693
2836/// // ln(Γ(4)) = ln(3!) ≈ 1.792, ln(Γ(5)) = ln(4!) ≈ 3.178
2837/// ```
2838pub fn ln_gamma_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2839where
2840    F: Float + SimdUnifiedOps,
2841{
2842    if x.is_empty() {
2843        return Array1::zeros(0);
2844    }
2845
2846    let optimizer = AutoOptimizer::new();
2847    if optimizer.should_use_simd(x.len()) {
2848        return F::simd_ln_gamma(x);
2849    }
2850
2851    // Scalar fallback for small arrays
2852    F::simd_ln_gamma(x)
2853}
2854
2855/// Element-wise error function erf(x) = (2/√π) ∫₀ˣ e^(-t²) dt
2856///
2857/// Uses SIMD acceleration when available for optimal performance.
2858/// Critical for normal distribution CDF: Φ(x) = 0.5 * (1 + erf(x/√2))
2859///
2860/// # Properties
2861/// - erf(0) = 0
2862/// - erf(∞) = 1, erf(-∞) = -1
2863/// - erf(-x) = -erf(x) (odd function)
2864/// - erf(1) ≈ 0.8427007929497148
2865///
2866/// # Examples
2867///
2868/// ```
2869/// use scirs2_core::ndarray_ext::elementwise::erf_simd;
2870/// use scirs2_core::ndarray::array;
2871///
2872/// let x = array![0.0_f64, 1.0, 2.0, -1.0];
2873/// let result = erf_simd(&x.view());
2874/// assert!((result[0] - 0.0_f64).abs() < 1e-10);  // erf(0) = 0
2875/// assert!((result[1] - 0.8427007929497148_f64).abs() < 1e-6);  // erf(1)
2876/// assert!((result[3] + 0.8427007929497148_f64).abs() < 1e-6);  // erf(-1) = -erf(1)
2877/// ```
2878pub fn erf_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2879where
2880    F: Float + SimdUnifiedOps,
2881{
2882    if x.is_empty() {
2883        return Array1::zeros(0);
2884    }
2885
2886    let optimizer = AutoOptimizer::new();
2887    if optimizer.should_use_simd(x.len()) {
2888        return F::simd_erf(x);
2889    }
2890
2891    // Scalar fallback for small arrays
2892    F::simd_erf(x)
2893}
2894
2895/// Element-wise complementary error function erfc(x) = 1 - erf(x)
2896///
2897/// More numerically stable than computing 1 - erf(x) directly for large x.
2898/// Uses SIMD acceleration when available for optimal performance.
2899///
2900/// # Properties
2901/// - erfc(0) = 1
2902/// - erfc(∞) = 0, erfc(-∞) = 2
2903/// - erfc(x) = 1 - erf(x)
2904/// - erfc(-x) = 2 - erfc(x)
2905///
2906/// # Examples
2907///
2908/// ```
2909/// use scirs2_core::ndarray_ext::elementwise::erfc_simd;
2910/// use scirs2_core::ndarray::array;
2911///
2912/// let x = array![0.0f64, 1.0, 2.0, 3.0];
2913/// let result = erfc_simd(&x.view());
2914/// assert!((result[0] - 1.0).abs() < 1e-10);  // erfc(0) = 1
2915/// assert!((result[1] - 0.1572992070502852).abs() < 1e-6);  // erfc(1)
2916/// ```
2917pub fn erfc_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2918where
2919    F: Float + SimdUnifiedOps,
2920{
2921    if x.is_empty() {
2922        return Array1::zeros(0);
2923    }
2924
2925    let optimizer = AutoOptimizer::new();
2926    if optimizer.should_use_simd(x.len()) {
2927        return F::simd_erfc(x);
2928    }
2929
2930    // Scalar fallback for small arrays
2931    F::simd_erfc(x)
2932}
2933
2934/// Element-wise inverse error function erfinv(y) = x such that erf(x) = y
2935///
2936/// Uses SIMD acceleration when available for optimal performance.
2937/// Critical for inverse normal CDF (probit function): Φ⁻¹(p) = √2 * erfinv(2p - 1)
2938///
2939/// # Domain and Range
2940/// - Domain: (-1, 1)
2941/// - Range: (-∞, ∞)
2942/// - erfinv(-1) = -∞, erfinv(1) = ∞
2943///
2944/// # Properties
2945/// - erfinv(0) = 0
2946/// - erfinv(-y) = -erfinv(y) (odd function)
2947/// - erf(erfinv(y)) = y
2948///
2949/// # Examples
2950///
2951/// ```
2952/// use scirs2_core::ndarray_ext::elementwise::erfinv_simd;
2953/// use scirs2_core::ndarray::array;
2954///
2955/// let y = array![0.0f64, 0.5, -0.5, 0.9];
2956/// let result = erfinv_simd(&y.view());
2957/// assert!((result[0] - 0.0).abs() < 1e-10);  // erfinv(0) = 0
2958/// ```
2959pub fn erfinv_simd<F>(x: &ArrayView1<F>) -> Array1<F>
2960where
2961    F: Float + SimdUnifiedOps,
2962{
2963    if x.is_empty() {
2964        return Array1::zeros(0);
2965    }
2966
2967    let optimizer = AutoOptimizer::new();
2968    if optimizer.should_use_simd(x.len()) {
2969        return F::simd_erfinv(x);
2970    }
2971
2972    // Scalar fallback for small arrays
2973    F::simd_erfinv(x)
2974}
2975
2976/// Element-wise inverse complementary error function erfcinv(y) = x such that erfc(x) = y
2977///
2978/// More numerically stable than erfinv(1 - y) for y close to 0.
2979/// Uses SIMD acceleration when available for optimal performance.
2980///
2981/// # Domain and Range
2982/// - Domain: (0, 2)
2983/// - Range: (-∞, ∞)
2984/// - erfcinv(0) = ∞, erfcinv(2) = -∞
2985///
2986/// # Properties
2987/// - erfcinv(1) = 0
2988/// - erfcinv(y) = erfinv(1 - y)
2989/// - erfc(erfcinv(y)) = y
2990///
2991/// # Examples
2992///
2993/// ```
2994/// use scirs2_core::ndarray_ext::elementwise::erfcinv_simd;
2995/// use scirs2_core::ndarray::array;
2996///
2997/// let y = array![1.0f64, 0.5, 1.5];
2998/// let result = erfcinv_simd(&y.view());
2999/// assert!((result[0] - 0.0).abs() < 1e-10);  // erfcinv(1) = 0
3000/// ```
3001pub fn erfcinv_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3002where
3003    F: Float + SimdUnifiedOps,
3004{
3005    if x.is_empty() {
3006        return Array1::zeros(0);
3007    }
3008
3009    let optimizer = AutoOptimizer::new();
3010    if optimizer.should_use_simd(x.len()) {
3011        return F::simd_erfcinv(x);
3012    }
3013
3014    // Scalar fallback for small arrays
3015    F::simd_erfcinv(x)
3016}
3017
3018/// Compute the element-wise sigmoid (logistic) function of an array.
3019///
3020/// The sigmoid function is defined as:
3021/// σ(x) = 1 / (1 + exp(-x))
3022///
3023/// This is critical for neural networks, logistic regression, and probability modeling.
3024/// The implementation is numerically stable, avoiding overflow for large |x|.
3025///
3026/// # Properties
3027///
3028/// - Range: (0, 1)
3029/// - σ(0) = 0.5
3030/// - σ(-x) = 1 - σ(x)
3031/// - Derivative: σ'(x) = σ(x)(1 - σ(x))
3032///
3033/// # Examples
3034///
3035/// ```
3036/// use scirs2_core::ndarray_ext::elementwise::sigmoid_simd;
3037/// use scirs2_core::ndarray::array;
3038///
3039/// let x = array![0.0f64, 1.0, -1.0];
3040/// let result = sigmoid_simd(&x.view());
3041/// assert!((result[0] - 0.5).abs() < 1e-10);  // sigmoid(0) = 0.5
3042/// ```
3043pub fn sigmoid_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3044where
3045    F: Float + SimdUnifiedOps,
3046{
3047    if x.is_empty() {
3048        return Array1::zeros(0);
3049    }
3050
3051    let optimizer = AutoOptimizer::new();
3052    if optimizer.should_use_simd(x.len()) {
3053        return F::simd_sigmoid(x);
3054    }
3055
3056    // Scalar fallback for small arrays
3057    F::simd_sigmoid(x)
3058}
3059
3060/// Compute the element-wise GELU (Gaussian Error Linear Unit) of an array.
3061///
3062/// The GELU function is defined as:
3063/// GELU(x) = x * Φ(x) = x * 0.5 * (1 + erf(x / √2))
3064///
3065/// Where Φ(x) is the cumulative distribution function of the standard normal distribution.
3066/// GELU is critical for Transformer models (BERT, GPT, etc.) and provides a smooth
3067/// approximation of ReLU.
3068///
3069/// # Properties
3070///
3071/// - GELU(0) = 0
3072/// - GELU(x) ≈ x for large positive x
3073/// - GELU(x) ≈ 0 for large negative x
3074/// - Smooth and differentiable everywhere
3075///
3076/// # Examples
3077///
3078/// ```
3079/// use scirs2_core::ndarray_ext::elementwise::gelu_simd;
3080/// use scirs2_core::ndarray::array;
3081///
3082/// let x = array![0.0f64, 1.0, -1.0];
3083/// let result = gelu_simd(&x.view());
3084/// assert!(result[0].abs() < 1e-10);  // GELU(0) = 0
3085/// ```
3086pub fn gelu_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3087where
3088    F: Float + SimdUnifiedOps,
3089{
3090    if x.is_empty() {
3091        return Array1::zeros(0);
3092    }
3093
3094    let optimizer = AutoOptimizer::new();
3095    if optimizer.should_use_simd(x.len()) {
3096        return F::simd_gelu(x);
3097    }
3098
3099    // Scalar fallback for small arrays
3100    F::simd_gelu(x)
3101}
3102
3103/// SIMD-accelerated Swish (SiLU - Sigmoid Linear Unit) activation function
3104///
3105/// Computes the Swish activation function element-wise:
3106/// `Swish(x) = x * sigmoid(x) = x / (1 + exp(-x))`
3107///
3108/// Swish is a self-gated activation function discovered through neural architecture
3109/// search that has become a popular choice for modern neural networks.
3110///
3111/// # Key Properties
3112///
3113/// - Swish(0) = 0
3114/// - Swish is smooth and non-monotonic
3115/// - Has a small negative region (unlike ReLU)
3116/// - Self-gating: x modulates its own activation via sigmoid
3117/// - Unbounded above, bounded below (minimum ≈ -0.278 at x ≈ -1.278)
3118///
3119/// # Usage in Deep Learning
3120///
3121/// Swish is used in:
3122/// - EfficientNet and EfficientNetV2
3123/// - GPT-NeoX and other large language models
3124/// - MobileNetV3
3125/// - Many modern vision and NLP architectures
3126///
3127/// # Examples
3128///
3129/// ```
3130/// use scirs2_core::ndarray_ext::elementwise::swish_simd;
3131/// use scirs2_core::ndarray::array;
3132///
3133/// let x = array![0.0f64, 1.0, -1.0];
3134/// let result = swish_simd(&x.view());
3135/// assert!(result[0].abs() < 1e-10);  // Swish(0) = 0
3136/// // Swish(1) ≈ 0.7311
3137/// assert!((result[1] - 0.7310585786).abs() < 1e-6);
3138/// // Swish(-1) ≈ -0.2689
3139/// assert!((result[2] - (-0.2689414214)).abs() < 1e-6);
3140/// ```
3141pub fn swish_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3142where
3143    F: Float + SimdUnifiedOps,
3144{
3145    if x.is_empty() {
3146        return Array1::zeros(0);
3147    }
3148
3149    let optimizer = AutoOptimizer::new();
3150    if optimizer.should_use_simd(x.len()) {
3151        return F::simd_swish(x);
3152    }
3153
3154    // Scalar fallback for small arrays
3155    F::simd_swish(x)
3156}
3157
3158/// SIMD-accelerated Softplus activation function
3159///
3160/// Computes the Softplus activation function element-wise:
3161/// `Softplus(x) = ln(1 + exp(x))`
3162///
3163/// Softplus is a smooth approximation of ReLU and is commonly used in
3164/// probabilistic models, Bayesian deep learning, and smooth counting.
3165///
3166/// # Key Properties
3167///
3168/// - Softplus(0) = ln(2) ≈ 0.693
3169/// - Always positive (> 0 for all x)
3170/// - Derivative: softplus'(x) = sigmoid(x)
3171/// - Approaches ReLU for x → +∞: softplus(x) ≈ x
3172/// - Approaches 0 for x → -∞: softplus(x) ≈ exp(x) ≈ 0
3173///
3174/// # Usage
3175///
3176/// Softplus is used in:
3177/// - Probabilistic models (output layer for positive quantities)
3178/// - Bayesian deep learning
3179/// - Smooth counting applications
3180/// - As a smooth ReLU replacement
3181///
3182/// # Examples
3183///
3184/// ```
3185/// use scirs2_core::ndarray_ext::elementwise::softplus_simd;
3186/// use scirs2_core::ndarray::array;
3187///
3188/// let x = array![0.0f64, 1.0, -1.0];
3189/// let result = softplus_simd(&x.view());
3190/// // Softplus(0) = ln(2) ≈ 0.693
3191/// assert!((result[0] - 0.6931471805599453).abs() < 1e-10);
3192/// // Softplus(1) = ln(1 + e) ≈ 1.3133
3193/// assert!((result[1] - 1.3132616875182228).abs() < 1e-10);
3194/// // Softplus(-1) = ln(1 + 1/e) ≈ 0.3133
3195/// assert!((result[2] - 0.31326168751822286).abs() < 1e-10);
3196/// ```
3197pub fn softplus_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3198where
3199    F: Float + SimdUnifiedOps,
3200{
3201    if x.is_empty() {
3202        return Array1::zeros(0);
3203    }
3204
3205    let optimizer = AutoOptimizer::new();
3206    if optimizer.should_use_simd(x.len()) {
3207        return F::simd_softplus(x);
3208    }
3209
3210    // Scalar fallback for small arrays
3211    F::simd_softplus(x)
3212}
3213
3214/// SIMD-accelerated Mish activation function
3215///
3216/// Computes the Mish activation function element-wise:
3217/// `Mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + exp(x)))`
3218///
3219/// Mish is a self-regularized non-monotonic activation function that combines
3220/// the benefits of ReLU, Swish, and other modern activations.
3221///
3222/// # Key Properties
3223///
3224/// - Mish(0) = 0
3225/// - Smooth and non-monotonic
3226/// - Self-regularizing (bounded negative region)
3227/// - Unbounded above, bounded below (minimum ≈ -0.31 at x ≈ -1.2)
3228/// - Derivative: complex but well-behaved
3229///
3230/// # Usage in Deep Learning
3231///
3232/// Mish is used in:
3233/// - YOLOv4 and YOLOv5 object detection
3234/// - Modern convolutional neural networks
3235/// - Image classification and segmentation tasks
3236///
3237/// # Examples
3238///
3239/// ```
3240/// use scirs2_core::ndarray_ext::elementwise::mish_simd;
3241/// use scirs2_core::ndarray::array;
3242///
3243/// let x = array![0.0f64, 1.0, -1.0];
3244/// let result = mish_simd(&x.view());
3245/// // Mish(0) = 0
3246/// assert!(result[0].abs() < 1e-10);
3247/// // Mish(1) = 1 * tanh(ln(1 + e)) ≈ 0.8651
3248/// assert!((result[1] - 0.8650983882673103).abs() < 1e-10);
3249/// ```
3250pub fn mish_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3251where
3252    F: Float + SimdUnifiedOps,
3253{
3254    if x.is_empty() {
3255        return Array1::zeros(0);
3256    }
3257
3258    let optimizer = AutoOptimizer::new();
3259    if optimizer.should_use_simd(x.len()) {
3260        return F::simd_mish(x);
3261    }
3262
3263    // Scalar fallback for small arrays
3264    F::simd_mish(x)
3265}
3266
3267/// Apply ELU (Exponential Linear Unit) activation using SIMD operations
3268///
3269/// ELU is defined as:
3270/// - f(x) = x, if x >= 0
3271/// - f(x) = α * (exp(x) - 1), if x < 0
3272///
3273/// ELU is used in deep neural networks to:
3274/// - Push mean activations closer to zero (faster learning)
3275/// - Have negative values (unlike ReLU) for better gradient flow
3276/// - Have a smooth curve everywhere (unlike Leaky ReLU)
3277///
3278/// # Arguments
3279/// * `x` - Input array
3280/// * `alpha` - Scaling factor for negative inputs (commonly 1.0)
3281///
3282/// # Returns
3283/// * Array with ELU applied elementwise
3284///
3285/// # Example
3286/// ```
3287/// use scirs2_core::ndarray_ext::elementwise::elu_simd;
3288/// use ndarray::{array, ArrayView1};
3289///
3290/// let x = array![1.0_f32, 0.0, -1.0, -2.0];
3291/// let result = elu_simd(&x.view(), 1.0);
3292/// assert!((result[0] - 1.0).abs() < 1e-6);  // Positive: unchanged
3293/// assert!((result[1] - 0.0).abs() < 1e-6);  // Zero: unchanged
3294/// assert!(result[2] < 0.0);  // Negative: α * (exp(x) - 1) < 0
3295/// ```
3296pub fn elu_simd<F>(x: &ArrayView1<F>, alpha: F) -> Array1<F>
3297where
3298    F: Float + SimdUnifiedOps,
3299{
3300    if x.is_empty() {
3301        return Array1::zeros(0);
3302    }
3303
3304    let optimizer = AutoOptimizer::new();
3305    if optimizer.should_use_simd(x.len()) {
3306        return F::simd_elu(x, alpha);
3307    }
3308
3309    // Scalar fallback for small arrays
3310    F::simd_elu(x, alpha)
3311}
3312
3313/// Apply Leaky ReLU / PReLU activation using SIMD operations
3314///
3315/// Leaky ReLU (Parametric ReLU when alpha is learned) is defined as:
3316/// - f(x) = x, if x >= 0
3317/// - f(x) = alpha * x, if x < 0
3318///
3319/// Leaky ReLU addresses the "dying ReLU" problem by allowing
3320/// a small gradient for negative inputs, preventing neurons from
3321/// becoming permanently inactive.
3322///
3323/// # Arguments
3324/// * `x` - Input array
3325/// * `alpha` - Slope for negative inputs (commonly 0.01 for Leaky ReLU, learned for PReLU)
3326///
3327/// # Returns
3328/// * Array with Leaky ReLU applied elementwise
3329///
3330/// # Example
3331/// ```
3332/// use scirs2_core::ndarray_ext::elementwise::leaky_relu_simd;
3333/// use ndarray::{array, ArrayView1};
3334///
3335/// let x = array![1.0_f32, 0.0, -1.0, -2.0];
3336/// let result = leaky_relu_simd(&x.view(), 0.01);
3337/// assert!((result[0] - 1.0).abs() < 1e-6);    // Positive: unchanged
3338/// assert!((result[1] - 0.0).abs() < 1e-6);    // Zero: unchanged
3339/// assert!((result[2] - (-0.01)).abs() < 1e-6); // Negative: alpha * x
3340/// ```
3341pub fn leaky_relu_simd<F>(x: &ArrayView1<F>, alpha: F) -> Array1<F>
3342where
3343    F: Float + SimdUnifiedOps,
3344{
3345    if x.is_empty() {
3346        return Array1::zeros(0);
3347    }
3348
3349    let optimizer = AutoOptimizer::new();
3350    if optimizer.should_use_simd(x.len()) {
3351        return F::simd_leaky_relu(x, alpha);
3352    }
3353
3354    // Scalar fallback for small arrays
3355    F::simd_leaky_relu(x, alpha)
3356}
3357
3358/// Alias for leaky_relu_simd - PReLU (Parametric ReLU)
3359///
3360/// PReLU is mathematically identical to Leaky ReLU, but the alpha
3361/// parameter is learned during training rather than being fixed.
3362/// This function provides a convenience alias for neural network code
3363/// that uses PReLU terminology.
3364#[inline]
3365pub fn prelu_simd<F>(x: &ArrayView1<F>, alpha: F) -> Array1<F>
3366where
3367    F: Float + SimdUnifiedOps,
3368{
3369    leaky_relu_simd(x, alpha)
3370}
3371
3372/// Apply SELU (Scaled Exponential Linear Unit) activation using SIMD operations
3373///
3374/// SELU is defined as:
3375/// - f(x) = λ * x, if x > 0
3376/// - f(x) = λ * α * (exp(x) - 1), if x <= 0
3377///
3378/// where λ ≈ 1.0507 and α ≈ 1.6733 are fixed constants.
3379///
3380/// SELU is the key activation for Self-Normalizing Neural Networks (SNNs):
3381/// - Automatically maintains mean ≈ 0 and variance ≈ 1 through layers
3382/// - Eliminates the need for Batch Normalization
3383/// - Requires LeCun Normal initialization for weights
3384/// - Works best with fully-connected networks
3385///
3386/// # Arguments
3387/// * `x` - Input array
3388///
3389/// # Returns
3390/// * Array with SELU applied elementwise
3391///
3392/// # Example
3393/// ```
3394/// use scirs2_core::ndarray_ext::elementwise::selu_simd;
3395/// use ndarray::{array, ArrayView1};
3396///
3397/// let x = array![1.0_f32, 0.0, -1.0, -2.0];
3398/// let result = selu_simd(&x.view());
3399/// assert!(result[0] > 1.0);  // Positive: scaled by λ ≈ 1.0507
3400/// assert!((result[1] - 0.0).abs() < 1e-6);  // Zero: unchanged
3401/// assert!(result[2] < 0.0);  // Negative: λ * α * (exp(x) - 1) < 0
3402/// ```
3403pub fn selu_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3404where
3405    F: Float + SimdUnifiedOps,
3406{
3407    if x.is_empty() {
3408        return Array1::zeros(0);
3409    }
3410
3411    let optimizer = AutoOptimizer::new();
3412    if optimizer.should_use_simd(x.len()) {
3413        return F::simd_selu(x);
3414    }
3415
3416    // Scalar fallback for small arrays
3417    F::simd_selu(x)
3418}
3419
3420/// Apply Hardsigmoid activation using SIMD operations
3421///
3422/// Hardsigmoid is defined as:
3423/// - f(x) = 0, if x <= -3
3424/// - f(x) = 1, if x >= 3
3425/// - f(x) = (x + 3) / 6, otherwise
3426///
3427/// Hardsigmoid is a piecewise linear approximation of sigmoid:
3428/// - Used in MobileNetV3 for efficient mobile inference
3429/// - Avoids expensive exp() computation
3430/// - Output always in range [0, 1]
3431///
3432/// # Arguments
3433/// * `x` - Input array
3434///
3435/// # Returns
3436/// * Array with Hardsigmoid applied elementwise
3437///
3438/// # Example
3439/// ```
3440/// use scirs2_core::ndarray_ext::elementwise::hardsigmoid_simd;
3441/// use ndarray::{array, ArrayView1};
3442///
3443/// let x = array![-4.0_f32, -3.0, 0.0, 3.0, 4.0];
3444/// let result = hardsigmoid_simd(&x.view());
3445/// assert!((result[0] - 0.0).abs() < 1e-6);   // Saturated at 0
3446/// assert!((result[1] - 0.0).abs() < 1e-6);   // Boundary
3447/// assert!((result[2] - 0.5).abs() < 1e-6);   // Linear region: (0+3)/6 = 0.5
3448/// assert!((result[3] - 1.0).abs() < 1e-6);   // Boundary
3449/// assert!((result[4] - 1.0).abs() < 1e-6);   // Saturated at 1
3450/// ```
3451pub fn hardsigmoid_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3452where
3453    F: Float + SimdUnifiedOps,
3454{
3455    if x.is_empty() {
3456        return Array1::zeros(0);
3457    }
3458
3459    let optimizer = AutoOptimizer::new();
3460    if optimizer.should_use_simd(x.len()) {
3461        return F::simd_hardsigmoid(x);
3462    }
3463
3464    // Scalar fallback for small arrays
3465    F::simd_hardsigmoid(x)
3466}
3467
3468/// Apply Hardswish activation using SIMD operations
3469///
3470/// Hardswish is defined as:
3471/// - f(x) = 0, if x <= -3
3472/// - f(x) = x, if x >= 3
3473/// - f(x) = x * (x + 3) / 6, otherwise
3474///
3475/// Hardswish is a piecewise linear approximation of Swish:
3476/// - Used in MobileNetV3 for efficient mobile inference
3477/// - Self-gating like Swish but without exp() computation
3478/// - Smooth at boundaries despite being piecewise
3479///
3480/// # Arguments
3481/// * `x` - Input array
3482///
3483/// # Returns
3484/// * Array with Hardswish applied elementwise
3485///
3486/// # Example
3487/// ```
3488/// use scirs2_core::ndarray_ext::elementwise::hardswish_simd;
3489/// use ndarray::{array, ArrayView1};
3490///
3491/// let x = array![-4.0_f32, -3.0, 0.0, 3.0, 4.0];
3492/// let result = hardswish_simd(&x.view());
3493/// assert!((result[0] - 0.0).abs() < 1e-6);   // Saturated at 0
3494/// assert!((result[1] - 0.0).abs() < 1e-6);   // Boundary: -3 * 0 / 6 = 0
3495/// assert!((result[2] - 0.0).abs() < 1e-6);   // Linear: 0 * 3 / 6 = 0
3496/// assert!((result[3] - 3.0).abs() < 1e-6);   // Boundary: identity
3497/// assert!((result[4] - 4.0).abs() < 1e-6);   // Identity region
3498/// ```
3499pub fn hardswish_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3500where
3501    F: Float + SimdUnifiedOps,
3502{
3503    if x.is_empty() {
3504        return Array1::zeros(0);
3505    }
3506
3507    let optimizer = AutoOptimizer::new();
3508    if optimizer.should_use_simd(x.len()) {
3509        return F::simd_hardswish(x);
3510    }
3511
3512    // Scalar fallback for small arrays
3513    F::simd_hardswish(x)
3514}
3515
3516/// Apply Sinc function using SIMD operations
3517///
3518/// The normalized sinc function is defined as:
3519/// - sinc(x) = sin(πx) / (πx) for x ≠ 0
3520/// - sinc(0) = 1 (by L'Hôpital's rule)
3521///
3522/// The sinc function is fundamental in signal processing:
3523/// - Ideal low-pass filter impulse response
3524/// - Whittaker-Shannon interpolation formula
3525/// - Windowing functions (Lanczos kernel)
3526/// - Sampling theory and Nyquist theorem
3527///
3528/// # Arguments
3529/// * `x` - Input array
3530///
3531/// # Returns
3532/// * Array with sinc applied elementwise
3533///
3534/// # Example
3535/// ```
3536/// use scirs2_core::ndarray_ext::elementwise::sinc_simd;
3537/// use ndarray::{array, ArrayView1};
3538///
3539/// let x = array![0.0_f64, 0.5, 1.0, 2.0];
3540/// let result = sinc_simd(&x.view());
3541/// assert!((result[0] - 1.0).abs() < 1e-10);   // sinc(0) = 1
3542/// assert!((result[2] - 0.0).abs() < 1e-10);   // sinc(1) = 0
3543/// assert!((result[3] - 0.0).abs() < 1e-10);   // sinc(2) = 0
3544/// ```
3545pub fn sinc_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3546where
3547    F: Float + SimdUnifiedOps,
3548{
3549    if x.is_empty() {
3550        return Array1::zeros(0);
3551    }
3552
3553    let optimizer = AutoOptimizer::new();
3554    if optimizer.should_use_simd(x.len()) {
3555        return F::simd_sinc(x);
3556    }
3557
3558    // Scalar fallback for small arrays
3559    F::simd_sinc(x)
3560}
3561
3562/// Apply Log-Softmax function using SIMD operations
3563///
3564/// The log-softmax function is defined as:
3565/// log_softmax(x_i) = x_i - log(Σ_j exp(x_j))
3566///
3567/// This is more numerically stable than computing log(softmax(x)) directly,
3568/// and is critical for neural network training:
3569/// - Cross-entropy loss computation
3570/// - Classification networks (output layer)
3571/// - Transformer language models
3572/// - Large language models (LLMs)
3573///
3574/// # Mathematical Properties
3575///
3576/// - log_softmax(x) = x - logsumexp(x)
3577/// - Σ exp(log_softmax(x)) = 1 (outputs are log-probabilities)
3578/// - log_softmax(x + c) = log_softmax(x) (invariant to constant shift)
3579/// - Maximum output element approaches 0 for peaked distributions
3580///
3581/// # Arguments
3582/// * `x` - Input array of logits (unnormalized log-probabilities)
3583///
3584/// # Returns
3585/// * Array with log-softmax applied elementwise (log-probabilities that sum to 0)
3586///
3587/// # Example
3588/// ```
3589/// use scirs2_core::ndarray_ext::elementwise::log_softmax_simd;
3590/// use ndarray::{array, ArrayView1};
3591///
3592/// let logits = array![1.0_f64, 2.0, 3.0];
3593/// let log_probs = log_softmax_simd(&logits.view());
3594///
3595/// // Verify: exp(log_probs) sums to 1
3596/// let probs: f64 = log_probs.mapv(|lp| lp.exp()).sum();
3597/// assert!((probs - 1.0).abs() < 1e-10);
3598///
3599/// // log_softmax values are always <= 0
3600/// for &lp in log_probs.iter() {
3601///     assert!(lp <= 0.0);
3602/// }
3603/// ```
3604///
3605/// # Applications
3606///
3607/// - **Cross-Entropy Loss**: -Σ target * log_softmax(logits)
3608/// - **Classification**: Converting logits to log-probabilities
3609/// - **Transformers**: Final output layer for token prediction
3610/// - **Language Models**: Computing perplexity and token probabilities
3611/// - **Numerical Stability**: Avoiding underflow in softmax computation
3612pub fn log_softmax_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3613where
3614    F: Float + SimdUnifiedOps,
3615{
3616    if x.is_empty() {
3617        return Array1::zeros(0);
3618    }
3619
3620    let optimizer = AutoOptimizer::new();
3621    if optimizer.should_use_simd(x.len()) {
3622        return F::simd_log_softmax(x);
3623    }
3624
3625    // Scalar fallback for small arrays
3626    F::simd_log_softmax(x)
3627}
3628
3629/// Apply inverse hyperbolic sine (asinh) using SIMD operations
3630///
3631/// The inverse hyperbolic sine is defined as:
3632/// asinh(x) = ln(x + √(x² + 1))
3633///
3634/// Domain: (-∞, +∞), Range: (-∞, +∞)
3635/// This is the inverse function of sinh.
3636///
3637/// # Mathematical Properties
3638///
3639/// - asinh(0) = 0
3640/// - asinh(-x) = -asinh(x) (odd function)
3641/// - asinh'(x) = 1/√(x² + 1)
3642/// - For large x: asinh(x) ≈ ln(2x)
3643///
3644/// # Arguments
3645/// * `x` - Input array
3646///
3647/// # Returns
3648/// * Array with asinh applied elementwise
3649///
3650/// # Example
3651/// ```
3652/// use scirs2_core::ndarray_ext::elementwise::asinh_simd;
3653/// use ndarray::{array, ArrayView1};
3654///
3655/// let x = array![0.0_f64, 1.0, -1.0, 10.0];
3656/// let result = asinh_simd(&x.view());
3657/// assert!((result[0] - 0.0).abs() < 1e-10);
3658/// assert!((result[1] - 0.881373587).abs() < 1e-6);  // asinh(1)
3659/// assert!((result[1] + result[2]).abs() < 1e-10);  // odd function
3660/// ```
3661///
3662/// # Applications
3663///
3664/// - **Hyperbolic Geometry**: Distance calculations in hyperbolic space
3665/// - **Special Relativity**: Rapidity in Lorentz transformations
3666/// - **Signal Processing**: Parametric representation of signals
3667/// - **Statistics**: Transformations for variance stabilization
3668pub fn asinh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3669where
3670    F: Float + SimdUnifiedOps,
3671{
3672    if x.is_empty() {
3673        return Array1::zeros(0);
3674    }
3675
3676    let optimizer = AutoOptimizer::new();
3677    if optimizer.should_use_simd(x.len()) {
3678        return F::simd_asinh(x);
3679    }
3680
3681    // Scalar fallback for small arrays
3682    F::simd_asinh(x)
3683}
3684
3685/// Apply inverse hyperbolic cosine (acosh) using SIMD operations
3686///
3687/// The inverse hyperbolic cosine is defined as:
3688/// acosh(x) = ln(x + √(x² - 1))
3689///
3690/// Domain: [1, +∞), Range: [0, +∞)
3691/// Returns NaN for x < 1.
3692/// This is the inverse function of cosh.
3693///
3694/// # Mathematical Properties
3695///
3696/// - acosh(1) = 0
3697/// - acosh(x) is monotonically increasing for x ≥ 1
3698/// - acosh'(x) = 1/√(x² - 1)
3699/// - For large x: acosh(x) ≈ ln(2x)
3700///
3701/// # Arguments
3702/// * `x` - Input array (values should be ≥ 1 for valid results)
3703///
3704/// # Returns
3705/// * Array with acosh applied elementwise (NaN for values < 1)
3706///
3707/// # Example
3708/// ```
3709/// use scirs2_core::ndarray_ext::elementwise::acosh_simd;
3710/// use ndarray::{array, ArrayView1};
3711///
3712/// let x = array![1.0_f64, 2.0, 10.0];
3713/// let result = acosh_simd(&x.view());
3714/// assert!((result[0] - 0.0).abs() < 1e-10);         // acosh(1) = 0
3715/// assert!((result[1] - 1.316957897).abs() < 1e-6);  // acosh(2)
3716///
3717/// // Out of domain returns NaN
3718/// let x_invalid = array![0.5_f64];
3719/// let result_invalid = acosh_simd(&x_invalid.view());
3720/// assert!(result_invalid[0].is_nan());
3721/// ```
3722///
3723/// # Applications
3724///
3725/// - **Hyperbolic Geometry**: Distance in Poincaré disk model
3726/// - **Physics**: Catenary curves, suspension bridges
3727/// - **Electronics**: Transmission line analysis
3728/// - **Computer Graphics**: Hyperbolic tessellations
3729pub fn acosh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3730where
3731    F: Float + SimdUnifiedOps,
3732{
3733    if x.is_empty() {
3734        return Array1::zeros(0);
3735    }
3736
3737    let optimizer = AutoOptimizer::new();
3738    if optimizer.should_use_simd(x.len()) {
3739        return F::simd_acosh(x);
3740    }
3741
3742    // Scalar fallback for small arrays
3743    F::simd_acosh(x)
3744}
3745
3746/// Apply inverse hyperbolic tangent (atanh) using SIMD operations
3747///
3748/// The inverse hyperbolic tangent is defined as:
3749/// atanh(x) = 0.5 * ln((1+x)/(1-x))
3750///
3751/// Domain: (-1, 1), Range: (-∞, +∞)
3752/// Returns ±∞ at x = ±1, NaN for |x| > 1.
3753/// This is the inverse function of tanh.
3754///
3755/// # Mathematical Properties
3756///
3757/// - atanh(0) = 0
3758/// - atanh(-x) = -atanh(x) (odd function)
3759/// - atanh(±1) = ±∞
3760/// - atanh'(x) = 1/(1 - x²)
3761///
3762/// # Arguments
3763/// * `x` - Input array (values should be in (-1, 1) for finite results)
3764///
3765/// # Returns
3766/// * Array with atanh applied elementwise
3767///
3768/// # Example
3769/// ```
3770/// use scirs2_core::ndarray_ext::elementwise::atanh_simd;
3771/// use ndarray::{array, ArrayView1};
3772///
3773/// let x = array![0.0_f64, 0.5, -0.5, 0.99];
3774/// let result = atanh_simd(&x.view());
3775/// assert!((result[0] - 0.0).abs() < 1e-10);          // atanh(0) = 0
3776/// assert!((result[1] - 0.5493061).abs() < 1e-6);     // atanh(0.5)
3777/// assert!((result[1] + result[2]).abs() < 1e-10);   // odd function
3778///
3779/// // Boundaries
3780/// let x_boundary = array![1.0_f64, -1.0];
3781/// let result_boundary = atanh_simd(&x_boundary.view());
3782/// assert!(result_boundary[0].is_infinite() && result_boundary[0] > 0.0);
3783/// assert!(result_boundary[1].is_infinite() && result_boundary[1] < 0.0);
3784/// ```
3785///
3786/// # Applications
3787///
3788/// - **Statistics**: Fisher's z-transformation for correlation coefficients
3789/// - **Signal Processing**: Parametric signal representation
3790/// - **Probability**: Logit function relationship (atanh(x) = 0.5*logit((1+x)/2))
3791/// - **Machine Learning**: Activation function transformations
3792pub fn atanh_simd<F>(x: &ArrayView1<F>) -> Array1<F>
3793where
3794    F: Float + SimdUnifiedOps,
3795{
3796    if x.is_empty() {
3797        return Array1::zeros(0);
3798    }
3799
3800    let optimizer = AutoOptimizer::new();
3801    if optimizer.should_use_simd(x.len()) {
3802        return F::simd_atanh(x);
3803    }
3804
3805    // Scalar fallback for small arrays
3806    F::simd_atanh(x)
3807}
3808
3809/// Compute the Beta function B(a, b) using SIMD operations
3810///
3811/// The Beta function is defined as:
3812/// B(a, b) = Γ(a)Γ(b) / Γ(a+b)
3813///
3814/// This function computes `B(a[i], b[i])` for each pair of elements.
3815/// The Beta function is fundamental in:
3816/// - Beta distribution (Bayesian priors for probabilities)
3817/// - Binomial coefficients: C(n,k) = 1/((n+1)·B(n-k+1, k+1))
3818/// - Statistical hypothesis testing
3819/// - Machine learning (Dirichlet processes)
3820///
3821/// # Mathematical Properties
3822///
3823/// - B(a, b) = B(b, a) (symmetric)
3824/// - B(1, 1) = 1
3825/// - B(a, 1) = 1/a
3826/// - B(1, b) = 1/b
3827/// - B(a, b) = B(a+1, b) + B(a, b+1)
3828///
3829/// # Arguments
3830/// * `a` - First parameter array (must be > 0)
3831/// * `b` - Second parameter array (must be > 0, same length as `a`)
3832///
3833/// # Returns
3834/// * Array with `B(a[i], b[i])` for each pair
3835///
3836/// # Example
3837/// ```
3838/// use scirs2_core::ndarray_ext::elementwise::beta_simd;
3839/// use ndarray::{array, ArrayView1};
3840///
3841/// let a = array![1.0_f64, 2.0, 0.5];
3842/// let b = array![1.0_f64, 2.0, 0.5];
3843/// let result = beta_simd(&a.view(), &b.view());
3844/// assert!((result[0] - 1.0).abs() < 1e-10);       // B(1,1) = 1
3845/// assert!((result[1] - 1.0/6.0).abs() < 1e-10);  // B(2,2) = 1/6
3846/// assert!((result[2] - std::f64::consts::PI).abs() < 1e-8);  // B(0.5,0.5) = π
3847/// ```
3848///
3849/// # Applications
3850///
3851/// - **Beta Distribution**: PDF = x^(a-1)(1-x)^(b-1) / B(a,b)
3852/// - **Bayesian Statistics**: Prior/posterior for probability parameters
3853/// - **A/B Testing**: Conversion rate analysis
3854/// - **Machine Learning**: Dirichlet processes, topic modeling
3855pub fn beta_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
3856where
3857    F: Float + SimdUnifiedOps,
3858{
3859    if a.is_empty() || b.is_empty() {
3860        return Array1::zeros(0);
3861    }
3862
3863    let optimizer = AutoOptimizer::new();
3864    if optimizer.should_use_simd(a.len()) {
3865        return F::simd_beta(a, b);
3866    }
3867
3868    // Scalar fallback for small arrays
3869    F::simd_beta(a, b)
3870}
3871
3872/// Compute the Log-Beta function ln(B(a, b)) using SIMD operations
3873///
3874/// The Log-Beta function is defined as:
3875/// ln(B(a, b)) = ln(Γ(a)) + ln(Γ(b)) - ln(Γ(a+b))
3876///
3877/// This is more numerically stable than computing B(a,b) directly,
3878/// especially for large arguments where Γ would overflow.
3879///
3880/// # Mathematical Properties
3881///
3882/// - ln(B(a, b)) = ln(B(b, a)) (symmetric)
3883/// - ln(B(1, 1)) = 0
3884/// - ln(B(a, 1)) = -ln(a)
3885/// - For large a,b: ln(B(a,b)) ≈ 0.5*ln(2π) - (a+b-0.5)*ln(a+b) + (a-0.5)*ln(a) + (b-0.5)*ln(b)
3886///
3887/// # Arguments
3888/// * `a` - First parameter array (must be > 0)
3889/// * `b` - Second parameter array (must be > 0, same length as `a`)
3890///
3891/// # Returns
3892/// * Array with `ln(B(a[i], b[i]))` for each pair
3893///
3894/// # Example
3895/// ```ignore
3896/// use scirs2_core::ndarray_ext::elementwise::ln_beta_simd;
3897/// use scirs2_core::ndarray::{array, ArrayView1};
3898///
3899/// let a = array![1.0_f64, 2.0, 10.0];
3900/// let b = array![1.0_f64, 2.0, 10.0];
3901/// let result = ln_beta_simd(&a.view(), &b.view());
3902/// assert!((result[0] - 0.0_f64).abs() < 1e-10);  // ln(B(1,1)) = ln(1) = 0
3903/// assert!((result[1] - (-6.0_f64).ln()).abs() < 1e-10);  // ln(B(2,2)) = ln(1/6)
3904/// ```
3905///
3906/// # Applications
3907///
3908/// - **Numerical Stability**: Avoiding overflow in Beta distribution computations
3909/// - **Log-likelihood**: Direct computation of log-probabilities
3910/// - **Monte Carlo Methods**: Log-probability computations
3911/// - **Variational Inference**: KL divergence between Beta distributions
3912pub fn ln_beta_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
3913where
3914    F: Float + SimdUnifiedOps,
3915{
3916    if a.is_empty() || b.is_empty() {
3917        return Array1::zeros(0);
3918    }
3919
3920    let optimizer = AutoOptimizer::new();
3921    if optimizer.should_use_simd(a.len()) {
3922        return F::simd_ln_beta(a, b);
3923    }
3924
3925    // Scalar fallback for small arrays
3926    F::simd_ln_beta(a, b)
3927}
3928
3929/// SIMD-accelerated linear interpolation
3930///
3931/// Computes element-wise linear interpolation: lerp(a, b, t) = a + t * (b - a)
3932/// When t=0, returns a; when t=1, returns b.
3933///
3934/// # Arguments
3935/// * `a` - Start values
3936/// * `b` - End values
3937/// * `t` - Interpolation parameter (typically in [0, 1])
3938///
3939/// # Returns
3940/// Array of interpolated values
3941///
3942/// # Examples
3943/// ```
3944/// use scirs2_core::ndarray::{array, Array1};
3945/// use scirs2_core::ndarray_ext::elementwise::lerp_simd;
3946///
3947/// let a = array![0.0_f32, 0.0, 0.0];
3948/// let b = array![10.0_f32, 20.0, 30.0];
3949///
3950/// // t = 0: returns a
3951/// let result = lerp_simd::<f32>(&a.view(), &b.view(), 0.0);
3952/// assert!((result[0] - 0.0).abs() < 1e-6);
3953///
3954/// // t = 1: returns b
3955/// let result = lerp_simd::<f32>(&a.view(), &b.view(), 1.0);
3956/// assert!((result[0] - 10.0).abs() < 1e-6);
3957///
3958/// // t = 0.5: midpoint
3959/// let result = lerp_simd::<f32>(&a.view(), &b.view(), 0.5);
3960/// assert!((result[0] - 5.0).abs() < 1e-6);
3961/// ```
3962///
3963/// # Use Cases
3964/// - Animation blending
3965/// - Color interpolation
3966/// - Smooth parameter transitions
3967/// - Gradient computation in neural networks
3968pub fn lerp_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>, t: F) -> Array1<F>
3969where
3970    F: Float + SimdUnifiedOps,
3971{
3972    if a.is_empty() || b.is_empty() {
3973        return Array1::zeros(0);
3974    }
3975
3976    let optimizer = AutoOptimizer::new();
3977    if optimizer.should_use_simd(a.len()) {
3978        return F::simd_lerp(a, b, t);
3979    }
3980
3981    // Scalar fallback for small arrays
3982    F::simd_lerp(a, b, t)
3983}
3984
3985/// SIMD-accelerated smoothstep interpolation
3986///
3987/// Returns smooth Hermite interpolation between 0 and 1 when edge0 < x < edge1:
3988/// - Returns 0 if x <= edge0
3989/// - Returns 1 if x >= edge1
3990/// - Returns smooth curve: 3t² - 2t³ where t = (x - edge0) / (edge1 - edge0)
3991///
3992/// # Arguments
3993/// * `edge0` - Lower edge of the transition
3994/// * `edge1` - Upper edge of the transition
3995/// * `x` - Input values
3996///
3997/// # Returns
3998/// Array of smoothstep values in [0, 1]
3999///
4000/// # Examples
4001/// ```
4002/// use scirs2_core::ndarray::{array, Array1};
4003/// use scirs2_core::ndarray_ext::elementwise::smoothstep_simd;
4004///
4005/// let x = array![0.0_f32, 0.25, 0.5, 0.75, 1.0];
4006///
4007/// let result = smoothstep_simd::<f32>(0.0, 1.0, &x.view());
4008/// assert!((result[0] - 0.0).abs() < 1e-6);  // x=0 -> 0
4009/// assert!((result[2] - 0.5).abs() < 1e-6);  // x=0.5 -> 0.5
4010/// assert!((result[4] - 1.0).abs() < 1e-6);  // x=1 -> 1
4011/// ```
4012///
4013/// # Use Cases
4014/// - Shader programming (smooth transitions)
4015/// - Activation function variants
4016/// - Anti-aliasing
4017/// - Soft thresholding
4018pub fn smoothstep_simd<F>(edge0: F, edge1: F, x: &ArrayView1<F>) -> Array1<F>
4019where
4020    F: Float + SimdUnifiedOps,
4021{
4022    if x.is_empty() {
4023        return Array1::zeros(0);
4024    }
4025
4026    let optimizer = AutoOptimizer::new();
4027    if optimizer.should_use_simd(x.len()) {
4028        return F::simd_smoothstep(edge0, edge1, x);
4029    }
4030
4031    // Scalar fallback for small arrays
4032    F::simd_smoothstep(edge0, edge1, x)
4033}
4034
4035/// SIMD-accelerated hypotenuse calculation
4036///
4037/// Computes element-wise hypotenuse: hypot(x, y) = sqrt(x² + y²)
4038/// Uses the standard library implementation which handles overflow/underflow correctly.
4039///
4040/// # Arguments
4041/// * `x` - First coordinate values
4042/// * `y` - Second coordinate values
4043///
4044/// # Returns
4045/// Array of hypotenuse values
4046///
4047/// # Examples
4048/// ```
4049/// use scirs2_core::ndarray::{array, Array1};
4050/// use scirs2_core::ndarray_ext::elementwise::hypot_simd;
4051///
4052/// let x = array![3.0_f64, 5.0, 8.0];
4053/// let y = array![4.0_f64, 12.0, 15.0];
4054///
4055/// let result = hypot_simd::<f64>(&x.view(), &y.view());
4056/// assert!((result[0] - 5.0).abs() < 1e-14);   // 3-4-5 triangle
4057/// assert!((result[1] - 13.0).abs() < 1e-14);  // 5-12-13 triangle
4058/// assert!((result[2] - 17.0).abs() < 1e-14);  // 8-15-17 triangle
4059/// ```
4060///
4061/// # Use Cases
4062/// - Distance calculations in 2D
4063/// - Computing vector magnitudes
4064/// - Complex number modulus: |a+bi| = hypot(a, b)
4065/// - Graphics and physics simulations
4066pub fn hypot_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> Array1<F>
4067where
4068    F: Float + SimdUnifiedOps,
4069{
4070    if x.is_empty() || y.is_empty() {
4071        return Array1::zeros(0);
4072    }
4073
4074    let optimizer = AutoOptimizer::new();
4075    if optimizer.should_use_simd(x.len()) {
4076        return F::simd_hypot(x, y);
4077    }
4078
4079    // Scalar fallback for small arrays
4080    F::simd_hypot(x, y)
4081}
4082
4083/// SIMD-accelerated copysign operation
4084///
4085/// Returns element-wise magnitude of x with the sign of y.
4086///
4087/// # Arguments
4088/// * `x` - Magnitude source values
4089/// * `y` - Sign source values
4090///
4091/// # Returns
4092/// Array where each element has the magnitude of x and sign of y
4093///
4094/// # Examples
4095/// ```
4096/// use scirs2_core::ndarray::{array, Array1};
4097/// use scirs2_core::ndarray_ext::elementwise::copysign_simd;
4098///
4099/// let x = array![1.0_f64, -2.0, 3.0, -4.0];
4100/// let y = array![-1.0_f64, 1.0, 1.0, -1.0];
4101///
4102/// let result = copysign_simd::<f64>(&x.view(), &y.view());
4103/// assert!((result[0] - (-1.0)).abs() < 1e-14);  // 1 with sign of -1 = -1
4104/// assert!((result[1] - 2.0).abs() < 1e-14);     // -2 with sign of 1 = 2
4105/// assert!((result[2] - 3.0).abs() < 1e-14);     // 3 with sign of 1 = 3
4106/// assert!((result[3] - (-4.0)).abs() < 1e-14); // -4 with sign of -1 = -4
4107/// ```
4108///
4109/// # Use Cases
4110/// - Sign manipulation in numerical algorithms
4111/// - Implementing special functions (reflection formula)
4112/// - Gradient sign propagation
4113pub fn copysign_simd<F>(x: &ArrayView1<F>, y: &ArrayView1<F>) -> Array1<F>
4114where
4115    F: Float + SimdUnifiedOps,
4116{
4117    if x.is_empty() || y.is_empty() {
4118        return Array1::zeros(0);
4119    }
4120
4121    let optimizer = AutoOptimizer::new();
4122    if optimizer.should_use_simd(x.len()) {
4123        return F::simd_copysign(x, y);
4124    }
4125
4126    // Scalar fallback for small arrays
4127    F::simd_copysign(x, y)
4128}
4129
4130/// SIMD-accelerated smootherstep interpolation (Ken Perlin's improved version)
4131///
4132/// Returns smooth Hermite interpolation with second-order continuity.
4133/// Formula: 6t⁵ - 15t⁴ + 10t³ where t = (x - edge0) / (edge1 - edge0)
4134///
4135/// Unlike smoothstep, both the first AND second derivatives are zero at the boundaries,
4136/// making it ideal for procedural generation and high-quality animations.
4137///
4138/// # Arguments
4139/// * `edge0` - Lower edge of the transition
4140/// * `edge1` - Upper edge of the transition
4141/// * `x` - Input values
4142///
4143/// # Returns
4144/// Array of smootherstep values in [0, 1]
4145///
4146/// # Examples
4147/// ```
4148/// use scirs2_core::ndarray::{array, Array1};
4149/// use scirs2_core::ndarray_ext::elementwise::smootherstep_simd;
4150///
4151/// let x = array![0.0_f64, 0.25, 0.5, 0.75, 1.0];
4152///
4153/// let result = smootherstep_simd::<f64>(0.0, 1.0, &x.view());
4154/// assert!((result[0] - 0.0).abs() < 1e-14);  // x=0 -> 0
4155/// assert!((result[2] - 0.5).abs() < 1e-14);  // x=0.5 -> 0.5
4156/// assert!((result[4] - 1.0).abs() < 1e-14);  // x=1 -> 1
4157/// ```
4158///
4159/// # Use Cases
4160/// - Perlin noise and procedural generation
4161/// - High-quality animation easing
4162/// - Shader programming (smooth lighting transitions)
4163/// - Gradient-based optimization (smoother loss landscapes)
4164pub fn smootherstep_simd<F>(edge0: F, edge1: F, x: &ArrayView1<F>) -> Array1<F>
4165where
4166    F: Float + SimdUnifiedOps,
4167{
4168    if x.is_empty() {
4169        return Array1::zeros(0);
4170    }
4171
4172    let optimizer = AutoOptimizer::new();
4173    if optimizer.should_use_simd(x.len()) {
4174        return F::simd_smootherstep(edge0, edge1, x);
4175    }
4176
4177    // Scalar fallback for small arrays
4178    F::simd_smootherstep(edge0, edge1, x)
4179}
4180
4181/// SIMD-accelerated logaddexp: log(exp(a) + exp(b))
4182///
4183/// Computes the logarithm of the sum of exponentials in a numerically stable way.
4184/// Uses the identity: log(exp(a) + exp(b)) = max(a,b) + log(1 + exp(-|a-b|))
4185///
4186/// # Arguments
4187/// * `a` - First input values
4188/// * `b` - Second input values
4189///
4190/// # Returns
4191/// Array of log(exp(a) + exp(b)) values
4192///
4193/// # Examples
4194/// ```
4195/// use scirs2_core::ndarray::{array, Array1};
4196/// use scirs2_core::ndarray_ext::elementwise::logaddexp_simd;
4197///
4198/// let a = array![0.0_f64, 1.0, 2.0];
4199/// let b = array![0.0_f64, 1.0, 2.0];
4200///
4201/// let result = logaddexp_simd::<f64>(&a.view(), &b.view());
4202/// // log(exp(0) + exp(0)) = log(2) ≈ 0.693
4203/// assert!((result[0] - 2.0_f64.ln()).abs() < 1e-14);
4204/// ```
4205///
4206/// # Use Cases
4207/// - Log-probability computations (Bayesian inference)
4208/// - Log-likelihood calculations in ML
4209/// - Hidden Markov Model algorithms
4210/// - Cross-entropy loss functions
4211pub fn logaddexp_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
4212where
4213    F: Float + SimdUnifiedOps,
4214{
4215    if a.is_empty() || b.is_empty() {
4216        return Array1::zeros(0);
4217    }
4218
4219    let optimizer = AutoOptimizer::new();
4220    if optimizer.should_use_simd(a.len()) {
4221        return F::simd_logaddexp(a, b);
4222    }
4223
4224    // Scalar fallback for small arrays
4225    F::simd_logaddexp(a, b)
4226}
4227
4228/// SIMD-accelerated logit function: log(p / (1-p))
4229///
4230/// The logit function maps probabilities in (0, 1) to log-odds in (-∞, +∞).
4231/// It is the inverse of the sigmoid (logistic) function.
4232///
4233/// # Arguments
4234/// * `a` - Probability values in (0, 1)
4235///
4236/// # Returns
4237/// Array of log-odds values
4238///
4239/// # Examples
4240/// ```
4241/// use scirs2_core::ndarray::{array, Array1};
4242/// use scirs2_core::ndarray_ext::elementwise::logit_simd;
4243///
4244/// let p = array![0.5_f64, 0.1, 0.9];
4245///
4246/// let result = logit_simd::<f64>(&p.view());
4247/// // logit(0.5) = log(1) = 0
4248/// assert!((result[0] - 0.0).abs() < 1e-14);
4249/// // logit(0.1) = log(0.1/0.9) ≈ -2.197
4250/// assert!((result[1] - (0.1_f64 / 0.9).ln()).abs() < 1e-14);
4251/// ```
4252///
4253/// # Use Cases
4254/// - Logistic regression
4255/// - Probability calibration
4256/// - Converting probabilities to unbounded space
4257/// - Statistical modeling (link functions)
4258pub fn logit_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4259where
4260    F: Float + SimdUnifiedOps,
4261{
4262    if a.is_empty() {
4263        return Array1::zeros(0);
4264    }
4265
4266    let optimizer = AutoOptimizer::new();
4267    if optimizer.should_use_simd(a.len()) {
4268        return F::simd_logit(a);
4269    }
4270
4271    // Scalar fallback for small arrays
4272    F::simd_logit(a)
4273}
4274
4275/// SIMD-accelerated element-wise square: x²
4276///
4277/// Computes the square of each element. This is more efficient than `pow(x, 2)`
4278/// since it avoids the overhead of general exponentiation.
4279///
4280/// # Arguments
4281/// * `a` - Input values
4282///
4283/// # Returns
4284/// Array of squared values
4285///
4286/// # Examples
4287/// ```
4288/// use scirs2_core::ndarray::{array, Array1};
4289/// use scirs2_core::ndarray_ext::elementwise::square_simd;
4290///
4291/// let x = array![1.0_f64, 2.0, 3.0, -4.0];
4292///
4293/// let result = square_simd::<f64>(&x.view());
4294/// assert!((result[0] - 1.0).abs() < 1e-14);   // 1² = 1
4295/// assert!((result[1] - 4.0).abs() < 1e-14);   // 2² = 4
4296/// assert!((result[2] - 9.0).abs() < 1e-14);   // 3² = 9
4297/// assert!((result[3] - 16.0).abs() < 1e-14);  // (-4)² = 16
4298/// ```
4299///
4300/// # Use Cases
4301/// - Computing squared distances
4302/// - Variance calculations (sum of squared deviations)
4303/// - Loss functions (MSE, L2 regularization)
4304/// - Polynomial evaluation
4305pub fn square_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4306where
4307    F: Float + SimdUnifiedOps,
4308{
4309    if a.is_empty() {
4310        return Array1::zeros(0);
4311    }
4312
4313    let optimizer = AutoOptimizer::new();
4314    if optimizer.should_use_simd(a.len()) {
4315        return F::simd_square(a);
4316    }
4317
4318    // Scalar fallback for small arrays
4319    F::simd_square(a)
4320}
4321
4322/// SIMD-accelerated inverse square root: 1/sqrt(x)
4323///
4324/// Computes the reciprocal of the square root for each element.
4325/// This operation is fundamental in graphics and physics simulations.
4326///
4327/// # Arguments
4328/// * `a` - Input values (should be positive)
4329///
4330/// # Returns
4331/// Array of 1/sqrt(x) values
4332/// - Returns INFINITY for x = 0
4333/// - Returns NaN for x < 0
4334///
4335/// # Examples
4336/// ```
4337/// use scirs2_core::ndarray::{array, Array1};
4338/// use scirs2_core::ndarray_ext::elementwise::rsqrt_simd;
4339///
4340/// let x = array![1.0_f64, 4.0, 9.0, 16.0];
4341///
4342/// let result = rsqrt_simd::<f64>(&x.view());
4343/// assert!((result[0] - 1.0).abs() < 1e-14);     // 1/sqrt(1) = 1
4344/// assert!((result[1] - 0.5).abs() < 1e-14);     // 1/sqrt(4) = 0.5
4345/// assert!((result[2] - 1.0/3.0).abs() < 1e-14); // 1/sqrt(9) = 1/3
4346/// assert!((result[3] - 0.25).abs() < 1e-14);    // 1/sqrt(16) = 0.25
4347/// ```
4348///
4349/// # Use Cases
4350/// - Vector normalization: v_normalized = v * rsqrt(dot(v, v))
4351/// - Quaternion normalization in 3D graphics
4352/// - Inverse distance weighting
4353/// - Fast approximate physics simulations
4354pub fn rsqrt_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4355where
4356    F: Float + SimdUnifiedOps,
4357{
4358    if a.is_empty() {
4359        return Array1::zeros(0);
4360    }
4361
4362    let optimizer = AutoOptimizer::new();
4363    if optimizer.should_use_simd(a.len()) {
4364        return F::simd_rsqrt(a);
4365    }
4366
4367    // Scalar fallback for small arrays
4368    F::simd_rsqrt(a)
4369}
4370
4371/// SIMD-accelerated simultaneous sin and cos computation
4372///
4373/// Computes both sine and cosine of each element in a single pass.
4374/// This is more efficient than calling sin and cos separately because
4375/// many implementations can compute both with minimal additional overhead.
4376///
4377/// # Arguments
4378/// * `a` - Input angles in radians
4379///
4380/// # Returns
4381/// Tuple of (sin_array, cos_array)
4382///
4383/// # Examples
4384/// ```
4385/// use scirs2_core::ndarray::{array, Array1};
4386/// use scirs2_core::ndarray_ext::elementwise::sincos_simd;
4387/// use std::f64::consts::PI;
4388///
4389/// let x = array![0.0_f64, PI/6.0, PI/4.0, PI/2.0];
4390///
4391/// let (sin_result, cos_result) = sincos_simd::<f64>(&x.view());
4392/// assert!((sin_result[0] - 0.0).abs() < 1e-14);        // sin(0) = 0
4393/// assert!((cos_result[0] - 1.0).abs() < 1e-14);        // cos(0) = 1
4394/// assert!((sin_result[1] - 0.5).abs() < 1e-14);        // sin(π/6) = 0.5
4395/// assert!((sin_result[3] - 1.0).abs() < 1e-14);        // sin(π/2) = 1
4396/// assert!(cos_result[3].abs() < 1e-14);                // cos(π/2) ≈ 0
4397/// ```
4398///
4399/// # Use Cases
4400/// - Rotation matrices (need both sin and cos)
4401/// - Complex number operations: e^(iθ) = cos(θ) + i·sin(θ)
4402/// - Fourier transform calculations
4403/// - Wave simulations
4404/// - Animation and interpolation (circular motion)
4405pub fn sincos_simd<F>(a: &ArrayView1<F>) -> (Array1<F>, Array1<F>)
4406where
4407    F: Float + SimdUnifiedOps,
4408{
4409    if a.is_empty() {
4410        return (Array1::zeros(0), Array1::zeros(0));
4411    }
4412
4413    let optimizer = AutoOptimizer::new();
4414    if optimizer.should_use_simd(a.len()) {
4415        return F::simd_sincos(a);
4416    }
4417
4418    // Scalar fallback for small arrays
4419    F::simd_sincos(a)
4420}
4421
4422/// SIMD-accelerated numerically stable exp(x) - 1
4423///
4424/// Computes exp(x) - 1 accurately for small x values where the direct
4425/// calculation `exp(x) - 1` would suffer from catastrophic cancellation.
4426/// For |x| < 1e-10, the result is approximately x (Taylor expansion).
4427///
4428/// # Arguments
4429/// * `a` - Input values
4430///
4431/// # Returns
4432/// Array of exp(x) - 1 values
4433///
4434/// # Examples
4435/// ```
4436/// use scirs2_core::ndarray::{array, Array1};
4437/// use scirs2_core::ndarray_ext::elementwise::expm1_simd;
4438///
4439/// let x = array![0.0_f64, 1e-15, 1.0, -1.0];
4440///
4441/// let result = expm1_simd::<f64>(&x.view());
4442/// // exp(0) - 1 = 0
4443/// assert!((result[0] - 0.0).abs() < 1e-14);
4444/// // For small x: exp(x) - 1 ≈ x
4445/// assert!((result[1] - 1e-15).abs() < 1e-29);
4446/// // exp(1) - 1 ≈ 1.718
4447/// assert!((result[2] - (1.0_f64.exp() - 1.0)).abs() < 1e-14);
4448/// ```
4449///
4450/// # Use Cases
4451/// - Financial calculations (compound interest for small rates)
4452/// - Numerical integration (avoiding cancellation errors)
4453/// - Statistical distributions (Poisson, exponential)
4454/// - Machine learning (softplus: log(1 + exp(x)))
4455pub fn expm1_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4456where
4457    F: Float + SimdUnifiedOps,
4458{
4459    if a.is_empty() {
4460        return Array1::zeros(0);
4461    }
4462
4463    let optimizer = AutoOptimizer::new();
4464    if optimizer.should_use_simd(a.len()) {
4465        return F::simd_expm1(a);
4466    }
4467
4468    // Scalar fallback for small arrays
4469    F::simd_expm1(a)
4470}
4471
4472/// SIMD-accelerated numerically stable ln(1 + x)
4473///
4474/// Computes ln(1 + x) accurately for small x values where the direct
4475/// calculation `(1 + x).ln()` would suffer from catastrophic cancellation.
4476/// For |x| < 1e-10, the result is approximately x - x²/2 (Taylor expansion).
4477///
4478/// # Arguments
4479/// * `a` - Input values (should be > -1)
4480///
4481/// # Returns
4482/// Array of ln(1 + x) values
4483/// - Returns -∞ for x = -1
4484/// - Returns NaN for x < -1
4485///
4486/// # Examples
4487/// ```
4488/// use scirs2_core::ndarray::{array, Array1};
4489/// use scirs2_core::ndarray_ext::elementwise::log1p_simd;
4490///
4491/// let x = array![0.0_f64, 1e-15, 1.0, -0.5];
4492///
4493/// let result = log1p_simd::<f64>(&x.view());
4494/// // ln(1 + 0) = 0
4495/// assert!((result[0] - 0.0).abs() < 1e-14);
4496/// // For small x: ln(1 + x) ≈ x
4497/// assert!((result[1] - 1e-15).abs() < 1e-29);
4498/// // ln(2) ≈ 0.693
4499/// assert!((result[2] - 2.0_f64.ln()).abs() < 1e-14);
4500/// ```
4501///
4502/// # Use Cases
4503/// - Log-probability calculations (log(1 - p) for small p)
4504/// - Numerical integration
4505/// - Statistical distributions
4506/// - Machine learning (binary cross-entropy: -y·log(p) - (1-y)·log(1-p))
4507pub fn log1p_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4508where
4509    F: Float + SimdUnifiedOps,
4510{
4511    if a.is_empty() {
4512        return Array1::zeros(0);
4513    }
4514
4515    let optimizer = AutoOptimizer::new();
4516    if optimizer.should_use_simd(a.len()) {
4517        return F::simd_log1p(a);
4518    }
4519
4520    // Scalar fallback for small arrays
4521    F::simd_log1p(a)
4522}
4523
4524/// SIMD-accelerated element-wise clipping (clamping)
4525///
4526/// Clips (limits) each element to be within [min_val, max_val].
4527/// Values below min_val become min_val, values above max_val become max_val.
4528///
4529/// # Arguments
4530/// * `a` - Input array
4531/// * `min_val` - Minimum value (lower bound)
4532/// * `max_val` - Maximum value (upper bound)
4533///
4534/// # Returns
4535/// Array with values clipped to [min_val, max_val]
4536///
4537/// # Examples
4538/// ```
4539/// use scirs2_core::ndarray::{array, Array1};
4540/// use scirs2_core::ndarray_ext::elementwise::clip_simd;
4541///
4542/// let x = array![-2.0_f64, -1.0, 0.0, 1.0, 2.0, 3.0];
4543///
4544/// let result = clip_simd::<f64>(&x.view(), 0.0, 1.0);
4545/// assert!((result[0] - 0.0).abs() < 1e-14);  // -2 clipped to 0
4546/// assert!((result[2] - 0.0).abs() < 1e-14);  // 0 unchanged
4547/// assert!((result[3] - 1.0).abs() < 1e-14);  // 1 unchanged
4548/// assert!((result[5] - 1.0).abs() < 1e-14);  // 3 clipped to 1
4549/// ```
4550///
4551/// # Use Cases
4552/// - Gradient clipping in neural networks
4553/// - Image processing (pixel value clamping)
4554/// - Bounded optimization
4555/// - Data normalization preprocessing
4556pub fn clip_simd<F>(a: &ArrayView1<F>, min_val: F, max_val: F) -> Array1<F>
4557where
4558    F: Float + SimdUnifiedOps,
4559{
4560    if a.is_empty() {
4561        return Array1::zeros(0);
4562    }
4563
4564    let optimizer = AutoOptimizer::new();
4565    if optimizer.should_use_simd(a.len()) {
4566        return F::simd_clip(a, min_val, max_val);
4567    }
4568
4569    // Scalar fallback for small arrays
4570    F::simd_clip(a, min_val, max_val)
4571}
4572
4573/// SIMD-accelerated cumulative sum (prefix sum)
4574///
4575/// Computes the cumulative sum of elements. `result[i] = sum(a[0..=i])`
4576///
4577/// # Arguments
4578/// * `a` - Input array
4579///
4580/// # Returns
4581/// Array of cumulative sums
4582///
4583/// # Examples
4584/// ```
4585/// use scirs2_core::ndarray::{array, Array1};
4586/// use scirs2_core::ndarray_ext::elementwise::cumsum_simd;
4587///
4588/// let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
4589///
4590/// let result = cumsum_simd::<f64>(&x.view());
4591/// assert!((result[0] - 1.0).abs() < 1e-14);   // 1
4592/// assert!((result[1] - 3.0).abs() < 1e-14);   // 1+2
4593/// assert!((result[2] - 6.0).abs() < 1e-14);   // 1+2+3
4594/// assert!((result[3] - 10.0).abs() < 1e-14);  // 1+2+3+4
4595/// assert!((result[4] - 15.0).abs() < 1e-14);  // 1+2+3+4+5
4596/// ```
4597///
4598/// # Use Cases
4599/// - Computing CDF from PDF
4600/// - Running totals and statistics
4601/// - Parallel prefix algorithms
4602/// - Integral image computation
4603pub fn cumsum_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4604where
4605    F: Float + SimdUnifiedOps,
4606{
4607    if a.is_empty() {
4608        return Array1::zeros(0);
4609    }
4610
4611    let optimizer = AutoOptimizer::new();
4612    if optimizer.should_use_simd(a.len()) {
4613        return F::simd_cumsum(a);
4614    }
4615
4616    // Scalar fallback for small arrays
4617    F::simd_cumsum(a)
4618}
4619
4620/// SIMD-accelerated cumulative product
4621///
4622/// Computes the cumulative product of elements. `result[i] = product(a[0..=i])`
4623///
4624/// # Arguments
4625/// * `a` - Input array
4626///
4627/// # Returns
4628/// Array of cumulative products
4629///
4630/// # Examples
4631/// ```
4632/// use scirs2_core::ndarray::{array, Array1};
4633/// use scirs2_core::ndarray_ext::elementwise::cumprod_simd;
4634///
4635/// let x = array![1.0_f64, 2.0, 3.0, 4.0];
4636///
4637/// let result = cumprod_simd::<f64>(&x.view());
4638/// assert!((result[0] - 1.0).abs() < 1e-14);   // 1
4639/// assert!((result[1] - 2.0).abs() < 1e-14);   // 1*2
4640/// assert!((result[2] - 6.0).abs() < 1e-14);   // 1*2*3
4641/// assert!((result[3] - 24.0).abs() < 1e-14);  // 1*2*3*4 = 4!
4642/// ```
4643///
4644/// # Use Cases
4645/// - Computing factorials and permutations
4646/// - Product of survival probabilities
4647/// - Chain rule in automatic differentiation
4648/// - Compound interest calculations
4649pub fn cumprod_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4650where
4651    F: Float + SimdUnifiedOps,
4652{
4653    if a.is_empty() {
4654        return Array1::zeros(0);
4655    }
4656
4657    let optimizer = AutoOptimizer::new();
4658    if optimizer.should_use_simd(a.len()) {
4659        return F::simd_cumprod(a);
4660    }
4661
4662    // Scalar fallback for small arrays
4663    F::simd_cumprod(a)
4664}
4665
4666/// SIMD-accelerated first-order difference
4667///
4668/// Computes the first-order finite difference: `result[i] = a[i+1] - a[i]`
4669/// The output has length n-1 for input of length n.
4670///
4671/// # Arguments
4672/// * `a` - Input array (length >= 2)
4673///
4674/// # Returns
4675/// Array of differences (length n-1)
4676///
4677/// # Examples
4678/// ```
4679/// use scirs2_core::ndarray::{array, Array1};
4680/// use scirs2_core::ndarray_ext::elementwise::diff_simd;
4681///
4682/// let x = array![1.0_f64, 3.0, 6.0, 10.0, 15.0];
4683///
4684/// let result = diff_simd::<f64>(&x.view());
4685/// assert_eq!(result.len(), 4);  // n-1 elements
4686/// assert!((result[0] - 2.0).abs() < 1e-14);  // 3-1
4687/// assert!((result[1] - 3.0).abs() < 1e-14);  // 6-3
4688/// assert!((result[2] - 4.0).abs() < 1e-14);  // 10-6
4689/// assert!((result[3] - 5.0).abs() < 1e-14);  // 15-10
4690/// ```
4691///
4692/// # Use Cases
4693/// - Numerical differentiation
4694/// - Detecting changes in time series
4695/// - Computing gradients
4696/// - Edge detection in signal processing
4697pub fn diff_simd<F>(a: &ArrayView1<F>) -> Array1<F>
4698where
4699    F: Float + SimdUnifiedOps,
4700{
4701    if a.len() < 2 {
4702        return Array1::zeros(0);
4703    }
4704
4705    let optimizer = AutoOptimizer::new();
4706    if optimizer.should_use_simd(a.len()) {
4707        return F::simd_diff(a);
4708    }
4709
4710    // Scalar fallback for small arrays
4711    F::simd_diff(a)
4712}
4713
4714// =============================================================================
4715// Phase 70: Statistical Functions
4716// =============================================================================
4717
4718/// SIMD-accelerated variance computation
4719///
4720/// Computes the sample variance: Var(x) = sum((x - mean)²) / (n-1)
4721/// Uses Bessel's correction for unbiased estimation.
4722///
4723/// # Arguments
4724/// * `a` - Input array
4725///
4726/// # Returns
4727/// Sample variance of the array
4728///
4729/// # Examples
4730/// ```
4731/// use scirs2_core::ndarray::array;
4732/// use scirs2_core::ndarray_ext::elementwise::variance_simd;
4733///
4734/// let x = array![2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
4735/// let var = variance_simd::<f64>(&x.view());
4736/// // Sample variance: sum of squared deviations = 32, n = 8
4737/// // var = 32 / (8-1) = 32/7 ≈ 4.571
4738/// let expected = 32.0 / 7.0;
4739/// assert!((var - expected).abs() < 1e-10);
4740/// ```
4741///
4742/// # Use Cases
4743/// - Statistical analysis
4744/// - Quality control (process capability)
4745/// - Risk assessment (financial variance)
4746/// - Feature scaling (standardization)
4747pub fn variance_simd<F>(a: &ArrayView1<F>) -> F
4748where
4749    F: Float + SimdUnifiedOps,
4750{
4751    if a.is_empty() {
4752        return F::zero();
4753    }
4754
4755    F::simd_variance(a)
4756}
4757
4758/// SIMD-accelerated standard deviation computation
4759///
4760/// Computes the sample standard deviation: std = sqrt(sample_variance)
4761///
4762/// # Arguments
4763/// * `a` - Input array
4764///
4765/// # Returns
4766/// Sample standard deviation
4767///
4768/// # Examples
4769/// ```
4770/// use scirs2_core::ndarray::array;
4771/// use scirs2_core::ndarray_ext::elementwise::std_simd;
4772///
4773/// let x = array![2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
4774/// let s = std_simd::<f64>(&x.view());
4775/// // Sample std = sqrt(sample variance) = sqrt(32/7) ≈ 2.138
4776/// let expected = (32.0_f64 / 7.0).sqrt();
4777/// assert!((s - expected).abs() < 1e-10);
4778/// ```
4779///
4780/// # Use Cases
4781/// - Volatility measurement in finance
4782/// - Error analysis in experiments
4783/// - Gaussian distribution parameters
4784/// - Confidence interval calculations
4785pub fn std_simd<F>(a: &ArrayView1<F>) -> F
4786where
4787    F: Float + SimdUnifiedOps,
4788{
4789    if a.is_empty() {
4790        return F::zero();
4791    }
4792
4793    F::simd_std(a)
4794}
4795
4796/// SIMD-accelerated array sum
4797///
4798/// Computes the sum of all elements in the array.
4799/// Uses Kahan summation for improved numerical accuracy.
4800///
4801/// # Arguments
4802/// * `a` - Input array
4803///
4804/// # Returns
4805/// Sum of all elements
4806///
4807/// # Examples
4808/// ```
4809/// use scirs2_core::ndarray::array;
4810/// use scirs2_core::ndarray_ext::elementwise::sum_simd;
4811///
4812/// let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
4813/// let s = sum_simd::<f64>(&x.view());
4814/// assert!((s - 15.0).abs() < 1e-14);
4815/// ```
4816///
4817/// # Use Cases
4818/// - Computing totals
4819/// - Mean calculation component
4820/// - Probability mass functions
4821/// - Integration approximation
4822pub fn sum_simd<F>(a: &ArrayView1<F>) -> F
4823where
4824    F: Float + SimdUnifiedOps,
4825{
4826    if a.is_empty() {
4827        return F::zero();
4828    }
4829
4830    F::simd_sum(a)
4831}
4832
4833/// SIMD-accelerated array mean
4834///
4835/// Computes the arithmetic mean: mean = sum(x) / n
4836///
4837/// # Arguments
4838/// * `a` - Input array
4839///
4840/// # Returns
4841/// Arithmetic mean of elements
4842///
4843/// # Examples
4844/// ```
4845/// use scirs2_core::ndarray::array;
4846/// use scirs2_core::ndarray_ext::elementwise::mean_simd;
4847///
4848/// let x = array![1.0_f64, 2.0, 3.0, 4.0, 5.0];
4849/// let m = mean_simd::<f64>(&x.view());
4850/// assert!((m - 3.0).abs() < 1e-14);
4851/// ```
4852///
4853/// # Use Cases
4854/// - Central tendency measurement
4855/// - Batch normalization (compute running mean)
4856/// - Signal averaging
4857/// - Expected value estimation
4858pub fn mean_simd<F>(a: &ArrayView1<F>) -> F
4859where
4860    F: Float + SimdUnifiedOps,
4861{
4862    if a.is_empty() {
4863        return F::zero();
4864    }
4865
4866    F::simd_mean(a)
4867}
4868
4869/// SIMD-accelerated weighted sum
4870///
4871/// Computes sum(values * weights).
4872///
4873/// # Arguments
4874/// * `values` - Values array
4875/// * `weights` - Weights array (same length as values)
4876///
4877/// # Returns
4878/// Weighted sum
4879///
4880/// # Examples
4881/// ```
4882/// use scirs2_core::ndarray::array;
4883/// use scirs2_core::ndarray_ext::elementwise::weighted_sum_simd;
4884///
4885/// let values = array![10.0_f64, 20.0, 30.0];
4886/// let weights = array![0.2_f64, 0.3, 0.5];
4887/// let ws = weighted_sum_simd::<f64>(&values.view(), &weights.view());
4888/// // 10*0.2 + 20*0.3 + 30*0.5 = 2 + 6 + 15 = 23
4889/// assert!((ws - 23.0).abs() < 1e-10);
4890/// ```
4891///
4892/// # Use Cases
4893/// - Portfolio valuation
4894/// - Weighted regression
4895/// - Attention mechanism scores
4896/// - Signal filtering
4897pub fn weighted_sum_simd<F>(values: &ArrayView1<F>, weights: &ArrayView1<F>) -> F
4898where
4899    F: Float + SimdUnifiedOps,
4900{
4901    if values.is_empty() || weights.is_empty() {
4902        return F::zero();
4903    }
4904
4905    F::simd_weighted_sum(values, weights)
4906}
4907
4908/// SIMD-accelerated weighted mean
4909///
4910/// Computes sum(values * weights) / sum(weights).
4911///
4912/// # Arguments
4913/// * `values` - Values array
4914/// * `weights` - Weights array (same length as values)
4915///
4916/// # Returns
4917/// Weighted mean
4918///
4919/// # Examples
4920/// ```
4921/// use scirs2_core::ndarray::array;
4922/// use scirs2_core::ndarray_ext::elementwise::weighted_mean_simd;
4923///
4924/// let values = array![10.0_f64, 20.0, 30.0];
4925/// let weights = array![1.0_f64, 2.0, 2.0];
4926/// let wm = weighted_mean_simd::<f64>(&values.view(), &weights.view());
4927/// // (10*1 + 20*2 + 30*2) / (1+2+2) = (10+40+60)/5 = 22
4928/// assert!((wm - 22.0).abs() < 1e-10);
4929/// ```
4930///
4931/// # Use Cases
4932/// - Grade point average calculation
4933/// - Portfolio weighted return
4934/// - Weighted sentiment analysis
4935/// - Attention-weighted representations
4936pub fn weighted_mean_simd<F>(values: &ArrayView1<F>, weights: &ArrayView1<F>) -> F
4937where
4938    F: Float + SimdUnifiedOps,
4939{
4940    if values.is_empty() || weights.is_empty() {
4941        return F::zero();
4942    }
4943
4944    F::simd_weighted_mean(values, weights)
4945}
4946
4947// =============================================================================
4948// Phase 71: Reduction Functions
4949// =============================================================================
4950
4951/// SIMD-accelerated maximum element
4952///
4953/// Finds the maximum value in the array.
4954///
4955/// # Arguments
4956/// * `a` - Input array
4957///
4958/// # Returns
4959/// Maximum element value
4960///
4961/// # Examples
4962/// ```
4963/// use scirs2_core::ndarray::array;
4964/// use scirs2_core::ndarray_ext::elementwise::max_element_simd;
4965///
4966/// let x = array![3.0_f64, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
4967/// let max = max_element_simd::<f64>(&x.view());
4968/// assert!((max - 9.0).abs() < 1e-14);
4969/// ```
4970///
4971/// # Use Cases
4972/// - Finding peak values
4973/// - Range calculation
4974/// - Normalization (min-max scaling)
4975/// - Softmax numerator stability
4976pub fn max_element_simd<F>(a: &ArrayView1<F>) -> F
4977where
4978    F: Float + SimdUnifiedOps,
4979{
4980    if a.is_empty() {
4981        return F::neg_infinity();
4982    }
4983
4984    F::simd_max_element(a)
4985}
4986
4987/// SIMD-accelerated minimum element
4988///
4989/// Finds the minimum value in the array.
4990///
4991/// # Arguments
4992/// * `a` - Input array
4993///
4994/// # Returns
4995/// Minimum element value
4996///
4997/// # Examples
4998/// ```
4999/// use scirs2_core::ndarray::array;
5000/// use scirs2_core::ndarray_ext::elementwise::min_element_simd;
5001///
5002/// let x = array![3.0_f64, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
5003/// let min = min_element_simd::<f64>(&x.view());
5004/// assert!((min - 1.0).abs() < 1e-14);
5005/// ```
5006///
5007/// # Use Cases
5008/// - Finding minimum values
5009/// - Range calculation
5010/// - Clipping lower bounds
5011/// - Outlier detection
5012pub fn min_element_simd<F>(a: &ArrayView1<F>) -> F
5013where
5014    F: Float + SimdUnifiedOps,
5015{
5016    if a.is_empty() {
5017        return F::infinity();
5018    }
5019
5020    F::simd_min_element(a)
5021}
5022
5023/// SIMD-accelerated argmax (index of maximum)
5024///
5025/// Returns the index of the maximum element.
5026///
5027/// # Arguments
5028/// * `a` - Input array
5029///
5030/// # Returns
5031/// Some(index) of maximum element, or None if array is empty
5032///
5033/// # Examples
5034/// ```
5035/// use scirs2_core::ndarray::array;
5036/// use scirs2_core::ndarray_ext::elementwise::argmax_simd;
5037///
5038/// let x = array![3.0_f64, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
5039/// let idx = argmax_simd::<f64>(&x.view());
5040/// assert_eq!(idx, Some(5));  // x[5] = 9.0 is the maximum
5041/// ```
5042///
5043/// # Use Cases
5044/// - Classification prediction (class with highest probability)
5045/// - Finding optimal parameters
5046/// - Greedy selection algorithms
5047/// - Winner-take-all networks
5048pub fn argmax_simd<F>(a: &ArrayView1<F>) -> Option<usize>
5049where
5050    F: Float + SimdUnifiedOps,
5051{
5052    if a.is_empty() {
5053        return None;
5054    }
5055
5056    F::simd_argmax(a)
5057}
5058
5059/// SIMD-accelerated argmin (index of minimum)
5060///
5061/// Returns the index of the minimum element.
5062///
5063/// # Arguments
5064/// * `a` - Input array
5065///
5066/// # Returns
5067/// Some(index) of minimum element, or None if array is empty
5068///
5069/// # Examples
5070/// ```
5071/// use scirs2_core::ndarray::array;
5072/// use scirs2_core::ndarray_ext::elementwise::argmin_simd;
5073///
5074/// let x = array![3.0_f64, 1.0, 4.0, 1.0, 5.0, 9.0, 2.0, 6.0];
5075/// let idx = argmin_simd::<f64>(&x.view());
5076/// assert_eq!(idx, Some(1));  // x[1] = 1.0 is the minimum (first occurrence)
5077/// ```
5078///
5079/// # Use Cases
5080/// - Finding nearest neighbors
5081/// - Minimum loss selection
5082/// - Optimal path finding
5083/// - Resource allocation
5084pub fn argmin_simd<F>(a: &ArrayView1<F>) -> Option<usize>
5085where
5086    F: Float + SimdUnifiedOps,
5087{
5088    if a.is_empty() {
5089        return None;
5090    }
5091
5092    F::simd_argmin(a)
5093}
5094
5095/// SIMD-accelerated sum of squares
5096///
5097/// Computes sum(x²) efficiently.
5098///
5099/// # Arguments
5100/// * `a` - Input array
5101///
5102/// # Returns
5103/// Sum of squared elements
5104///
5105/// # Examples
5106/// ```
5107/// use scirs2_core::ndarray::array;
5108/// use scirs2_core::ndarray_ext::elementwise::sum_squares_simd;
5109///
5110/// let x = array![1.0_f64, 2.0, 3.0, 4.0];
5111/// let ss = sum_squares_simd::<f64>(&x.view());
5112/// // 1 + 4 + 9 + 16 = 30
5113/// assert!((ss - 30.0).abs() < 1e-14);
5114/// ```
5115///
5116/// # Use Cases
5117/// - L2 norm calculation (sqrt of sum of squares)
5118/// - Mean squared error
5119/// - Variance computation
5120/// - Energy of signal
5121pub fn sum_squares_simd<F>(a: &ArrayView1<F>) -> F
5122where
5123    F: Float + SimdUnifiedOps,
5124{
5125    if a.is_empty() {
5126        return F::zero();
5127    }
5128
5129    F::simd_sum_squares(a)
5130}
5131
5132/// SIMD-accelerated sum of cubes
5133///
5134/// Computes sum(x³) efficiently.
5135///
5136/// # Arguments
5137/// * `a` - Input array
5138///
5139/// # Returns
5140/// Sum of cubed elements
5141///
5142/// # Examples
5143/// ```
5144/// use scirs2_core::ndarray::array;
5145/// use scirs2_core::ndarray_ext::elementwise::sum_cubes_simd;
5146///
5147/// let x = array![1.0_f64, 2.0, 3.0];
5148/// let sc = sum_cubes_simd::<f64>(&x.view());
5149/// // 1 + 8 + 27 = 36
5150/// assert!((sc - 36.0).abs() < 1e-14);
5151/// ```
5152///
5153/// # Use Cases
5154/// - Higher-order moment calculation
5155/// - Skewness estimation
5156/// - Cubic interpolation
5157/// - Power sum computation
5158pub fn sum_cubes_simd<F>(a: &ArrayView1<F>) -> F
5159where
5160    F: Float + SimdUnifiedOps,
5161{
5162    if a.is_empty() {
5163        return F::zero();
5164    }
5165
5166    F::simd_sum_cubes(a)
5167}
5168
5169/// SIMD-accelerated log-sum-exp
5170///
5171/// Computes log(sum(exp(x))) in a numerically stable way.
5172/// Uses the identity: log(sum(exp(x))) = max(x) + log(sum(exp(x - max(x))))
5173///
5174/// # Arguments
5175/// * `a` - Input array
5176///
5177/// # Returns
5178/// Log of sum of exponentials
5179///
5180/// # Examples
5181/// ```
5182/// use scirs2_core::ndarray::array;
5183/// use scirs2_core::ndarray_ext::elementwise::log_sum_exp_simd;
5184///
5185/// let x = array![1.0_f64, 2.0, 3.0];
5186/// let lse = log_sum_exp_simd::<f64>(&x.view());
5187/// // log(e^1 + e^2 + e^3) ≈ 3.407
5188/// let expected = (1.0_f64.exp() + 2.0_f64.exp() + 3.0_f64.exp()).ln();
5189/// assert!((lse - expected).abs() < 1e-10);
5190/// ```
5191///
5192/// # Use Cases
5193/// - Softmax denominator
5194/// - Log-probability computations
5195/// - Partition function in statistical mechanics
5196/// - Evidence lower bound (ELBO) in VAEs
5197pub fn log_sum_exp_simd<F>(a: &ArrayView1<F>) -> F
5198where
5199    F: Float + SimdUnifiedOps,
5200{
5201    if a.is_empty() {
5202        return F::neg_infinity();
5203    }
5204
5205    F::simd_log_sum_exp(a)
5206}
5207
5208// =============================================================================
5209// Phase 72: Norm and Distance Functions
5210// =============================================================================
5211
5212/// SIMD-accelerated L2 norm (Euclidean norm)
5213///
5214/// Computes ||x||₂ = sqrt(sum(x²))
5215///
5216/// # Arguments
5217/// * `a` - Input array
5218///
5219/// # Returns
5220/// L2 (Euclidean) norm
5221///
5222/// # Examples
5223/// ```
5224/// use scirs2_core::ndarray::array;
5225/// use scirs2_core::ndarray_ext::elementwise::norm_simd;
5226///
5227/// let x = array![3.0_f64, 4.0];
5228/// let n = norm_simd::<f64>(&x.view());
5229/// // sqrt(9 + 16) = 5
5230/// assert!((n - 5.0).abs() < 1e-14);
5231/// ```
5232///
5233/// # Use Cases
5234/// - Vector magnitude
5235/// - Normalization preprocessing
5236/// - Regularization penalty
5237/// - Gradient clipping
5238pub fn norm_simd<F>(a: &ArrayView1<F>) -> F
5239where
5240    F: Float + SimdUnifiedOps,
5241{
5242    if a.is_empty() {
5243        return F::zero();
5244    }
5245
5246    F::simd_norm(a)
5247}
5248
5249/// SIMD-accelerated L1 norm (Manhattan norm)
5250///
5251/// Computes ||x||₁ = sum(|x|)
5252///
5253/// # Arguments
5254/// * `a` - Input array
5255///
5256/// # Returns
5257/// L1 (Manhattan) norm
5258///
5259/// # Examples
5260/// ```
5261/// use scirs2_core::ndarray::array;
5262/// use scirs2_core::ndarray_ext::elementwise::norm_l1_simd;
5263///
5264/// let x = array![1.0_f64, -2.0, 3.0, -4.0];
5265/// let n = norm_l1_simd::<f64>(&x.view());
5266/// // |1| + |-2| + |3| + |-4| = 10
5267/// assert!((n - 10.0).abs() < 1e-14);
5268/// ```
5269///
5270/// # Use Cases
5271/// - Sparse regularization (LASSO)
5272/// - Robust statistics
5273/// - Compressed sensing
5274/// - Feature selection
5275pub fn norm_l1_simd<F>(a: &ArrayView1<F>) -> F
5276where
5277    F: Float + SimdUnifiedOps,
5278{
5279    if a.is_empty() {
5280        return F::zero();
5281    }
5282
5283    F::simd_norm_l1(a)
5284}
5285
5286/// SIMD-accelerated L-infinity norm (Chebyshev/max norm)
5287///
5288/// Computes ||x||∞ = max(|x|)
5289///
5290/// # Arguments
5291/// * `a` - Input array
5292///
5293/// # Returns
5294/// L-infinity (max) norm
5295///
5296/// # Examples
5297/// ```
5298/// use scirs2_core::ndarray::array;
5299/// use scirs2_core::ndarray_ext::elementwise::norm_linf_simd;
5300///
5301/// let x = array![1.0_f64, -5.0, 3.0, -2.0];
5302/// let n = norm_linf_simd::<f64>(&x.view());
5303/// // max(|1|, |-5|, |3|, |-2|) = 5
5304/// assert!((n - 5.0).abs() < 1e-14);
5305/// ```
5306///
5307/// # Use Cases
5308/// - Gradient clipping by max norm
5309/// - Adversarial robustness (L∞ perturbations)
5310/// - Convergence criteria
5311/// - Worst-case analysis
5312pub fn norm_linf_simd<F>(a: &ArrayView1<F>) -> F
5313where
5314    F: Float + SimdUnifiedOps,
5315{
5316    if a.is_empty() {
5317        return F::zero();
5318    }
5319
5320    F::simd_norm_linf(a)
5321}
5322
5323/// SIMD-accelerated Euclidean distance
5324///
5325/// Computes ||a - b||₂ = sqrt(sum((a - b)²))
5326///
5327/// # Arguments
5328/// * `a` - First vector
5329/// * `b` - Second vector
5330///
5331/// # Returns
5332/// Euclidean distance between vectors
5333///
5334/// # Examples
5335/// ```
5336/// use scirs2_core::ndarray::array;
5337/// use scirs2_core::ndarray_ext::elementwise::distance_euclidean_simd;
5338///
5339/// let a = array![0.0_f64, 0.0];
5340/// let b = array![3.0_f64, 4.0];
5341/// let d = distance_euclidean_simd::<f64>(&a.view(), &b.view());
5342/// assert!((d - 5.0).abs() < 1e-14);
5343/// ```
5344///
5345/// # Use Cases
5346/// - K-means clustering
5347/// - Nearest neighbor search
5348/// - Physical distance measurement
5349/// - Embedding similarity
5350pub fn distance_euclidean_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5351where
5352    F: Float + SimdUnifiedOps,
5353{
5354    if a.is_empty() || b.is_empty() {
5355        return F::zero();
5356    }
5357
5358    F::simd_distance_euclidean(a, b)
5359}
5360
5361/// SIMD-accelerated Manhattan distance
5362///
5363/// Computes sum(|a - b|)
5364///
5365/// # Arguments
5366/// * `a` - First vector
5367/// * `b` - Second vector
5368///
5369/// # Returns
5370/// Manhattan (L1) distance between vectors
5371///
5372/// # Examples
5373/// ```
5374/// use scirs2_core::ndarray::array;
5375/// use scirs2_core::ndarray_ext::elementwise::distance_manhattan_simd;
5376///
5377/// let a = array![1.0_f64, 2.0, 3.0];
5378/// let b = array![4.0_f64, 0.0, 3.0];
5379/// let d = distance_manhattan_simd::<f64>(&a.view(), &b.view());
5380/// // |1-4| + |2-0| + |3-3| = 3 + 2 + 0 = 5
5381/// assert!((d - 5.0).abs() < 1e-14);
5382/// ```
5383///
5384/// # Use Cases
5385/// - Grid-based path finding
5386/// - Robust distance metric
5387/// - Taxicab geometry
5388/// - Feature comparison
5389pub fn distance_manhattan_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5390where
5391    F: Float + SimdUnifiedOps,
5392{
5393    if a.is_empty() || b.is_empty() {
5394        return F::zero();
5395    }
5396
5397    F::simd_distance_manhattan(a, b)
5398}
5399
5400/// SIMD-accelerated Chebyshev distance
5401///
5402/// Computes max(|a - b|)
5403///
5404/// # Arguments
5405/// * `a` - First vector
5406/// * `b` - Second vector
5407///
5408/// # Returns
5409/// Chebyshev (L∞) distance between vectors
5410///
5411/// # Examples
5412/// ```
5413/// use scirs2_core::ndarray::array;
5414/// use scirs2_core::ndarray_ext::elementwise::distance_chebyshev_simd;
5415///
5416/// let a = array![1.0_f64, 2.0, 3.0];
5417/// let b = array![4.0_f64, 0.0, 3.0];
5418/// let d = distance_chebyshev_simd::<f64>(&a.view(), &b.view());
5419/// // max(|1-4|, |2-0|, |3-3|) = max(3, 2, 0) = 3
5420/// assert!((d - 3.0).abs() < 1e-14);
5421/// ```
5422///
5423/// # Use Cases
5424/// - Chess king distance
5425/// - Maximum deviation
5426/// - Robust outlier detection
5427/// - Image processing
5428pub fn distance_chebyshev_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5429where
5430    F: Float + SimdUnifiedOps,
5431{
5432    if a.is_empty() || b.is_empty() {
5433        return F::zero();
5434    }
5435
5436    F::simd_distance_chebyshev(a, b)
5437}
5438
5439/// SIMD-accelerated cosine distance
5440///
5441/// Computes 1 - cosine_similarity(a, b)
5442///
5443/// # Arguments
5444/// * `a` - First vector
5445/// * `b` - Second vector
5446///
5447/// # Returns
5448/// Cosine distance (0 = identical direction, 2 = opposite direction)
5449///
5450/// # Examples
5451/// ```
5452/// use scirs2_core::ndarray::array;
5453/// use scirs2_core::ndarray_ext::elementwise::distance_cosine_simd;
5454///
5455/// let a = array![1.0_f64, 0.0, 0.0];
5456/// let b = array![0.0_f64, 1.0, 0.0];
5457/// let d = distance_cosine_simd::<f64>(&a.view(), &b.view());
5458/// // Orthogonal vectors have cosine similarity 0, distance 1
5459/// assert!((d - 1.0).abs() < 1e-10);
5460/// ```
5461///
5462/// # Use Cases
5463/// - Text similarity (TF-IDF vectors)
5464/// - Recommendation systems
5465/// - Document clustering
5466/// - Image retrieval
5467pub fn distance_cosine_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5468where
5469    F: Float + SimdUnifiedOps,
5470{
5471    if a.is_empty() || b.is_empty() {
5472        return F::one();
5473    }
5474
5475    F::simd_distance_cosine(a, b)
5476}
5477
5478/// SIMD-accelerated cosine similarity
5479///
5480/// Computes (a · b) / (||a|| * ||b||)
5481///
5482/// # Arguments
5483/// * `a` - First vector
5484/// * `b` - Second vector
5485///
5486/// # Returns
5487/// Cosine similarity (-1 to 1, where 1 = identical direction)
5488///
5489/// # Examples
5490/// ```
5491/// use scirs2_core::ndarray::array;
5492/// use scirs2_core::ndarray_ext::elementwise::cosine_similarity_simd;
5493///
5494/// let a = array![1.0_f64, 2.0, 3.0];
5495/// let b = array![2.0_f64, 4.0, 6.0];
5496/// let sim = cosine_similarity_simd::<f64>(&a.view(), &b.view());
5497/// // Parallel vectors have cosine similarity 1
5498/// assert!((sim - 1.0).abs() < 1e-10);
5499/// ```
5500///
5501/// # Use Cases
5502/// - Word embedding similarity
5503/// - Document comparison
5504/// - Recommendation scoring
5505/// - Semantic search
5506pub fn cosine_similarity_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5507where
5508    F: Float + SimdUnifiedOps,
5509{
5510    if a.is_empty() || b.is_empty() {
5511        return F::zero();
5512    }
5513
5514    F::simd_cosine_similarity(a, b)
5515}
5516
5517// =============================================================================
5518// Phase 73: Binary Operations
5519// =============================================================================
5520
5521/// SIMD-accelerated element-wise addition
5522///
5523/// Computes a + b element-wise.
5524///
5525/// # Arguments
5526/// * `a` - First array
5527/// * `b` - Second array
5528///
5529/// # Returns
5530/// Element-wise sum
5531///
5532/// # Examples
5533/// ```
5534/// use scirs2_core::ndarray::array;
5535/// use scirs2_core::ndarray_ext::elementwise::add_simd;
5536///
5537/// let a = array![1.0_f64, 2.0, 3.0];
5538/// let b = array![4.0_f64, 5.0, 6.0];
5539/// let c = add_simd::<f64>(&a.view(), &b.view());
5540/// assert!((c[0] - 5.0).abs() < 1e-14);
5541/// assert!((c[1] - 7.0).abs() < 1e-14);
5542/// assert!((c[2] - 9.0).abs() < 1e-14);
5543/// ```
5544///
5545/// # Use Cases
5546/// - Vector arithmetic
5547/// - Residual connections
5548/// - Bias addition
5549/// - Signal combination
5550pub fn add_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5551where
5552    F: Float + SimdUnifiedOps,
5553{
5554    if a.is_empty() || b.is_empty() {
5555        return Array1::zeros(0);
5556    }
5557
5558    F::simd_add(a, b)
5559}
5560
5561/// SIMD-accelerated element-wise subtraction
5562///
5563/// Computes a - b element-wise.
5564///
5565/// # Arguments
5566/// * `a` - First array
5567/// * `b` - Second array
5568///
5569/// # Returns
5570/// Element-wise difference
5571///
5572/// # Examples
5573/// ```
5574/// use scirs2_core::ndarray::array;
5575/// use scirs2_core::ndarray_ext::elementwise::sub_simd;
5576///
5577/// let a = array![5.0_f64, 7.0, 9.0];
5578/// let b = array![1.0_f64, 2.0, 3.0];
5579/// let c = sub_simd::<f64>(&a.view(), &b.view());
5580/// assert!((c[0] - 4.0).abs() < 1e-14);
5581/// assert!((c[1] - 5.0).abs() < 1e-14);
5582/// assert!((c[2] - 6.0).abs() < 1e-14);
5583/// ```
5584///
5585/// # Use Cases
5586/// - Gradient computation
5587/// - Error calculation
5588/// - Differencing signals
5589/// - Relative positioning
5590pub fn sub_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5591where
5592    F: Float + SimdUnifiedOps,
5593{
5594    if a.is_empty() || b.is_empty() {
5595        return Array1::zeros(0);
5596    }
5597
5598    F::simd_sub(a, b)
5599}
5600
5601/// SIMD-accelerated element-wise multiplication
5602///
5603/// Computes a * b element-wise (Hadamard product).
5604///
5605/// # Arguments
5606/// * `a` - First array
5607/// * `b` - Second array
5608///
5609/// # Returns
5610/// Element-wise product
5611///
5612/// # Examples
5613/// ```
5614/// use scirs2_core::ndarray::array;
5615/// use scirs2_core::ndarray_ext::elementwise::mul_simd;
5616///
5617/// let a = array![2.0_f64, 3.0, 4.0];
5618/// let b = array![5.0_f64, 6.0, 7.0];
5619/// let c = mul_simd::<f64>(&a.view(), &b.view());
5620/// assert!((c[0] - 10.0).abs() < 1e-14);
5621/// assert!((c[1] - 18.0).abs() < 1e-14);
5622/// assert!((c[2] - 28.0).abs() < 1e-14);
5623/// ```
5624///
5625/// # Use Cases
5626/// - Attention weights application
5627/// - Feature gating
5628/// - Gradient scaling
5629/// - Signal modulation
5630pub fn mul_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5631where
5632    F: Float + SimdUnifiedOps,
5633{
5634    if a.is_empty() || b.is_empty() {
5635        return Array1::zeros(0);
5636    }
5637
5638    F::simd_mul(a, b)
5639}
5640
5641/// SIMD-accelerated element-wise division
5642///
5643/// Computes a / b element-wise.
5644///
5645/// # Arguments
5646/// * `a` - Numerator array
5647/// * `b` - Denominator array
5648///
5649/// # Returns
5650/// Element-wise quotient
5651///
5652/// # Examples
5653/// ```
5654/// use scirs2_core::ndarray::array;
5655/// use scirs2_core::ndarray_ext::elementwise::div_simd;
5656///
5657/// let a = array![10.0_f64, 20.0, 30.0];
5658/// let b = array![2.0_f64, 4.0, 5.0];
5659/// let c = div_simd::<f64>(&a.view(), &b.view());
5660/// assert!((c[0] - 5.0).abs() < 1e-14);
5661/// assert!((c[1] - 5.0).abs() < 1e-14);
5662/// assert!((c[2] - 6.0).abs() < 1e-14);
5663/// ```
5664///
5665/// # Use Cases
5666/// - Normalization
5667/// - Ratio computation
5668/// - Scaling by variable factors
5669/// - Probability normalization
5670pub fn div_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5671where
5672    F: Float + SimdUnifiedOps,
5673{
5674    if a.is_empty() || b.is_empty() {
5675        return Array1::zeros(0);
5676    }
5677
5678    F::simd_div(a, b)
5679}
5680
5681/// SIMD-accelerated element-wise maximum
5682///
5683/// Computes max(a, b) element-wise.
5684///
5685/// # Arguments
5686/// * `a` - First array
5687/// * `b` - Second array
5688///
5689/// # Returns
5690/// Element-wise maximum
5691///
5692/// # Examples
5693/// ```ignore
5694/// use scirs2_core::ndarray::array;
5695/// use scirs2_core::ndarray_ext::elementwise::max_simd;
5696///
5697/// let a = array![1.0_f64, 5.0, 3.0];
5698/// let b = array![4.0_f64, 2.0, 6.0];
5699/// let c = max_simd::<f64>(&a.view(), &b.view());
5700/// assert!((c[0] - 4.0_f64).abs() < 1e-14);
5701/// assert!((c[1] - 5.0_f64).abs() < 1e-14);
5702/// assert!((c[2] - 6.0_f64).abs() < 1e-14);
5703/// ```
5704///
5705/// # Use Cases
5706/// - ReLU activation: max(0, x)
5707/// - Soft clipping
5708/// - Envelope detection
5709/// - Upper bound enforcement
5710pub fn max_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5711where
5712    F: Float + SimdUnifiedOps,
5713{
5714    if a.is_empty() || b.is_empty() {
5715        return Array1::zeros(0);
5716    }
5717
5718    F::simd_max(a, b)
5719}
5720
5721/// SIMD-accelerated element-wise minimum
5722///
5723/// Computes min(a, b) element-wise.
5724///
5725/// # Arguments
5726/// * `a` - First array
5727/// * `b` - Second array
5728///
5729/// # Returns
5730/// Element-wise minimum
5731///
5732/// # Examples
5733/// ```ignore
5734/// use scirs2_core::ndarray::array;
5735/// use scirs2_core::ndarray_ext::elementwise::min_simd;
5736///
5737/// let a = array![1.0_f64, 5.0, 3.0];
5738/// let b = array![4.0_f64, 2.0, 6.0];
5739/// let c = min_simd::<f64>(&a.view(), &b.view());
5740/// assert!((c[0] - 1.0_f64).abs() < 1e-14);
5741/// assert!((c[1] - 2.0_f64).abs() < 1e-14);
5742/// assert!((c[2] - 3.0_f64).abs() < 1e-14);
5743/// ```
5744///
5745/// # Use Cases
5746/// - Gradient clipping
5747/// - Soft clipping
5748/// - Lower bound enforcement
5749/// - Minimum filter in signal processing
5750pub fn min_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> Array1<F>
5751where
5752    F: Float + SimdUnifiedOps,
5753{
5754    if a.is_empty() || b.is_empty() {
5755        return Array1::zeros(0);
5756    }
5757
5758    F::simd_min(a, b)
5759}
5760
5761/// SIMD-accelerated scalar multiplication
5762///
5763/// Computes a * scalar for all elements.
5764///
5765/// # Arguments
5766/// * `a` - Input array
5767/// * `scalar` - Scalar multiplier
5768///
5769/// # Returns
5770/// Scaled array
5771///
5772/// # Examples
5773/// ```
5774/// use scirs2_core::ndarray::array;
5775/// use scirs2_core::ndarray_ext::elementwise::scalar_mul_simd;
5776///
5777/// let a = array![1.0_f64, 2.0, 3.0];
5778/// let c = scalar_mul_simd::<f64>(&a.view(), 2.5);
5779/// assert!((c[0] - 2.5).abs() < 1e-14);
5780/// assert!((c[1] - 5.0).abs() < 1e-14);
5781/// assert!((c[2] - 7.5).abs() < 1e-14);
5782/// ```
5783///
5784/// # Use Cases
5785/// - Learning rate scaling
5786/// - Normalization
5787/// - Unit conversion
5788/// - Signal amplification
5789pub fn scalar_mul_simd<F>(a: &ArrayView1<F>, scalar: F) -> Array1<F>
5790where
5791    F: Float + SimdUnifiedOps,
5792{
5793    if a.is_empty() {
5794        return Array1::zeros(0);
5795    }
5796
5797    F::simd_scalar_mul(a, scalar)
5798}
5799
5800/// SIMD-accelerated fused multiply-add
5801///
5802/// Computes a * b + c element-wise in a single operation.
5803/// More accurate and efficient than separate multiply and add.
5804///
5805/// # Arguments
5806/// * `a` - First multiplicand
5807/// * `b` - Second multiplicand
5808/// * `c` - Addend
5809///
5810/// # Returns
5811/// Element-wise fused multiply-add result
5812///
5813/// # Examples
5814/// ```ignore
5815/// use scirs2_core::ndarray::array;
5816/// use scirs2_core::ndarray_ext::elementwise::fma_simd;
5817///
5818/// let a = array![1.0_f64, 2.0, 3.0];
5819/// let b = array![4.0_f64, 5.0, 6.0];
5820/// let c = array![7.0_f64, 8.0, 9.0];
5821/// let result = fma_simd::<f64>(&a.view(), &b.view(), &c.view());
5822/// // 1*4+7=11, 2*5+8=18, 3*6+9=27
5823/// assert!((result[0] - 11.0_f64).abs() < 1e-14);
5824/// assert!((result[1] - 18.0_f64).abs() < 1e-14);
5825/// assert!((result[2] - 27.0_f64).abs() < 1e-14);
5826/// ```
5827///
5828/// # Use Cases
5829/// - Matrix multiplication inner loop
5830/// - Polynomial evaluation (Horner's method)
5831/// - Linear combinations
5832/// - Neural network forward pass
5833pub fn fma_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>, c: &ArrayView1<F>) -> Array1<F>
5834where
5835    F: Float + SimdUnifiedOps,
5836{
5837    if a.is_empty() || b.is_empty() || c.is_empty() {
5838        return Array1::zeros(0);
5839    }
5840
5841    F::simd_fma(a, b, c)
5842}
5843
5844/// SIMD-accelerated dot product
5845///
5846/// Computes sum(a * b).
5847///
5848/// # Arguments
5849/// * `a` - First vector
5850/// * `b` - Second vector
5851///
5852/// # Returns
5853/// Dot product (scalar)
5854///
5855/// # Examples
5856/// ```
5857/// use scirs2_core::ndarray::array;
5858/// use scirs2_core::ndarray_ext::elementwise::dot_simd;
5859///
5860/// let a = array![1.0_f64, 2.0, 3.0];
5861/// let b = array![4.0_f64, 5.0, 6.0];
5862/// let d = dot_simd::<f64>(&a.view(), &b.view());
5863/// // 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32
5864/// assert!((d - 32.0).abs() < 1e-14);
5865/// ```
5866///
5867/// # Use Cases
5868/// - Projection calculations
5869/// - Cosine similarity numerator
5870/// - Attention scores
5871/// - Linear layer computation
5872pub fn dot_simd<F>(a: &ArrayView1<F>, b: &ArrayView1<F>) -> F
5873where
5874    F: Float + SimdUnifiedOps,
5875{
5876    if a.is_empty() || b.is_empty() {
5877        return F::zero();
5878    }
5879
5880    F::simd_dot(a, b)
5881}
5882
5883// =============================================================================
5884// Phase 74: Normalization and Activation Functions
5885// =============================================================================
5886
5887/// SIMD-accelerated ReLU (Rectified Linear Unit)
5888///
5889/// Computes max(0, x) element-wise.
5890///
5891/// # Arguments
5892/// * `a` - Input array
5893///
5894/// # Returns
5895/// ReLU-activated array
5896///
5897/// # Examples
5898/// ```
5899/// use scirs2_core::ndarray::array;
5900/// use scirs2_core::ndarray_ext::elementwise::relu_simd;
5901///
5902/// let x = array![-2.0_f64, -1.0, 0.0, 1.0, 2.0];
5903/// let result = relu_simd::<f64>(&x.view());
5904/// assert!((result[0] - 0.0).abs() < 1e-14);
5905/// assert!((result[1] - 0.0).abs() < 1e-14);
5906/// assert!((result[2] - 0.0).abs() < 1e-14);
5907/// assert!((result[3] - 1.0).abs() < 1e-14);
5908/// assert!((result[4] - 2.0).abs() < 1e-14);
5909/// ```
5910///
5911/// # Use Cases
5912/// - Neural network activation
5913/// - Sparse representations
5914/// - Thresholding signals
5915/// - Feature rectification
5916pub fn relu_simd<F>(a: &ArrayView1<F>) -> Array1<F>
5917where
5918    F: Float + SimdUnifiedOps,
5919{
5920    if a.is_empty() {
5921        return Array1::zeros(0);
5922    }
5923
5924    F::simd_relu(a)
5925}
5926
5927/// SIMD-accelerated L2 normalization
5928///
5929/// Normalizes the vector to unit length: x / ||x||₂
5930///
5931/// # Arguments
5932/// * `a` - Input array
5933///
5934/// # Returns
5935/// Unit-normalized array (or zero if input is zero)
5936///
5937/// # Examples
5938/// ```
5939/// use scirs2_core::ndarray::array;
5940/// use scirs2_core::ndarray_ext::elementwise::normalize_simd;
5941///
5942/// let x = array![3.0_f64, 4.0];
5943/// let result = normalize_simd::<f64>(&x.view());
5944/// // ||result|| = 1
5945/// let norm = (result[0]*result[0] + result[1]*result[1]).sqrt();
5946/// assert!((norm - 1.0).abs() < 1e-10);
5947/// // x/5 = [0.6, 0.8]
5948/// assert!((result[0] - 0.6).abs() < 1e-10);
5949/// assert!((result[1] - 0.8).abs() < 1e-10);
5950/// ```
5951///
5952/// # Use Cases
5953/// - Unit vector computation
5954/// - Cosine similarity preparation
5955/// - Gradient normalization
5956/// - Direction extraction
5957pub fn normalize_simd<F>(a: &ArrayView1<F>) -> Array1<F>
5958where
5959    F: Float + SimdUnifiedOps,
5960{
5961    if a.is_empty() {
5962        return Array1::zeros(0);
5963    }
5964
5965    F::simd_normalize(a)
5966}
5967
5968/// SIMD-accelerated standardization (z-score normalization)
5969///
5970/// Transforms to zero mean and unit variance: (x - mean) / std
5971///
5972/// # Arguments
5973/// * `a` - Input array
5974///
5975/// # Returns
5976/// Standardized array
5977///
5978/// # Examples
5979/// ```
5980/// use scirs2_core::ndarray::array;
5981/// use scirs2_core::ndarray_ext::elementwise::standardize_simd;
5982///
5983/// let x = array![2.0_f64, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
5984/// let result = standardize_simd::<f64>(&x.view());
5985/// // Mean should be ~0, std should be ~1
5986/// let mean: f64 = result.iter().sum::<f64>() / result.len() as f64;
5987/// assert!(mean.abs() < 1e-10);
5988/// ```
5989///
5990/// # Use Cases
5991/// - Feature scaling for ML
5992/// - Batch normalization
5993/// - Statistical preprocessing
5994/// - Anomaly scoring
5995pub fn standardize_simd<F>(a: &ArrayView1<F>) -> Array1<F>
5996where
5997    F: Float + SimdUnifiedOps,
5998{
5999    if a.is_empty() {
6000        return Array1::zeros(0);
6001    }
6002
6003    F::simd_standardize(a)
6004}
6005
6006/// SIMD-accelerated softmax
6007///
6008/// Computes exp(x - max(x)) / sum(exp(x - max(x))) for numerical stability.
6009/// Output probabilities sum to 1.
6010///
6011/// # Arguments
6012/// * `a` - Input array (logits)
6013///
6014/// # Returns
6015/// Probability distribution (sums to 1)
6016///
6017/// # Examples
6018/// ```
6019/// use scirs2_core::ndarray::array;
6020/// use scirs2_core::ndarray_ext::elementwise::softmax_simd;
6021///
6022/// let x = array![1.0_f64, 2.0, 3.0];
6023/// let result = softmax_simd::<f64>(&x.view());
6024/// // Probabilities should sum to 1
6025/// let sum: f64 = result.iter().sum();
6026/// assert!((sum - 1.0).abs() < 1e-10);
6027/// // Higher logits should have higher probabilities
6028/// assert!(result[2] > result[1]);
6029/// assert!(result[1] > result[0]);
6030/// ```
6031///
6032/// # Use Cases
6033/// - Classification output layer
6034/// - Attention weights
6035/// - Policy probabilities in RL
6036/// - Mixture model weights
6037pub fn softmax_simd<F>(a: &ArrayView1<F>) -> Array1<F>
6038where
6039    F: Float + SimdUnifiedOps,
6040{
6041    if a.is_empty() {
6042        return Array1::zeros(0);
6043    }
6044
6045    F::simd_softmax(a)
6046}
6047
6048/// SIMD-accelerated truncation (round towards zero)
6049///
6050/// Removes the fractional part, rounding towards zero.
6051///
6052/// # Arguments
6053/// * `a` - Input array
6054///
6055/// # Returns
6056/// Truncated array
6057///
6058/// # Examples
6059/// ```
6060/// use scirs2_core::ndarray::array;
6061/// use scirs2_core::ndarray_ext::elementwise::trunc_simd;
6062///
6063/// let x = array![2.7_f64, -2.7, 0.9, -0.9];
6064/// let result = trunc_simd::<f64>(&x.view());
6065/// assert!((result[0] - 2.0).abs() < 1e-14);
6066/// assert!((result[1] - (-2.0)).abs() < 1e-14);
6067/// assert!((result[2] - 0.0).abs() < 1e-14);
6068/// assert!((result[3] - 0.0).abs() < 1e-14);
6069/// ```
6070///
6071/// # Use Cases
6072/// - Integer conversion
6073/// - Discretization
6074/// - Quantization
6075/// - Index calculation
6076pub fn trunc_simd<F>(a: &ArrayView1<F>) -> Array1<F>
6077where
6078    F: Float + SimdUnifiedOps,
6079{
6080    if a.is_empty() {
6081        return Array1::zeros(0);
6082    }
6083
6084    F::simd_trunc(a)
6085}
6086
6087#[cfg(test)]
6088#[path = "elementwise_tests.rs"]
6089mod tests;