Skip to main content

trueno/vector/ops/activations/advanced/
parametric.rs

1//! Parametric activation functions: leaky_relu, elu, selu
2//!
3//! These activations accept parameters (negative slope, alpha) or use
4//! exponential-linear formulas with fixed scaling constants.
5
6use crate::vector::Vector;
7use crate::{Result, TruenoError};
8
9impl Vector<f32> {
10    /// Leaky ReLU activation function
11    ///
12    /// Computes the element-wise Leaky ReLU with a configurable negative slope.
13    /// Leaky ReLU addresses the "dying ReLU" problem by allowing small negative values.
14    ///
15    /// # Formula
16    ///
17    /// ```text
18    /// leaky_relu(x, α)[i] = max(αx\[i\], x\[i\])
19    ///                     = x\[i\]    if x\[i\] > 0
20    ///                     = αx\[i\]   if x\[i\] ≤ 0
21    /// ```
22    ///
23    /// # Parameters
24    ///
25    /// - `negative_slope`: The slope for negative values (typically 0.01)
26    ///   - Must be in range [0.0, 1.0)
27    ///   - Common values: 0.01 (default), 0.1, 0.2
28    ///   - α = 0 reduces to standard ReLU
29    ///   - α = 1 reduces to identity function
30    ///
31    /// # Properties
32    ///
33    /// - **Fixes dying ReLU**: Neurons can't completely die (always has gradient)
34    /// - **Non-zero gradient**: Gradient is α for negative inputs (not zero)
35    /// - **Unbounded positive**: No saturation for positive values
36    /// - **Parameterized**: Negative slope can be tuned or learned (PReLU)
37    ///
38    /// # Applications
39    ///
40    /// - **Deep networks**: Prevents dying neurons in very deep networks
41    /// - **GANs**: Often used in generator and discriminator networks
42    /// - **Better gradient flow**: Helps with vanishing gradient problem
43    /// - **Empirical improvements**: Often outperforms ReLU in practice
44    ///
45    /// # Performance
46    ///
47    /// This operation is memory-bound (simple multiplication and comparison).
48    /// SIMD provides modest speedups.
49    ///
50    /// # Errors
51    ///
52    /// Returns `EmptyVector` if the input vector is empty.
53    /// Returns `InvalidInput` if negative_slope is not in [0.0, 1.0).
54    ///
55    /// # Examples
56    ///
57    /// ```
58    /// use trueno::Vector;
59    ///
60    /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
61    /// let result = v.leaky_relu(0.01)?;
62    ///
63    /// // Negative values multiplied by 0.01, positive unchanged
64    /// assert_eq!(result.as_slice(), &[-0.02, -0.01, 0.0, 1.0, 2.0]);
65    /// # Ok::<(), trueno::TruenoError>(())
66    /// ```
67    pub fn leaky_relu(&self, negative_slope: f32) -> Result<Self> {
68        if self.data.is_empty() {
69            return Err(TruenoError::EmptyVector);
70        }
71
72        // Validate negative_slope parameter
73        if !(0.0..1.0).contains(&negative_slope) {
74            return Err(TruenoError::InvalidInput(format!(
75                "negative_slope must be in [0.0, 1.0), got {}",
76                negative_slope
77            )));
78        }
79
80        // OpComplexity::Low - GPU threshold: >100K elements
81        #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
82        const GPU_THRESHOLD: usize = usize::MAX; // GPU DISABLED - 2-800x slower, see docs/performance-analysis.md
83
84        // Try GPU first for large vectors
85        #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
86        {
87            if self.data.len() >= GPU_THRESHOLD {
88                use crate::backends::gpu::GpuDevice;
89                if GpuDevice::is_available() {
90                    let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
91                    let mut result = vec![0.0; self.data.len()];
92                    if gpu.leaky_relu(&self.data, &mut result, negative_slope).is_ok() {
93                        return Ok(Vector::from_vec(result));
94                    }
95                }
96            }
97        }
98
99        // Scalar fallback: leaky_relu(x, α) = x if x > 0, αx otherwise
100        let data: Vec<f32> =
101            self.data.iter().map(|&x| if x > 0.0 { x } else { negative_slope * x }).collect();
102
103        Ok(Vector::from_vec(data))
104    }
105
106    /// ELU (Exponential Linear Unit) activation function
107    ///
108    /// Computes the element-wise ELU with a configurable alpha parameter.
109    /// ELU pushes mean activations closer to zero, improving learning.
110    ///
111    /// # Formula
112    ///
113    /// ```text
114    /// elu(x, α)[i] = x\[i\]           if x\[i\] > 0
115    ///              = α(e^x\[i\] - 1)  if x\[i\] ≤ 0
116    /// ```
117    ///
118    /// # Parameters
119    ///
120    /// - `alpha`: Controls the saturation value for negative inputs (typically 1.0)
121    ///   - Must be > 0
122    ///   - Common value: 1.0 (original ELU paper)
123    ///   - Larger α → slower saturation for negative inputs
124    ///
125    /// # Properties
126    ///
127    /// - **Smooth**: Unlike ReLU/Leaky ReLU, has smooth gradients everywhere
128    /// - **Negative values**: Allows negative outputs (pushes mean closer to zero)
129    /// - **Bounded below**: Saturates to -α for very negative inputs
130    /// - **Unbounded above**: No saturation for positive values
131    /// - **Non-zero gradient**: Has gradient everywhere (no dead neurons)
132    ///
133    /// # Applications
134    ///
135    /// - **Deep networks**: Better gradient flow than ReLU
136    /// - **Mean activation near zero**: Reduces internal covariate shift
137    /// - **Noise robustness**: Smooth activation helps with noisy gradients
138    /// - **Empirical improvements**: Often outperforms ReLU and Leaky ReLU
139    ///
140    /// # Performance
141    ///
142    /// This operation is compute-bound due to exp() for negative values.
143    /// More expensive than ReLU/Leaky ReLU but provides better properties.
144    ///
145    /// # Errors
146    ///
147    /// Returns `EmptyVector` if the input vector is empty.
148    /// Returns `InvalidInput` if alpha <= 0.
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// use trueno::Vector;
154    ///
155    /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
156    /// let result = v.elu(1.0)?;
157    ///
158    /// // Negative values: α(e^x - 1), positive unchanged
159    /// // elu(-2, 1) ≈ -0.865, elu(-1, 1) ≈ -0.632
160    /// assert!((result.as_slice()[0] - (-0.865)).abs() < 0.01);
161    /// assert!((result.as_slice()[1] - (-0.632)).abs() < 0.01);
162    /// assert_eq!(result.as_slice()[2], 0.0);
163    /// assert_eq!(result.as_slice()[3], 1.0);
164    /// assert_eq!(result.as_slice()[4], 2.0);
165    /// # Ok::<(), trueno::TruenoError>(())
166    /// ```
167    pub fn elu(&self, alpha: f32) -> Result<Self> {
168        if self.data.is_empty() {
169            return Err(TruenoError::EmptyVector);
170        }
171
172        // Validate alpha parameter
173        if alpha <= 0.0 {
174            return Err(TruenoError::InvalidInput(format!("alpha must be > 0, got {}", alpha)));
175        }
176
177        // OpComplexity::Low - GPU threshold: >100K elements
178        #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
179        const GPU_THRESHOLD: usize = usize::MAX; // GPU DISABLED - 2-800x slower, see docs/performance-analysis.md
180
181        // Try GPU first for large vectors
182        #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
183        {
184            if self.data.len() >= GPU_THRESHOLD {
185                use crate::backends::gpu::GpuDevice;
186                if GpuDevice::is_available() {
187                    let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
188                    let mut result = vec![0.0; self.data.len()];
189                    if gpu.elu(&self.data, &mut result, alpha).is_ok() {
190                        return Ok(Vector::from_vec(result));
191                    }
192                }
193            }
194        }
195
196        // Scalar fallback: elu(x, α) = x if x > 0, α(e^x - 1) otherwise
197        let data: Vec<f32> =
198            self.data.iter().map(|&x| if x > 0.0 { x } else { alpha * (x.exp() - 1.0) }).collect();
199
200        Ok(Vector::from_vec(data))
201    }
202
203    /// SELU (Scaled Exponential Linear Unit) activation function
204    ///
205    /// Computes selu(x) = λ * (x if x > 0 else α * (exp(x) - 1))
206    /// where λ ≈ 1.0507 and α ≈ 1.6733
207    ///
208    /// # Properties
209    ///
210    /// - **Self-normalizing**: Activations converge to zero mean and unit variance
211    /// - **Vanishing gradient prevention**: Non-zero gradient for negative inputs
212    /// - **Automatic normalization**: Reduces need for batch normalization
213    ///
214    /// # Performance
215    ///
216    /// Uses scalar implementation (GPU disabled for element-wise ops).
217    ///
218    /// # Examples
219    ///
220    /// ```
221    /// use trueno::Vector;
222    ///
223    /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
224    /// let result = v.selu()?;
225    ///
226    /// // Positive values scaled by λ ≈ 1.0507
227    /// assert!((result.as_slice()[3] - 1.0507).abs() < 0.001);
228    /// assert!((result.as_slice()[4] - 2.1014).abs() < 0.001);
229    ///
230    /// // Zero stays zero
231    /// assert!(result.as_slice()[2].abs() < 1e-5);
232    ///
233    /// // Negative values use ELU-like formula
234    /// assert!(result.as_slice()[0] < 0.0);
235    /// # Ok::<(), trueno::TruenoError>(())
236    /// ```
237    ///
238    /// # Errors
239    ///
240    /// Returns `EmptyVector` if the input vector is empty.
241    ///
242    /// # References
243    ///
244    /// - Klambauer et al. (2017): "Self-Normalizing Neural Networks"
245    pub fn selu(&self) -> Result<Self> {
246        if self.data.is_empty() {
247            return Err(TruenoError::EmptyVector);
248        }
249
250        // SELU constants from Klambauer et al. (2017)
251        // These specific values ensure self-normalizing property
252        const LAMBDA: f32 = 1.0507009873554804934193349852946;
253        const ALPHA: f32 = 1.6732632423543772848170429916717;
254
255        // Scalar implementation: selu(x) = λ * (x if x > 0 else α * (exp(x) - 1))
256        let data: Vec<f32> = self
257            .data
258            .iter()
259            .map(|&x| if x > 0.0 { LAMBDA * x } else { LAMBDA * ALPHA * (x.exp() - 1.0) })
260            .collect();
261
262        Ok(Vector::from_vec(data))
263    }
264}