trueno/vector/ops/activations/advanced/parametric.rs
1//! Parametric activation functions: leaky_relu, elu, selu
2//!
3//! These activations accept parameters (negative slope, alpha) or use
4//! exponential-linear formulas with fixed scaling constants.
5
6use crate::vector::Vector;
7use crate::{Result, TruenoError};
8
9impl Vector<f32> {
10 /// Leaky ReLU activation function
11 ///
12 /// Computes the element-wise Leaky ReLU with a configurable negative slope.
13 /// Leaky ReLU addresses the "dying ReLU" problem by allowing small negative values.
14 ///
15 /// # Formula
16 ///
17 /// ```text
18 /// leaky_relu(x, α)[i] = max(αx\[i\], x\[i\])
19 /// = x\[i\] if x\[i\] > 0
20 /// = αx\[i\] if x\[i\] ≤ 0
21 /// ```
22 ///
23 /// # Parameters
24 ///
25 /// - `negative_slope`: The slope for negative values (typically 0.01)
26 /// - Must be in range [0.0, 1.0)
27 /// - Common values: 0.01 (default), 0.1, 0.2
28 /// - α = 0 reduces to standard ReLU
29 /// - α = 1 reduces to identity function
30 ///
31 /// # Properties
32 ///
33 /// - **Fixes dying ReLU**: Neurons can't completely die (always has gradient)
34 /// - **Non-zero gradient**: Gradient is α for negative inputs (not zero)
35 /// - **Unbounded positive**: No saturation for positive values
36 /// - **Parameterized**: Negative slope can be tuned or learned (PReLU)
37 ///
38 /// # Applications
39 ///
40 /// - **Deep networks**: Prevents dying neurons in very deep networks
41 /// - **GANs**: Often used in generator and discriminator networks
42 /// - **Better gradient flow**: Helps with vanishing gradient problem
43 /// - **Empirical improvements**: Often outperforms ReLU in practice
44 ///
45 /// # Performance
46 ///
47 /// This operation is memory-bound (simple multiplication and comparison).
48 /// SIMD provides modest speedups.
49 ///
50 /// # Errors
51 ///
52 /// Returns `EmptyVector` if the input vector is empty.
53 /// Returns `InvalidInput` if negative_slope is not in [0.0, 1.0).
54 ///
55 /// # Examples
56 ///
57 /// ```
58 /// use trueno::Vector;
59 ///
60 /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
61 /// let result = v.leaky_relu(0.01)?;
62 ///
63 /// // Negative values multiplied by 0.01, positive unchanged
64 /// assert_eq!(result.as_slice(), &[-0.02, -0.01, 0.0, 1.0, 2.0]);
65 /// # Ok::<(), trueno::TruenoError>(())
66 /// ```
67 pub fn leaky_relu(&self, negative_slope: f32) -> Result<Self> {
68 if self.data.is_empty() {
69 return Err(TruenoError::EmptyVector);
70 }
71
72 // Validate negative_slope parameter
73 if !(0.0..1.0).contains(&negative_slope) {
74 return Err(TruenoError::InvalidInput(format!(
75 "negative_slope must be in [0.0, 1.0), got {}",
76 negative_slope
77 )));
78 }
79
80 // OpComplexity::Low - GPU threshold: >100K elements
81 #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
82 const GPU_THRESHOLD: usize = usize::MAX; // GPU DISABLED - 2-800x slower, see docs/performance-analysis.md
83
84 // Try GPU first for large vectors
85 #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
86 {
87 if self.data.len() >= GPU_THRESHOLD {
88 use crate::backends::gpu::GpuDevice;
89 if GpuDevice::is_available() {
90 let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
91 let mut result = vec![0.0; self.data.len()];
92 if gpu.leaky_relu(&self.data, &mut result, negative_slope).is_ok() {
93 return Ok(Vector::from_vec(result));
94 }
95 }
96 }
97 }
98
99 // Scalar fallback: leaky_relu(x, α) = x if x > 0, αx otherwise
100 let data: Vec<f32> =
101 self.data.iter().map(|&x| if x > 0.0 { x } else { negative_slope * x }).collect();
102
103 Ok(Vector::from_vec(data))
104 }
105
106 /// ELU (Exponential Linear Unit) activation function
107 ///
108 /// Computes the element-wise ELU with a configurable alpha parameter.
109 /// ELU pushes mean activations closer to zero, improving learning.
110 ///
111 /// # Formula
112 ///
113 /// ```text
114 /// elu(x, α)[i] = x\[i\] if x\[i\] > 0
115 /// = α(e^x\[i\] - 1) if x\[i\] ≤ 0
116 /// ```
117 ///
118 /// # Parameters
119 ///
120 /// - `alpha`: Controls the saturation value for negative inputs (typically 1.0)
121 /// - Must be > 0
122 /// - Common value: 1.0 (original ELU paper)
123 /// - Larger α → slower saturation for negative inputs
124 ///
125 /// # Properties
126 ///
127 /// - **Smooth**: Unlike ReLU/Leaky ReLU, has smooth gradients everywhere
128 /// - **Negative values**: Allows negative outputs (pushes mean closer to zero)
129 /// - **Bounded below**: Saturates to -α for very negative inputs
130 /// - **Unbounded above**: No saturation for positive values
131 /// - **Non-zero gradient**: Has gradient everywhere (no dead neurons)
132 ///
133 /// # Applications
134 ///
135 /// - **Deep networks**: Better gradient flow than ReLU
136 /// - **Mean activation near zero**: Reduces internal covariate shift
137 /// - **Noise robustness**: Smooth activation helps with noisy gradients
138 /// - **Empirical improvements**: Often outperforms ReLU and Leaky ReLU
139 ///
140 /// # Performance
141 ///
142 /// This operation is compute-bound due to exp() for negative values.
143 /// More expensive than ReLU/Leaky ReLU but provides better properties.
144 ///
145 /// # Errors
146 ///
147 /// Returns `EmptyVector` if the input vector is empty.
148 /// Returns `InvalidInput` if alpha <= 0.
149 ///
150 /// # Examples
151 ///
152 /// ```
153 /// use trueno::Vector;
154 ///
155 /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
156 /// let result = v.elu(1.0)?;
157 ///
158 /// // Negative values: α(e^x - 1), positive unchanged
159 /// // elu(-2, 1) ≈ -0.865, elu(-1, 1) ≈ -0.632
160 /// assert!((result.as_slice()[0] - (-0.865)).abs() < 0.01);
161 /// assert!((result.as_slice()[1] - (-0.632)).abs() < 0.01);
162 /// assert_eq!(result.as_slice()[2], 0.0);
163 /// assert_eq!(result.as_slice()[3], 1.0);
164 /// assert_eq!(result.as_slice()[4], 2.0);
165 /// # Ok::<(), trueno::TruenoError>(())
166 /// ```
167 pub fn elu(&self, alpha: f32) -> Result<Self> {
168 if self.data.is_empty() {
169 return Err(TruenoError::EmptyVector);
170 }
171
172 // Validate alpha parameter
173 if alpha <= 0.0 {
174 return Err(TruenoError::InvalidInput(format!("alpha must be > 0, got {}", alpha)));
175 }
176
177 // OpComplexity::Low - GPU threshold: >100K elements
178 #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
179 const GPU_THRESHOLD: usize = usize::MAX; // GPU DISABLED - 2-800x slower, see docs/performance-analysis.md
180
181 // Try GPU first for large vectors
182 #[cfg(all(feature = "gpu", not(target_arch = "wasm32")))]
183 {
184 if self.data.len() >= GPU_THRESHOLD {
185 use crate::backends::gpu::GpuDevice;
186 if GpuDevice::is_available() {
187 let gpu = GpuDevice::new().map_err(TruenoError::InvalidInput)?;
188 let mut result = vec![0.0; self.data.len()];
189 if gpu.elu(&self.data, &mut result, alpha).is_ok() {
190 return Ok(Vector::from_vec(result));
191 }
192 }
193 }
194 }
195
196 // Scalar fallback: elu(x, α) = x if x > 0, α(e^x - 1) otherwise
197 let data: Vec<f32> =
198 self.data.iter().map(|&x| if x > 0.0 { x } else { alpha * (x.exp() - 1.0) }).collect();
199
200 Ok(Vector::from_vec(data))
201 }
202
203 /// SELU (Scaled Exponential Linear Unit) activation function
204 ///
205 /// Computes selu(x) = λ * (x if x > 0 else α * (exp(x) - 1))
206 /// where λ ≈ 1.0507 and α ≈ 1.6733
207 ///
208 /// # Properties
209 ///
210 /// - **Self-normalizing**: Activations converge to zero mean and unit variance
211 /// - **Vanishing gradient prevention**: Non-zero gradient for negative inputs
212 /// - **Automatic normalization**: Reduces need for batch normalization
213 ///
214 /// # Performance
215 ///
216 /// Uses scalar implementation (GPU disabled for element-wise ops).
217 ///
218 /// # Examples
219 ///
220 /// ```
221 /// use trueno::Vector;
222 ///
223 /// let v = Vector::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
224 /// let result = v.selu()?;
225 ///
226 /// // Positive values scaled by λ ≈ 1.0507
227 /// assert!((result.as_slice()[3] - 1.0507).abs() < 0.001);
228 /// assert!((result.as_slice()[4] - 2.1014).abs() < 0.001);
229 ///
230 /// // Zero stays zero
231 /// assert!(result.as_slice()[2].abs() < 1e-5);
232 ///
233 /// // Negative values use ELU-like formula
234 /// assert!(result.as_slice()[0] < 0.0);
235 /// # Ok::<(), trueno::TruenoError>(())
236 /// ```
237 ///
238 /// # Errors
239 ///
240 /// Returns `EmptyVector` if the input vector is empty.
241 ///
242 /// # References
243 ///
244 /// - Klambauer et al. (2017): "Self-Normalizing Neural Networks"
245 pub fn selu(&self) -> Result<Self> {
246 if self.data.is_empty() {
247 return Err(TruenoError::EmptyVector);
248 }
249
250 // SELU constants from Klambauer et al. (2017)
251 // These specific values ensure self-normalizing property
252 const LAMBDA: f32 = 1.0507009873554804934193349852946;
253 const ALPHA: f32 = 1.6732632423543772848170429916717;
254
255 // Scalar implementation: selu(x) = λ * (x if x > 0 else α * (exp(x) - 1))
256 let data: Vec<f32> = self
257 .data
258 .iter()
259 .map(|&x| if x > 0.0 { LAMBDA * x } else { LAMBDA * ALPHA * (x.exp() - 1.0) })
260 .collect();
261
262 Ok(Vector::from_vec(data))
263 }
264}