1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
//! Activation functions for neural networks
//!
//! This module provides common activation functions used in neural networks.
//! Activation functions introduce non-linearity into neural networks, enabling
//! them to learn complex patterns and relationships.
//! # Overview
//! Activation functions are mathematical functions that determine whether a neuron
//! should be activated or not based on the input. They introduce non-linearity
//! to the network, allowing it to learn complex mappings between inputs and outputs.
//! # Available Activation Functions
//! - **ReLU** (Rectified Linear Unit): Most commonly used, simple and effective
//! - **Sigmoid**: Maps input to (0,1), useful for binary classification output layers
//! - **Tanh**: Maps input to (-1,1), often better than sigmoid for hidden layers
//! - **Softmax**: Converts logits to probability distribution, used in multi-class classification
//! - **GELU** (Gaussian Error Linear Unit): Smooth alternative to ReLU, used in transformers
//! - **Swish/SiLU**: Self-gated activation, often outperforms ReLU
//! - **Mish**: Smooth, non-monotonic activation function
//! - **Leaky ReLU**: Variant of ReLU that allows small negative values
//! - **ELU** (Exponential Linear Unit): Smooth variant of ReLU
//! # Examples
//! ## Basic Usage
//! ```rust
//! use scirs2_neural::activations::{Activation, ReLU, Sigmoid, Softmax};
//! use scirs2_core::ndarray::Array;
//! # fn example() -> scirs2_neural::error::Result<()> {
//! // Create activation functions
//! let relu = ReLU::new();
//! let sigmoid = Sigmoid::new();
//! let softmax = Softmax::new(0); // Apply softmax along axis 0
//! // Create input data
//! let input = Array::from_vec(vec![-2.0, -1.0, 0.0, 1.0, 2.0])
//! .into_dyn();
//! // Apply ReLU activation
//! let relu_output = relu.forward(&input)?;
//! // Output: [0.0, 0.0, 0.0, 1.0, 2.0]
//! // Apply Sigmoid activation
//! let sigmoid_output = sigmoid.forward(&input)?;
//! // Output: [0.119, 0.269, 0.5, 0.731, 0.881] (approximately)
//! // For softmax, typically used with 2D input (batch_size, num_classes)
//! let logits = Array::from_shape_vec((1, 3), vec![1.0, 2.0, 3.0])?.into_dyn();
//! let probabilities = softmax.forward(&logits)?;
//! // Output: [[0.090, 0.245, 0.665]] (approximately, sums to 1.0)
//! # Ok(())
//! # }
//! ```
//! ## Using in Forward and Backward Pass
//! use scirs2_neural::activations::{Activation, ReLU};
//! let input = Array::from_vec(vec![-1.0, 0.5, 2.0]).into_dyn();
//! // Forward pass
//! let output = relu.forward(&input)?;
//! println!("ReLU output: {:?}", output);
//! // Output: [0.0, 0.5, 2.0]
//! // Backward pass (computing gradients)
//! let grad_output = Array::from_vec(vec![1.0, 1.0, 1.0]).into_dyn();
//! let grad_input = relu.backward(&grad_output, &output)?;
//! println!("ReLU gradient: {:?}", grad_input);
//! // Output: [0.0, 1.0, 1.0] (gradient is 0 for negative inputs, 1 for positive)
//! ## Choosing the Right Activation Function
//! ### For Hidden Layers:
//! - **ReLU**: Default choice, computationally efficient, prevents vanishing gradient
//! - **GELU**: Good for transformer architectures
//! - **Swish**: Often outperforms ReLU, especially in deep networks
//! - **Tanh**: When you need outputs centered around zero
//! ### For Output Layers:
//! - **Sigmoid**: Binary classification (single output)
//! - **Softmax**: Multi-class classification (multiple outputs that sum to 1)
//! - **Linear (no activation)**: Regression tasks
//! - **Tanh**: When output should be in range (-1, 1)
//! # Performance Considerations
//! - **ReLU** and **Leaky ReLU**: Fastest to compute
//! - **Sigmoid** and **Tanh**: Require expensive exponential operations
//! - **Softmax**: Most expensive, but only used in output layer typically
//! - **GELU** and **Swish**: More expensive than ReLU but can provide better results
use crateResult;
use Array;
use ;
/// Trait for activation functions
///
/// This trait defines the interface for all activation functions in the neural network.
/// Activation functions must implement both forward and backward pass methods to support
/// automatic differentiation during training.
/// # Examples
/// ```rust
/// use scirs2_neural::activations::{Activation, ReLU};
/// use scirs2_core::ndarray::Array;
/// # fn example() -> scirs2_neural::error::Result<()> {
/// let activation = ReLU::new();
/// let input = Array::from_vec(vec![-1.0, 0.0, 1.0]).into_dyn();
/// // Forward pass
/// let output = activation.forward(&input)?;
/// assert_eq!(output.as_slice().expect("Operation failed"), &[0.0, 0.0, 1.0]);
/// // Backward pass
/// let grad_output = Array::from_vec(vec![1.0, 1.0, 1.0]).into_dyn();
/// let grad_input = activation.backward(&grad_output, &output)?;
/// assert_eq!(grad_input.as_slice().expect("Operation failed"), &[0.0, 0.0, 1.0]);
/// # Ok(())
/// # }
/// ```
pub use GELU;
pub use Mish;
pub use ;
pub use Sigmoid;
pub use Softmax;
pub use Swish;
pub use Tanh;