use super::module::Module;
use crate::autograd::Tensor;
#[derive(Debug, Clone, Copy, Default)]
pub struct ReLU;
impl ReLU {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Module for ReLU {
fn forward(&self, input: &Tensor) -> Tensor {
input.relu()
}
}
#[derive(Debug, Clone, Copy)]
pub struct LeakyReLU {
negative_slope: f32,
}
impl LeakyReLU {
#[must_use]
pub fn new() -> Self {
Self {
negative_slope: 0.01,
}
}
#[must_use]
pub fn with_slope(negative_slope: f32) -> Self {
Self { negative_slope }
}
}
impl Default for LeakyReLU {
fn default() -> Self {
Self::new()
}
}
impl Module for LeakyReLU {
fn forward(&self, input: &Tensor) -> Tensor {
input.leaky_relu(self.negative_slope)
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct Sigmoid;
impl Sigmoid {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Module for Sigmoid {
fn forward(&self, input: &Tensor) -> Tensor {
input.sigmoid()
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct Tanh;
impl Tanh {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Module for Tanh {
fn forward(&self, input: &Tensor) -> Tensor {
input.tanh_()
}
}
#[derive(Debug, Clone, Copy, Default)]
pub struct GELU;
impl GELU {
#[must_use]
pub fn new() -> Self {
Self
}
}
impl Module for GELU {
fn forward(&self, input: &Tensor) -> Tensor {
input.gelu()
}
}
#[derive(Debug, Clone, Copy)]
pub struct Softmax {
dim: i32,
}
impl Softmax {
#[must_use]
pub fn new(dim: i32) -> Self {
Self { dim }
}
}
impl Default for Softmax {
fn default() -> Self {
Self::new(-1)
}
}
impl Module for Softmax {
fn forward(&self, input: &Tensor) -> Tensor {
let ndim = input.ndim() as i32;
let axis = if self.dim < 0 {
ndim + self.dim
} else {
self.dim
};
if axis == ndim - 1 {
return input.softmax();
}
assert!(
ndim == 2 && axis == 0,
"Softmax: dim={} resolved to axis={} is unsupported for a \
{ndim}-D tensor; only the last dim or dim=0 on a 2D tensor are \
implemented",
self.dim,
axis,
);
input.transpose().softmax().transpose()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_relu() {
let relu = ReLU::new();
let x = Tensor::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
let y = relu.forward(&x);
assert_eq!(y.data(), &[0.0, 0.0, 0.0, 1.0, 2.0]);
}
#[test]
fn test_leaky_relu() {
let lrelu = LeakyReLU::with_slope(0.1);
let x = Tensor::from_slice(&[-2.0, -1.0, 0.0, 1.0, 2.0]);
let y = lrelu.forward(&x);
assert_eq!(y.data(), &[-0.2, -0.1, 0.0, 1.0, 2.0]);
}
#[test]
fn test_sigmoid() {
let sigmoid = Sigmoid::new();
let x = Tensor::from_slice(&[0.0]);
let y = sigmoid.forward(&x);
assert!((y.data()[0] - 0.5).abs() < 1e-5);
}
#[test]
fn test_sigmoid_bounds() {
let sigmoid = Sigmoid::new();
let x = Tensor::from_slice(&[-10.0, 0.0, 10.0]);
let y = sigmoid.forward(&x);
for &val in y.data() {
assert!(val > 0.0 && val < 1.0);
}
}
#[test]
fn test_tanh() {
let tanh = Tanh::new();
let x = Tensor::from_slice(&[0.0]);
let y = tanh.forward(&x);
assert!((y.data()[0]).abs() < 1e-5);
}
#[test]
fn test_tanh_bounds() {
let tanh = Tanh::new();
let x = Tensor::from_slice(&[-2.0, 0.0, 2.0]);
let y = tanh.forward(&x);
for &val in y.data() {
assert!((-1.0..=1.0).contains(&val));
}
assert!(y.data()[0] > -1.0 && y.data()[0] < -0.9); assert!(y.data()[2] > 0.9 && y.data()[2] < 1.0); }
#[test]
fn test_gelu() {
let gelu = GELU::new();
let x = Tensor::from_slice(&[0.0]);
let y = gelu.forward(&x);
assert!((y.data()[0]).abs() < 1e-5);
}
#[test]
fn test_gelu_positive() {
let gelu = GELU::new();
let x = Tensor::from_slice(&[1.0]);
let y = gelu.forward(&x);
assert!((y.data()[0] - 0.841).abs() < 0.01);
}
#[test]
fn test_softmax_sums_to_one() {
let softmax = Softmax::new(-1);
let x = Tensor::new(&[1.0, 2.0, 3.0, 1.0, 2.0, 3.0], &[2, 3]);
let y = softmax.forward(&x);
let (batch, features) = (2, 3);
for b in 0..batch {
let sum: f32 = (0..features).map(|j| y.data()[b * features + j]).sum();
assert!((sum - 1.0).abs() < 1e-5, "Row {b} sums to {sum}");
}
}
#[test]
fn test_softmax_numerical_stability() {
let softmax = Softmax::new(-1);
let x = Tensor::new(&[1000.0, 1001.0, 1002.0], &[1, 3]);
let y = softmax.forward(&x);
for &val in y.data() {
assert!(val.is_finite());
assert!((0.0..=1.0).contains(&val));
}
let sum: f32 = y.data().iter().sum();
assert!((sum - 1.0).abs() < 1e-5);
}
#[test]
fn test_relu_default() {
let relu = ReLU::default();
let x = Tensor::from_slice(&[-1.0, 1.0]);
let y = relu.forward(&x);
assert_eq!(y.data(), &[0.0, 1.0]);
}
#[test]
fn test_relu_debug_clone_copy() {
let relu = ReLU::new();
let debug_str = format!("{:?}", relu);
assert!(debug_str.contains("ReLU"));
let cloned = relu.clone();
let copied = relu;
let _ = cloned.forward(&Tensor::from_slice(&[1.0]));
let _ = copied.forward(&Tensor::from_slice(&[1.0]));
}
#[test]
fn test_leaky_relu_default() {
let lrelu = LeakyReLU::default();
let x = Tensor::from_slice(&[-100.0]);
let y = lrelu.forward(&x);
assert!((y.data()[0] - (-1.0)).abs() < 0.001);
}
#[test]
fn test_leaky_relu_debug_clone_copy() {
let lrelu = LeakyReLU::new();
let debug_str = format!("{:?}", lrelu);
assert!(debug_str.contains("LeakyReLU"));
let cloned = lrelu.clone();
let copied = lrelu;
let _ = cloned.forward(&Tensor::from_slice(&[1.0]));
let _ = copied.forward(&Tensor::from_slice(&[1.0]));
}
#[test]
fn test_sigmoid_default() {
let sigmoid = Sigmoid::default();
let x = Tensor::from_slice(&[0.0]);
let y = sigmoid.forward(&x);
assert!((y.data()[0] - 0.5).abs() < 1e-5);
}
#[test]
fn test_sigmoid_debug_clone_copy() {
let sigmoid = Sigmoid::new();
let debug_str = format!("{:?}", sigmoid);
assert!(debug_str.contains("Sigmoid"));
let cloned = sigmoid.clone();
let copied = sigmoid;
let _ = cloned.forward(&Tensor::from_slice(&[0.0]));
let _ = copied.forward(&Tensor::from_slice(&[0.0]));
}
#[test]
fn test_tanh_default() {
let tanh = Tanh::default();
let x = Tensor::from_slice(&[0.0]);
let y = tanh.forward(&x);
assert!((y.data()[0]).abs() < 1e-5);
}
#[test]
fn test_tanh_debug_clone_copy() {
let tanh = Tanh::new();
let debug_str = format!("{:?}", tanh);
assert!(debug_str.contains("Tanh"));
let cloned = tanh.clone();
let copied = tanh;
let _ = cloned.forward(&Tensor::from_slice(&[0.0]));
let _ = copied.forward(&Tensor::from_slice(&[0.0]));
}
#[test]
fn test_gelu_default() {
let gelu = GELU::default();
let x = Tensor::from_slice(&[0.0]);
let y = gelu.forward(&x);
assert!((y.data()[0]).abs() < 1e-5);
}
#[test]
fn test_gelu_debug_clone_copy() {
let gelu = GELU::new();
let debug_str = format!("{:?}", gelu);
assert!(debug_str.contains("GELU"));
let cloned = gelu.clone();
let copied = gelu;
let _ = cloned.forward(&Tensor::from_slice(&[1.0]));
let _ = copied.forward(&Tensor::from_slice(&[1.0]));
}
#[test]
fn test_softmax_default() {
let softmax = Softmax::default(); let x = Tensor::new(&[1.0, 2.0, 3.0], &[1, 3]);
let y = softmax.forward(&x);
let sum: f32 = y.data().iter().sum();
assert!((sum - 1.0).abs() < 1e-5);
}
#[test]
fn test_softmax_debug_clone_copy() {
let softmax = Softmax::new(-1);
let debug_str = format!("{:?}", softmax);
assert!(debug_str.contains("Softmax"));
let cloned = softmax.clone();
let copied = softmax;
let _ = cloned.forward(&Tensor::new(&[1.0, 2.0], &[1, 2]));
let _ = copied.forward(&Tensor::new(&[1.0, 2.0], &[1, 2]));
}
#[test]
fn test_softmax_dim0_column_pmat867() {
let softmax = Softmax::new(0);
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]);
let y = softmax.forward(&x);
let d = y.data();
assert!(
(d[0] - 0.1192).abs() < 1e-3,
"Softmax(0)[0][0] = {} (expected 0.1192 column-softmax, \
0.2689 means dim was ignored)",
d[0]
);
assert!((d[1] - 0.1192).abs() < 1e-3, "[0][1] = {}", d[1]);
assert!((d[2] - 0.8808).abs() < 1e-3, "[1][0] = {}", d[2]);
assert!((d[3] - 0.8808).abs() < 1e-3, "[1][1] = {}", d[3]);
let col0 = d[0] + d[2];
let col1 = d[1] + d[3];
assert!((col0 - 1.0).abs() < 1e-5, "column 0 sums to {col0}");
assert!((col1 - 1.0).abs() < 1e-5, "column 1 sums to {col1}");
}
#[test]
fn test_softmax_dim1_and_neg1_row_unchanged_pmat867() {
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]);
for dim in [1, -1] {
let y = Softmax::new(dim).forward(&x);
let d = y.data();
assert!(
(d[0] - 0.2689).abs() < 1e-3,
"Softmax({dim})[0][0] = {} (expected 0.2689 row-softmax)",
d[0]
);
assert!((d[1] - 0.7311).abs() < 1e-3, "[0][1] = {}", d[1]);
let row0 = d[0] + d[1];
let row1 = d[2] + d[3];
assert!((row0 - 1.0).abs() < 1e-5, "row 0 sums to {row0}");
assert!((row1 - 1.0).abs() < 1e-5, "row 1 sums to {row1}");
}
}
#[test]
fn test_softmax_dim0_preserves_gradients_pmat867() {
use crate::autograd::clear_graph;
clear_graph();
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]).requires_grad();
let x_id = x.id();
let loss = Softmax::new(0).forward(&x).sum();
loss.backward();
let grad = crate::autograd::get_grad(x_id)
.expect("Softmax(0) forward must keep the input differentiable");
assert_eq!(grad.numel(), 4, "gradient must cover every input element");
for &g in grad.data() {
assert!(g.abs() < 1e-4, "grad {g} should be ~0 for sum-of-softmax");
}
}
}