use super::*;
pub(crate) use crate::autograd::{clear_graph, no_grad};
#[allow(dead_code)]
pub(super) fn numerical_gradient<F>(f: F, x: &Tensor, eps: f32) -> Tensor
where
F: Fn(&Tensor) -> Tensor,
{
let mut grad_data = vec![0.0; x.numel()];
for i in 0..x.numel() {
let mut x_plus = x.data().to_vec();
let mut x_minus = x.data().to_vec();
x_plus[i] += eps;
x_minus[i] -= eps;
let y_plus = no_grad(|| f(&Tensor::new(&x_plus, x.shape())).item());
let y_minus = no_grad(|| f(&Tensor::new(&x_minus, x.shape())).item());
grad_data[i] = (y_plus - y_minus) / (2.0 * eps);
}
Tensor::new(&grad_data, x.shape())
}
#[allow(dead_code)]
pub(super) fn check_gradient<F>(f: F, x: &Tensor, eps: f32, tol: f32) -> bool
where
F: Fn(&Tensor) -> Tensor,
{
clear_graph();
let x_grad = x.clone().requires_grad();
let x_id = x_grad.id();
let y = f(&x_grad);
y.backward();
let analytical = crate::autograd::get_grad(x_id).expect("No gradient computed");
let numerical = numerical_gradient(&f, x, eps);
let max_diff: f32 = analytical
.data()
.iter()
.zip(numerical.data().iter())
.map(|(a, n)| (a - n).abs())
.fold(0.0, f32::max);
max_diff < tol
}
#[test]
fn test_simple_sum_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 3.0]).requires_grad();
let x_id = x.id();
let y = x.sum();
y.backward();
let grad = crate::autograd::get_grad(x_id).expect("Gradient should exist");
assert_eq!(grad.data(), &[1.0, 1.0, 1.0]);
}
#[test]
fn test_add_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 3.0]).requires_grad();
let y = Tensor::from_slice(&[4.0, 5.0, 6.0]);
let x_id = x.id();
let z = x.add(&y).sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("Should have gradient");
assert_eq!(grad.data(), &[1.0, 1.0, 1.0]);
}
#[test]
fn test_mul_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 3.0]).requires_grad();
let y = Tensor::from_slice(&[4.0, 5.0, 6.0]);
let x_id = x.id();
let z = x.mul(&y).sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[4.0, 5.0, 6.0]);
}
#[test]
fn test_exp_gradient() {
clear_graph();
let x = Tensor::from_slice(&[0.0, 1.0, -1.0]).requires_grad();
let x_id = x.id();
let z = x.exp().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
for (g, &v) in grad.data().iter().zip(&[0.0_f32, 1.0, -1.0]) {
assert!((g - v.exp()).abs() < 1e-5);
}
}
#[test]
fn test_log_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 4.0]).requires_grad();
let x_id = x.id();
let z = x.log().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[1.0, 0.5, 0.25]);
}
#[test]
fn test_pow_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 3.0]).requires_grad();
let x_id = x.id();
let z = x.pow(2.0).sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[2.0, 4.0, 6.0]);
}
#[test]
fn test_relu_gradient() {
clear_graph();
let x = Tensor::from_slice(&[-1.0, 0.5, 2.0]).requires_grad();
let x_id = x.id();
let z = x.relu().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[0.0, 1.0, 1.0]);
}
#[test]
fn test_sigmoid_gradient() {
clear_graph();
let x = Tensor::from_slice(&[0.0]).requires_grad();
let x_id = x.id();
let z = x.sigmoid().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 0.25).abs() < 1e-5); }
#[test]
fn test_mean_gradient() {
clear_graph();
let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0]).requires_grad();
let x_id = x.id();
let z = x.mean();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[0.25, 0.25, 0.25, 0.25]);
}
#[test]
fn test_chain_gradient() {
let x = Tensor::from_slice(&[1.0, 2.0, 3.0]);
clear_graph();
let x_grad = x.clone().requires_grad();
let x_id = x_grad.id();
let y = x_grad.mul_scalar(2.0).pow(2.0).sum();
y.backward();
let grad = crate::autograd::get_grad(x_id).expect("No gradient");
let expected = [8.0, 16.0, 24.0];
for (g, e) in grad.data().iter().zip(expected.iter()) {
assert!((g - e).abs() < 1e-3, "Expected {e}, got {g}");
}
}
#[test]
fn test_matmul_forward() {
let a = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]);
let b = Tensor::new(&[5.0, 6.0, 7.0, 8.0], &[2, 2]);
let c = a.matmul(&b);
assert_eq!(c.shape(), &[2, 2]);
assert_eq!(c.data(), &[19.0, 22.0, 43.0, 50.0]);
}
#[test]
fn test_tanh_gradient() {
clear_graph();
let x = Tensor::from_slice(&[0.0]).requires_grad();
let x_id = x.id();
let z = x.tanh_().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 1.0).abs() < 1e-5); }
#[test]
fn test_sqrt_gradient() {
clear_graph();
let x = Tensor::from_slice(&[4.0]).requires_grad();
let x_id = x.id();
let z = x.sqrt().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 0.25).abs() < 1e-5); }
#[test]
fn test_matmul_backward() {
clear_graph();
let a = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]).requires_grad();
let b = Tensor::new(&[1.0, 0.0, 0.0, 1.0], &[2, 2]).requires_grad(); let a_id = a.id();
let b_id = b.id();
let c = a.matmul(&b);
let loss = c.sum();
loss.backward();
let grad_a = crate::autograd::get_grad(a_id).expect("grad_a");
let grad_b = crate::autograd::get_grad(b_id).expect("grad_b");
assert_eq!(grad_a.data(), &[1.0, 1.0, 1.0, 1.0]);
assert_eq!(grad_b.data(), &[4.0, 4.0, 6.0, 6.0]);
}
#[test]
fn test_div_gradient() {
clear_graph();
let x = Tensor::from_slice(&[6.0]).requires_grad();
let y = Tensor::from_slice(&[2.0]).requires_grad();
let x_id = x.id();
let y_id = y.id();
let z = x.div(&y).sum();
z.backward();
let grad_x = crate::autograd::get_grad(x_id).expect("grad_x");
let grad_y = crate::autograd::get_grad(y_id).expect("grad_y");
assert!((grad_x.data()[0] - 0.5).abs() < 1e-5); assert!((grad_y.data()[0] - (-1.5)).abs() < 1e-5); }
#[test]
fn test_neg_gradient() {
clear_graph();
let x = Tensor::from_slice(&[3.0]).requires_grad();
let x_id = x.id();
let z = x.neg().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data()[0], -1.0);
}
#[test]
fn test_pow_gradient_cubic() {
clear_graph();
let x = Tensor::from_slice(&[2.0]).requires_grad();
let x_id = x.id();
let z = x.pow(3.0).sum(); z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 12.0).abs() < 1e-5);
}
#[test]
fn test_exp_gradient_e() {
clear_graph();
let x = Tensor::from_slice(&[1.0]).requires_grad();
let x_id = x.id();
let z = x.exp().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - std::f32::consts::E).abs() < 1e-4);
}
#[test]
fn test_log_gradient_half() {
clear_graph();
let x = Tensor::from_slice(&[2.0]).requires_grad();
let x_id = x.id();
let z = x.log().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 0.5).abs() < 1e-5);
}
#[test]
fn test_sub_gradient() {
clear_graph();
let x = Tensor::from_slice(&[5.0, 3.0]).requires_grad();
let y = Tensor::from_slice(&[2.0, 1.0]).requires_grad();
let x_id = x.id();
let y_id = y.id();
let z = x.sub(&y).sum();
z.backward();
let grad_x = crate::autograd::get_grad(x_id).expect("grad_x");
let grad_y = crate::autograd::get_grad(y_id).expect("grad_y");
assert_eq!(grad_x.data(), &[1.0, 1.0]);
assert_eq!(grad_y.data(), &[-1.0, -1.0]);
}
#[test]
fn test_leaky_relu_gradient_positive() {
clear_graph();
let x = Tensor::from_slice(&[2.0, 3.0]).requires_grad();
let x_id = x.id();
let z = x.leaky_relu(0.01).sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.data(), &[1.0, 1.0]);
}
#[test]
fn test_leaky_relu_gradient_negative() {
clear_graph();
let x = Tensor::from_slice(&[-2.0, -3.0]).requires_grad();
let x_id = x.id();
let negative_slope = 0.1;
let z = x.leaky_relu(negative_slope).sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
for &g in grad.data() {
assert!((g - negative_slope).abs() < 1e-5);
}
}
#[test]
fn test_gelu_forward() {
let x = Tensor::from_slice(&[0.0, 1.0, -1.0]);
let y = x.gelu();
assert!((y.data()[0] - 0.0).abs() < 1e-3);
assert!((y.data()[1] - 0.841).abs() < 0.01);
assert!((y.data()[2] - (-0.159)).abs() < 0.01);
}
#[test]
fn test_gelu_gradient() {
clear_graph();
let x = Tensor::from_slice(&[0.0]).requires_grad();
let x_id = x.id();
let z = x.gelu().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert!((grad.data()[0] - 0.5).abs() < 0.01);
}
#[test]
fn test_softmax_forward() {
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]);
let y = x.softmax();
let row1_sum: f32 = y.data()[0..2].iter().sum();
let row2_sum: f32 = y.data()[2..4].iter().sum();
assert!((row1_sum - 1.0).abs() < 1e-5);
assert!((row2_sum - 1.0).abs() < 1e-5);
}
#[test]
fn test_softmax_gradient() {
clear_graph();
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]).requires_grad();
let x_id = x.id();
let z = x.softmax().sum();
z.backward();
let grad = crate::autograd::get_grad(x_id).expect("grad");
assert_eq!(grad.numel(), 4);
}
#[test]
fn test_transpose_forward() {
let a = Tensor::new(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3]);
let a_t = a.transpose();
assert_eq!(a_t.shape(), &[3, 2]);
assert_eq!(a_t.data(), &[1.0, 4.0, 2.0, 5.0, 3.0, 6.0]);
}
#[test]
fn test_transpose_gradient() {
clear_graph();
let a = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]).requires_grad();
let a_id = a.id();
let z = a.transpose().sum();
z.backward();
let grad = crate::autograd::get_grad(a_id).expect("grad");
assert_eq!(grad.data(), &[1.0, 1.0, 1.0, 1.0]);
}
#[test]
fn test_broadcast_add_forward() {
let matrix = Tensor::new(&[1.0, 2.0, 3.0, 4.0], &[2, 2]);
let bias = Tensor::new(&[10.0, 20.0], &[2]);
let result = matrix.broadcast_add(&bias);
assert_eq!(result.data(), &[11.0, 22.0, 13.0, 24.0]);
}
#[path = "tests_broadcast_ops.rs"]
mod tests_broadcast_ops;