#![cfg(feature = "gpu-cuda")]
use echidna::gpu::{CudaContext, GpuBackend, GpuTapeData};
use echidna::{record, BReverse};
use num_traits::Float;
fn cuda_context() -> Option<CudaContext> {
CudaContext::new()
}
#[test]
fn cuda_atan2_large_magnitudes_gradient_finite() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [1.0_f32, 1.0_f32];
let (tape, _) = record(|v: &[BReverse<f32>]| v[0].atan2(v[1]), &x0);
let large = 1e20_f32; let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let (_, g) = ctx.gradient_batch(&gpu_tape, &[large, large], 1).unwrap();
assert!(
g[0].is_finite(),
"d/dy atan2 should be finite; got {}",
g[0]
);
assert!(
g[1].is_finite(),
"d/dx atan2 should be finite; got {}",
g[1]
);
assert!(g[0] != 0.0, "d/dy atan2 underflowed to zero");
assert!(g[1] != 0.0, "d/dx atan2 underflowed to zero");
}
#[test]
fn cuda_asinh_large_derivative_finite() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [1.0_f32];
let (tape, _) = record(|v: &[BReverse<f32>]| v[0].asinh(), &x0);
let large = 1e20_f32;
let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let (_, g) = ctx.gradient_batch(&gpu_tape, &[large], 1).unwrap();
assert!(g[0].is_finite(), "asinh derivative should be finite");
let rel_err = (g[0] as f64 - 1e-20).abs() / 1e-20;
assert!(rel_err < 1e-5, "g[0] = {}, expected ≈ 1e-20", g[0]);
}
#[test]
fn cuda_acosh_large_derivative_finite() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [2.0_f32];
let (tape, _) = record(|v: &[BReverse<f32>]| v[0].acosh(), &x0);
let large = 1e20_f32;
let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let (_, g) = ctx.gradient_batch(&gpu_tape, &[large], 1).unwrap();
assert!(g[0].is_finite(), "acosh derivative should be finite");
let rel_err = (g[0] as f64 - 1e-20).abs() / 1e-20;
assert!(rel_err < 1e-5, "g[0] = {}, expected ≈ 1e-20", g[0]);
}
#[test]
fn cuda_powf_negative_base_integer_exponent() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [-2.0_f32];
let (tape, _) = record(
|v: &[BReverse<f32>]| v[0].powf(BReverse::constant(3.0)),
&x0,
);
let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let (_, g) = ctx.gradient_batch(&gpu_tape, &[-2.0_f32], 1).unwrap();
assert!(g[0].is_finite(), "gradient must be finite (not NaN)");
let rel_err = (g[0] as f64 - 12.0).abs() / 12.0;
assert!(rel_err < 1e-5, "g[0] = {}, expected 12", g[0]);
}
#[test]
fn cuda_max_with_nan_operand() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [1.5_f32, f32::NAN];
let (tape, _) = record(|v: &[BReverse<f32>]| v[0].max(v[1]), &x0);
let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let (_, g) = ctx
.gradient_batch(&gpu_tape, &[1.5_f32, f32::NAN], 1)
.unwrap();
assert_eq!(g[0], 1.0, "adjoint should route to non-NaN operand");
assert_eq!(g[1], 0.0, "adjoint to NaN operand should be 0");
}
#[test]
fn cuda_fract_negative_input_matches_cpu() {
let ctx = match cuda_context() {
Some(c) => c,
None => return,
};
let x0 = [-1.3_f32];
let (tape, _) = record(|v: &[BReverse<f32>]| v[0].fract(), &x0);
let gpu_data = GpuTapeData::from_tape(&tape).unwrap();
let gpu_tape = ctx.upload_tape(&gpu_data);
let out = ctx.forward_batch(&gpu_tape, &[-1.3_f32], 1).unwrap();
let expected = -1.3_f32.fract(); assert!(
(out[0] - expected).abs() < 1e-6,
"fract(-1.3) on CUDA = {}, expected ≈ {}",
out[0],
expected,
);
assert!(
out[0] < 0.0,
"GPU fract should be negative for negative input"
);
}