use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::Float;
use std::fmt;
use std::ops::{Add, Div, Mul, Neg, Sub};
use super::{Dual, Tape, Var};
fn float_const<T: Float>(val: f64) -> Result<T> {
T::from(val).ok_or_else(|| {
NumRs2Error::NumericalError(format!("Cannot represent {} in target float type", val))
})
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HyperDual<T> {
real: T,
eps1: T,
eps2: T,
eps1eps2: T,
}
impl<T: Float> HyperDual<T> {
#[inline]
pub fn new(real: T, eps1: T, eps2: T, eps1eps2: T) -> Self {
Self {
real,
eps1,
eps2,
eps1eps2,
}
}
#[inline]
pub fn constant(value: T) -> Self {
Self::new(value, T::zero(), T::zero(), T::zero())
}
#[inline]
pub fn make_variable(value: T, is_dir_i: bool, is_dir_j: bool) -> Self {
Self::new(
value,
if is_dir_i { T::one() } else { T::zero() },
if is_dir_j { T::one() } else { T::zero() },
T::zero(),
)
}
#[inline]
pub fn real(&self) -> T {
self.real
}
#[inline]
pub fn eps1(&self) -> T {
self.eps1
}
#[inline]
pub fn eps2(&self) -> T {
self.eps2
}
#[inline]
pub fn eps1eps2(&self) -> T {
self.eps1eps2
}
#[inline]
pub fn scale(self, s: T) -> Self {
Self::new(
self.real * s,
self.eps1 * s,
self.eps2 * s,
self.eps1eps2 * s,
)
}
pub fn powf(self, n: T) -> Self {
let a = self.real;
let gp = n * a.powf(n - T::one());
let gpp = n * (n - T::one()) * a.powf(n - T::one() - T::one());
Self::new(
a.powf(n),
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn powi(self, n: i32) -> Self {
match n {
0 => Self::constant(T::one()),
1 => self,
_ if n < 0 => Self::constant(T::one()) / self.powi(-n),
_ => {
let half = self.powi(n / 2);
if n % 2 == 0 {
half * half
} else {
half * half * self
}
}
}
}
pub fn exp(self) -> Self {
let ea = self.real.exp();
Self::new(
ea,
ea * self.eps1,
ea * self.eps2,
ea * (self.eps1 * self.eps2 + self.eps1eps2),
)
}
pub fn ln(self) -> Self {
let a = self.real;
let gp = T::one() / a;
let gpp = -T::one() / (a * a);
Self::new(
a.ln(),
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn sin(self) -> Self {
let sin_a = self.real.sin();
let cos_a = self.real.cos();
Self::new(
sin_a,
cos_a * self.eps1,
cos_a * self.eps2,
-sin_a * self.eps1 * self.eps2 + cos_a * self.eps1eps2,
)
}
pub fn cos(self) -> Self {
let sin_a = self.real.sin();
let cos_a = self.real.cos();
Self::new(
cos_a,
-sin_a * self.eps1,
-sin_a * self.eps2,
-cos_a * self.eps1 * self.eps2 - sin_a * self.eps1eps2,
)
}
pub fn tan(self) -> Self {
let tan_a = self.real.tan();
let cos_a = self.real.cos();
let sec2 = T::one() / (cos_a * cos_a);
let two = T::one() + T::one();
let gp = sec2;
let gpp = two * tan_a * sec2;
Self::new(
tan_a,
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn sinh(self) -> Self {
let sinh_a = self.real.sinh();
let cosh_a = self.real.cosh();
Self::new(
sinh_a,
cosh_a * self.eps1,
cosh_a * self.eps2,
sinh_a * self.eps1 * self.eps2 + cosh_a * self.eps1eps2,
)
}
pub fn cosh(self) -> Self {
let sinh_a = self.real.sinh();
let cosh_a = self.real.cosh();
Self::new(
cosh_a,
sinh_a * self.eps1,
sinh_a * self.eps2,
cosh_a * self.eps1 * self.eps2 + sinh_a * self.eps1eps2,
)
}
pub fn tanh(self) -> Self {
let tanh_a = self.real.tanh();
let sech2 = T::one() - tanh_a * tanh_a;
let two = T::one() + T::one();
let gp = sech2;
let gpp = -two * tanh_a * sech2;
Self::new(
tanh_a,
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn sqrt(self) -> Self {
let a = self.real;
let sqrt_a = a.sqrt();
let two = T::one() + T::one();
let four = two * two;
let gp = T::one() / (two * sqrt_a);
let gpp = -T::one() / (four * a * sqrt_a);
Self::new(
sqrt_a,
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn abs(self) -> Self {
if self.real >= T::zero() {
self
} else {
-self
}
}
pub fn sigmoid(self) -> Self {
let a = self.real;
let s = T::one() / (T::one() + (-a).exp());
let two = T::one() + T::one();
let gp = s * (T::one() - s);
let gpp = gp * (T::one() - two * s);
Self::new(
s,
gp * self.eps1,
gp * self.eps2,
gpp * self.eps1 * self.eps2 + gp * self.eps1eps2,
)
}
pub fn relu(self) -> Self {
if self.real > T::zero() {
self
} else {
Self::constant(T::zero())
}
}
}
impl<T: Float> Add for HyperDual<T> {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
Self::new(
self.real + rhs.real,
self.eps1 + rhs.eps1,
self.eps2 + rhs.eps2,
self.eps1eps2 + rhs.eps1eps2,
)
}
}
impl<T: Float> Sub for HyperDual<T> {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self::Output {
Self::new(
self.real - rhs.real,
self.eps1 - rhs.eps1,
self.eps2 - rhs.eps2,
self.eps1eps2 - rhs.eps1eps2,
)
}
}
impl<T: Float> Mul for HyperDual<T> {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self::Output {
Self::new(
self.real * rhs.real,
self.real * rhs.eps1 + self.eps1 * rhs.real,
self.real * rhs.eps2 + self.eps2 * rhs.real,
self.real * rhs.eps1eps2
+ self.eps1 * rhs.eps2
+ self.eps2 * rhs.eps1
+ self.eps1eps2 * rhs.real,
)
}
}
impl<T: Float> Div for HyperDual<T> {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self::Output {
let (a, b, c, d) = (self.real, self.eps1, self.eps2, self.eps1eps2);
let (e, f, g, h) = (rhs.real, rhs.eps1, rhs.eps2, rhs.eps1eps2);
let e2 = e * e;
let e3 = e2 * e;
let two = T::one() + T::one();
Self::new(
a / e,
(b * e - a * f) / e2,
(c * e - a * g) / e2,
(d * e2 - a * h * e - b * g * e - c * f * e + two * a * f * g) / e3,
)
}
}
impl<T: Float> Neg for HyperDual<T> {
type Output = Self;
#[inline]
fn neg(self) -> Self::Output {
Self::new(-self.real, -self.eps1, -self.eps2, -self.eps1eps2)
}
}
impl<T: Float + fmt::Display> fmt::Display for HyperDual<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{} + {}*e1 + {}*e2 + {}*e1e2",
self.real, self.eps1, self.eps2, self.eps1eps2
)
}
}
pub fn forward_jacobian<F, T>(f: F, x: &[T]) -> Result<Array<T>>
where
F: Fn(&[Dual<T>]) -> Vec<Dual<T>>,
T: Float,
{
let n = x.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Input vector must be non-empty".to_string(),
));
}
let probe_input: Vec<Dual<T>> = x.iter().map(|&v| Dual::variable(v)).collect();
let probe_output = f(&probe_input);
let m = probe_output.len();
if m == 0 {
return Err(NumRs2Error::InvalidInput(
"Output vector must be non-empty".to_string(),
));
}
let mut jac_data = vec![T::zero(); m * n];
for j in 0..n {
let dual_input: Vec<Dual<T>> = (0..n)
.map(|k| {
let deriv = if k == j { T::one() } else { T::zero() };
Dual::new(x[k], deriv)
})
.collect();
let output = f(&dual_input);
for i in 0..m {
jac_data[i * n + j] = output[i].deriv();
}
}
Ok(Array::from_vec(jac_data).reshape(&[m, n]))
}
pub fn reverse_jacobian<F, T>(f: F, x: &[T]) -> Result<Array<T>>
where
F: Fn(&mut Tape<T>, &[Var]) -> Vec<Var>,
T: Float,
{
let n = x.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Input vector must be non-empty".to_string(),
));
}
let mut tape = Tape::new();
let vars: Vec<Var> = x.iter().map(|&v| tape.var(v)).collect();
let outputs = f(&mut tape, &vars);
let m = outputs.len();
if m == 0 {
return Err(NumRs2Error::InvalidInput(
"Output vector must be non-empty".to_string(),
));
}
let mut jac_data = vec![T::zero(); m * n];
for i in 0..m {
tape.zero_grad();
tape.backward(outputs[i]);
for j in 0..n {
jac_data[i * n + j] = tape.grad(vars[j]);
}
}
Ok(Array::from_vec(jac_data).reshape(&[m, n]))
}
pub fn jacobian_auto<F, T>(f: F, x: &[T]) -> Result<Array<T>>
where
F: Fn(&[Dual<T>]) -> Vec<Dual<T>>,
T: Float,
{
forward_jacobian(f, x)
}
pub fn hessian_exact<F, T>(f: F, x: &[T]) -> Result<Array<T>>
where
F: Fn(&[HyperDual<T>]) -> HyperDual<T>,
T: Float,
{
let n = x.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Input vector must be non-empty for Hessian computation".to_string(),
));
}
let mut hess_data = vec![T::zero(); n * n];
for i in 0..n {
for j in i..n {
let inputs: Vec<HyperDual<T>> = (0..n)
.map(|k| HyperDual::make_variable(x[k], k == i, k == j))
.collect();
let result = f(&inputs);
let h_ij = result.eps1eps2();
hess_data[i * n + j] = h_ij;
hess_data[j * n + i] = h_ij;
}
}
Ok(Array::from_vec(hess_data).reshape(&[n, n]))
}
pub fn hessian_vector_product<F, T>(f: F, x: &[T], v: &[T]) -> Result<Vec<T>>
where
F: Fn(&[HyperDual<T>]) -> HyperDual<T>,
T: Float,
{
let n = x.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Input vector must be non-empty".to_string(),
));
}
if v.len() != n {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![n],
actual: vec![v.len()],
});
}
let mut hv = Vec::with_capacity(n);
for i in 0..n {
let inputs: Vec<HyperDual<T>> = (0..n)
.map(|k| {
HyperDual::new(
x[k],
if k == i { T::one() } else { T::zero() },
v[k],
T::zero(),
)
})
.collect();
let result = f(&inputs);
hv.push(result.eps1eps2());
}
Ok(hv)
}
pub fn hessian_forward_over_reverse<F, T>(f: F, x: &[T]) -> Result<Array<T>>
where
F: Fn(&mut Tape<T>, &[Var]) -> Var,
T: Float,
{
let n = x.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Input vector must be non-empty for Hessian computation".to_string(),
));
}
let eps = T::epsilon().sqrt();
let two = T::one() + T::one();
let compute_gradient = |point: &[T]| -> Vec<T> {
let mut tape = Tape::new();
let vars: Vec<Var> = point.iter().map(|&v| tape.var(v)).collect();
let output = f(&mut tape, &vars);
tape.backward(output);
vars.iter().map(|&v| tape.grad(v)).collect()
};
let mut hess_data = Vec::with_capacity(n * n);
for i in 0..n {
let mut x_plus = x.to_vec();
let mut x_minus = x.to_vec();
x_plus[i] = x_plus[i] + eps;
x_minus[i] = x_minus[i] - eps;
let grad_plus = compute_gradient(&x_plus);
let grad_minus = compute_gradient(&x_minus);
for j in 0..n {
let h_ij = (grad_plus[j] - grad_minus[j]) / (two * eps);
hess_data.push(h_ij);
}
}
Ok(Array::from_vec(hess_data).reshape(&[n, n]))
}
pub fn compute_gradient_ad<F, T>(f: F, x: &[T]) -> Result<Vec<T>>
where
F: Fn(&[Dual<T>]) -> Dual<T>,
T: Float,
{
let n = x.len();
let mut grad = Vec::with_capacity(n);
for i in 0..n {
let dual_input: Vec<Dual<T>> = (0..n)
.map(|k| {
let deriv = if k == i { T::one() } else { T::zero() };
Dual::new(x[k], deriv)
})
.collect();
let result = f(&dual_input);
grad.push(result.deriv());
}
Ok(grad)
}
pub fn compute_gradient_hyperdual<F, T>(f: &F, x: &[T]) -> Result<Vec<T>>
where
F: Fn(&[HyperDual<T>]) -> HyperDual<T>,
T: Float,
{
let n = x.len();
let mut grad = Vec::with_capacity(n);
for i in 0..n {
let inputs: Vec<HyperDual<T>> = (0..n)
.map(|k| {
HyperDual::new(
x[k],
if k == i { T::one() } else { T::zero() },
T::zero(),
T::zero(),
)
})
.collect();
let result = f(&inputs);
grad.push(result.eps1());
}
Ok(grad)
}
fn numerical_gradient_central<T: Float>(f: &dyn Fn(&[T]) -> T, x: &[T]) -> Result<Vec<T>> {
let n = x.len();
let eps = T::epsilon().sqrt();
let two = T::one() + T::one();
let mut grad = Vec::with_capacity(n);
for i in 0..n {
let mut x_plus = x.to_vec();
let mut x_minus = x.to_vec();
x_plus[i] = x_plus[i] + eps;
x_minus[i] = x_minus[i] - eps;
let f_plus = f(&x_plus);
let f_minus = f(&x_minus);
grad.push((f_plus - f_minus) / (two * eps));
}
Ok(grad)
}
#[derive(Debug, Clone)]
pub struct GradientCheckResult<T> {
pub max_abs_error: T,
pub max_rel_error: T,
pub passed: bool,
pub component_errors: Vec<T>,
pub analytical: Vec<T>,
pub numerical: Vec<T>,
}
pub fn gradient_check<F, T>(
f: F,
x: &[T],
analytical_gradient: &[T],
tolerance: T,
) -> Result<GradientCheckResult<T>>
where
F: Fn(&[T]) -> T,
T: Float,
{
let n = x.len();
if analytical_gradient.len() != n {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![n],
actual: vec![analytical_gradient.len()],
});
}
let numerical = numerical_gradient_central(&f, x)?;
let mut max_abs = T::zero();
let mut max_rel = T::zero();
let mut component_errors = Vec::with_capacity(n);
let threshold = T::epsilon() * float_const::<T>(100.0)?;
for i in 0..n {
let abs_err = (analytical_gradient[i] - numerical[i]).abs();
component_errors.push(abs_err);
if abs_err > max_abs {
max_abs = abs_err;
}
let max_mag = analytical_gradient[i].abs().max(numerical[i].abs());
let rel_err = if max_mag < threshold {
abs_err
} else {
abs_err / max_mag
};
if rel_err > max_rel {
max_rel = rel_err;
}
}
Ok(GradientCheckResult {
max_abs_error: max_abs,
max_rel_error: max_rel,
passed: max_rel < tolerance,
component_errors,
analytical: analytical_gradient.to_vec(),
numerical,
})
}
pub fn gradient_check_ad<F1, F2, T>(
f_ad: F1,
f_numerical: F2,
x: &[T],
tolerance: T,
) -> Result<GradientCheckResult<T>>
where
F1: Fn(&[Dual<T>]) -> Dual<T>,
F2: Fn(&[T]) -> T,
T: Float,
{
let ad_grad = compute_gradient_ad(f_ad, x)?;
gradient_check(f_numerical, x, &ad_grad, tolerance)
}
#[derive(Debug, Clone)]
pub struct TaylorExpansion2<T> {
pub center: Vec<T>,
pub value: T,
pub gradient: Vec<T>,
pub hessian_flat: Vec<T>,
pub dim: usize,
}
impl<T: Float> TaylorExpansion2<T> {
pub fn evaluate(&self, x: &[T]) -> Result<T> {
let n = self.dim;
if x.len() != n {
return Err(NumRs2Error::ShapeMismatch {
expected: vec![n],
actual: vec![x.len()],
});
}
let h: Vec<T> = (0..n).map(|i| x[i] - self.center[i]).collect();
let mut result = self.value;
for i in 0..n {
result = result + self.gradient[i] * h[i];
}
let two = T::one() + T::one();
for i in 0..n {
for j in 0..n {
result = result + self.hessian_flat[i * n + j] * h[i] * h[j] / two;
}
}
Ok(result)
}
pub fn hessian_element(&self, i: usize, j: usize) -> T {
self.hessian_flat[i * self.dim + j]
}
}
pub fn multivariate_taylor<F, T>(f: F, center: &[T]) -> Result<TaylorExpansion2<T>>
where
F: Fn(&[HyperDual<T>]) -> HyperDual<T>,
T: Float,
{
let n = center.len();
if n == 0 {
return Err(NumRs2Error::InvalidInput(
"Center point must be non-empty".to_string(),
));
}
let mut value = T::zero();
let mut gradient = vec![T::zero(); n];
let mut hessian_flat = vec![T::zero(); n * n];
for i in 0..n {
for j in i..n {
let inputs: Vec<HyperDual<T>> = (0..n)
.map(|k| HyperDual::make_variable(center[k], k == i, k == j))
.collect();
let result = f(&inputs);
if i == 0 && j == 0 {
value = result.real();
}
if i == j {
gradient[i] = result.eps1();
}
let h_ij = result.eps1eps2();
hessian_flat[i * n + j] = h_ij;
hessian_flat[j * n + i] = h_ij;
}
}
Ok(TaylorExpansion2 {
center: center.to_vec(),
value,
gradient,
hessian_flat,
dim: n,
})
}
#[cfg(test)]
mod tests {
use super::*;
const TOL: f64 = 1e-10;
const NUMERICAL_TOL: f64 = 1e-5;
#[test]
fn test_hyperdual_basic_arithmetic() {
let x = HyperDual::new(3.0, 1.0, 0.0, 0.0);
let y = HyperDual::new(4.0, 0.0, 1.0, 0.0);
let sum = x + y;
assert!((sum.real() - 7.0).abs() < TOL);
assert!((sum.eps1() - 1.0).abs() < TOL);
assert!((sum.eps2() - 1.0).abs() < TOL);
assert!((sum.eps1eps2() - 0.0).abs() < TOL);
let diff = x - y;
assert!((diff.real() - (-1.0)).abs() < TOL);
let prod = x * y;
assert!((prod.real() - 12.0).abs() < TOL);
assert!((prod.eps1() - 4.0).abs() < TOL);
assert!((prod.eps2() - 3.0).abs() < TOL);
assert!((prod.eps1eps2() - 1.0).abs() < TOL);
let neg = -x;
assert!((neg.real() - (-3.0)).abs() < TOL);
assert!((neg.eps1() - (-1.0)).abs() < TOL);
}
#[test]
fn test_hyperdual_division() {
let x = HyperDual::new(6.0, 1.0, 0.0, 0.0);
let y = HyperDual::new(3.0, 0.0, 1.0, 0.0);
let quot = x / y;
assert!((quot.real() - 2.0).abs() < TOL);
assert!((quot.eps1() - 1.0 / 3.0).abs() < TOL);
assert!((quot.eps2() - (-2.0 / 3.0)).abs() < TOL);
assert!((quot.eps1eps2() - (-1.0 / 9.0)).abs() < TOL);
}
#[test]
fn test_hyperdual_exp() {
let x = HyperDual::new(2.0, 1.0, 1.0, 0.0);
let result = x.exp();
let e2 = 2.0_f64.exp();
assert!((result.real() - e2).abs() < TOL);
assert!((result.eps1() - e2).abs() < TOL);
assert!((result.eps2() - e2).abs() < TOL);
assert!((result.eps1eps2() - e2).abs() < TOL);
}
#[test]
fn test_hyperdual_sin_cos() {
let val = 1.0_f64;
let x = HyperDual::new(val, 1.0, 1.0, 0.0);
let sin_result = x.sin();
assert!((sin_result.real() - val.sin()).abs() < TOL);
assert!((sin_result.eps1() - val.cos()).abs() < TOL);
assert!((sin_result.eps1eps2() - (-val.sin())).abs() < TOL);
let cos_result = x.cos();
assert!((cos_result.real() - val.cos()).abs() < TOL);
assert!((cos_result.eps1() - (-val.sin())).abs() < TOL);
assert!((cos_result.eps1eps2() - (-val.cos())).abs() < TOL);
}
#[test]
fn test_hyperdual_chain_rule() {
let val = 1.5_f64;
let x = HyperDual::new(val, 1.0, 1.0, 0.0);
let x_sq = x * x;
let result = x_sq.sin();
let x2 = val * val;
let expected_val = x2.sin();
let expected_deriv = x2.cos() * 2.0 * val;
let expected_second = -4.0 * x2 * x2.sin() + 2.0 * x2.cos();
assert!((result.real() - expected_val).abs() < TOL);
assert!((result.eps1() - expected_deriv).abs() < TOL);
assert!((result.eps1eps2() - expected_second).abs() < TOL);
}
#[test]
fn test_hyperdual_powi() {
let x = HyperDual::new(-2.0, 1.0, 1.0, 0.0);
let result = x.powi(3);
assert!((result.real() - (-8.0)).abs() < TOL);
assert!((result.eps1() - 12.0).abs() < TOL);
assert!((result.eps1eps2() - (-12.0)).abs() < TOL);
}
#[test]
fn test_forward_jacobian_linear() {
let f = |vars: &[Dual<f64>]| -> Vec<Dual<f64>> {
let two = Dual::constant(2.0);
let three = Dual::constant(3.0);
let four = Dual::constant(4.0);
vec![vars[0] + two * vars[1], three * vars[0] + four * vars[1]]
};
let jac = forward_jacobian(f, &[1.0, 1.0]).expect("forward_jacobian should succeed");
let jac_vec = jac.to_vec();
assert!((jac_vec[0] - 1.0).abs() < TOL); assert!((jac_vec[1] - 2.0).abs() < TOL); assert!((jac_vec[2] - 3.0).abs() < TOL); assert!((jac_vec[3] - 4.0).abs() < TOL); }
#[test]
fn test_forward_jacobian_quadratic() {
let f =
|vars: &[Dual<f64>]| -> Vec<Dual<f64>> { vec![vars[0] * vars[0], vars[0] * vars[1]] };
let jac = forward_jacobian(f, &[2.0, 3.0]).expect("forward_jacobian should succeed");
let jac_vec = jac.to_vec();
assert!((jac_vec[0] - 4.0).abs() < TOL); assert!((jac_vec[1] - 0.0).abs() < TOL); assert!((jac_vec[2] - 3.0).abs() < TOL); assert!((jac_vec[3] - 2.0).abs() < TOL); }
#[test]
fn test_reverse_jacobian_linear() {
let jac = reverse_jacobian(
|tape: &mut Tape<f64>, vars: &[Var]| {
let two = tape.var(2.0);
let three = tape.var(3.0);
let four = tape.var(4.0);
let two_y = tape.mul(two, vars[1]);
let out1 = tape.add(vars[0], two_y);
let three_x = tape.mul(three, vars[0]);
let four_y = tape.mul(four, vars[1]);
let out2 = tape.add(three_x, four_y);
vec![out1, out2]
},
&[1.0, 1.0],
)
.expect("reverse_jacobian should succeed");
let jac_vec = jac.to_vec();
assert!((jac_vec[0] - 1.0).abs() < TOL);
assert!((jac_vec[1] - 2.0).abs() < TOL);
assert!((jac_vec[2] - 3.0).abs() < TOL);
assert!((jac_vec[3] - 4.0).abs() < TOL);
}
#[test]
fn test_reverse_jacobian_multi_output() {
let jac = reverse_jacobian(
|tape: &mut Tape<f64>, vars: &[Var]| {
let x_sq = tape.mul(vars[0], vars[0]);
let x_cube = tape.mul(x_sq, vars[0]);
vec![x_sq, x_cube]
},
&[2.0],
)
.expect("reverse_jacobian should succeed");
let jac_vec = jac.to_vec();
assert!((jac_vec[0] - 4.0).abs() < TOL); assert!((jac_vec[1] - 12.0).abs() < TOL); }
#[test]
fn test_jacobian_forward_reverse_agree() {
let fwd_jac = forward_jacobian(
|vars: &[Dual<f64>]| -> Vec<Dual<f64>> {
let one = Dual::constant(1.0);
vec![vars[0] * vars[1] + vars[0], vars[1] * vars[1] - vars[0]]
},
&[2.0, 3.0],
)
.expect("forward_jacobian should succeed");
let rev_jac = reverse_jacobian(
|tape: &mut Tape<f64>, vars: &[Var]| {
let xy = tape.mul(vars[0], vars[1]);
let out1 = tape.add(xy, vars[0]);
let y_sq = tape.mul(vars[1], vars[1]);
let out2 = tape.sub(y_sq, vars[0]);
vec![out1, out2]
},
&[2.0, 3.0],
)
.expect("reverse_jacobian should succeed");
let fwd_vec = fwd_jac.to_vec();
let rev_vec = rev_jac.to_vec();
for k in 0..fwd_vec.len() {
assert!(
(fwd_vec[k] - rev_vec[k]).abs() < TOL,
"Jacobian mismatch at index {}: forward={}, reverse={}",
k,
fwd_vec[k],
rev_vec[k]
);
}
}
#[test]
fn test_hessian_exact_quadratic() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
let three = HyperDual::constant(3.0);
let two = HyperDual::constant(2.0);
vars[0] * vars[0] + three * vars[0] * vars[1] + two * vars[1] * vars[1]
};
let hess = hessian_exact(f, &[5.0, 7.0]).expect("hessian_exact should succeed");
let hess_vec = hess.to_vec();
assert!((hess_vec[0] - 2.0).abs() < TOL); assert!((hess_vec[1] - 3.0).abs() < TOL); assert!((hess_vec[2] - 3.0).abs() < TOL); assert!((hess_vec[3] - 4.0).abs() < TOL); }
#[test]
fn test_hessian_exact_rosenbrock() {
let rosenbrock = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
let x = vars[0];
let y = vars[1];
let one = HyperDual::constant(1.0);
let hundred = HyperDual::constant(100.0);
let diff1 = one - x;
let diff2 = y - x * x;
diff1 * diff1 + hundred * diff2 * diff2
};
let hess = hessian_exact(rosenbrock, &[1.0, 1.0]).expect("hessian_exact should succeed");
let hess_vec = hess.to_vec();
assert!((hess_vec[0] - 802.0).abs() < TOL);
assert!((hess_vec[1] - (-400.0)).abs() < TOL);
assert!((hess_vec[2] - (-400.0)).abs() < TOL);
assert!((hess_vec[3] - 200.0).abs() < TOL);
}
#[test]
fn test_hessian_vector_product_simple() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
let two = HyperDual::constant(2.0);
let three = HyperDual::constant(3.0);
vars[0] * vars[0] + two * vars[0] * vars[1] + three * vars[1] * vars[1]
};
let hv = hessian_vector_product(f, &[1.0, 1.0], &[1.0, 2.0])
.expect("hessian_vector_product should succeed");
assert!((hv[0] - 6.0).abs() < TOL);
assert!((hv[1] - 14.0).abs() < TOL);
}
#[test]
fn test_hessian_forward_over_reverse() {
let f = |tape: &mut Tape<f64>, vars: &[Var]| -> Var {
let x_sq = tape.mul(vars[0], vars[0]);
let y_sq = tape.mul(vars[1], vars[1]);
tape.add(x_sq, y_sq)
};
let hess = hessian_forward_over_reverse(f, &[3.0, 4.0])
.expect("hessian_forward_over_reverse should succeed");
let hess_vec = hess.to_vec();
assert!((hess_vec[0] - 2.0).abs() < NUMERICAL_TOL); assert!(hess_vec[1].abs() < NUMERICAL_TOL); assert!(hess_vec[2].abs() < NUMERICAL_TOL); assert!((hess_vec[3] - 2.0).abs() < NUMERICAL_TOL); }
#[test]
fn test_gradient_check_passes() {
let f_numerical = |vars: &[f64]| -> f64 { vars[0] * vars[0] + vars[1] * vars[1] };
let analytical = vec![6.0, 8.0];
let result = gradient_check(f_numerical, &[3.0, 4.0], &analytical, 1e-4)
.expect("gradient_check should succeed");
assert!(result.passed, "Correct gradient should pass check");
assert!(result.max_rel_error < 1e-4);
}
#[test]
fn test_gradient_check_detects_error() {
let f_numerical = |vars: &[f64]| -> f64 { vars[0] * vars[0] + vars[1] * vars[1] };
let wrong_analytical = vec![6.0, 100.0];
let result = gradient_check(f_numerical, &[3.0, 4.0], &wrong_analytical, 1e-4)
.expect("gradient_check should succeed");
assert!(!result.passed, "Wrong gradient should fail check");
assert!(result.max_abs_error > 90.0);
}
#[test]
fn test_gradient_check_ad_integration() {
let f_ad = |vars: &[Dual<f64>]| -> Dual<f64> { vars[0] * vars[0] + vars[1] * vars[1] };
let f_num = |vars: &[f64]| -> f64 { vars[0] * vars[0] + vars[1] * vars[1] };
let result = gradient_check_ad(f_ad, f_num, &[3.0, 4.0], 1e-4)
.expect("gradient_check_ad should succeed");
assert!(result.passed);
}
#[test]
fn test_taylor_quadratic_approximation() {
let f =
|vars: &[HyperDual<f64>]| -> HyperDual<f64> { vars[0] * vars[0] + vars[1] * vars[1] };
let taylor =
multivariate_taylor(f, &[1.0, 1.0]).expect("multivariate_taylor should succeed");
assert!((taylor.value - 2.0).abs() < TOL);
assert!((taylor.gradient[0] - 2.0).abs() < TOL);
assert!((taylor.gradient[1] - 2.0).abs() < TOL);
let approx = taylor
.evaluate(&[2.0, 3.0])
.expect("evaluate should succeed");
let exact = 4.0 + 9.0; assert!((approx - exact).abs() < TOL);
}
#[test]
fn test_taylor_approximation_accuracy() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> { vars[0].exp() * vars[1].sin() };
let taylor =
multivariate_taylor(f, &[0.0, 0.0]).expect("multivariate_taylor should succeed");
assert!((taylor.value - 0.0).abs() < TOL);
let approx = taylor
.evaluate(&[0.1, 0.1])
.expect("evaluate should succeed");
let exact = 0.1_f64.exp() * 0.1_f64.sin();
let error = (approx - exact).abs();
assert!(
error < 1e-3,
"Taylor approximation error {} should be small near center",
error
);
}
#[test]
fn test_hessian_1d_function() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> { vars[0].powi(3) };
let hess = hessian_exact(f, &[2.0]).expect("hessian_exact should succeed for 1D");
let hess_vec = hess.to_vec();
assert!((hess_vec[0] - 12.0).abs() < TOL);
}
#[test]
fn test_hessian_identity_function() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> { vars[0] + vars[1] };
let hess = hessian_exact(f, &[3.0, 4.0]).expect("hessian_exact should succeed");
let hess_vec = hess.to_vec();
for val in &hess_vec {
assert!(val.abs() < TOL, "Hessian of linear function should be zero");
}
}
#[test]
fn test_hyperdual_ln() {
let x = HyperDual::new(2.0, 1.0, 1.0, 0.0);
let result = x.ln();
assert!((result.real() - 2.0_f64.ln()).abs() < TOL);
assert!((result.eps1() - 0.5).abs() < TOL);
assert!((result.eps1eps2() - (-0.25)).abs() < TOL);
}
#[test]
fn test_hyperdual_tanh() {
let x = HyperDual::new(0.0, 1.0, 1.0, 0.0);
let result = x.tanh();
assert!((result.real() - 0.0).abs() < TOL);
assert!((result.eps1() - 1.0).abs() < TOL);
assert!((result.eps1eps2() - 0.0).abs() < TOL);
}
#[test]
fn test_hyperdual_sigmoid() {
let x = HyperDual::new(0.0, 1.0, 1.0, 0.0);
let result = x.sigmoid();
assert!((result.real() - 0.5).abs() < TOL);
assert!((result.eps1() - 0.25).abs() < TOL);
assert!((result.eps1eps2() - 0.0).abs() < TOL);
}
#[test]
fn test_hessian_3d_function() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
vars[0] * vars[1] + vars[1] * vars[2] + vars[0] * vars[2]
};
let hess = hessian_exact(f, &[1.0, 2.0, 3.0]).expect("hessian_exact should succeed for 3D");
let hess_vec = hess.to_vec();
assert!(hess_vec[0].abs() < TOL);
assert!((hess_vec[1] - 1.0).abs() < TOL);
assert!((hess_vec[2] - 1.0).abs() < TOL);
assert!((hess_vec[3] - 1.0).abs() < TOL);
assert!(hess_vec[4].abs() < TOL);
assert!((hess_vec[5] - 1.0).abs() < TOL);
assert!((hess_vec[6] - 1.0).abs() < TOL);
assert!((hess_vec[7] - 1.0).abs() < TOL);
assert!(hess_vec[8].abs() < TOL);
}
#[test]
fn test_compute_gradient_ad_matches_hyperdual() {
let f_dual = |vars: &[Dual<f64>]| -> Dual<f64> {
vars[0] * vars[0] + vars[0] * vars[1] + vars[1] * vars[1]
};
let f_hyper = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
vars[0] * vars[0] + vars[0] * vars[1] + vars[1] * vars[1]
};
let point = &[2.0, 3.0];
let grad_dual =
compute_gradient_ad(f_dual, point).expect("compute_gradient_ad should succeed");
let grad_hyper = compute_gradient_hyperdual(&f_hyper, point)
.expect("compute_gradient_hyperdual should succeed");
for i in 0..grad_dual.len() {
assert!(
(grad_dual[i] - grad_hyper[i]).abs() < TOL,
"Gradient component {} disagrees: dual={}, hyper={}",
i,
grad_dual[i],
grad_hyper[i]
);
}
}
#[test]
fn test_hessian_vector_product_matches_full_hessian() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
let three = HyperDual::constant(3.0);
vars[0] * vars[0] + three * vars[0] * vars[1] + vars[1] * vars[1]
};
let x = &[2.0, 3.0];
let v = &[1.0, -1.0];
let hess = hessian_exact(f, x).expect("hessian_exact should succeed");
let hess_vec = hess.to_vec();
let n = x.len();
let mut hv_full = vec![0.0; n];
for i in 0..n {
for j in 0..n {
hv_full[i] += hess_vec[i * n + j] * v[j];
}
}
let hv_efficient =
hessian_vector_product(f, x, v).expect("hessian_vector_product should succeed");
for i in 0..n {
assert!(
(hv_full[i] - hv_efficient[i]).abs() < TOL,
"Hv component {} disagrees: full={}, efficient={}",
i,
hv_full[i],
hv_efficient[i]
);
}
}
#[test]
fn test_hessian_exact_vs_forward_over_reverse() {
let f_hyper = |vars: &[HyperDual<f64>]| -> HyperDual<f64> {
vars[0] * vars[0] + vars[0] * vars[1] + vars[1] * vars[1]
};
let f_tape = |tape: &mut Tape<f64>, vars: &[Var]| -> Var {
let x_sq = tape.mul(vars[0], vars[0]);
let xy = tape.mul(vars[0], vars[1]);
let y_sq = tape.mul(vars[1], vars[1]);
let tmp = tape.add(x_sq, xy);
tape.add(tmp, y_sq)
};
let x = &[2.0, 3.0];
let hess_exact = hessian_exact(f_hyper, x).expect("hessian_exact should succeed");
let hess_for = hessian_forward_over_reverse(f_tape, x)
.expect("hessian_forward_over_reverse should succeed");
let exact_vec = hess_exact.to_vec();
let for_vec = hess_for.to_vec();
for k in 0..exact_vec.len() {
assert!(
(exact_vec[k] - for_vec[k]).abs() < NUMERICAL_TOL,
"Hessian element {} disagrees: exact={}, forward_over_reverse={}",
k,
exact_vec[k],
for_vec[k]
);
}
}
#[test]
fn test_empty_input_errors() {
let f_dual = |_vars: &[Dual<f64>]| -> Vec<Dual<f64>> { vec![] };
assert!(forward_jacobian(f_dual, &[]).is_err());
let f_hyper = |vars: &[HyperDual<f64>]| -> HyperDual<f64> { HyperDual::constant(0.0) };
assert!(hessian_exact(f_hyper, &[]).is_err());
assert!(hessian_vector_product(f_hyper, &[], &[]).is_err());
assert!(multivariate_taylor(f_hyper, &[]).is_err());
}
#[test]
fn test_hessian_vector_product_dimension_mismatch() {
let f = |vars: &[HyperDual<f64>]| -> HyperDual<f64> { vars[0] * vars[0] };
let result = hessian_vector_product(f, &[1.0, 2.0], &[1.0]);
assert!(result.is_err());
}
}