pub mod higher_order;
pub use higher_order::{
compute_gradient_ad, compute_gradient_hyperdual, forward_jacobian, gradient_check,
gradient_check_ad, hessian_exact, hessian_forward_over_reverse, hessian_vector_product,
jacobian_auto, multivariate_taylor, reverse_jacobian, GradientCheckResult, HyperDual,
TaylorExpansion2,
};
use crate::array::Array;
use crate::error::{NumRs2Error, Result};
use num_traits::{Float, One, Zero};
use std::fmt;
use std::ops::{Add, Div, Mul, Neg, Sub};
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Dual<T> {
value: T,
deriv: T,
}
impl<T: Float> Dual<T> {
pub fn new(value: T, deriv: T) -> Self {
Self { value, deriv }
}
pub fn variable(value: T) -> Self {
Self::new(value, T::one())
}
pub fn constant(value: T) -> Self {
Self::new(value, T::zero())
}
pub fn value(&self) -> T {
self.value
}
pub fn deriv(&self) -> T {
self.deriv
}
pub fn pow(&self, n: T) -> Self {
let value = self.value.powf(n);
let deriv = n * self.value.powf(n - T::one()) * self.deriv;
Self::new(value, deriv)
}
pub fn exp(&self) -> Self {
let exp_val = self.value.exp();
Self::new(exp_val, exp_val * self.deriv)
}
pub fn ln(&self) -> Self {
Self::new(self.value.ln(), self.deriv / self.value)
}
pub fn sin(&self) -> Self {
Self::new(self.value.sin(), self.value.cos() * self.deriv)
}
pub fn cos(&self) -> Self {
Self::new(self.value.cos(), -self.value.sin() * self.deriv)
}
pub fn tan(&self) -> Self {
let tan_val = self.value.tan();
Self::new(tan_val, self.deriv / self.value.cos().powi(2))
}
pub fn sinh(&self) -> Self {
Self::new(self.value.sinh(), self.value.cosh() * self.deriv)
}
pub fn cosh(&self) -> Self {
Self::new(self.value.cosh(), self.value.sinh() * self.deriv)
}
pub fn tanh(&self) -> Self {
let tanh_val = self.value.tanh();
Self::new(tanh_val, (T::one() - tanh_val * tanh_val) * self.deriv)
}
pub fn sqrt(&self) -> Self {
let sqrt_val = self.value.sqrt();
Self::new(sqrt_val, self.deriv / (T::one() + T::one()) / sqrt_val)
}
pub fn abs(&self) -> Self {
if self.value >= T::zero() {
*self
} else {
-(*self)
}
}
pub fn sigmoid(&self) -> Self {
let exp_neg_x = (-self.value).exp();
let sigmoid_val = T::one() / (T::one() + exp_neg_x);
Self::new(
sigmoid_val,
sigmoid_val * (T::one() - sigmoid_val) * self.deriv,
)
}
pub fn relu(&self) -> Self {
if self.value > T::zero() {
*self
} else {
Self::constant(T::zero())
}
}
}
impl<T: Float> Add for Dual<T> {
type Output = Self;
fn add(self, rhs: Self) -> Self::Output {
Self::new(self.value + rhs.value, self.deriv + rhs.deriv)
}
}
impl<T: Float> Sub for Dual<T> {
type Output = Self;
fn sub(self, rhs: Self) -> Self::Output {
Self::new(self.value - rhs.value, self.deriv - rhs.deriv)
}
}
impl<T: Float> Mul for Dual<T> {
type Output = Self;
fn mul(self, rhs: Self) -> Self::Output {
Self::new(
self.value * rhs.value,
self.deriv * rhs.value + self.value * rhs.deriv,
)
}
}
impl<T: Float> Div for Dual<T> {
type Output = Self;
fn div(self, rhs: Self) -> Self::Output {
Self::new(
self.value / rhs.value,
(self.deriv * rhs.value - self.value * rhs.deriv) / (rhs.value * rhs.value),
)
}
}
impl<T: Float> Neg for Dual<T> {
type Output = Self;
fn neg(self) -> Self::Output {
Self::new(-self.value, -self.deriv)
}
}
impl<T: Float + fmt::Display> fmt::Display for Dual<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}+{}ε", self.value, self.deriv)
}
}
pub fn gradient<F, T>(f: F, x: &Array<T>) -> Result<Array<T>>
where
F: Fn(&Array<Dual<T>>) -> Dual<T>,
T: Float,
{
let n = x.size();
let x_vec = x.to_vec();
let mut grad = Vec::with_capacity(n);
for i in 0..n {
let mut dual_vec = Vec::with_capacity(n);
for j in 0..n {
let deriv = if i == j { T::one() } else { T::zero() };
dual_vec.push(Dual::new(x_vec[j], deriv));
}
let dual_array = Array::from_vec(dual_vec);
let result = f(&dual_array);
grad.push(result.deriv());
}
Ok(Array::from_vec(grad))
}
pub fn jacobian<F, T>(f: F, x: &Array<T>) -> Result<Array<T>>
where
F: Fn(&Array<Dual<T>>) -> Array<Dual<T>>,
T: Float,
{
let n = x.size();
let x_vec = x.to_vec();
let dual_vec_test: Vec<Dual<T>> = x_vec.iter().map(|&v| Dual::variable(v)).collect();
let output_test = f(&Array::from_vec(dual_vec_test));
let m = output_test.size();
let mut jac = Vec::with_capacity(m * n);
for i in 0..n {
let mut dual_vec = Vec::with_capacity(n);
for j in 0..n {
let deriv = if i == j { T::one() } else { T::zero() };
dual_vec.push(Dual::new(x_vec[j], deriv));
}
let dual_array = Array::from_vec(dual_vec);
let result = f(&dual_array);
let result_vec = result.to_vec();
for elem in result_vec {
jac.push(elem.deriv());
}
}
Ok(Array::from_vec(jac).reshape(&[m, n]))
}
#[derive(Debug, Clone)]
struct TapeNode<T> {
value: T,
grad: T,
parents: Vec<(usize, T)>,
}
pub struct Tape<T> {
nodes: Vec<TapeNode<T>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Var(usize);
impl<T: Float> Tape<T> {
pub fn new() -> Self {
Self { nodes: Vec::new() }
}
pub fn var(&mut self, value: T) -> Var {
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: Vec::new(),
});
Var(idx)
}
pub fn value(&self, var: Var) -> T {
self.nodes[var.0].value
}
pub fn grad(&self, var: Var) -> T {
self.nodes[var.0].grad
}
pub fn add(&mut self, x: Var, y: Var) -> Var {
let value = self.nodes[x.0].value + self.nodes[y.0].value;
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, T::one()), (y.0, T::one())],
});
Var(idx)
}
pub fn sub(&mut self, x: Var, y: Var) -> Var {
let value = self.nodes[x.0].value - self.nodes[y.0].value;
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, T::one()), (y.0, -T::one())],
});
Var(idx)
}
pub fn mul(&mut self, x: Var, y: Var) -> Var {
let x_val = self.nodes[x.0].value;
let y_val = self.nodes[y.0].value;
let value = x_val * y_val;
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, y_val), (y.0, x_val)],
});
Var(idx)
}
pub fn div(&mut self, x: Var, y: Var) -> Var {
let x_val = self.nodes[x.0].value;
let y_val = self.nodes[y.0].value;
let value = x_val / y_val;
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, T::one() / y_val), (y.0, -x_val / (y_val * y_val))],
});
Var(idx)
}
pub fn pow(&mut self, x: Var, n: T) -> Var {
let x_val = self.nodes[x.0].value;
let value = x_val.powf(n);
let idx = self.nodes.len();
let grad_coeff = n * x_val.powf(n - T::one());
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, grad_coeff)],
});
Var(idx)
}
pub fn exp(&mut self, x: Var) -> Var {
let x_val = self.nodes[x.0].value;
let value = x_val.exp();
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, value)],
});
Var(idx)
}
pub fn ln(&mut self, x: Var) -> Var {
let x_val = self.nodes[x.0].value;
let value = x_val.ln();
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, T::one() / x_val)],
});
Var(idx)
}
pub fn sin(&mut self, x: Var) -> Var {
let x_val = self.nodes[x.0].value;
let value = x_val.sin();
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, x_val.cos())],
});
Var(idx)
}
pub fn cos(&mut self, x: Var) -> Var {
let x_val = self.nodes[x.0].value;
let value = x_val.cos();
let idx = self.nodes.len();
self.nodes.push(TapeNode {
value,
grad: T::zero(),
parents: vec![(x.0, -x_val.sin())],
});
Var(idx)
}
pub fn backward(&mut self, output: Var) {
for node in &mut self.nodes {
node.grad = T::zero();
}
self.nodes[output.0].grad = T::one();
for i in (0..self.nodes.len()).rev() {
let grad = self.nodes[i].grad;
let parents = self.nodes[i].parents.clone();
for (parent_idx, coeff) in parents {
self.nodes[parent_idx].grad = self.nodes[parent_idx].grad + grad * coeff;
}
}
}
pub fn zero_grad(&mut self) {
for node in &mut self.nodes {
node.grad = T::zero();
}
}
pub fn len(&self) -> usize {
self.nodes.len()
}
pub fn is_empty(&self) -> bool {
self.nodes.is_empty()
}
}
impl<T: Float> Default for Tape<T> {
fn default() -> Self {
Self::new()
}
}
pub fn hessian<F, T>(f: F, x: &Array<T>) -> Result<Array<T>>
where
F: Fn(&[T]) -> T,
T: Float,
{
let n = x.size();
let x_vec = x.to_vec();
let mut hess = Vec::with_capacity(n * n);
for i in 0..n {
let eps = T::from(1e-7).expect("1e-7 is representable as Float");
let grad_at = |point: &[T]| -> Vec<T> {
let mut grad = Vec::with_capacity(n);
for j in 0..n {
let mut point_plus = point.to_vec();
let mut point_minus = point.to_vec();
point_plus[j] = point_plus[j] + eps;
point_minus[j] = point_minus[j] - eps;
let f_plus = f(&point_plus);
let f_minus = f(&point_minus);
grad.push((f_plus - f_minus) / (eps + eps));
}
grad
};
let grad = grad_at(&x_vec);
for j in 0..n {
let mut x_plus = x_vec.clone();
let mut x_minus = x_vec.clone();
x_plus[i] = x_plus[i] + eps;
x_minus[i] = x_minus[i] - eps;
let grad_plus = grad_at(&x_plus);
let grad_minus = grad_at(&x_minus);
let second_deriv = (grad_plus[j] - grad_minus[j]) / (eps + eps);
hess.push(second_deriv);
}
}
Ok(Array::from_vec(hess).reshape(&[n, n]))
}
pub fn directional_derivative<F, T>(f: F, x: &Array<T>, v: &Array<T>) -> Result<T>
where
F: Fn(&Array<Dual<T>>) -> Dual<T>,
T: Float,
{
if x.size() != v.size() {
return Err(NumRs2Error::ShapeMismatch {
expected: x.shape(),
actual: v.shape(),
});
}
let v_vec = v.to_vec();
let v_norm_sq: T = v_vec
.iter()
.map(|&vi| vi * vi)
.fold(T::zero(), |acc, x| acc + x);
let v_norm = v_norm_sq.sqrt();
let x_vec = x.to_vec();
let mut dual_vec = Vec::with_capacity(x.size());
for i in 0..x.size() {
dual_vec.push(Dual::new(x_vec[i], v_vec[i] / v_norm));
}
let dual_array = Array::from_vec(dual_vec);
let result = f(&dual_array);
Ok(result.deriv())
}
pub fn nth_derivative<F, T>(f: F, x: T, n: usize) -> T
where
F: Fn(Dual<T>) -> Dual<T> + Copy,
T: Float,
{
nth_derivative_impl(&f, x, n)
}
fn nth_derivative_impl<F, T>(f: &F, x: T, n: usize) -> T
where
F: Fn(Dual<T>) -> Dual<T>,
T: Float,
{
if n == 0 {
let result = f(Dual::constant(x));
return result.value();
}
if n == 1 {
let result = f(Dual::variable(x));
return result.deriv();
}
let eps = T::from(1e-7).expect("1e-7 is representable as Float");
let deriv_prev_plus = nth_derivative_impl(f, x + eps, n - 1);
let deriv_prev_minus = nth_derivative_impl(f, x - eps, n - 1);
(deriv_prev_plus - deriv_prev_minus) / (eps + eps)
}
pub fn taylor_series<F, T>(f: F, a: T, order: usize) -> Vec<T>
where
F: Fn(Dual<T>) -> Dual<T> + Copy,
T: Float,
{
let mut coeffs = Vec::with_capacity(order + 1);
let factorial = |n: usize| -> T {
let mut result = T::one();
for i in 1..=n {
result = result * T::from(i).expect("factorial index is representable as Float");
}
result
};
for n in 0..=order {
let deriv_n = nth_derivative(f, a, n);
coeffs.push(deriv_n / factorial(n));
}
coeffs
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dual_arithmetic() {
let x = Dual::new(3.0, 1.0);
let y = Dual::new(4.0, 0.0);
let sum = x + y;
assert_eq!(sum.value(), 7.0);
assert_eq!(sum.deriv(), 1.0);
let diff = x - y;
assert_eq!(diff.value(), -1.0);
assert_eq!(diff.deriv(), 1.0);
let prod = x * y;
assert_eq!(prod.value(), 12.0);
assert_eq!(prod.deriv(), 4.0);
let quot = x / y;
assert_eq!(quot.value(), 0.75);
assert_eq!(quot.deriv(), 0.25);
}
#[test]
fn test_dual_functions() {
let x = Dual::variable(2.0);
let square = x.pow(2.0);
assert_eq!(square.value(), 4.0);
assert_eq!(square.deriv(), 4.0);
let exp_x = x.exp();
assert!((exp_x.value() - 2.0_f64.exp()).abs() < 1e-10);
assert!((exp_x.deriv() - 2.0_f64.exp()).abs() < 1e-10);
let ln_x = x.ln();
assert!((ln_x.value() - 2.0_f64.ln()).abs() < 1e-10);
assert!((ln_x.deriv() - 0.5).abs() < 1e-10);
let sin_x = x.sin();
assert!((sin_x.value() - 2.0_f64.sin()).abs() < 1e-10);
assert!((sin_x.deriv() - 2.0_f64.cos()).abs() < 1e-10);
}
#[test]
fn test_dual_composition() {
let x = Dual::variable(2.0);
let x_sq = x * x;
let x_sq_plus_1 = x_sq + Dual::constant(1.0);
let result = x_sq_plus_1.pow(3.0);
assert_eq!(result.value(), 125.0); assert_eq!(result.deriv(), 300.0); }
#[test]
fn test_gradient_simple() {
fn f(vars: &Array<Dual<f64>>) -> Dual<f64> {
let v = vars.to_vec();
v[0] * v[0] + v[1] * v[1]
}
let x = Array::from_vec(vec![3.0, 4.0]);
let grad = gradient(f, &x).expect("gradient computation should succeed");
let grad_vec = grad.to_vec();
assert!((grad_vec[0] - 6.0).abs() < 1e-10);
assert!((grad_vec[1] - 8.0).abs() < 1e-10);
}
#[test]
fn test_tape_basic() {
let mut tape = Tape::new();
let x = tape.var(3.0);
let y = tape.var(4.0);
let z = tape.mul(x, x); let w = tape.mul(y, y); let result = tape.add(z, w);
assert_eq!(tape.value(result), 25.0);
tape.backward(result);
assert_eq!(tape.grad(x), 6.0); assert_eq!(tape.grad(y), 8.0); }
#[test]
fn test_tape_division() {
let mut tape = Tape::new();
let x = tape.var(4.0);
let y = tape.var(2.0);
let z = tape.div(x, y);
assert_eq!(tape.value(z), 2.0);
tape.backward(z);
assert_eq!(tape.grad(x), 0.5); assert_eq!(tape.grad(y), -1.0); }
#[test]
fn test_tape_exp_ln() {
let mut tape = Tape::new();
let x = tape.var(2.0);
let y = tape.exp(x); let z = tape.ln(y);
assert!((tape.value(z) - 2.0).abs() < 1e-10);
tape.backward(z);
assert!((tape.grad(x) - 1.0).abs() < 1e-10);
}
#[test]
fn test_tape_trigonometric() {
let mut tape = Tape::new();
let x = tape.var(1.0);
let s = tape.sin(x);
let c = tape.cos(x);
assert!((tape.value(s) - 1.0_f64.sin()).abs() < 1e-10);
assert!((tape.value(c) - 1.0_f64.cos()).abs() < 1e-10);
tape.backward(s);
assert!((tape.grad(x) - 1.0_f64.cos()).abs() < 1e-10);
tape.zero_grad();
tape.backward(c);
assert!((tape.grad(x) + 1.0_f64.sin()).abs() < 1e-10);
}
#[test]
fn test_tape_chain_rule() {
let mut tape = Tape::new();
let x = tape.var(2.0);
let x_sq = tape.pow(x, 2.0);
let result = tape.sin(x_sq);
let expected_value = (4.0_f64).sin();
assert!((tape.value(result) - expected_value).abs() < 1e-10);
tape.backward(result);
let expected_grad = (4.0_f64).cos() * 4.0; assert!((tape.grad(x) - expected_grad).abs() < 1e-10);
}
#[test]
fn test_directional_derivative() {
fn f(vars: &Array<Dual<f64>>) -> Dual<f64> {
let v = vars.to_vec();
v[0] * v[0] + v[1] * v[1]
}
let x = Array::from_vec(vec![3.0, 4.0]);
let v = Array::from_vec(vec![1.0, 0.0]);
let dir_deriv = directional_derivative(f, &x, &v)
.expect("directional derivative computation should succeed");
assert!((dir_deriv - 6.0).abs() < 1e-10);
}
#[test]
fn test_nth_derivative_simple() {
fn f(x: Dual<f64>) -> Dual<f64> {
x.pow(3.0)
}
let x = 2.0;
let f0 = nth_derivative(f, x, 0);
assert!((f0 - 8.0).abs() < 1e-6);
let f1 = nth_derivative(f, x, 1);
assert!((f1 - 12.0).abs() < 1e-6);
let f2 = nth_derivative(f, x, 2);
assert!((f2 - 12.0).abs() < 0.1);
let f3 = nth_derivative(f, x, 3);
assert!((f3 - 6.0).abs() < 1.0); }
#[test]
fn test_taylor_series_exp() {
fn f(x: Dual<f64>) -> Dual<f64> {
x.exp()
}
let coeffs = taylor_series(f, 0.0, 4);
assert!((coeffs[0] - 1.0).abs() < 1e-6); assert!((coeffs[1] - 1.0).abs() < 1e-6); assert!((coeffs[2] - 0.5).abs() < 1e-4); assert!((coeffs[3] - 1.0 / 6.0).abs() < 1e-3); }
#[test]
fn test_hessian_simple() {
fn f(vars: &[f64]) -> f64 {
vars[0] * vars[0] + vars[1] * vars[1]
}
let x = Array::from_vec(vec![3.0, 4.0]);
let hess = hessian(f, &x).expect("hessian computation should succeed");
let hess_vec = hess.to_vec();
assert!((hess_vec[0] - 2.0).abs() < 1.0);
assert!(hess_vec[1].abs() < 1.0);
assert!(hess_vec[2].abs() < 1.0);
assert!((hess_vec[3] - 2.0).abs() < 1.0);
}
#[test]
fn test_dual_sigmoid() {
let x = Dual::variable(0.0);
let y = x.sigmoid();
assert!((y.value() - 0.5).abs() < 1e-10); assert!((y.deriv() - 0.25).abs() < 1e-10); }
#[test]
fn test_dual_relu() {
let x_pos = Dual::variable(2.0);
let y_pos = x_pos.relu();
assert_eq!(y_pos.value(), 2.0);
assert_eq!(y_pos.deriv(), 1.0);
let x_neg = Dual::variable(-2.0);
let y_neg = x_neg.relu();
assert_eq!(y_neg.value(), 0.0);
assert_eq!(y_neg.deriv(), 0.0);
}
}