use scirs2_core::ndarray::{s, Array1, Array2, ArrayView1, ArrayView2, ScalarOperand};
use scirs2_core::numeric::{Float, NumAssign};
use std::iter::Sum;
use crate::error::{LinalgError, LinalgResult};
#[allow(dead_code)]
pub fn mse_gradient<F>(
predictions: &ArrayView1<F>,
targets: &ArrayView1<F>,
) -> LinalgResult<Array1<F>>
where
F: Float + NumAssign + Sum + ScalarOperand,
{
if predictions.shape() != targets.shape() {
return Err(LinalgError::ShapeError(format!(
"Shape mismatch for mse_gradient: predictions has shape {:?} but targets has shape {:?}",
predictions.shape(),
targets.shape()
)));
}
let n = F::from(predictions.len()).expect("Operation failed");
let two = F::from(2.0).expect("Operation failed");
let scale = two / n;
let gradient = predictions - targets;
let gradient = &gradient * scale;
Ok(gradient.to_owned())
}
#[allow(dead_code)]
pub fn binary_crossentropy_gradient<F>(
predictions: &ArrayView1<F>,
targets: &ArrayView1<F>,
) -> LinalgResult<Array1<F>>
where
F: Float + NumAssign + Sum + ScalarOperand,
{
if predictions.shape() != targets.shape() {
return Err(LinalgError::ShapeError(format!(
"Shape mismatch for binary_crossentropy_gradient: predictions has shape {:?} but targets has shape {:?}",
predictions.shape(),
targets.shape()
)));
}
for &p in predictions.iter() {
if p <= F::zero() || p >= F::one() {
return Err(LinalgError::InvalidInputError(
"Predictions must be between 0 and 1 for binary cross-entropy".to_string(),
));
}
}
for &t in targets.iter() {
if (t - F::zero()).abs() > F::epsilon() && (t - F::one()).abs() > F::epsilon() {
return Err(LinalgError::InvalidInputError(
"Targets must be 0 or 1 for binary cross-entropy".to_string(),
));
}
}
let one = F::one();
let eps = F::from(1e-15).expect("Operation failed");
let mut gradient = Array1::zeros(predictions.len());
for i in 0..predictions.len() {
let p = predictions[i];
let t = targets[i];
let term1 = if t > F::epsilon() {
-t / (p + eps)
} else {
F::zero()
};
let term2 = if (one - t) > F::epsilon() {
(one - t) / (one - p + eps)
} else {
F::zero()
};
gradient[i] = term1 + term2;
}
Ok(gradient)
}
#[allow(dead_code)]
pub fn softmax_crossentropy_gradient<F>(
softmax_output: &ArrayView2<F>,
targets: &ArrayView2<F>,
) -> LinalgResult<Array2<F>>
where
F: Float + NumAssign + Sum + ScalarOperand + std::fmt::Display,
{
if softmax_output.shape() != targets.shape() {
return Err(LinalgError::ShapeError(format!(
"Shape mismatch for softmax_crossentropy_gradient: softmax_output has shape {:?} but targets has shape {:?}",
softmax_output.shape(),
targets.shape()
)));
}
let (batchsize, _num_classes) = softmax_output.dim();
for i in 0..batchsize {
let row_sum = softmax_output.slice(s![i, ..]).sum();
if (row_sum - F::one()).abs() > F::from(1e-5).expect("Operation failed") {
return Err(LinalgError::InvalidInputError(format!(
"softmax_output row {i} does not sum to 1: sum is {row_sum}"
)));
}
}
for i in 0..batchsize {
let row_sum = targets.slice(s![i, ..]).sum();
if (row_sum - F::one()).abs() > F::from(1e-6).expect("Operation failed") {
return Err(LinalgError::InvalidInputError(format!(
"targets row {i} is not a valid one-hot vector: sum is {row_sum}"
)));
}
let mut has_one = false;
for val in targets.slice(s![i, ..]).iter() {
if (*val - F::one()).abs() < F::from(1e-6).expect("Operation failed") {
if has_one {
return Err(LinalgError::InvalidInputError(format!(
"targets row {i} is not a valid one-hot vector: multiple entries close to 1"
)));
}
has_one = true;
} else if *val > F::from(1e-6).expect("Operation failed") {
return Err(LinalgError::InvalidInputError(format!(
"targets row {i} is not a valid one-hot vector: contains value {} not close to 0 or 1", *val
)));
}
}
if !has_one {
return Err(LinalgError::InvalidInputError(format!(
"targets row {i} is not a valid one-hot vector: no entry close to 1"
)));
}
}
let batchsize_f = F::from(batchsize).expect("Operation failed");
let mut gradient = softmax_output.to_owned() - targets;
gradient /= batchsize_f;
Ok(gradient)
}
#[allow(dead_code)]
pub fn jacobian<F, G>(f: &G, x: &Array1<F>, epsilon: F) -> LinalgResult<Array2<F>>
where
F: Float + NumAssign + Sum + ScalarOperand,
G: Fn(&Array1<F>) -> Array1<F>,
{
let n = x.len();
let f_x = f(x);
let m = f_x.len();
let mut jacobian = Array2::zeros((m, n));
let two_epsilon = F::from(2.0).expect("Operation failed") * epsilon;
for j in 0..n {
let mut x_forward = x.clone();
let mut x_backward = x.clone();
x_forward[j] = x[j] + epsilon;
x_backward[j] = x[j] - epsilon;
let f_forward = f(&x_forward);
let f_backward = f(&x_backward);
for i in 0..m {
jacobian[[i, j]] = (f_forward[i] - f_backward[i]) / two_epsilon;
}
}
Ok(jacobian)
}
#[allow(dead_code)]
pub fn hessian<F, G>(f: &G, x: &Array1<F>, epsilon: F) -> LinalgResult<Array2<F>>
where
F: Float + NumAssign + Sum + ScalarOperand,
G: Fn(&Array1<F>) -> F,
{
let n = x.len();
let mut hessian = Array2::zeros((n, n));
let two = F::from(2.0).expect("Operation failed");
let epsilon_squared = epsilon * epsilon;
let f_x = f(x);
for i in 0..n {
for j in 0..=i {
if i == j {
let mut x_plus = x.clone();
let mut x_minus = x.clone();
x_plus[i] = x[i] + epsilon;
x_minus[i] = x[i] - epsilon;
let f_plus = f(&x_plus);
let f_minus = f(&x_minus);
let h_ii = (f_plus - two * f_x + f_minus) / epsilon_squared;
hessian[[i, i]] = h_ii;
} else {
let mut x_plus_plus = x.clone();
let mut x_plus_minus = x.clone();
let mut x_minus_plus = x.clone();
let mut x_minus_minus = x.clone();
x_plus_plus[i] = x[i] + epsilon;
x_plus_plus[j] = x[j] + epsilon;
x_plus_minus[i] = x[i] + epsilon;
x_plus_minus[j] = x[j] - epsilon;
x_minus_plus[i] = x[i] - epsilon;
x_minus_plus[j] = x[j] + epsilon;
x_minus_minus[i] = x[i] - epsilon;
x_minus_minus[j] = x[j] - epsilon;
let f_plus_plus = f(&x_plus_plus);
let f_plus_minus = f(&x_plus_minus);
let f_minus_plus = f(&x_minus_plus);
let f_minus_minus = f(&x_minus_minus);
let four = F::from(4.0).expect("Operation failed");
let h_ij = (f_plus_plus - f_plus_minus - f_minus_plus + f_minus_minus)
/ (four * epsilon_squared);
hessian[[i, j]] = h_ij;
hessian[[j, i]] = h_ij; }
}
}
Ok(hessian)
}
#[cfg(test)]
mod tests {
use super::*;
use approx::assert_relative_eq;
use scirs2_core::ndarray::array;
#[test]
fn test_mse_gradient() {
let predictions = array![3.0, 1.0, 2.0];
let targets = array![2.5, 0.5, 2.0];
let gradient =
mse_gradient(&predictions.view(), &targets.view()).expect("Operation failed");
assert_relative_eq!(gradient[0], 1.0 / 3.0, epsilon = 1e-10);
assert_relative_eq!(gradient[1], 1.0 / 3.0, epsilon = 1e-10);
assert_relative_eq!(gradient[2], 0.0, epsilon = 1e-10);
}
#[test]
fn test_binary_crossentropy_gradient() {
let predictions = array![0.7, 0.3, 0.9];
let targets = array![1.0, 0.0, 1.0];
let gradient = binary_crossentropy_gradient(&predictions.view(), &targets.view())
.expect("Operation failed");
assert_relative_eq!(gradient[0], -1.428571, epsilon = 1e-6);
assert_relative_eq!(gradient[1], 1.428571, epsilon = 1e-6);
assert_relative_eq!(gradient[2], -1.111111, epsilon = 1e-6);
}
#[test]
fn test_softmax_crossentropy_gradient() {
let softmax_output = array![[0.7, 0.2, 0.1], [0.3, 0.6, 0.1]];
let targets = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];
let gradient = softmax_crossentropy_gradient(&softmax_output.view(), &targets.view())
.expect("Operation failed");
assert_relative_eq!(gradient[[0, 0]], -0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 1]], 0.1, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 2]], 0.05, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 0]], 0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 1]], -0.2, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 2]], 0.05, epsilon = 1e-10);
}
#[test]
fn test_jacobian() {
let f = |v: &Array1<f64>| -> Array1<f64> {
let x = v[0];
let y = v[1];
array![x * x + y, 2.0 * x + y * y, x * y]
};
let x = array![2.0, 3.0]; let epsilon = 1e-5;
let jac = jacobian(&f, &x, epsilon).expect("Operation failed");
assert_relative_eq!(jac[[0, 0]], 4.0, epsilon = 1e-4);
assert_relative_eq!(jac[[0, 1]], 1.0, epsilon = 1e-4);
assert_relative_eq!(jac[[1, 0]], 2.0, epsilon = 1e-4);
assert_relative_eq!(jac[[1, 1]], 6.0, epsilon = 1e-4);
assert_relative_eq!(jac[[2, 0]], 3.0, epsilon = 1e-4);
assert_relative_eq!(jac[[2, 1]], 2.0, epsilon = 1e-4);
}
#[test]
fn test_hessian() {
let f = |v: &Array1<f64>| -> f64 {
let x = v[0];
2.0 * x * x
};
let x = array![0.5];
let epsilon = 1e-4;
let hess = hessian(&f, &x, epsilon).expect("Operation failed");
assert_relative_eq!(hess[[0, 0]], 4.0, epsilon = 1e-2);
}
#[test]
fn test_hessian_multidimensional() {
let f = |v: &Array1<f64>| -> f64 {
let x = v[0];
let y = v[1];
let z = v[2];
x * x * y + y * y * z + z * z * x
};
let x = array![1.0, 1.0, 1.0];
let epsilon = 1e-4;
let hess = hessian(&f, &x, epsilon).expect("Operation failed");
assert_relative_eq!(hess[[0, 0]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[1, 1]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[2, 2]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[0, 1]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[1, 0]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[1, 2]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[2, 1]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[0, 2]], 2.0, epsilon = 1e-2);
assert_relative_eq!(hess[[2, 0]], 2.0, epsilon = 1e-2);
}
#[test]
fn test_hessian_quadratic_form() {
let f = |v: &Array1<f64>| -> f64 {
let x = v[0];
let y = v[1];
x * x + x * y + 2.0 * y * y
};
let x = array![1.0, 2.0];
let epsilon = 1e-5;
let hess = hessian(&f, &x, epsilon).expect("Operation failed");
assert_relative_eq!(hess[[0, 0]], 2.0, epsilon = 1e-4);
assert_relative_eq!(hess[[0, 1]], 1.0, epsilon = 1e-4);
assert_relative_eq!(hess[[1, 0]], 1.0, epsilon = 1e-4);
assert_relative_eq!(hess[[1, 1]], 4.0, epsilon = 1e-4);
}
#[test]
fn test_mse_gradient_dimension_error() {
let predictions = array![1.0, 2.0, 3.0];
let targets = array![1.0, 2.0];
let result = mse_gradient(&predictions.view(), &targets.view());
assert!(result.is_err());
}
#[test]
fn test_binary_crossentropy_gradient_invalid_predictions() {
let predictions = array![0.5, 1.2, 0.3]; let targets = array![1.0, 0.0, 1.0];
let result = binary_crossentropy_gradient(&predictions.view(), &targets.view());
assert!(result.is_err());
let predictions = array![0.5, -0.1, 0.3]; let targets = array![1.0, 0.0, 1.0];
let result = binary_crossentropy_gradient(&predictions.view(), &targets.view());
assert!(result.is_err());
}
#[test]
fn test_binary_crossentropy_gradient_invalid_targets() {
let predictions = array![0.5, 0.7, 0.3];
let targets = array![1.0, 0.5, 1.0];
let result = binary_crossentropy_gradient(&predictions.view(), &targets.view());
assert!(result.is_err());
}
#[test]
fn test_softmax_crossentropy_gradient_invalid_softmax() {
let softmax_output = array![[0.7, 0.2, 0.2], [0.3, 0.6, 0.1]]; let targets = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];
let result = softmax_crossentropy_gradient(&softmax_output.view(), &targets.view());
assert!(result.is_err());
}
#[test]
fn test_softmax_crossentropy_gradient_invalid_targets() {
let softmax_output = array![[0.7, 0.2, 0.1], [0.3, 0.6, 0.1]];
let targets = array![[1.0, 0.0, 0.0], [0.3, 0.3, 0.4]];
let result = softmax_crossentropy_gradient(&softmax_output.view(), &targets.view());
assert!(result.is_err());
}
}