Function softmax_crossentropy_gradient

Source
pub fn softmax_crossentropy_gradient<F>(
    softmax_output: &ArrayView2<'_, F>,
    targets: &ArrayView2<'_, F>,
) -> LinalgResult<Array2<F>>
Expand description

Calculate the gradient of softmax cross-entropy with respect to logits

Computes the gradient of softmax + cross-entropy loss with respect to logits (pre-softmax). This is a common gradient calculation in multi-class classification tasks.

§Arguments

  • softmax_output - The output of the softmax function (probabilities that sum to 1)
  • targets - Target one-hot encoded vectors

§Returns

  • The gradient of softmax cross-entropy with respect to logits

§Examples

use ndarray::array;
use scirs2_linalg::gradient::softmax_crossentropy_gradient;
use approx::assert_relative_eq;

// Softmax outputs (probabilities)
let softmax_output = array![[0.7, 0.2, 0.1], [0.3, 0.6, 0.1]];
// One-hot encoded targets
let targets = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];

let gradient = softmax_crossentropy_gradient(&softmax_output.view(), &targets.view()).unwrap();

// For each example, gradient = (softmax_output - targets) / batch_size
// = ([0.7, 0.2, 0.1] - [1.0, 0.0, 0.0]) / 2
// = [-0.15, 0.1, 0.05]
// For the second example:
// = ([0.3, 0.6, 0.1] - [0.0, 1.0, 0.0]) / 2
// = [0.15, -0.2, 0.05]

assert_relative_eq!(gradient[[0, 0]], -0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 1]], 0.1, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 2]], 0.05, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 0]], 0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 1]], -0.2, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 2]], 0.05, epsilon = 1e-10);