pub fn softmax_crossentropy_gradient<F>(
softmax_output: &ArrayView2<'_, F>,
targets: &ArrayView2<'_, F>,
) -> LinalgResult<Array2<F>>
Expand description
Calculate the gradient of softmax cross-entropy with respect to logits
Computes the gradient of softmax + cross-entropy loss with respect to logits (pre-softmax). This is a common gradient calculation in multi-class classification tasks.
§Arguments
softmax_output
- The output of the softmax function (probabilities that sum to 1)targets
- Target one-hot encoded vectors
§Returns
- The gradient of softmax cross-entropy with respect to logits
§Examples
use ndarray::array;
use scirs2_linalg::gradient::softmax_crossentropy_gradient;
use approx::assert_relative_eq;
// Softmax outputs (probabilities)
let softmax_output = array![[0.7, 0.2, 0.1], [0.3, 0.6, 0.1]];
// One-hot encoded targets
let targets = array![[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]];
let gradient = softmax_crossentropy_gradient(&softmax_output.view(), &targets.view()).unwrap();
// For each example, gradient = (softmax_output - targets) / batch_size
// = ([0.7, 0.2, 0.1] - [1.0, 0.0, 0.0]) / 2
// = [-0.15, 0.1, 0.05]
// For the second example:
// = ([0.3, 0.6, 0.1] - [0.0, 1.0, 0.0]) / 2
// = [0.15, -0.2, 0.05]
assert_relative_eq!(gradient[[0, 0]], -0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 1]], 0.1, epsilon = 1e-10);
assert_relative_eq!(gradient[[0, 2]], 0.05, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 0]], 0.15, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 1]], -0.2, epsilon = 1e-10);
assert_relative_eq!(gradient[[1, 2]], 0.05, epsilon = 1e-10);