use std::sync::Arc;
use ferrotorch_core::Float;
use ferrotorch_core::autograd::autocast_ops::autocast_guard;
use ferrotorch_core::autograd::no_grad::is_grad_enabled;
use ferrotorch_core::error::{FerrotorchError, FerrotorchResult};
use ferrotorch_core::ops::elementwise::{binary_map, mean, sum, unary_map};
use ferrotorch_core::storage::TensorStorage;
use ferrotorch_core::tensor::{GradFn, Tensor};
use num_traits::{One, Zero};
use crate::module::Reduction;
fn apply_reduction<T: Float>(
unreduced: &Tensor<T>,
reduction: Reduction,
) -> FerrotorchResult<Tensor<T>> {
match reduction {
Reduction::None => Ok(unreduced.clone()),
Reduction::Mean => mean(unreduced),
Reduction::Sum => sum(unreduced),
}
}
#[derive(Debug, Clone)]
pub struct MSELoss {
pub reduction: Reduction,
}
impl MSELoss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
pred: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("mse_loss");
if pred.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"MSELoss: pred shape {:?} != target shape {:?}",
pred.shape(),
target.shape()
),
});
}
let diff = binary_map(pred, target, |p, t| p - t)?;
let sq = unary_map(&diff, |x| x * x)?;
let reduced = apply_reduction(&sq, self.reduction)?;
if is_grad_enabled() && pred.requires_grad() {
let grad_fn = Arc::new(MSEBackward {
pred: pred.clone(),
target: target.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for MSELoss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct MSEBackward<T: Float> {
pred: Tensor<T>,
target: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for MSEBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
use ferrotorch_core::autograd::no_grad::no_grad;
use ferrotorch_core::grad_fns::arithmetic::{mul, sub};
let grad_input = no_grad(|| {
let diff = sub(&self.pred, &self.target)?;
let two = ferrotorch_core::creation::scalar(T::from(2.0).unwrap())?
.to(self.pred.device())?;
let scaled = mul(&diff, &two)?;
let result = mul(&scaled, grad_output)?;
match self.reduction {
Reduction::Mean => {
let n = ferrotorch_core::creation::scalar(
T::from(self.pred.shape().iter().product::<usize>()).unwrap(),
)?
.to(self.pred.device())?;
ferrotorch_core::grad_fns::arithmetic::div(&result, &n)
}
_ => Ok(result),
}
})?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.pred]
}
fn name(&self) -> &'static str {
"MSEBackward"
}
}
#[derive(Debug, Clone)]
pub struct CrossEntropyLoss {
pub reduction: Reduction,
pub label_smoothing: f64,
}
impl CrossEntropyLoss {
pub fn new(reduction: Reduction, label_smoothing: f64) -> Self {
Self {
reduction,
label_smoothing,
}
}
pub fn forward<T: Float>(
&self,
logits: &Tensor<T>,
targets: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("cross_entropy");
let shape = logits.shape();
if shape.len() != 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"CrossEntropyLoss: expected 2D logits [B, C], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let classes = shape[1];
if targets.shape() != [batch] {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"CrossEntropyLoss: target shape {:?} does not match batch size {}",
targets.shape(),
batch,
),
});
}
let logits_data = logits.data_vec()?;
let targets_data = targets.data_vec()?;
let ls = T::from(self.label_smoothing).unwrap();
let one = <T as One>::one();
let mut log_probs = vec![<T as Zero>::zero(); batch * classes];
let mut softmax_out = vec![<T as Zero>::zero(); batch * classes];
for b in 0..batch {
let base = b * classes;
let mut max_val = logits_data[base];
for c in 1..classes {
if logits_data[base + c] > max_val {
max_val = logits_data[base + c];
}
}
let mut sum_exp = <T as Zero>::zero();
for c in 0..classes {
let e = (logits_data[base + c] - max_val).exp();
softmax_out[base + c] = e;
sum_exp += e;
}
let log_sum = sum_exp.ln();
for c in 0..classes {
softmax_out[base + c] = softmax_out[base + c] / sum_exp;
log_probs[base + c] = logits_data[base + c] - max_val - log_sum;
}
}
let mut losses = vec![<T as Zero>::zero(); batch];
for b in 0..batch {
let base = b * classes;
let target_class = targets_data[b].to_usize().unwrap_or(0);
let nll = -log_probs[base + target_class];
if self.label_smoothing > 0.0 {
let mut sum_lp = <T as Zero>::zero();
for c in 0..classes {
sum_lp += log_probs[base + c];
}
let smooth = -sum_lp / T::from(classes).unwrap();
losses[b] = (one - ls) * nll + ls * smooth;
} else {
losses[b] = nll;
}
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && logits.requires_grad() {
let softmax_tensor =
Tensor::from_storage(TensorStorage::cpu(softmax_out), vec![batch, classes], false)?;
let grad_fn = Arc::new(CrossEntropyBackward {
logits: logits.clone(),
targets: targets.clone(),
softmax: softmax_tensor,
label_smoothing: self.label_smoothing,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for CrossEntropyLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 0.0)
}
}
#[derive(Debug)]
struct CrossEntropyBackward<T: Float> {
logits: Tensor<T>,
targets: Tensor<T>,
softmax: Tensor<T>,
label_smoothing: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for CrossEntropyBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.logits.shape();
let batch = shape[0];
let classes = shape[1];
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "CrossEntropy backward" });
}
let sm_data = self.softmax.data()?;
let targets_data = self.targets.data()?;
let grad_data = grad_output.data()?;
let ls = T::from(self.label_smoothing).unwrap();
let one = <T as One>::one();
let inv_c = T::from(1.0).unwrap() / T::from(classes).unwrap();
let mut result = vec![<T as Zero>::zero(); batch * classes];
for b in 0..batch {
let base = b * classes;
let target_class = targets_data[b].to_usize().unwrap_or(0);
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
for c in 0..classes {
let sm = sm_data[base + c];
let one_hot = if c == target_class {
one
} else {
<T as Zero>::zero()
};
let target_dist = (one - ls) * one_hot + ls * inv_c;
result[base + c] = (sm - target_dist) * scale;
}
}
let grad_input = Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.logits]
}
fn name(&self) -> &'static str {
"CrossEntropyBackward"
}
}
#[derive(Debug, Clone)]
pub struct BCEWithLogitsLoss {
pub reduction: Reduction,
}
impl BCEWithLogitsLoss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
logits: &Tensor<T>,
targets: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("bce_with_logits");
if logits.shape() != targets.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"BCEWithLogitsLoss: logits shape {:?} != targets shape {:?}",
logits.shape(),
targets.shape()
),
});
}
let logits_data = logits.data_vec()?;
let targets_data = targets.data_vec()?;
let zero = <T as Zero>::zero();
let one = <T as One>::one();
let loss_data: Vec<T> = logits_data
.iter()
.zip(targets_data.iter())
.map(|(&x, &y)| {
let relu_x = if x > zero { x } else { zero };
let abs_x = if x > zero { x } else { -x };
relu_x - x * y + (one + (-abs_x).exp()).ln()
})
.collect();
let unreduced = Tensor::from_storage(
TensorStorage::cpu(loss_data),
logits.shape().to_vec(),
false,
)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && logits.requires_grad() {
let grad_fn = Arc::new(BCEWithLogitsBackward {
logits: logits.clone(),
targets: targets.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for BCEWithLogitsLoss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct BCEWithLogitsBackward<T: Float> {
logits: Tensor<T>,
targets: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for BCEWithLogitsBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
use ferrotorch_core::autograd::no_grad::no_grad;
use ferrotorch_core::grad_fns::activation::sigmoid;
use ferrotorch_core::grad_fns::arithmetic::{div, mul, sub};
let grad_input = no_grad(|| {
let sig = sigmoid(&self.logits)?;
let diff = sub(&sig, &self.targets)?;
let result = mul(&diff, grad_output)?;
match self.reduction {
Reduction::Mean => {
let n = ferrotorch_core::creation::scalar(
T::from(self.logits.shape().iter().product::<usize>()).unwrap(),
)?
.to(self.logits.device())?;
div(&result, &n)
}
_ => Ok(result),
}
})?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.logits]
}
fn name(&self) -> &'static str {
"BCEWithLogitsBackward"
}
}
#[derive(Debug, Clone)]
pub struct HuberLoss {
pub reduction: Reduction,
pub delta: f64,
}
impl HuberLoss {
pub fn new(reduction: Reduction, delta: f64) -> Self {
Self { reduction, delta }
}
pub fn forward<T: Float>(
&self,
pred: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if pred.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"HuberLoss: pred shape {:?} != target shape {:?}",
pred.shape(),
target.shape()
),
});
}
let pred_data = pred.data_vec()?;
let target_data = target.data_vec()?;
let delta = T::from(self.delta).unwrap();
let half = T::from(0.5).unwrap();
let loss_data: Vec<T> = pred_data
.iter()
.zip(target_data.iter())
.map(|(&p, &t)| {
let error = p - t;
let abs_error = error.abs();
if abs_error < delta {
half * error * error
} else {
delta * (abs_error - half * delta)
}
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), pred.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && pred.requires_grad() {
let grad_fn = Arc::new(HuberBackward {
pred: pred.clone(),
target: target.clone(),
delta: self.delta,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for HuberLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 1.0)
}
}
#[derive(Debug)]
struct HuberBackward<T: Float> {
pred: Tensor<T>,
target: Tensor<T>,
delta: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for HuberBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
use ferrotorch_core::autograd::no_grad::no_grad;
use ferrotorch_core::grad_fns::arithmetic::{div, mul, sub};
use ferrotorch_core::grad_fns::transcendental::clamp;
let delta_t = T::from(self.delta).unwrap();
let grad_input = no_grad(|| {
let error = sub(&self.pred, &self.target)?;
let clamped = clamp(&error, -delta_t, delta_t)?;
let result = mul(&clamped, grad_output)?;
match self.reduction {
Reduction::Mean => {
let n = ferrotorch_core::creation::scalar(
T::from(self.pred.shape().iter().product::<usize>()).unwrap(),
)?
.to(self.pred.device())?;
div(&result, &n)
}
_ => Ok(result),
}
})?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.pred]
}
fn name(&self) -> &'static str {
"HuberBackward"
}
}
#[derive(Debug, Clone)]
pub struct KLDivLoss {
pub reduction: Reduction,
}
impl KLDivLoss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if input.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"KLDivLoss: input shape {:?} != target shape {:?}",
input.shape(),
target.shape()
),
});
}
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let zero = <T as Zero>::zero();
let loss_data: Vec<T> = input_data
.iter()
.zip(target_data.iter())
.map(|(&inp, &tgt)| {
if tgt > zero {
tgt * (tgt.ln() - inp)
} else {
zero
}
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), input.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(KLDivBackward {
input: input.clone(),
target: target.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for KLDivLoss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct KLDivBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for KLDivBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
use ferrotorch_core::autograd::no_grad::no_grad;
use ferrotorch_core::grad_fns::arithmetic::{div, mul, neg};
let grad_input = no_grad(|| {
let neg_target = neg(&self.target)?;
let result = mul(&neg_target, grad_output)?;
match self.reduction {
Reduction::Mean => {
let n = ferrotorch_core::creation::scalar(
T::from(self.input.shape().iter().product::<usize>()).unwrap(),
)?
.to(self.input.device())?;
div(&result, &n)
}
_ => Ok(result),
}
})?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"KLDivBackward"
}
}
#[derive(Debug, Clone)]
pub struct CosineEmbeddingLoss {
pub reduction: Reduction,
pub margin: f64,
}
impl CosineEmbeddingLoss {
pub fn new(reduction: Reduction, margin: f64) -> Self {
Self { reduction, margin }
}
pub fn forward_pair<T: Float>(
&self,
x1: &Tensor<T>,
x2: &Tensor<T>,
y: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if x1.shape() != x2.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"CosineEmbeddingLoss: x1 shape {:?} != x2 shape {:?}",
x1.shape(),
x2.shape()
),
});
}
let x1_data = x1.data_vec()?;
let x2_data = x2.data_vec()?;
let y_data = y.data_vec()?;
let zero = <T as Zero>::zero();
let one = <T as One>::one();
let margin_t = T::from(self.margin).unwrap();
let shape = x1.shape();
let (batch, feat) = if shape.len() == 1 {
(1, shape[0])
} else if shape.len() == 2 {
(shape[0], shape[1])
} else {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"CosineEmbeddingLoss: expected 1D or 2D input, got shape {:?}",
shape
),
});
};
if y_data.len() != batch {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"CosineEmbeddingLoss: y length {} != batch size {}",
y_data.len(),
batch
),
});
}
let mut losses = vec![zero; batch];
for b in 0..batch {
let base = b * feat;
let mut dot = zero;
let mut norm1_sq = zero;
let mut norm2_sq = zero;
for f in 0..feat {
let a = x1_data[base + f];
let bv = x2_data[base + f];
dot += a * bv;
norm1_sq += a * a;
norm2_sq += bv * bv;
}
let denom = norm1_sq.sqrt() * norm2_sq.sqrt();
let cos_sim = if denom > zero { dot / denom } else { zero };
if y_data[b] > zero {
losses[b] = one - cos_sim;
} else {
let v = cos_sim - margin_t;
losses[b] = if v > zero { v } else { zero };
}
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && (x1.requires_grad() || x2.requires_grad()) {
let grad_fn = Arc::new(CosineEmbeddingBackward {
x1: x1.clone(),
x2: x2.clone(),
y: y.clone(),
margin: self.margin,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for CosineEmbeddingLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 0.0)
}
}
#[derive(Debug)]
struct CosineEmbeddingBackward<T: Float> {
x1: Tensor<T>,
x2: Tensor<T>,
y: Tensor<T>,
margin: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for CosineEmbeddingBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.x1.shape();
let (batch, feat) = if shape.len() == 1 {
(1, shape[0])
} else {
(shape[0], shape[1])
};
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda {
op: "CosineEmbedding backward",
});
}
let x1_data = self.x1.data()?;
let x2_data = self.x2.data()?;
let y_data = self.y.data()?;
let grad_data = grad_output.data()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let mut grad_x1 = vec![zero; batch * feat];
let mut grad_x2 = vec![zero; batch * feat];
for b in 0..batch {
let base = b * feat;
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
let mut dot = zero;
let mut norm1_sq = zero;
let mut norm2_sq = zero;
for f in 0..feat {
let a = x1_data[base + f];
let bv = x2_data[base + f];
dot += a * bv;
norm1_sq += a * a;
norm2_sq += bv * bv;
}
let norm1 = norm1_sq.sqrt();
let norm2 = norm2_sq.sqrt();
let denom = norm1 * norm2;
if denom <= zero {
continue;
}
let cos_sim = dot / denom;
let is_positive = y_data[b] > zero;
let is_active = if is_positive {
true
} else {
cos_sim - margin_t > zero
};
if !is_active {
continue;
}
let sign = if is_positive { -<T as One>::one() } else { <T as One>::one() };
for f in 0..feat {
let a = x1_data[base + f];
let bv = x2_data[base + f];
let d_cos_x1 = bv / denom - cos_sim * a / norm1_sq;
let d_cos_x2 = a / denom - cos_sim * bv / norm2_sq;
grad_x1[base + f] = sign * d_cos_x1 * scale;
grad_x2[base + f] = sign * d_cos_x2 * scale;
}
}
let grad_x1_tensor =
Tensor::from_storage(TensorStorage::cpu(grad_x1), shape.to_vec(), false)?;
let grad_x2_tensor =
Tensor::from_storage(TensorStorage::cpu(grad_x2), shape.to_vec(), false)?;
Ok(vec![Some(grad_x1_tensor), Some(grad_x2_tensor)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.x1, &self.x2]
}
fn name(&self) -> &'static str {
"CosineEmbeddingBackward"
}
}
#[derive(Debug, Clone)]
pub struct L1Loss {
pub reduction: Reduction,
}
impl L1Loss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
pred: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("l1_loss");
if pred.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"L1Loss: pred shape {:?} != target shape {:?}",
pred.shape(),
target.shape()
),
});
}
let diff = binary_map(pred, target, |p, t| p - t)?;
let abs_diff = unary_map(&diff, |x| x.abs())?;
let reduced = apply_reduction(&abs_diff, self.reduction)?;
if is_grad_enabled() && pred.requires_grad() {
let grad_fn = Arc::new(L1Backward {
pred: pred.clone(),
target: target.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for L1Loss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct L1Backward<T: Float> {
pred: Tensor<T>,
target: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for L1Backward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "L1 backward" });
}
let pred_data = self.pred.data()?;
let target_data = self.target.data()?;
let grad_data = grad_output.data()?;
let n = T::from(pred_data.len()).unwrap();
let sign = |x: T| -> T {
let zero = <T as Zero>::zero();
if x > zero {
<T as One>::one()
} else if x < zero {
-<T as One>::one()
} else {
zero
}
};
let result: Vec<T> = match self.reduction {
Reduction::Mean => {
let go = grad_data[0];
pred_data
.iter()
.zip(target_data.iter())
.map(|(&p, &t)| sign(p - t) * go / n)
.collect()
}
Reduction::Sum => {
let go = grad_data[0];
pred_data
.iter()
.zip(target_data.iter())
.map(|(&p, &t)| sign(p - t) * go)
.collect()
}
Reduction::None => pred_data
.iter()
.zip(target_data.iter())
.zip(grad_data.iter())
.map(|((&p, &t), &g)| sign(p - t) * g)
.collect(),
};
let grad_input = Tensor::from_storage(
TensorStorage::cpu(result),
self.pred.shape().to_vec(),
false,
)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.pred]
}
fn name(&self) -> &'static str {
"L1Backward"
}
}
#[derive(Debug, Clone)]
pub struct NLLLoss {
pub reduction: Reduction,
pub ignore_index: Option<isize>,
}
impl NLLLoss {
pub fn new(reduction: Reduction, ignore_index: Option<isize>) -> Self {
Self {
reduction,
ignore_index,
}
}
pub fn forward<T: Float>(
&self,
log_probs: &Tensor<T>,
targets: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("nll_loss");
let shape = log_probs.shape();
if shape.len() != 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"NLLLoss: expected 2D log_probs [B, C], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let classes = shape[1];
if targets.shape() != [batch] {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"NLLLoss: target shape {:?} does not match batch size {}",
targets.shape(),
batch,
),
});
}
if batch == 0 {
return match self.reduction {
Reduction::None => Tensor::from_storage(TensorStorage::cpu(vec![]), vec![0], false),
_ => Tensor::from_storage(
TensorStorage::cpu(vec![<T as Zero>::zero()]),
vec![],
false,
),
};
}
let lp_data = log_probs.data_vec()?;
let targets_data = targets.data_vec()?;
let mut losses = vec![<T as Zero>::zero(); batch];
let mut valid_count: usize = 0;
for b in 0..batch {
let target_idx = targets_data[b].to_isize().unwrap_or(0);
if let Some(ignore) = self.ignore_index {
if target_idx == ignore {
continue;
}
}
let target_class = target_idx as usize;
if target_class >= classes {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"NLLLoss: target index {} is out of range for {} classes at batch element {}",
target_class, classes, b
),
});
}
losses[b] = -lp_data[b * classes + target_class];
valid_count += 1;
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = match self.reduction {
Reduction::None => unreduced.clone(),
Reduction::Sum => sum(&unreduced)?,
Reduction::Mean => {
if valid_count == 0 {
Tensor::from_storage(
TensorStorage::cpu(vec![<T as Zero>::zero()]),
vec![],
false,
)?
} else {
let s = sum(&unreduced)?;
let s_data = s.data_vec()?;
let mean_val = s_data[0] / T::from(valid_count).unwrap();
Tensor::from_storage(TensorStorage::cpu(vec![mean_val]), vec![], false)?
}
}
};
if is_grad_enabled() && log_probs.requires_grad() {
let grad_fn = Arc::new(NLLBackward {
log_probs: log_probs.clone(),
targets: targets.clone(),
reduction: self.reduction,
ignore_index: self.ignore_index,
valid_count,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for NLLLoss {
fn default() -> Self {
Self::new(Reduction::Mean, None)
}
}
#[derive(Debug)]
struct NLLBackward<T: Float> {
log_probs: Tensor<T>,
targets: Tensor<T>,
reduction: Reduction,
ignore_index: Option<isize>,
valid_count: usize,
}
impl<T: Float> GradFn<T> for NLLBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.log_probs.shape();
let batch = shape[0];
let classes = shape[1];
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "NLL backward" });
}
let targets_data = self.targets.data()?;
let grad_data = grad_output.data()?;
let mut result = vec![<T as Zero>::zero(); batch * classes];
for b in 0..batch {
let target_idx = targets_data[b].to_isize().unwrap_or(0);
if let Some(ignore) = self.ignore_index {
if target_idx == ignore {
continue;
}
}
let target_class = target_idx as usize;
let scale = match self.reduction {
Reduction::Mean => {
if self.valid_count > 0 {
grad_data[0] / T::from(self.valid_count).unwrap()
} else {
<T as Zero>::zero()
}
}
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
result[b * classes + target_class] = -scale;
}
let grad_input = Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.log_probs]
}
fn name(&self) -> &'static str {
"NLLBackward"
}
}
#[derive(Debug, Clone)]
pub struct SmoothL1Loss {
pub reduction: Reduction,
}
impl SmoothL1Loss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
pred: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
let huber = HuberLoss::new(self.reduction, 1.0);
huber.forward(pred, target)
}
}
impl Default for SmoothL1Loss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug, Clone)]
pub struct BCELoss {
pub reduction: Reduction,
}
impl BCELoss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("bce_loss");
if input.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"BCELoss: input shape {:?} != target shape {:?}",
input.shape(),
target.shape()
),
});
}
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let one = <T as One>::one();
let eps = T::from(1e-12).unwrap();
let one_m_eps = one - eps;
let loss_data: Vec<T> = input_data
.iter()
.zip(target_data.iter())
.map(|(&x, &y)| {
let xc = if x < eps {
eps
} else if x > one_m_eps {
one_m_eps
} else {
x
};
-(y * xc.ln() + (one - y) * (one - xc).ln())
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), input.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(BCEBackward {
input: input.clone(),
target: target.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for BCELoss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct BCEBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for BCEBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "BCE backward" });
}
let input_data = self.input.data()?;
let target_data = self.target.data()?;
let grad_data = grad_output.data()?;
let one = <T as One>::one();
let eps = T::from(1e-12).unwrap();
let one_m_eps = one - eps;
let n = T::from(input_data.len()).unwrap();
let result: Vec<T> = match self.reduction {
Reduction::Mean => {
let go = grad_data[0];
input_data
.iter()
.zip(target_data.iter())
.map(|(&x, &y)| {
let xc = if x < eps {
eps
} else if x > one_m_eps {
one_m_eps
} else {
x
};
(-y / xc + (one - y) / (one - xc)) * go / n
})
.collect()
}
Reduction::Sum => {
let go = grad_data[0];
input_data
.iter()
.zip(target_data.iter())
.map(|(&x, &y)| {
let xc = if x < eps {
eps
} else if x > one_m_eps {
one_m_eps
} else {
x
};
(-y / xc + (one - y) / (one - xc)) * go
})
.collect()
}
Reduction::None => input_data
.iter()
.zip(target_data.iter())
.zip(grad_data.iter())
.map(|((&x, &y), &g)| {
let xc = if x < eps {
eps
} else if x > one_m_eps {
one_m_eps
} else {
x
};
(-y / xc + (one - y) / (one - xc)) * g
})
.collect(),
};
let grad_input = Tensor::from_storage(
TensorStorage::cpu(result),
self.input.shape().to_vec(),
false,
)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"BCEBackward"
}
}
#[derive(Debug, Clone)]
pub struct TripletMarginLoss {
pub reduction: Reduction,
pub margin: f64,
pub p: f64,
}
impl TripletMarginLoss {
pub fn new(reduction: Reduction, margin: f64, p: f64) -> Self {
Self {
reduction,
margin,
p,
}
}
pub fn forward<T: Float>(
&self,
anchor: &Tensor<T>,
positive: &Tensor<T>,
negative: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if anchor.shape() != positive.shape() || anchor.shape() != negative.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"TripletMarginLoss: shape mismatch: anchor {:?}, positive {:?}, negative {:?}",
anchor.shape(),
positive.shape(),
negative.shape()
),
});
}
let shape = anchor.shape();
if shape.len() != 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"TripletMarginLoss: expected 2D input [B, D], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let feat = shape[1];
let anchor_data = anchor.data_vec()?;
let positive_data = positive.data_vec()?;
let negative_data = negative.data_vec()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let p_val = T::from(self.p).unwrap();
let inv_p = T::from(1.0 / self.p).unwrap();
let mut losses = vec![zero; batch];
for (b, loss) in losses.iter_mut().enumerate() {
let base = b * feat;
let mut dist_pos = zero;
let mut dist_neg = zero;
for f in 0..feat {
let dp = (anchor_data[base + f] - positive_data[base + f]).abs();
let dn = (anchor_data[base + f] - negative_data[base + f]).abs();
dist_pos += dp.powf(p_val);
dist_neg += dn.powf(p_val);
}
dist_pos = dist_pos.powf(inv_p);
dist_neg = dist_neg.powf(inv_p);
let val = dist_pos - dist_neg + margin_t;
*loss = if val > zero { val } else { zero };
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && anchor.requires_grad() {
let grad_fn = Arc::new(TripletMarginBackward {
anchor: anchor.clone(),
positive: positive.clone(),
negative: negative.clone(),
margin: self.margin,
p: self.p,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for TripletMarginLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 1.0, 2.0)
}
}
#[derive(Debug)]
struct TripletMarginBackward<T: Float> {
anchor: Tensor<T>,
positive: Tensor<T>,
negative: Tensor<T>,
margin: f64,
p: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for TripletMarginBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.anchor.shape();
let batch = shape[0];
let feat = shape[1];
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "TripletMargin backward" });
}
let anchor_data = self.anchor.data()?;
let positive_data = self.positive.data()?;
let negative_data = self.negative.data()?;
let grad_data = grad_output.data()?;
let zero = <T as Zero>::zero();
let p_val = T::from(self.p).unwrap();
let margin_t = T::from(self.margin).unwrap();
let inv_p = T::from(1.0 / self.p).unwrap();
let p_m1 = p_val - <T as One>::one();
let mut result = vec![zero; batch * feat];
for b in 0..batch {
let base = b * feat;
let mut dist_pos = zero;
let mut dist_neg = zero;
for f in 0..feat {
let dp = (anchor_data[base + f] - positive_data[base + f]).abs();
let dn = (anchor_data[base + f] - negative_data[base + f]).abs();
dist_pos += dp.powf(p_val);
dist_neg += dn.powf(p_val);
}
dist_pos = dist_pos.powf(inv_p);
dist_neg = dist_neg.powf(inv_p);
let triplet_val = dist_pos - dist_neg + margin_t;
if triplet_val <= zero {
continue;
}
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
let eps = T::from(1e-12).unwrap();
for f in 0..feat {
let diff_pos = anchor_data[base + f] - positive_data[base + f];
let diff_neg = anchor_data[base + f] - negative_data[base + f];
let grad_pos = if dist_pos > eps {
diff_pos.signum() * diff_pos.abs().powf(p_m1) / dist_pos.powf(p_m1)
} else {
zero
};
let grad_neg = if dist_neg > eps {
diff_neg.signum() * diff_neg.abs().powf(p_m1) / dist_neg.powf(p_m1)
} else {
zero
};
result[base + f] = (grad_pos - grad_neg) * scale;
}
}
let grad_input = Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.anchor]
}
fn name(&self) -> &'static str {
"TripletMarginBackward"
}
}
#[derive(Debug, Clone)]
pub struct MarginRankingLoss {
pub reduction: Reduction,
pub margin: f64,
}
impl MarginRankingLoss {
pub fn new(reduction: Reduction, margin: f64) -> Self {
Self { reduction, margin }
}
pub fn forward<T: Float>(
&self,
x1: &Tensor<T>,
x2: &Tensor<T>,
y: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if x1.shape() != x2.shape() || x1.shape() != y.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"MarginRankingLoss: shape mismatch: x1 {:?}, x2 {:?}, y {:?}",
x1.shape(),
x2.shape(),
y.shape()
),
});
}
let x1_data = x1.data_vec()?;
let x2_data = x2.data_vec()?;
let y_data = y.data_vec()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let loss_data: Vec<T> = x1_data
.iter()
.zip(x2_data.iter())
.zip(y_data.iter())
.map(|((&a, &b), &yi)| {
let val = -yi * (a - b) + margin_t;
if val > zero { val } else { zero }
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), x1.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && x1.requires_grad() {
let grad_fn = Arc::new(MarginRankingBackward {
x1: x1.clone(),
x2: x2.clone(),
y: y.clone(),
margin: self.margin,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for MarginRankingLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 0.0)
}
}
#[derive(Debug)]
struct MarginRankingBackward<T: Float> {
x1: Tensor<T>,
x2: Tensor<T>,
y: Tensor<T>,
margin: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for MarginRankingBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "MarginRanking backward" });
}
let x1_data = self.x1.data()?;
let x2_data = self.x2.data()?;
let y_data = self.y.data()?;
let grad_data = grad_output.data()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let n = T::from(x1_data.len()).unwrap();
let result: Vec<T> = match self.reduction {
Reduction::Mean => {
let go = grad_data[0];
x1_data
.iter()
.zip(x2_data.iter())
.zip(y_data.iter())
.map(|((&a, &b), &yi)| {
let val = -yi * (a - b) + margin_t;
if val > zero { -yi * go / n } else { zero }
})
.collect()
}
Reduction::Sum => {
let go = grad_data[0];
x1_data
.iter()
.zip(x2_data.iter())
.zip(y_data.iter())
.map(|((&a, &b), &yi)| {
let val = -yi * (a - b) + margin_t;
if val > zero { -yi * go } else { zero }
})
.collect()
}
Reduction::None => x1_data
.iter()
.zip(x2_data.iter())
.zip(y_data.iter())
.zip(grad_data.iter())
.map(|(((&a, &b), &yi), &g)| {
let val = -yi * (a - b) + margin_t;
if val > zero { -yi * g } else { zero }
})
.collect(),
};
let grad_input =
Tensor::from_storage(TensorStorage::cpu(result), self.x1.shape().to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.x1]
}
fn name(&self) -> &'static str {
"MarginRankingBackward"
}
}
#[derive(Debug, Clone)]
pub struct CTCLoss {
pub reduction: Reduction,
pub blank: usize,
pub zero_infinity: bool,
}
impl CTCLoss {
pub fn new(reduction: Reduction, blank: usize, zero_infinity: bool) -> Self {
Self {
reduction,
blank,
zero_infinity,
}
}
pub fn forward<T: Float>(
&self,
log_probs: &Tensor<T>,
targets: &Tensor<T>,
input_lengths: &[usize],
target_lengths: &[usize],
) -> FerrotorchResult<Tensor<T>> {
let shape = log_probs.shape();
if shape.len() != 3 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"CTCLoss: expected 3D log_probs [T, B, C], got shape {:?}",
shape
),
});
}
let max_t = shape[0];
let batch = shape[1];
let num_classes = shape[2];
if input_lengths.len() != batch || target_lengths.len() != batch {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"CTCLoss: batch size {} but input_lengths.len()={}, target_lengths.len()={}",
batch,
input_lengths.len(),
target_lengths.len()
),
});
}
let lp_data = log_probs.data_vec()?;
let targets_data = targets.data_vec()?;
let neg_inf = T::from(-1e30).unwrap();
let zero = <T as Zero>::zero();
let mut losses = vec![zero; batch];
let mut target_offset = 0usize;
for b in 0..batch {
let t_len = input_lengths[b].min(max_t);
let s_len = target_lengths[b];
let tgt: Vec<usize> = (0..s_len)
.map(|i| targets_data[target_offset + i].to_usize().unwrap_or(0))
.collect();
target_offset += s_len;
if s_len == 0 {
let mut log_prob_blank = zero;
for t in 0..t_len {
let idx = t * batch * num_classes + b * num_classes + self.blank;
log_prob_blank += lp_data[idx];
}
losses[b] = -log_prob_blank;
continue;
}
let ext_len = 2 * s_len + 1;
let mut ext_labels = vec![self.blank; ext_len];
for i in 0..s_len {
ext_labels[2 * i + 1] = tgt[i];
}
let mut alpha = vec![vec![neg_inf; ext_len]; t_len];
let lp_blank_0 = lp_data[b * num_classes + ext_labels[0]];
alpha[0][0] = lp_blank_0;
if ext_len > 1 {
let lp_first = lp_data[b * num_classes + ext_labels[1]];
alpha[0][1] = lp_first;
}
for t in 1..t_len {
for s in 0..ext_len {
let lp_val = lp_data[t * batch * num_classes + b * num_classes + ext_labels[s]];
let mut log_sum = alpha[t - 1][s];
if s >= 1 {
log_sum = log_add_exp(log_sum, alpha[t - 1][s - 1]);
}
if s >= 2 && ext_labels[s] != self.blank && ext_labels[s] != ext_labels[s - 2] {
log_sum = log_add_exp(log_sum, alpha[t - 1][s - 2]);
}
alpha[t][s] = log_sum + lp_val;
}
}
let log_prob =
log_add_exp(alpha[t_len - 1][ext_len - 1], alpha[t_len - 1][ext_len - 2]);
let loss_val = -log_prob;
if self.zero_infinity && (loss_val == T::infinity() || loss_val.is_nan()) {
losses[b] = zero;
} else {
losses[b] = loss_val;
}
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && log_probs.requires_grad() {
let grad_fn = Arc::new(CTCBackward {
log_probs: log_probs.clone(),
targets: targets.clone(),
input_lengths: input_lengths.to_vec(),
target_lengths: target_lengths.to_vec(),
blank: self.blank,
zero_infinity: self.zero_infinity,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for CTCLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 0, false)
}
}
fn log_add_exp<T: Float>(a: T, b: T) -> T {
let max = if a > b { a } else { b };
let min = if a > b { b } else { a };
let threshold = T::from(-1e29).unwrap();
if max < threshold {
max
} else {
max + (min - max).exp().ln_1p()
}
}
#[derive(Debug)]
struct CTCBackward<T: Float> {
log_probs: Tensor<T>,
targets: Tensor<T>,
input_lengths: Vec<usize>,
target_lengths: Vec<usize>,
blank: usize,
zero_infinity: bool,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for CTCBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda {
op: "CTC backward",
});
}
let shape = self.log_probs.shape();
let max_t = shape[0];
let batch = shape[1];
let num_classes = shape[2];
let total_size = max_t * batch * num_classes;
let lp_data = self.log_probs.data()?;
let targets_data = self.targets.data()?;
let grad_data = grad_output.data()?;
let neg_inf = T::from(-1e30).unwrap();
let zero = <T as Zero>::zero();
let mut result = vec![zero; total_size];
let mut target_offset = 0usize;
for b in 0..batch {
let t_len = self.input_lengths[b].min(max_t);
let s_len = self.target_lengths[b];
let tgt: Vec<usize> = (0..s_len)
.map(|i| targets_data[target_offset + i].to_usize().unwrap_or(0))
.collect();
target_offset += s_len;
let go_scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
if s_len == 0 {
for t in 0..t_len {
let idx = t * batch * num_classes + b * num_classes + self.blank;
result[idx] = -go_scale;
}
continue;
}
let ext_len = 2 * s_len + 1;
let mut ext_labels = vec![self.blank; ext_len];
for i in 0..s_len {
ext_labels[2 * i + 1] = tgt[i];
}
let mut alpha = vec![vec![neg_inf; ext_len]; t_len];
alpha[0][0] = lp_data[b * num_classes + ext_labels[0]];
if ext_len > 1 {
alpha[0][1] = lp_data[b * num_classes + ext_labels[1]];
}
for t in 1..t_len {
for s in 0..ext_len {
let lp_val =
lp_data[t * batch * num_classes + b * num_classes + ext_labels[s]];
let mut log_sum = alpha[t - 1][s];
if s >= 1 {
log_sum = log_add_exp(log_sum, alpha[t - 1][s - 1]);
}
if s >= 2
&& ext_labels[s] != self.blank
&& ext_labels[s] != ext_labels[s - 2]
{
log_sum = log_add_exp(log_sum, alpha[t - 1][s - 2]);
}
alpha[t][s] = log_sum + lp_val;
}
}
let log_prob =
log_add_exp(alpha[t_len - 1][ext_len - 1], alpha[t_len - 1][ext_len - 2]);
if self.zero_infinity && ((-log_prob) == T::infinity() || (-log_prob).is_nan()) {
continue;
}
let mut beta = vec![vec![neg_inf; ext_len]; t_len];
beta[t_len - 1][ext_len - 1] = zero;
if ext_len > 1 {
beta[t_len - 1][ext_len - 2] = zero;
}
for t in (0..t_len.saturating_sub(1)).rev() {
for s in (0..ext_len).rev() {
let lp_s =
lp_data[(t + 1) * batch * num_classes + b * num_classes + ext_labels[s]];
let mut log_sum = lp_s + beta[t + 1][s];
if s + 1 < ext_len {
let lp_s1 = lp_data
[(t + 1) * batch * num_classes + b * num_classes + ext_labels[s + 1]];
log_sum = log_add_exp(log_sum, lp_s1 + beta[t + 1][s + 1]);
}
if s + 2 < ext_len
&& ext_labels[s] != self.blank
&& ext_labels[s] != ext_labels[s + 2]
{
let lp_s2 = lp_data
[(t + 1) * batch * num_classes + b * num_classes + ext_labels[s + 2]];
log_sum = log_add_exp(log_sum, lp_s2 + beta[t + 1][s + 2]);
}
beta[t][s] = log_sum;
}
}
for t in 0..t_len {
let mut log_ab_per_class = vec![neg_inf; num_classes];
for s in 0..ext_len {
let c = ext_labels[s];
let ab = alpha[t][s] + beta[t][s];
log_ab_per_class[c] = log_add_exp(log_ab_per_class[c], ab);
}
for c in 0..num_classes {
let idx = t * batch * num_classes + b * num_classes + c;
let threshold = T::from(-1e29).unwrap();
let occupation = if log_ab_per_class[c] > threshold {
(log_ab_per_class[c] - log_prob).exp()
} else {
zero
};
result[idx] = -occupation * go_scale;
}
}
}
let grad_input =
Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.log_probs]
}
fn name(&self) -> &'static str {
"CTCBackward"
}
}
#[derive(Debug, Clone)]
pub struct PoissonNLLLoss {
pub reduction: Reduction,
pub log_input: bool,
pub eps: f64,
}
impl PoissonNLLLoss {
pub fn new(reduction: Reduction, log_input: bool, eps: f64) -> Self {
Self {
reduction,
log_input,
eps,
}
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if input.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"PoissonNLLLoss: input shape {:?} != target shape {:?}",
input.shape(),
target.shape()
),
});
}
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let eps_t = T::from(self.eps).unwrap();
let loss_data: Vec<T> = input_data
.iter()
.zip(target_data.iter())
.map(|(&x, &y)| {
if self.log_input {
x.exp() - y * x
} else {
x - y * (x + eps_t).ln()
}
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), input.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(PoissonNLLBackward {
input: input.clone(),
target: target.clone(),
log_input: self.log_input,
eps: self.eps,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for PoissonNLLLoss {
fn default() -> Self {
Self::new(Reduction::Mean, true, 1e-8)
}
}
#[derive(Debug)]
struct PoissonNLLBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
log_input: bool,
eps: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for PoissonNLLBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
use ferrotorch_core::autograd::no_grad::no_grad;
use ferrotorch_core::grad_fns::arithmetic::{add, div, mul, sub};
use ferrotorch_core::grad_fns::transcendental::exp;
let device = self.input.device();
let grad_input = no_grad(|| {
let local = if self.log_input {
let exp_input = exp(&self.input)?;
sub(&exp_input, &self.target)?
} else {
let eps = ferrotorch_core::creation::scalar(T::from(self.eps).unwrap())?
.to(device)?;
let one = ferrotorch_core::creation::scalar(<T as One>::one())?.to(device)?;
let denom = add(&self.input, &eps)?;
let ratio = div(&self.target, &denom)?;
sub(&one, &ratio)?
};
let result = mul(&local, grad_output)?;
match self.reduction {
Reduction::Mean => {
let n = ferrotorch_core::creation::scalar(
T::from(self.input.shape().iter().product::<usize>()).unwrap(),
)?
.to(device)?;
div(&result, &n)
}
_ => Ok(result),
}
})?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"PoissonNLLBackward"
}
}
#[derive(Debug, Clone)]
pub struct MultiMarginLoss {
pub reduction: Reduction,
pub p: usize,
pub margin: f64,
}
impl MultiMarginLoss {
pub fn new(reduction: Reduction, p: usize, margin: f64) -> Self {
Self {
reduction,
p,
margin,
}
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
let shape = input.shape();
if shape.len() != 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"MultiMarginLoss: expected 2D input [B, C], got shape {:?}",
shape
),
});
}
let batch = shape[0];
let classes = shape[1];
if target.shape() != [batch] {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"MultiMarginLoss: target shape {:?} does not match batch size {}",
target.shape(),
batch,
),
});
}
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let inv_c = T::from(1.0).unwrap() / T::from(classes).unwrap();
let mut losses = vec![zero; batch];
for b in 0..batch {
let base = b * classes;
let y = target_data[b].to_usize().unwrap_or(0);
let x_y = input_data[base + y];
let mut sample_loss = zero;
for j in 0..classes {
if j == y {
continue;
}
let val = margin_t - x_y + input_data[base + j];
if val > zero {
sample_loss += if self.p == 2 { val * val } else { val };
}
}
losses[b] = sample_loss * inv_c;
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(MultiMarginBackward {
input: input.clone(),
target: target.clone(),
p: self.p,
margin: self.margin,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for MultiMarginLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 1, 1.0)
}
}
#[derive(Debug)]
struct MultiMarginBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
p: usize,
margin: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for MultiMarginBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.input.shape();
let batch = shape[0];
let classes = shape[1];
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "MultiMargin backward" });
}
let input_data = self.input.data()?;
let target_data = self.target.data()?;
let grad_data = grad_output.data()?;
let zero = <T as Zero>::zero();
let one = <T as One>::one();
let two = T::from(2.0).unwrap();
let margin_t = T::from(self.margin).unwrap();
let inv_c = one / T::from(classes).unwrap();
let mut result = vec![zero; batch * classes];
for b in 0..batch {
let base = b * classes;
let y = target_data[b].to_usize().unwrap_or(0);
let x_y = input_data[base + y];
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
let mut grad_y = zero;
for j in 0..classes {
if j == y {
continue;
}
let val = margin_t - x_y + input_data[base + j];
if val > zero {
let g_j = if self.p == 2 { two * val } else { one };
result[base + j] = g_j * inv_c * scale;
grad_y = grad_y - g_j * inv_c * scale;
}
}
result[base + y] += grad_y;
}
let grad_input = Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"MultiMarginBackward"
}
}
#[derive(Debug, Clone)]
pub struct MultiLabelSoftMarginLoss {
pub reduction: Reduction,
}
impl MultiLabelSoftMarginLoss {
pub fn new(reduction: Reduction) -> Self {
Self { reduction }
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
let shape = input.shape();
if shape.len() != 2 {
return Err(FerrotorchError::InvalidArgument {
message: format!(
"MultiLabelSoftMarginLoss: expected 2D input [B, C], got shape {:?}",
shape
),
});
}
if input.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"MultiLabelSoftMarginLoss: input shape {:?} != target shape {:?}",
input.shape(),
target.shape()
),
});
}
let batch = shape[0];
let classes = shape[1];
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let zero = <T as Zero>::zero();
let one = <T as One>::one();
let inv_c = one / T::from(classes).unwrap();
let mut losses = vec![zero; batch];
for (b, loss) in losses.iter_mut().enumerate() {
let base = b * classes;
let mut sample_loss = zero;
for c in 0..classes {
let x = input_data[base + c];
let y = target_data[base + c];
let relu_x = if x > zero { x } else { zero };
let abs_x = if x > zero { x } else { -x };
sample_loss += relu_x - x * y + (one + (-abs_x).exp()).ln();
}
*loss = sample_loss * inv_c;
}
let unreduced = Tensor::from_storage(TensorStorage::cpu(losses), vec![batch], false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(MultiLabelSoftMarginBackward {
input: input.clone(),
target: target.clone(),
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for MultiLabelSoftMarginLoss {
fn default() -> Self {
Self::new(Reduction::Mean)
}
}
#[derive(Debug)]
struct MultiLabelSoftMarginBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for MultiLabelSoftMarginBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
let shape = self.input.shape();
let batch = shape[0];
let classes = shape[1];
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "MultiLabelSoftMargin backward" });
}
let input_data = self.input.data()?;
let target_data = self.target.data()?;
let grad_data = grad_output.data()?;
let one = <T as One>::one();
let inv_c = one / T::from(classes).unwrap();
let mut result = vec![<T as Zero>::zero(); batch * classes];
for b in 0..batch {
let base = b * classes;
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(batch).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[b],
};
for c in 0..classes {
let x = input_data[base + c];
let y = target_data[base + c];
let sig = one / (one + (-x).exp());
result[base + c] = (sig - y) * inv_c * scale;
}
}
let grad_input = Tensor::from_storage(TensorStorage::cpu(result), shape.to_vec(), false)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"MultiLabelSoftMarginBackward"
}
}
#[derive(Debug, Clone)]
pub struct HingeEmbeddingLoss {
pub reduction: Reduction,
pub margin: f64,
}
impl HingeEmbeddingLoss {
pub fn new(reduction: Reduction, margin: f64) -> Self {
Self { reduction, margin }
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
y: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
if input.shape() != y.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"HingeEmbeddingLoss: input shape {:?} != y shape {:?}",
input.shape(),
y.shape()
),
});
}
let input_data = input.data_vec()?;
let y_data = y.data_vec()?;
let zero = <T as Zero>::zero();
let margin_t = T::from(self.margin).unwrap();
let loss_data: Vec<T> = input_data
.iter()
.zip(y_data.iter())
.map(|(&x, &yi)| {
if yi > zero {
x
} else {
let val = margin_t - x;
if val > zero { val } else { zero }
}
})
.collect();
let unreduced =
Tensor::from_storage(TensorStorage::cpu(loss_data), input.shape().to_vec(), false)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && input.requires_grad() {
let grad_fn = Arc::new(HingeEmbeddingBackward {
input: input.clone(),
y: y.clone(),
margin: self.margin,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for HingeEmbeddingLoss {
fn default() -> Self {
Self::new(Reduction::Mean, 1.0)
}
}
#[derive(Debug)]
struct HingeEmbeddingBackward<T: Float> {
input: Tensor<T>,
y: Tensor<T>,
margin: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for HingeEmbeddingBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda { op: "HingeEmbedding backward" });
}
let input_data = self.input.data()?;
let y_data = self.y.data()?;
let grad_data = grad_output.data()?;
let zero = <T as Zero>::zero();
let one = <T as One>::one();
let margin_t = T::from(self.margin).unwrap();
let n = T::from(input_data.len()).unwrap();
let result: Vec<T> = match self.reduction {
Reduction::Mean => {
let go = grad_data[0];
input_data
.iter()
.zip(y_data.iter())
.map(|(&x, &yi)| {
if yi > zero {
one * go / n
} else {
let val = margin_t - x;
if val > zero { -one * go / n } else { zero }
}
})
.collect()
}
Reduction::Sum => {
let go = grad_data[0];
input_data
.iter()
.zip(y_data.iter())
.map(|(&x, &yi)| {
if yi > zero {
one * go
} else {
let val = margin_t - x;
if val > zero { -one * go } else { zero }
}
})
.collect()
}
Reduction::None => input_data
.iter()
.zip(y_data.iter())
.zip(grad_data.iter())
.map(|((&x, &yi), &g)| {
if yi > zero {
one * g
} else {
let val = margin_t - x;
if val > zero { -one * g } else { zero }
}
})
.collect(),
};
let grad_input = Tensor::from_storage(
TensorStorage::cpu(result),
self.input.shape().to_vec(),
false,
)?;
Ok(vec![Some(grad_input)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input]
}
fn name(&self) -> &'static str {
"HingeEmbeddingBackward"
}
}
#[derive(Debug, Clone)]
pub struct GaussianNLLLoss {
pub reduction: Reduction,
pub full: bool,
pub eps: f64,
}
impl GaussianNLLLoss {
pub fn new(reduction: Reduction, full: bool, eps: f64) -> Self {
Self {
reduction,
full,
eps,
}
}
pub fn forward<T: Float>(
&self,
input: &Tensor<T>,
target: &Tensor<T>,
var: &Tensor<T>,
) -> FerrotorchResult<Tensor<T>> {
autocast_guard("gaussian_nll_loss");
if input.shape() != target.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"GaussianNLLLoss: input shape {:?} != target shape {:?}",
input.shape(),
target.shape()
),
});
}
if input.shape() != var.shape() {
return Err(FerrotorchError::ShapeMismatch {
message: format!(
"GaussianNLLLoss: input shape {:?} != var shape {:?}",
input.shape(),
var.shape()
),
});
}
let input_data = input.data_vec()?;
let target_data = target.data_vec()?;
let var_data = var.data_vec()?;
let half = T::from(0.5).unwrap();
let eps_t = T::from(self.eps).unwrap();
let log_2pi = T::from((2.0 * std::f64::consts::PI).ln()).unwrap();
let loss_data: Vec<T> = input_data
.iter()
.zip(target_data.iter())
.zip(var_data.iter())
.map(|((&inp, &tgt), &v)| {
let v_clamped = if v < eps_t { eps_t } else { v };
let diff = inp - tgt;
let mut l = half * (v_clamped.ln() + diff * diff / v_clamped);
if self.full {
l = l + half * log_2pi;
}
l
})
.collect();
let unreduced = Tensor::from_storage(
TensorStorage::cpu(loss_data),
input.shape().to_vec(),
false,
)?;
let reduced = apply_reduction(&unreduced, self.reduction)?;
if is_grad_enabled() && (input.requires_grad() || var.requires_grad()) {
let grad_fn = Arc::new(GaussianNLLBackward {
input: input.clone(),
target: target.clone(),
var: var.clone(),
eps: self.eps,
reduction: self.reduction,
});
Tensor::from_operation(
TensorStorage::cpu(reduced.data_vec()?),
reduced.shape().to_vec(),
grad_fn,
)
} else {
Ok(reduced)
}
}
}
impl Default for GaussianNLLLoss {
fn default() -> Self {
Self::new(Reduction::Mean, false, 1e-6)
}
}
#[derive(Debug)]
struct GaussianNLLBackward<T: Float> {
input: Tensor<T>,
target: Tensor<T>,
var: Tensor<T>,
eps: f64,
reduction: Reduction,
}
impl<T: Float> GradFn<T> for GaussianNLLBackward<T> {
fn backward(&self, grad_output: &Tensor<T>) -> FerrotorchResult<Vec<Option<Tensor<T>>>> {
if grad_output.is_cuda() {
return Err(FerrotorchError::NotImplementedOnCuda {
op: "GaussianNLL backward",
});
}
let input_data = self.input.data()?;
let target_data = self.target.data()?;
let var_data = self.var.data()?;
let grad_data = grad_output.data()?;
let n = input_data.len();
let half = T::from(0.5).unwrap();
let eps_t = T::from(self.eps).unwrap();
let zero = <T as Zero>::zero();
let mut grad_input = vec![zero; n];
let mut grad_var = vec![zero; n];
for i in 0..n {
let scale = match self.reduction {
Reduction::Mean => grad_data[0] / T::from(n).unwrap(),
Reduction::Sum => grad_data[0],
Reduction::None => grad_data[i],
};
let v = if var_data[i] < eps_t {
eps_t
} else {
var_data[i]
};
let diff = input_data[i] - target_data[i];
grad_input[i] = diff / v * scale;
grad_var[i] = half * (<T as One>::one() / v - diff * diff / (v * v)) * scale;
}
let shape = self.input.shape().to_vec();
let grad_input_tensor =
Tensor::from_storage(TensorStorage::cpu(grad_input), shape.clone(), false)?;
let grad_var_tensor =
Tensor::from_storage(TensorStorage::cpu(grad_var), shape, false)?;
Ok(vec![Some(grad_input_tensor), Some(grad_var_tensor)])
}
fn inputs(&self) -> Vec<&Tensor<T>> {
vec![&self.input, &self.var]
}
fn name(&self) -> &'static str {
"GaussianNLLBackward"
}
}
#[cfg(test)]
mod tests {
use super::*;
use ferrotorch_core::autograd::graph::backward;
use ferrotorch_core::storage::TensorStorage;
fn leaf_vec(vals: &[f64]) -> Tensor<f64> {
Tensor::from_storage(TensorStorage::cpu(vals.to_vec()), vec![vals.len()], true).unwrap()
}
fn target_vec(vals: &[f64]) -> Tensor<f64> {
Tensor::from_storage(TensorStorage::cpu(vals.to_vec()), vec![vals.len()], false).unwrap()
}
fn leaf_2d(vals: &[f64], shape: &[usize]) -> Tensor<f64> {
Tensor::from_storage(TensorStorage::cpu(vals.to_vec()), shape.to_vec(), true).unwrap()
}
#[test]
fn test_mse_forward_mean() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = MSELoss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
assert!(out.is_scalar());
assert!(
(out.item().unwrap() - 0.25).abs() < 1e-7,
"MSE mean: expected 0.25, got {}",
out.item().unwrap()
);
}
#[test]
fn test_mse_forward_sum() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = MSELoss::new(Reduction::Sum);
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 0.75).abs() < 1e-7,
"MSE sum: expected 0.75, got {}",
out.item().unwrap()
);
}
#[test]
fn test_mse_forward_none() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = MSELoss::new(Reduction::None);
let out = loss.forward(&pred, &target).unwrap();
assert_eq!(out.shape(), &[3]);
let d = out.data().unwrap();
for i in 0..3 {
assert!(
(d[i] - 0.25).abs() < 1e-7,
"MSE none[{}]: expected 0.25, got {}",
i,
d[i]
);
}
}
#[test]
fn test_mse_backward_mean() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = MSELoss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = -1.0 / 3.0;
for i in 0..3 {
assert!(
(g[i] - expected).abs() < 1e-7,
"MSE grad[{}]: expected {}, got {}",
i,
expected,
g[i]
);
}
}
#[test]
fn test_mse_backward_sum() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = MSELoss::new(Reduction::Sum);
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
for i in 0..3 {
assert!(
(g[i] - (-1.0)).abs() < 1e-7,
"MSE sum grad[{}]: expected -1.0, got {}",
i,
g[i]
);
}
}
#[test]
fn test_mse_zero_loss() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let loss = MSELoss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(out.item().unwrap().abs() < 1e-10);
}
#[test]
fn test_cross_entropy_forward_mean() {
let logits = leaf_2d(&[1.0, 2.0, 3.0, 1.0, 2.0, 3.0], &[2, 3]);
let targets = target_vec(&[2.0, 0.0]);
let loss = CrossEntropyLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
let sum_exp = (-2.0_f64).exp() + (-1.0_f64).exp() + 1.0;
let log_sum = sum_exp.ln();
let lsm = [
1.0 - 3.0 - log_sum,
2.0 - 3.0 - log_sum,
3.0 - 3.0 - log_sum,
];
let expected = (-lsm[2] + (-lsm[0])) / 2.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"CE mean: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_cross_entropy_forward_sum() {
let logits = leaf_2d(&[1.0, 2.0, 3.0, 1.0, 2.0, 3.0], &[2, 3]);
let targets = target_vec(&[2.0, 0.0]);
let loss = CrossEntropyLoss::new(Reduction::Sum, 0.0);
let out = loss.forward(&logits, &targets).unwrap();
let sum_exp = (-2.0_f64).exp() + (-1.0_f64).exp() + 1.0;
let log_sum = sum_exp.ln();
let lsm = [
1.0 - 3.0 - log_sum,
2.0 - 3.0 - log_sum,
3.0 - 3.0 - log_sum,
];
let expected = -lsm[2] + (-lsm[0]);
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"CE sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_cross_entropy_forward_none() {
let logits = leaf_2d(&[1.0, 2.0, 3.0, 1.0, 2.0, 3.0], &[2, 3]);
let targets = target_vec(&[2.0, 0.0]);
let loss = CrossEntropyLoss::new(Reduction::None, 0.0);
let out = loss.forward(&logits, &targets).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
let sum_exp = (-2.0_f64).exp() + (-1.0_f64).exp() + 1.0;
let log_sum = sum_exp.ln();
let lsm = [
1.0 - 3.0 - log_sum,
2.0 - 3.0 - log_sum,
3.0 - 3.0 - log_sum,
];
assert!((d[0] - (-lsm[2])).abs() < 1e-6);
assert!((d[1] - (-lsm[0])).abs() < 1e-6);
}
#[test]
fn test_cross_entropy_backward_mean() {
let logits = leaf_2d(&[1.0, 2.0, 3.0], &[1, 3]);
let targets = target_vec(&[1.0]);
let loss = CrossEntropyLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
backward(&out).unwrap();
let grad = logits.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let sum_exp = 1.0_f64.exp() + 2.0_f64.exp() + 3.0_f64.exp();
let sm = [
1.0_f64.exp() / sum_exp,
2.0_f64.exp() / sum_exp,
3.0_f64.exp() / sum_exp,
];
let expected = [sm[0] - 0.0, sm[1] - 1.0, sm[2] - 0.0];
for i in 0..3 {
assert!(
(g[i] - expected[i]).abs() < 1e-6,
"CE grad[{}]: expected {}, got {}",
i,
expected[i],
g[i]
);
}
}
#[test]
fn test_cross_entropy_label_smoothing() {
let logits = leaf_2d(&[1.0, 2.0, 3.0], &[1, 3]);
let targets = target_vec(&[2.0]);
let ls = 0.1;
let loss = CrossEntropyLoss::new(Reduction::Mean, ls);
let out = loss.forward(&logits, &targets).unwrap();
let max_val = 3.0_f64;
let sum_exp = (-2.0_f64).exp() + (-1.0_f64).exp() + 1.0;
let log_sum = sum_exp.ln();
let lsm = [
1.0 - max_val - log_sum,
2.0 - max_val - log_sum,
3.0 - max_val - log_sum,
];
let nll = -lsm[2];
let smooth = -(lsm[0] + lsm[1] + lsm[2]) / 3.0;
let expected = (1.0 - ls) * nll + ls * smooth;
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"CE label smoothing: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_cross_entropy_large_logits_stability() {
let logits = leaf_2d(&[1000.0, 1001.0, 999.0], &[1, 3]);
let targets = target_vec(&[1.0]);
let loss = CrossEntropyLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
let val = out.item().unwrap();
assert!(
val.is_finite(),
"CE with large logits produced non-finite: {}",
val
);
let z = (-1.0_f64).exp() + 1.0 + (-2.0_f64).exp();
let expected = -(1001.0 - 1001.0 - z.ln()); assert!(
(val - expected).abs() < 1e-5,
"CE large logits: expected {}, got {}",
expected,
val
);
}
#[test]
fn test_cross_entropy_negative_logits_stability() {
let logits = leaf_2d(&[-1000.0, -999.0, -1001.0], &[1, 3]);
let targets = target_vec(&[1.0]);
let loss = CrossEntropyLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
let val = out.item().unwrap();
assert!(
val.is_finite(),
"CE with large negative logits produced non-finite: {}",
val
);
}
#[test]
fn test_bce_forward_mean() {
let logits = leaf_vec(&[0.0, 0.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::new(Reduction::Mean);
let out = loss.forward(&logits, &targets).unwrap();
let expected = 2.0_f64.ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"BCE mean: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_bce_forward_sum() {
let logits = leaf_vec(&[0.0, 0.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::new(Reduction::Sum);
let out = loss.forward(&logits, &targets).unwrap();
let expected = 2.0 * 2.0_f64.ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"BCE sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_bce_forward_none() {
let logits = leaf_vec(&[0.0, 0.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::new(Reduction::None);
let out = loss.forward(&logits, &targets).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
let ln2 = 2.0_f64.ln();
assert!((d[0] - ln2).abs() < 1e-7);
assert!((d[1] - ln2).abs() < 1e-7);
}
#[test]
fn test_bce_backward_mean() {
let logits = leaf_vec(&[0.0, 0.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::new(Reduction::Mean);
let out = loss.forward(&logits, &targets).unwrap();
backward(&out).unwrap();
let grad = logits.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-0.25)).abs() < 1e-7,
"BCE grad[0]: expected -0.25, got {}",
g[0]
);
assert!(
(g[1] - 0.25).abs() < 1e-7,
"BCE grad[1]: expected 0.25, got {}",
g[1]
);
}
#[test]
fn test_bce_numerical_stability_large_positive() {
let logits = leaf_vec(&[100.0]);
let targets = target_vec(&[1.0]);
let loss = BCEWithLogitsLoss::new(Reduction::Mean);
let out = loss.forward(&logits, &targets).unwrap();
let val = out.item().unwrap();
assert!(
val.is_finite(),
"BCE large positive logit: non-finite {}",
val
);
assert!(
val < 1e-10,
"BCE large positive logit: expected ~0, got {}",
val
);
}
#[test]
fn test_bce_numerical_stability_large_negative() {
let logits = leaf_vec(&[-100.0]);
let targets = target_vec(&[0.0]);
let loss = BCEWithLogitsLoss::new(Reduction::Mean);
let out = loss.forward(&logits, &targets).unwrap();
let val = out.item().unwrap();
assert!(
val.is_finite(),
"BCE large negative logit: non-finite {}",
val
);
assert!(
val < 1e-10,
"BCE large negative logit: expected ~0, got {}",
val
);
}
#[test]
fn test_huber_forward_quadratic_region() {
let pred = leaf_vec(&[1.3]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default(); let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 0.045).abs() < 1e-7,
"Huber quadratic: expected 0.045, got {}",
out.item().unwrap()
);
}
#[test]
fn test_huber_forward_linear_region() {
let pred = leaf_vec(&[3.0]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 1.5).abs() < 1e-7,
"Huber linear: expected 1.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_huber_forward_sum() {
let pred = leaf_vec(&[1.3, 3.0]);
let target = target_vec(&[1.0, 1.0]);
let loss = HuberLoss::new(Reduction::Sum, 1.0);
let out = loss.forward(&pred, &target).unwrap();
let expected = 0.045 + 1.5;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"Huber sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_huber_forward_none() {
let pred = leaf_vec(&[1.3, 3.0]);
let target = target_vec(&[1.0, 1.0]);
let loss = HuberLoss::new(Reduction::None, 1.0);
let out = loss.forward(&pred, &target).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
assert!((d[0] - 0.045).abs() < 1e-7);
assert!((d[1] - 1.5).abs() < 1e-7);
}
#[test]
fn test_huber_backward_quadratic() {
let pred = leaf_vec(&[1.3]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - 0.3).abs() < 1e-7,
"Huber quadratic grad: expected 0.3, got {}",
g[0]
);
}
#[test]
fn test_huber_backward_linear() {
let pred = leaf_vec(&[3.0]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - 1.0).abs() < 1e-7,
"Huber linear grad: expected 1.0, got {}",
g[0]
);
}
#[test]
fn test_huber_backward_negative_error() {
let pred = leaf_vec(&[-1.0]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-1.0)).abs() < 1e-7,
"Huber negative error grad: expected -1.0, got {}",
g[0]
);
}
#[test]
fn test_huber_custom_delta() {
let pred = leaf_vec(&[1.3]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::new(Reduction::Mean, 0.5);
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 0.045).abs() < 1e-7,
"Huber custom delta quadratic: expected 0.045, got {}",
out.item().unwrap()
);
let pred2 = leaf_vec(&[2.0]);
let target2 = target_vec(&[1.0]);
let out2 = loss.forward(&pred2, &target2).unwrap();
assert!(
(out2.item().unwrap() - 0.375).abs() < 1e-7,
"Huber custom delta linear: expected 0.375, got {}",
out2.item().unwrap()
);
}
#[test]
fn test_huber_zero_loss() {
let pred = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(out.item().unwrap().abs() < 1e-10);
}
#[test]
fn test_mse_no_grad() {
ferrotorch_core::no_grad(|| {
let pred = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.5, 2.5]);
let loss = MSELoss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(
out.grad_fn().is_none(),
"MSELoss inside no_grad should not attach grad_fn"
);
});
}
#[test]
fn test_ce_no_grad() {
ferrotorch_core::no_grad(|| {
let logits = leaf_2d(&[1.0, 2.0, 3.0], &[1, 3]);
let targets = target_vec(&[0.0]);
let loss = CrossEntropyLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
assert!(
out.grad_fn().is_none(),
"CrossEntropyLoss inside no_grad should not attach grad_fn"
);
});
}
#[test]
fn test_bce_no_grad() {
ferrotorch_core::no_grad(|| {
let logits = leaf_vec(&[0.0, 1.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::default();
let out = loss.forward(&logits, &targets).unwrap();
assert!(
out.grad_fn().is_none(),
"BCEWithLogitsLoss inside no_grad should not attach grad_fn"
);
});
}
#[test]
fn test_huber_no_grad() {
ferrotorch_core::no_grad(|| {
let pred = leaf_vec(&[1.0]);
let target = target_vec(&[2.0]);
let loss = HuberLoss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(
out.grad_fn().is_none(),
"HuberLoss inside no_grad should not attach grad_fn"
);
});
}
#[test]
fn test_mse_shape_mismatch() {
let pred = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let loss = MSELoss::default();
assert!(loss.forward(&pred, &target).is_err());
}
#[test]
fn test_bce_shape_mismatch() {
let logits = leaf_vec(&[0.0]);
let targets = target_vec(&[1.0, 0.0]);
let loss = BCEWithLogitsLoss::default();
assert!(loss.forward(&logits, &targets).is_err());
}
#[test]
fn test_huber_shape_mismatch() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.0]);
let loss = HuberLoss::default();
assert!(loss.forward(&pred, &target).is_err());
}
#[test]
fn test_ce_logits_wrong_dims() {
let logits = leaf_vec(&[1.0, 2.0, 3.0]);
let targets = target_vec(&[1.0]);
let loss = CrossEntropyLoss::default();
assert!(loss.forward(&logits, &targets).is_err());
}
#[test]
fn test_ce_target_batch_mismatch() {
let logits = leaf_2d(&[1.0, 2.0, 3.0], &[1, 3]);
let targets = target_vec(&[0.0, 1.0]); let loss = CrossEntropyLoss::default();
assert!(loss.forward(&logits, &targets).is_err());
}
#[test]
fn test_kl_div_forward_mean() {
let input = leaf_vec(&[0.5_f64.ln(), 0.5_f64.ln()]);
let target = target_vec(&[0.25, 0.75]);
let loss = KLDivLoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
let expected =
0.25 * (0.25_f64.ln() - 0.5_f64.ln()) + 0.75 * (0.75_f64.ln() - 0.5_f64.ln());
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"KL sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_kl_div_zero_target_contributes_zero() {
let input = leaf_vec(&[0.5_f64.ln(), 0.5_f64.ln()]);
let target = target_vec(&[0.0, 1.0]);
let loss = KLDivLoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
let expected = 2.0_f64.ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"KL zero target: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_kl_div_identical_distributions() {
let target = target_vec(&[0.3, 0.7]);
let input = leaf_vec(&[0.3_f64.ln(), 0.7_f64.ln()]);
let loss = KLDivLoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
assert!(
out.item().unwrap().abs() < 1e-10,
"KL same dist: expected ~0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_kl_div_backward() {
let input = leaf_vec(&[0.5_f64.ln(), 0.5_f64.ln()]);
let target = target_vec(&[0.25, 0.75]);
let loss = KLDivLoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-0.25)).abs() < 1e-7,
"KL grad[0]: expected -0.25, got {}",
g[0]
);
assert!(
(g[1] - (-0.75)).abs() < 1e-7,
"KL grad[1]: expected -0.75, got {}",
g[1]
);
}
#[test]
fn test_kl_div_shape_mismatch() {
let input = leaf_vec(&[0.0, 0.0]);
let target = target_vec(&[0.5]);
let loss = KLDivLoss::default();
assert!(loss.forward(&input, &target).is_err());
}
#[test]
fn test_cosine_embedding_positive_pair() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = leaf_2d(&[0.0, 1.0], &[1, 2]);
let y = target_vec(&[1.0]);
let loss = CosineEmbeddingLoss::default();
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
assert!(
(out.item().unwrap() - 1.0).abs() < 1e-7,
"cosine positive orthogonal: expected 1.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_cosine_embedding_positive_identical() {
let x1 = leaf_2d(&[1.0, 1.0], &[1, 2]);
let x2 = leaf_2d(&[1.0, 1.0], &[1, 2]);
let y = target_vec(&[1.0]);
let loss = CosineEmbeddingLoss::default();
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
assert!(
out.item().unwrap().abs() < 1e-7,
"cosine positive identical: expected 0.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_cosine_embedding_negative_pair() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let y = target_vec(&[-1.0]);
let loss = CosineEmbeddingLoss::new(Reduction::Mean, 0.5);
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
assert!(
(out.item().unwrap() - 0.5).abs() < 1e-7,
"cosine negative same: expected 0.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_cosine_embedding_negative_orthogonal() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = leaf_2d(&[0.0, 1.0], &[1, 2]);
let y = target_vec(&[-1.0]);
let loss = CosineEmbeddingLoss::new(Reduction::Mean, 0.0);
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
assert!(
out.item().unwrap().abs() < 1e-7,
"cosine negative orthogonal: expected 0.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_cosine_embedding_shape_mismatch() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = leaf_2d(&[1.0, 0.0, 0.0], &[1, 3]);
let y = target_vec(&[1.0]);
let loss = CosineEmbeddingLoss::default();
assert!(loss.forward_pair(&x1, &x2, &y).is_err());
}
#[test]
fn test_smooth_l1_forward_quadratic() {
let pred = leaf_vec(&[1.3]);
let target = target_vec(&[1.0]);
let loss = SmoothL1Loss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 0.045).abs() < 1e-7,
"SmoothL1 quadratic: expected 0.045, got {}",
out.item().unwrap()
);
}
#[test]
fn test_smooth_l1_forward_linear() {
let pred = leaf_vec(&[3.0]);
let target = target_vec(&[1.0]);
let loss = SmoothL1Loss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 1.5).abs() < 1e-7,
"SmoothL1 linear: expected 1.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_smooth_l1_matches_huber() {
let pred = leaf_vec(&[0.5, 2.0, -1.0]);
let target = target_vec(&[1.0, 0.0, 0.5]);
let smooth = SmoothL1Loss::new(Reduction::Sum);
let huber = HuberLoss::new(Reduction::Sum, 1.0);
let s_out = smooth.forward(&pred, &target).unwrap();
let h_out = huber.forward(&pred, &target).unwrap();
assert!(
(s_out.item().unwrap() - h_out.item().unwrap()).abs() < 1e-10,
"SmoothL1 and Huber(1.0) diverge: {} vs {}",
s_out.item().unwrap(),
h_out.item().unwrap()
);
}
#[test]
fn test_smooth_l1_zero_loss() {
let pred = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0]);
let loss = SmoothL1Loss::default();
let out = loss.forward(&pred, &target).unwrap();
assert!(out.item().unwrap().abs() < 1e-10);
}
#[test]
fn test_mse_loss_fires_autocast_guard_when_enabled() {
use ferrotorch_core::autograd::autocast::{AutocastDtype, autocast, set_autocast_debug};
use ferrotorch_core::autograd::autocast_ops::{AutocastCategory, drain_autocast_events};
set_autocast_debug(true);
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
drain_autocast_events();
let _ = MSELoss::new(Reduction::Mean)
.forward(&pred, &target)
.unwrap();
assert!(drain_autocast_events().is_empty());
autocast(AutocastDtype::F16, || {
drain_autocast_events();
let _ = MSELoss::new(Reduction::Mean)
.forward(&pred, &target)
.unwrap();
let events = drain_autocast_events();
assert_eq!(events.len(), 1);
assert_eq!(events[0].op, "mse_loss");
assert_eq!(events[0].category, AutocastCategory::FullPrecision);
});
}
#[test]
fn test_cross_entropy_fires_autocast_guard_when_enabled() {
use ferrotorch_core::autograd::autocast::{AutocastDtype, autocast, set_autocast_debug};
use ferrotorch_core::autograd::autocast_ops::{AutocastCategory, drain_autocast_events};
set_autocast_debug(true);
let logits = leaf_2d(&[1.0, 2.0, 3.0, 1.0, 2.0, 3.0], &[2, 3]);
let targets = target_vec(&[2.0, 0.0]);
drain_autocast_events();
let _ = CrossEntropyLoss::new(Reduction::Mean, 0.0)
.forward(&logits, &targets)
.unwrap();
assert!(drain_autocast_events().is_empty());
autocast(AutocastDtype::BF16, || {
drain_autocast_events();
let _ = CrossEntropyLoss::new(Reduction::Mean, 0.0)
.forward(&logits, &targets)
.unwrap();
let events = drain_autocast_events();
assert_eq!(events.len(), 1);
assert_eq!(events[0].op, "cross_entropy");
assert_eq!(events[0].category, AutocastCategory::FullPrecision);
});
}
#[test]
fn test_bce_with_logits_fires_autocast_guard_when_enabled() {
use ferrotorch_core::autograd::autocast::{AutocastDtype, autocast, set_autocast_debug};
use ferrotorch_core::autograd::autocast_ops::{AutocastCategory, drain_autocast_events};
set_autocast_debug(true);
let logits = leaf_vec(&[0.5, -0.5, 1.0]);
let targets = target_vec(&[1.0, 0.0, 1.0]);
drain_autocast_events();
let _ = BCEWithLogitsLoss::new(Reduction::Mean)
.forward(&logits, &targets)
.unwrap();
assert!(drain_autocast_events().is_empty());
autocast(AutocastDtype::F16, || {
drain_autocast_events();
let _ = BCEWithLogitsLoss::new(Reduction::Mean)
.forward(&logits, &targets)
.unwrap();
let events = drain_autocast_events();
assert_eq!(events.len(), 1);
assert_eq!(events[0].op, "bce_with_logits");
assert_eq!(events[0].category, AutocastCategory::FullPrecision);
});
}
#[test]
fn test_l1_forward_mean() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = L1Loss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
assert!(out.is_scalar());
assert!(
(out.item().unwrap() - 0.5).abs() < 1e-7,
"L1 mean: expected 0.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_l1_forward_sum() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = L1Loss::new(Reduction::Sum);
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 1.5).abs() < 1e-7,
"L1 sum: expected 1.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_l1_forward_none() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = L1Loss::new(Reduction::None);
let out = loss.forward(&pred, &target).unwrap();
assert_eq!(out.shape(), &[3]);
let d = out.data().unwrap();
for i in 0..3 {
assert!(
(d[i] - 0.5).abs() < 1e-7,
"L1 none[{}]: expected 0.5, got {}",
i,
d[i]
);
}
}
#[test]
fn test_l1_backward_mean() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.5, 2.5, 3.5]);
let loss = L1Loss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = -1.0 / 3.0;
for i in 0..3 {
assert!(
(g[i] - expected).abs() < 1e-7,
"L1 backward mean[{}]: expected {}, got {}",
i,
expected,
g[i]
);
}
}
#[test]
fn test_l1_backward_sum() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[0.5, 1.5, 2.5]);
let loss = L1Loss::new(Reduction::Sum);
let out = loss.forward(&pred, &target).unwrap();
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
for i in 0..3 {
assert!(
(g[i] - 1.0).abs() < 1e-7,
"L1 backward sum[{}]: expected 1.0, got {}",
i,
g[i]
);
}
}
#[test]
fn test_l1_shape_mismatch() {
let pred = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let loss = L1Loss::default();
assert!(loss.forward(&pred, &target).is_err());
}
#[test]
fn test_l1_zero_diff() {
let pred = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let loss = L1Loss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
assert!((out.item().unwrap()).abs() < 1e-10);
backward(&out).unwrap();
let grad = pred.grad().unwrap().unwrap();
let g = grad.data().unwrap();
for i in 0..3 {
assert!(
g[i].abs() < 1e-10,
"L1 zero diff grad[{}] should be 0, got {}",
i,
g[i]
);
}
}
#[test]
fn test_l1_mixed_signs() {
let pred = leaf_vec(&[3.0, 1.0]);
let target = target_vec(&[1.0, 3.0]);
let loss = L1Loss::new(Reduction::Mean);
let out = loss.forward(&pred, &target).unwrap();
assert!(
(out.item().unwrap() - 2.0).abs() < 1e-7,
"L1 mixed: expected 2.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_nll_forward_mean() {
let log_probs = leaf_2d(&[-1.5, -0.5, -2.0, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0]);
let loss = NLLLoss::default();
let out = loss.forward(&log_probs, &targets).unwrap();
assert!(out.is_scalar());
assert!(
(out.item().unwrap() - 0.65).abs() < 1e-7,
"NLL mean: expected 0.65, got {}",
out.item().unwrap()
);
}
#[test]
fn test_nll_forward_sum() {
let log_probs = leaf_2d(&[-1.5, -0.5, -2.0, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0]);
let loss = NLLLoss::new(Reduction::Sum, None);
let out = loss.forward(&log_probs, &targets).unwrap();
assert!(
(out.item().unwrap() - 1.3).abs() < 1e-7,
"NLL sum: expected 1.3, got {}",
out.item().unwrap()
);
}
#[test]
fn test_nll_forward_none() {
let log_probs = leaf_2d(&[-1.5, -0.5, -2.0, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0]);
let loss = NLLLoss::new(Reduction::None, None);
let out = loss.forward(&log_probs, &targets).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
assert!(
(d[0] - 0.5).abs() < 1e-7,
"NLL none[0]: expected 0.5, got {}",
d[0]
);
assert!(
(d[1] - 0.8).abs() < 1e-7,
"NLL none[1]: expected 0.8, got {}",
d[1]
);
}
#[test]
fn test_nll_backward_mean() {
let log_probs = leaf_2d(&[-1.5, -0.5, -2.0, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0]);
let loss = NLLLoss::default();
let out = loss.forward(&log_probs, &targets).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = [0.0, -0.5, 0.0, -0.5, 0.0, 0.0];
for i in 0..6 {
assert!(
(g[i] - expected[i]).abs() < 1e-7,
"NLL backward mean[{}]: expected {}, got {}",
i,
expected[i],
g[i]
);
}
}
#[test]
fn test_nll_ignore_index() {
let log_probs = leaf_2d(&[-1.5, -0.5, -2.0, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0]);
let loss = NLLLoss::new(Reduction::Mean, Some(0));
let out = loss.forward(&log_probs, &targets).unwrap();
assert!(
(out.item().unwrap() - 0.5).abs() < 1e-7,
"NLL ignore_index mean: expected 0.5, got {}",
out.item().unwrap()
);
}
#[test]
fn test_nll_ignore_index_all_ignored() {
let log_probs = leaf_2d(&[-1.5, -0.5, -0.8, -1.2], &[2, 2]);
let targets = target_vec(&[0.0, 0.0]);
let loss = NLLLoss::new(Reduction::Mean, Some(0));
let out = loss.forward(&log_probs, &targets).unwrap();
assert!(
(out.item().unwrap()).abs() < 1e-10,
"NLL all ignored: expected 0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_nll_wrong_log_probs_shape() {
let log_probs = leaf_vec(&[-0.5, -1.0, -1.5]);
let targets = target_vec(&[1.0]);
let loss = NLLLoss::default();
assert!(loss.forward(&log_probs, &targets).is_err());
}
#[test]
fn test_nll_target_shape_mismatch() {
let log_probs = leaf_2d(&[-0.5, -1.0, -1.5, -0.8, -1.2, -1.0], &[2, 3]);
let targets = target_vec(&[1.0, 0.0, 2.0]);
let loss = NLLLoss::default();
assert!(loss.forward(&log_probs, &targets).is_err());
}
#[test]
fn test_nll_target_out_of_range() {
let log_probs = leaf_2d(&[-0.5, -1.0], &[1, 2]);
let targets = target_vec(&[5.0]); let loss = NLLLoss::default();
assert!(loss.forward(&log_probs, &targets).is_err());
}
#[test]
fn test_nll_empty_batch() {
let log_probs = leaf_2d(&[], &[0, 3]);
let targets = target_vec(&[]);
let loss = NLLLoss::default();
let out = loss.forward(&log_probs, &targets).unwrap();
assert!((out.item().unwrap()).abs() < 1e-10);
}
#[test]
fn test_nll_backward_sum() {
let log_probs = leaf_2d(&[-1.0, -2.0, -3.0, -4.0], &[2, 2]);
let targets = target_vec(&[0.0, 1.0]);
let loss = NLLLoss::new(Reduction::Sum, None);
let out = loss.forward(&log_probs, &targets).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = [-1.0, 0.0, 0.0, -1.0];
for i in 0..4 {
assert!(
(g[i] - expected[i]).abs() < 1e-7,
"NLL backward sum[{}]: expected {}, got {}",
i,
expected[i],
g[i]
);
}
}
#[test]
fn test_nll_backward_with_ignore() {
let log_probs = leaf_2d(&[-1.0, -2.0, -3.0, -4.0], &[2, 2]);
let targets = target_vec(&[0.0, 1.0]);
let loss = NLLLoss::new(Reduction::Mean, Some(0));
let out = loss.forward(&log_probs, &targets).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = [0.0, 0.0, 0.0, -1.0];
for i in 0..4 {
assert!(
(g[i] - expected[i]).abs() < 1e-7,
"NLL backward ignore[{}]: expected {}, got {}",
i,
expected[i],
g[i]
);
}
}
#[test]
fn test_bce_loss_forward_mean() {
let input = leaf_vec(&[0.8, 0.4, 0.6]);
let target = target_vec(&[1.0, 0.0, 1.0]);
let loss = BCELoss::new(Reduction::Mean);
let out = loss.forward(&input, &target).unwrap();
let expected = (-0.8_f64.ln() + -0.6_f64.ln() + -0.6_f64.ln()) / 3.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"BCELoss mean: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_bce_loss_forward_sum() {
let input = leaf_vec(&[0.8, 0.4]);
let target = target_vec(&[1.0, 0.0]);
let loss = BCELoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
let expected = -0.8_f64.ln() + -(0.6_f64).ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"BCELoss sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_bce_loss_forward_none() {
let input = leaf_vec(&[0.8, 0.4]);
let target = target_vec(&[1.0, 0.0]);
let loss = BCELoss::new(Reduction::None);
let out = loss.forward(&input, &target).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
assert!(
(d[0] - (-0.8_f64.ln())).abs() < 1e-7,
"BCELoss none[0]: expected {}, got {}",
-0.8_f64.ln(),
d[0]
);
assert!(
(d[1] - (-0.6_f64.ln())).abs() < 1e-7,
"BCELoss none[1]: expected {}, got {}",
-0.6_f64.ln(),
d[1]
);
}
#[test]
fn test_bce_loss_backward_mean() {
let input = leaf_vec(&[0.8, 0.4]);
let target = target_vec(&[1.0, 0.0]);
let loss = BCELoss::new(Reduction::Mean);
let out = loss.forward(&input, &target).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-0.625)).abs() < 1e-5,
"BCELoss backward[0]: expected -0.625, got {}",
g[0]
);
let exp1 = 1.0 / 0.6 / 2.0;
assert!(
(g[1] - exp1).abs() < 1e-5,
"BCELoss backward[1]: expected {}, got {}",
exp1,
g[1]
);
}
#[test]
fn test_bce_loss_shape_mismatch() {
let input = leaf_vec(&[0.5, 0.5]);
let target = target_vec(&[1.0]);
let loss = BCELoss::default();
assert!(loss.forward(&input, &target).is_err());
}
#[test]
fn test_triplet_margin_forward_mean() {
let anchor = leaf_2d(&[0.0, 0.0], &[1, 2]);
let positive =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0]), vec![1, 2], false).unwrap();
let negative =
Tensor::from_storage(TensorStorage::cpu(vec![3.0, 0.0]), vec![1, 2], false).unwrap();
let loss = TripletMarginLoss::default();
let out = loss.forward(&anchor, &positive, &negative).unwrap();
assert!(
out.item().unwrap().abs() < 1e-7,
"Triplet loss should be 0 when negative is far, got {}",
out.item().unwrap()
);
}
#[test]
fn test_triplet_margin_forward_active() {
let anchor = leaf_2d(&[0.0, 0.0], &[1, 2]);
let positive =
Tensor::from_storage(TensorStorage::cpu(vec![2.0, 0.0]), vec![1, 2], false).unwrap();
let negative =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0]), vec![1, 2], false).unwrap();
let loss = TripletMarginLoss::default();
let out = loss.forward(&anchor, &positive, &negative).unwrap();
assert!(
(out.item().unwrap() - 2.0).abs() < 1e-7,
"Triplet loss: expected 2.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_triplet_margin_batch() {
let anchor = leaf_2d(&[0.0, 0.0, 0.0, 0.0], &[2, 2]);
let positive = Tensor::from_storage(
TensorStorage::cpu(vec![2.0, 0.0, 1.0, 0.0]),
vec![2, 2],
false,
)
.unwrap();
let negative = Tensor::from_storage(
TensorStorage::cpu(vec![1.0, 0.0, 5.0, 0.0]),
vec![2, 2],
false,
)
.unwrap();
let loss = TripletMarginLoss::new(Reduction::Mean, 1.0, 2.0);
let out = loss.forward(&anchor, &positive, &negative).unwrap();
assert!(
(out.item().unwrap() - 1.0).abs() < 1e-7,
"Triplet batch mean: expected 1.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_triplet_margin_shape_mismatch() {
let anchor = leaf_2d(&[0.0, 0.0], &[1, 2]);
let positive =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0, 0.0]), vec![1, 3], false)
.unwrap();
let negative =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0]), vec![1, 2], false).unwrap();
let loss = TripletMarginLoss::default();
assert!(loss.forward(&anchor, &positive, &negative).is_err());
}
#[test]
fn test_margin_ranking_forward_mean() {
let x1 = leaf_vec(&[2.0, 0.5]);
let x2 = target_vec(&[1.0, 1.0]);
let y = target_vec(&[1.0, -1.0]);
let loss = MarginRankingLoss::new(Reduction::Mean, 0.0);
let out = loss.forward(&x1, &x2, &y).unwrap();
assert!(
out.item().unwrap().abs() < 1e-7,
"MarginRanking: expected 0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_margin_ranking_forward_with_margin() {
let x1 = leaf_vec(&[1.0, 0.5]);
let x2 = target_vec(&[0.5, 1.0]);
let y = target_vec(&[1.0, 1.0]);
let loss = MarginRankingLoss::new(Reduction::Mean, 1.0);
let out = loss.forward(&x1, &x2, &y).unwrap();
assert!(
(out.item().unwrap() - 1.0).abs() < 1e-7,
"MarginRanking: expected 1.0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_margin_ranking_backward() {
let x1 = leaf_vec(&[0.5]);
let x2 = target_vec(&[1.0]);
let y = target_vec(&[1.0]);
let loss = MarginRankingLoss::new(Reduction::Mean, 1.0);
let out = loss.forward(&x1, &x2, &y).unwrap();
backward(&out).unwrap();
let grad = x1.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-1.0)).abs() < 1e-7,
"MarginRanking backward: expected -1, got {}",
g[0]
);
}
#[test]
fn test_margin_ranking_shape_mismatch() {
let x1 = leaf_vec(&[1.0, 2.0]);
let x2 = target_vec(&[1.0]);
let y = target_vec(&[1.0, -1.0]);
let loss = MarginRankingLoss::default();
assert!(loss.forward(&x1, &x2, &y).is_err());
}
#[test]
fn test_ctc_simple() {
let mut lp = vec![-10.0_f64; 3 * 1 * 3]; lp[0 * 1 * 3 + 0 * 3 + 0] = -0.1; lp[0 * 1 * 3 + 0 * 3 + 1] = -10.0;
lp[0 * 1 * 3 + 0 * 3 + 2] = -10.0;
lp[1 * 1 * 3 + 0 * 3 + 0] = -10.0;
lp[1 * 1 * 3 + 0 * 3 + 1] = -0.1;
lp[1 * 1 * 3 + 0 * 3 + 2] = -10.0;
lp[2 * 1 * 3 + 0 * 3 + 0] = -10.0;
lp[2 * 1 * 3 + 0 * 3 + 1] = -10.0;
lp[2 * 1 * 3 + 0 * 3 + 2] = -0.1;
let log_probs = Tensor::from_storage(TensorStorage::cpu(lp), vec![3, 1, 3], false).unwrap();
let targets = target_vec(&[1.0, 2.0]);
let loss = CTCLoss::default();
let out = loss.forward(&log_probs, &targets, &[3], &[2]).unwrap();
assert!(
out.item().unwrap() < 1.0,
"CTC: expected low loss for aligned input, got {}",
out.item().unwrap()
);
assert!(
out.item().unwrap() >= 0.0,
"CTC: loss should be non-negative, got {}",
out.item().unwrap()
);
}
#[test]
fn test_ctc_empty_target() {
let lp = vec![-0.5_f64, -10.0, -10.0, -0.3, -10.0, -10.0]; let log_probs = Tensor::from_storage(TensorStorage::cpu(lp), vec![2, 1, 3], false).unwrap();
let targets = target_vec(&[]);
let loss = CTCLoss::new(Reduction::Mean, 0, false);
let out = loss.forward(&log_probs, &targets, &[2], &[0]).unwrap();
assert!(
(out.item().unwrap() - 0.8).abs() < 1e-7,
"CTC empty target: expected 0.8, got {}",
out.item().unwrap()
);
}
#[test]
fn test_ctc_wrong_shape() {
let log_probs = leaf_vec(&[0.0, 0.0, 0.0]); let targets = target_vec(&[1.0]);
let loss = CTCLoss::default();
assert!(loss.forward(&log_probs, &targets, &[3], &[1]).is_err());
}
#[test]
fn test_poisson_nll_forward_log_input() {
let input = leaf_vec(&[0.0, 1.0, 2.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let loss = PoissonNLLLoss::default(); let out = loss.forward(&input, &target).unwrap();
let e0 = 0.0_f64.exp() - 1.0 * 0.0; let e1 = 1.0_f64.exp() - 2.0 * 1.0; let e2 = 2.0_f64.exp() - 3.0 * 2.0; let expected = (e0 + e1 + e2) / 3.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"Poisson NLL: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_poisson_nll_forward_no_log_input() {
let input = leaf_vec(&[1.0, 2.0, 3.0]);
let target = target_vec(&[1.0, 2.0, 3.0]);
let eps = 1e-8;
let loss = PoissonNLLLoss::new(Reduction::Mean, false, eps);
let out = loss.forward(&input, &target).unwrap();
let e0 = 1.0 - 1.0 * (1.0 + eps).ln();
let e1 = 2.0 - 2.0 * (2.0 + eps).ln();
let e2 = 3.0 - 3.0 * (3.0 + eps).ln();
let expected = (e0 + e1 + e2) / 3.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"Poisson NLL no_log: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_poisson_nll_backward() {
let input = leaf_vec(&[1.0]);
let target = target_vec(&[2.0]);
let loss = PoissonNLLLoss::default();
let out = loss.forward(&input, &target).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = 1.0_f64.exp() - 2.0;
assert!(
(g[0] - expected).abs() < 1e-6,
"Poisson backward: expected {}, got {}",
expected,
g[0]
);
}
#[test]
fn test_poisson_nll_shape_mismatch() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0]);
let loss = PoissonNLLLoss::default();
assert!(loss.forward(&input, &target).is_err());
}
#[test]
fn test_multi_margin_forward_mean() {
let input = leaf_2d(&[1.0, 3.0, 2.0], &[1, 3]);
let target = target_vec(&[1.0]);
let loss = MultiMarginLoss::default();
let out = loss.forward(&input, &target).unwrap();
assert!(
out.item().unwrap().abs() < 1e-7,
"MultiMargin: expected 0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_multi_margin_forward_active() {
let input = leaf_2d(&[2.0, 1.0, 3.0], &[1, 3]);
let target = target_vec(&[1.0]);
let loss = MultiMarginLoss::default();
let out = loss.forward(&input, &target).unwrap();
let expected = 5.0 / 3.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"MultiMargin active: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_multi_margin_p2() {
let input = leaf_2d(&[2.0, 1.0, 3.0], &[1, 3]);
let target = target_vec(&[1.0]);
let loss = MultiMarginLoss::new(Reduction::Mean, 2, 1.0);
let out = loss.forward(&input, &target).unwrap();
let expected = 13.0 / 3.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"MultiMargin p=2: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_multi_margin_backward() {
let input = leaf_2d(&[2.0, 1.0, 3.0], &[1, 3]);
let target = target_vec(&[1.0]);
let loss = MultiMarginLoss::new(Reduction::Sum, 1, 1.0);
let out = loss.forward(&input, &target).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - 1.0 / 3.0).abs() < 1e-7,
"MultiMargin grad[0]: expected 1/3, got {}",
g[0]
);
assert!(
(g[1] - (-2.0 / 3.0)).abs() < 1e-7,
"MultiMargin grad[1]: expected -2/3, got {}",
g[1]
);
assert!(
(g[2] - 1.0 / 3.0).abs() < 1e-7,
"MultiMargin grad[2]: expected 1/3, got {}",
g[2]
);
}
#[test]
fn test_multi_margin_wrong_shape() {
let input = leaf_vec(&[1.0, 2.0, 3.0]); let target = target_vec(&[1.0]);
let loss = MultiMarginLoss::default();
assert!(loss.forward(&input, &target).is_err());
}
#[test]
fn test_multi_label_soft_margin_forward() {
let input = leaf_2d(&[2.0, -1.0], &[1, 2]);
let target =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0]), vec![1, 2], false).unwrap();
let loss = MultiLabelSoftMarginLoss::default();
let out = loss.forward(&input, &target).unwrap();
let bce0 = (1.0 + (-2.0_f64).exp()).ln();
let bce1 = (1.0 + (-1.0_f64).exp()).ln();
let expected = (bce0 + bce1) / 2.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-6,
"MultiLabelSoftMargin: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_multi_label_soft_margin_backward() {
let input = leaf_2d(&[0.0, 0.0], &[1, 2]);
let target =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0]), vec![1, 2], false).unwrap();
let loss = MultiLabelSoftMarginLoss::new(Reduction::Sum);
let out = loss.forward(&input, &target).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-0.25)).abs() < 1e-6,
"MLSM backward[0]: expected -0.25, got {}",
g[0]
);
assert!(
(g[1] - 0.25).abs() < 1e-6,
"MLSM backward[1]: expected 0.25, got {}",
g[1]
);
}
#[test]
fn test_multi_label_soft_margin_shape_mismatch() {
let input = leaf_2d(&[1.0, 2.0], &[1, 2]);
let target =
Tensor::from_storage(TensorStorage::cpu(vec![1.0, 0.0, 0.0]), vec![1, 3], false)
.unwrap();
let loss = MultiLabelSoftMarginLoss::default();
assert!(loss.forward(&input, &target).is_err());
}
#[test]
fn test_hinge_embedding_forward_mean() {
let input = leaf_vec(&[0.5, 2.0]);
let y = target_vec(&[1.0, -1.0]);
let loss = HingeEmbeddingLoss::default();
let out = loss.forward(&input, &y).unwrap();
assert!(
(out.item().unwrap() - 0.25).abs() < 1e-7,
"HingeEmbedding mean: expected 0.25, got {}",
out.item().unwrap()
);
}
#[test]
fn test_hinge_embedding_negative_active() {
let input = leaf_vec(&[0.3]);
let y = target_vec(&[-1.0]);
let loss = HingeEmbeddingLoss::new(Reduction::Mean, 1.0);
let out = loss.forward(&input, &y).unwrap();
assert!(
(out.item().unwrap() - 0.7).abs() < 1e-7,
"HingeEmbedding active: expected 0.7, got {}",
out.item().unwrap()
);
}
#[test]
fn test_hinge_embedding_backward() {
let input = leaf_vec(&[0.5, 0.3]);
let y = target_vec(&[1.0, -1.0]);
let loss = HingeEmbeddingLoss::new(Reduction::Mean, 1.0);
let out = loss.forward(&input, &y).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - 0.5).abs() < 1e-7,
"HingeEmb backward[0]: expected 0.5, got {}",
g[0]
);
assert!(
(g[1] - (-0.5)).abs() < 1e-7,
"HingeEmb backward[1]: expected -0.5, got {}",
g[1]
);
}
#[test]
fn test_hinge_embedding_shape_mismatch() {
let input = leaf_vec(&[0.5, 0.3]);
let y = target_vec(&[1.0]);
let loss = HingeEmbeddingLoss::default();
assert!(loss.forward(&input, &y).is_err());
}
#[test]
fn test_hinge_embedding_all_reductions() {
let input = leaf_vec(&[0.5, 0.3]);
let y = target_vec(&[1.0, -1.0]);
let sum_loss = HingeEmbeddingLoss::new(Reduction::Sum, 1.0)
.forward(&input, &y)
.unwrap();
assert!(
(sum_loss.item().unwrap() - 1.2).abs() < 1e-7,
"HingeEmb sum: expected 1.2, got {}",
sum_loss.item().unwrap()
);
let none_loss = HingeEmbeddingLoss::new(Reduction::None, 1.0)
.forward(&input, &y)
.unwrap();
assert_eq!(none_loss.shape(), &[2]);
let d = none_loss.data().unwrap();
assert!((d[0] - 0.5).abs() < 1e-7);
assert!((d[1] - 0.7).abs() < 1e-7);
}
#[test]
fn test_gaussian_nll_forward_mean() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.5, 2.5]);
let var = target_vec(&[1.0, 2.0]);
let loss = GaussianNLLLoss::default();
let out = loss.forward(&input, &target, &var).unwrap();
let e0 = 0.5 * (0.0 + 0.25);
let e1 = 0.5 * (2.0_f64.ln() + 0.125);
let expected = (e0 + e1) / 2.0;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"GaussianNLL mean: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_gaussian_nll_forward_sum() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.5, 2.5]);
let var = target_vec(&[1.0, 2.0]);
let loss = GaussianNLLLoss::new(Reduction::Sum, false, 1e-6);
let out = loss.forward(&input, &target, &var).unwrap();
let e0 = 0.5 * (0.0 + 0.25);
let e1 = 0.5 * (2.0_f64.ln() + 0.125);
let expected = e0 + e1;
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"GaussianNLL sum: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_gaussian_nll_forward_none() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.5, 2.5]);
let var = target_vec(&[1.0, 2.0]);
let loss = GaussianNLLLoss::new(Reduction::None, false, 1e-6);
let out = loss.forward(&input, &target, &var).unwrap();
assert_eq!(out.shape(), &[2]);
let d = out.data().unwrap();
let e0 = 0.5 * (0.0 + 0.25);
let e1 = 0.5 * (2.0_f64.ln() + 0.125);
assert!(
(d[0] - e0).abs() < 1e-7,
"GaussianNLL none[0]: expected {}, got {}",
e0,
d[0]
);
assert!(
(d[1] - e1).abs() < 1e-7,
"GaussianNLL none[1]: expected {}, got {}",
e1,
d[1]
);
}
#[test]
fn test_gaussian_nll_full_mode() {
let input = leaf_vec(&[0.0]);
let target = target_vec(&[0.0]);
let var = target_vec(&[1.0]);
let loss = GaussianNLLLoss::new(Reduction::Mean, true, 1e-6);
let out = loss.forward(&input, &target, &var).unwrap();
let expected = 0.5 * (2.0 * std::f64::consts::PI).ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-7,
"GaussianNLL full: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_gaussian_nll_backward_input() {
let input = leaf_vec(&[2.0]);
let target = target_vec(&[1.0]);
let var = target_vec(&[4.0]);
let loss = GaussianNLLLoss::new(Reduction::Mean, false, 1e-6);
let out = loss.forward(&input, &target, &var).unwrap();
backward(&out).unwrap();
let grad = input.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = (2.0 - 1.0) / 4.0;
assert!(
(g[0] - expected).abs() < 1e-7,
"GaussianNLL backward input: expected {}, got {}",
expected,
g[0]
);
}
#[test]
fn test_gaussian_nll_backward_var() {
let input = leaf_vec(&[2.0]);
let target = target_vec(&[1.0]);
let var_tensor = Tensor::from_storage(
TensorStorage::cpu(vec![4.0]),
vec![1],
true,
)
.unwrap();
let loss = GaussianNLLLoss::new(Reduction::Mean, false, 1e-6);
let out = loss.forward(&input, &target, &var_tensor).unwrap();
backward(&out).unwrap();
let grad = var_tensor.grad().unwrap().unwrap();
let g = grad.data().unwrap();
let expected = 0.5 * (1.0 / 4.0 - 1.0 / 16.0);
assert!(
(g[0] - expected).abs() < 1e-7,
"GaussianNLL backward var: expected {}, got {}",
expected,
g[0]
);
}
#[test]
fn test_gaussian_nll_eps_clamp() {
let input = leaf_vec(&[1.0]);
let target = target_vec(&[1.0]);
let var = target_vec(&[0.0]); let eps = 1e-6;
let loss = GaussianNLLLoss::new(Reduction::Mean, false, eps);
let out = loss.forward(&input, &target, &var).unwrap();
let expected = 0.5 * eps.ln();
assert!(
(out.item().unwrap() - expected).abs() < 1e-5,
"GaussianNLL eps clamp: expected {}, got {}",
expected,
out.item().unwrap()
);
}
#[test]
fn test_gaussian_nll_shape_mismatch() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0]);
let var = target_vec(&[1.0, 1.0]);
let loss = GaussianNLLLoss::default();
assert!(loss.forward(&input, &target, &var).is_err());
}
#[test]
fn test_gaussian_nll_var_shape_mismatch() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0]);
let var = target_vec(&[1.0]);
let loss = GaussianNLLLoss::default();
assert!(loss.forward(&input, &target, &var).is_err());
}
#[test]
fn test_gaussian_nll_zero_loss() {
let input = leaf_vec(&[1.0, 2.0]);
let target = target_vec(&[1.0, 2.0]);
let var = target_vec(&[1.0, 1.0]);
let loss = GaussianNLLLoss::default();
let out = loss.forward(&input, &target, &var).unwrap();
assert!(
out.item().unwrap().abs() < 1e-10,
"GaussianNLL zero loss: expected ~0, got {}",
out.item().unwrap()
);
}
#[test]
fn test_cosine_embedding_backward_positive() {
let x1 = leaf_2d(&[3.0, 4.0], &[1, 2]);
let x2 = Tensor::from_storage(
TensorStorage::cpu(vec![4.0, 3.0]),
vec![1, 2],
true,
)
.unwrap();
let y = target_vec(&[1.0]);
let loss = CosineEmbeddingLoss::new(Reduction::Sum, 0.0);
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
let cos_val = 24.0 / 25.0;
assert!(
(out.item().unwrap() - (1.0 - cos_val)).abs() < 1e-7,
"CosEmb positive: expected {}, got {}",
1.0 - cos_val,
out.item().unwrap()
);
backward(&out).unwrap();
let grad_x1 = x1.grad().unwrap().unwrap();
let g1 = grad_x1.data().unwrap();
let expected_g1_0 = -(4.0 / 25.0 - cos_val * 3.0 / 25.0);
let expected_g1_1 = -(3.0 / 25.0 - cos_val * 4.0 / 25.0);
assert!(
(g1[0] - expected_g1_0).abs() < 1e-7,
"CosEmb backward x1[0]: expected {}, got {}",
expected_g1_0,
g1[0]
);
assert!(
(g1[1] - expected_g1_1).abs() < 1e-7,
"CosEmb backward x1[1]: expected {}, got {}",
expected_g1_1,
g1[1]
);
let grad_x2 = x2.grad().unwrap().unwrap();
let g2 = grad_x2.data().unwrap();
let expected_g2_0 = -(3.0 / 25.0 - cos_val * 4.0 / 25.0);
let expected_g2_1 = -(4.0 / 25.0 - cos_val * 3.0 / 25.0);
assert!(
(g2[0] - expected_g2_0).abs() < 1e-7,
"CosEmb backward x2[0]: expected {}, got {}",
expected_g2_0,
g2[0]
);
assert!(
(g2[1] - expected_g2_1).abs() < 1e-7,
"CosEmb backward x2[1]: expected {}, got {}",
expected_g2_1,
g2[1]
);
}
#[test]
fn test_cosine_embedding_backward_negative_active() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = Tensor::from_storage(
TensorStorage::cpu(vec![1.0, 0.0]),
vec![1, 2],
true,
)
.unwrap();
let y = target_vec(&[-1.0]);
let loss = CosineEmbeddingLoss::new(Reduction::Sum, 0.5);
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
assert!(
(out.item().unwrap() - 0.5).abs() < 1e-7,
"CosEmb negative active: expected 0.5, got {}",
out.item().unwrap()
);
backward(&out).unwrap();
let grad_x1 = x1.grad().unwrap().unwrap();
let g1 = grad_x1.data().unwrap();
assert!(
g1[0].abs() < 1e-7,
"CosEmb neg backward x1[0]: expected 0, got {}",
g1[0]
);
assert!(
g1[1].abs() < 1e-7,
"CosEmb neg backward x1[1]: expected 0, got {}",
g1[1]
);
}
#[test]
fn test_cosine_embedding_backward_negative_inactive() {
let x1 = leaf_2d(&[1.0, 0.0], &[1, 2]);
let x2 = Tensor::from_storage(
TensorStorage::cpu(vec![0.0, 1.0]),
vec![1, 2],
true,
)
.unwrap();
let y = target_vec(&[-1.0]);
let loss = CosineEmbeddingLoss::new(Reduction::Sum, 0.0);
let out = loss.forward_pair(&x1, &x2, &y).unwrap();
backward(&out).unwrap();
let grad_x1 = x1.grad().unwrap().unwrap();
let g1 = grad_x1.data().unwrap();
assert!(
g1[0].abs() < 1e-7 && g1[1].abs() < 1e-7,
"CosEmb inactive neg: expected zero grad, got {:?}",
&g1[..]
);
}
#[test]
fn test_ctc_backward_gradients_sum_to_zero() {
let mut lp = vec![-10.0_f64; 3 * 1 * 3];
lp[0] = -0.1;
lp[1] = -5.0;
lp[2] = -5.0;
lp[3] = -5.0;
lp[4] = -0.1;
lp[5] = -5.0;
lp[6] = -5.0;
lp[7] = -5.0;
lp[8] = -0.1;
let log_probs = Tensor::from_storage(
TensorStorage::cpu(lp),
vec![3, 1, 3],
true,
)
.unwrap();
let targets = target_vec(&[1.0, 2.0]);
let loss = CTCLoss::new(Reduction::Sum, 0, false);
let out = loss.forward(&log_probs, &targets, &[3], &[2]).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let g = grad.data().unwrap();
for i in 0..9 {
assert!(
g[i].is_finite(),
"CTC grad[{}] is not finite: {}",
i,
g[i]
);
}
}
#[test]
fn test_ctc_backward_empty_target() {
let lp = vec![-0.5_f64, -10.0, -10.0, -0.3, -10.0, -10.0];
let log_probs = Tensor::from_storage(
TensorStorage::cpu(lp),
vec![2, 1, 3],
true,
)
.unwrap();
let targets = target_vec(&[]);
let loss = CTCLoss::new(Reduction::Sum, 0, false);
let out = loss.forward(&log_probs, &targets, &[2], &[0]).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let g = grad.data().unwrap();
assert!(
(g[0] - (-1.0)).abs() < 1e-7,
"CTC empty target grad[t=0,blank]: expected -1, got {}",
g[0]
);
assert!(
(g[3] - (-1.0)).abs() < 1e-7,
"CTC empty target grad[t=1,blank]: expected -1, got {}",
g[3]
);
assert!(
g[1].abs() < 1e-7,
"CTC empty target grad[t=0,c=1]: expected 0, got {}",
g[1]
);
}
#[test]
fn test_ctc_backward_no_grad() {
ferrotorch_core::no_grad(|| {
let lp = vec![-0.5_f64; 3 * 1 * 2];
let log_probs = Tensor::from_storage(
TensorStorage::cpu(lp),
vec![3, 1, 2],
true,
)
.unwrap();
let targets = target_vec(&[1.0]);
let loss = CTCLoss::default();
let out = loss
.forward(&log_probs, &targets, &[3], &[1])
.unwrap();
assert!(
out.grad_fn().is_none(),
"CTCLoss inside no_grad should not attach grad_fn"
);
});
}
#[test]
fn test_ctc_backward_numerical_gradient() {
let base_lp = vec![
-0.5_f64, -1.0, -2.0, -1.0, -0.5, -2.0, -2.0, -1.0, -0.5, ];
let eps = 1e-5;
let log_probs = Tensor::from_storage(
TensorStorage::cpu(base_lp.clone()),
vec![3, 1, 3],
true,
)
.unwrap();
let targets = target_vec(&[1.0, 2.0]);
let loss = CTCLoss::new(Reduction::Sum, 0, false);
let out = loss.forward(&log_probs, &targets, &[3], &[2]).unwrap();
backward(&out).unwrap();
let grad = log_probs.grad().unwrap().unwrap();
let analytical = grad.data_vec().unwrap();
for idx in 0..9 {
let mut lp_plus = base_lp.clone();
lp_plus[idx] += eps;
let lp_p = Tensor::from_storage(
TensorStorage::cpu(lp_plus),
vec![3, 1, 3],
false,
)
.unwrap();
let t_p = target_vec(&[1.0, 2.0]);
let out_p = CTCLoss::new(Reduction::Sum, 0, false)
.forward(&lp_p, &t_p, &[3], &[2])
.unwrap();
let mut lp_minus = base_lp.clone();
lp_minus[idx] -= eps;
let lp_m = Tensor::from_storage(
TensorStorage::cpu(lp_minus),
vec![3, 1, 3],
false,
)
.unwrap();
let t_m = target_vec(&[1.0, 2.0]);
let out_m = CTCLoss::new(Reduction::Sum, 0, false)
.forward(&lp_m, &t_m, &[3], &[2])
.unwrap();
let numerical = (out_p.item().unwrap() - out_m.item().unwrap()) / (2.0 * eps);
assert!(
(analytical[idx] - numerical).abs() < 1e-4,
"CTC grad[{}]: analytical={}, numerical={}",
idx,
analytical[idx],
numerical,
);
}
}
}