burn_nn/loss/smooth_l1.rs
1use super::Reduction;
2use burn::config::Config;
3use burn::module::Module;
4use burn::tensor::{Tensor, backend::Backend};
5use burn_core as burn;
6
7/// Configuration for the [SmoothL1Loss](SmoothL1Loss) module.
8///
9/// Smooth L1 loss combines L1 and L2 loss, using L2 loss for small errors (below beta)
10/// and L1 loss for large errors (above beta). This makes it less sensitive to outliers
11/// than MSE while maintaining smooth gradients near zero.
12///
13/// # Example
14///
15/// ```ignore
16/// use burn_nn::loss::{SmoothL1LossConfig, Reduction};
17///
18/// // Create Smooth L1 loss with default beta=1.0
19/// let smooth_l1 = SmoothL1LossConfig::new().init();
20///
21/// // Create with custom beta
22/// let smooth_l1_custom = SmoothL1LossConfig::new().with_beta(0.5).init();
23/// ```
24#[derive(Config, Debug)]
25pub struct SmoothL1LossConfig {
26 /// Specifies the threshold at which to change between L1 and L2 loss.
27 /// The value must be positive. Default: 1.0
28 #[config(default = 1.0)]
29 pub beta: f32,
30}
31
32impl SmoothL1LossConfig {
33 /// Initializes a [Smooth L1 Loss](SmoothL1Loss) module.
34 ///
35 /// # Panics
36 ///
37 /// Panics if `beta <= 0`.
38 pub fn init(&self) -> SmoothL1Loss {
39 self.assertions();
40 SmoothL1Loss { beta: self.beta }
41 }
42
43 fn assertions(&self) {
44 assert!(self.beta > 0.0, "The parameter beta must be positive.")
45 }
46}
47
48/// Computes the Smooth L1 Loss between predictions and targets.
49///
50/// This loss function uses L2 loss for small errors (below beta) and L1 loss for
51/// large errors (above beta), providing robustness to outliers while maintaining
52/// smooth gradients near |x - y| = 0.
53///
54/// # Mathematical Definition
55///
56/// For predictions `x` and targets `y`, the element-wise loss is:
57///
58/// - L_i = 0.5 * (x_i - y_i)² / beta , if |x_i - y_i| < beta
59/// - L_i = |x_i - y_i| - 0.5 * beta , otherwise
60///
61/// # Notes
62///
63/// Smooth L1 loss is closely related to HuberLoss since it is equivalent to HuberLoss
64/// scaled by `1/beta`:
65/// `SmoothL1(x, y, beta) = Huber(x, y, beta) / beta`
66///
67/// This leads to the following differences:
68///
69/// - As beta approaches 0, Smooth L1 loss converges to L1Loss, while HuberLoss converges to 0.
70/// When beta = 0, Smooth L1 loss is equivalent to L1 loss. Thus, the `beta`
71/// parameter in Burn must be positive. L1Loss should be used for beta = 0.
72/// - As beta approaches positive infinity, Smooth L1 loss converges to a constant 0 loss, while
73/// HuberLoss converges to L2Loss.
74///
75/// # Example
76///
77/// ```rust,ignore
78/// use burn_nn::loss::{SmoothL1LossConfig, Reduction};
79/// use burn::tensor::Tensor;
80///
81/// // Create Smooth L1 loss with the default beta=1.0
82/// let smooth_l1 = SmoothL1LossConfig::new().init();
83///
84/// let predictions: Tensor<Backend, 2> = /* model output */;
85/// let targets: Tensor<Backend, 2> = /* ground truth */;
86///
87/// // Compute element-wise loss without reduction
88/// let element_wise = smooth_l1.forward(predictions.clone(), targets.clone());
89///
90/// // Compute loss with mean reduction
91/// let loss = smooth_l1.forward_with_reduction(predictions.clone(), targets.clone(), Reduction::Mean);
92///
93/// // Per-image loss: reduce over C, H, W → [batch, 1, 1, 1]
94/// let loss_per_image = smooth_l1.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
95/// ```
96#[derive(Module, Clone, Debug)]
97pub struct SmoothL1Loss {
98 /// Specifies the threshold at which to change between L1 and L2 loss.
99 /// The value must be positive. Default: 1.0
100 pub beta: f32,
101}
102
103impl SmoothL1Loss {
104 /// Computes the element-wise smooth L1 loss without reduction.
105 ///
106 /// # Arguments
107 ///
108 /// - `predictions` - The model's predicted values.
109 /// - `targets` - The ground truth target values.
110 ///
111 /// # Returns
112 ///
113 /// A tensor of the same shape as the inputs, containing the smooth L1 loss
114 /// for each element.
115 ///
116 /// # Shapes
117 ///
118 /// - predictions: `[...dims]` - Any shape
119 /// - targets: `[...dims]` - Must match predictions shape
120 /// - output: `[...dims]` - Same shape as inputs
121 pub fn forward<const D: usize, B: Backend>(
122 &self,
123 predictions: Tensor<B, D>,
124 targets: Tensor<B, D>,
125 ) -> Tensor<B, D> {
126 let error = predictions.sub(targets);
127 let abs_error = error.clone().abs();
128
129 // The L1 case: |error| - 0.5 * beta (when |error| >= beta)
130 let l1_loss = abs_error.clone().sub_scalar(0.5 * self.beta);
131
132 // The L2 case: 0.5 * (error)^2 / beta (when |error| < beta)
133 let l2_loss = error.square().mul_scalar(0.5).div_scalar(self.beta);
134
135 let l2_mask = abs_error.lower_elem(self.beta);
136 l1_loss.mask_where(l2_mask, l2_loss)
137 }
138
139 /// Computes the smooth L1 loss with reduction.
140 ///
141 /// # Arguments
142 ///
143 /// - `predictions` - The model's predicted values.
144 /// - `targets` - The ground truth target values.
145 /// - `reduction` - Specifies how to reduce the element-wise losses:
146 /// - `Reduction::Mean` or `Reduction::Auto`: Returns the mean of all element-wise losses.
147 /// - `Reduction::Sum`: Returns the sum of all element-wise losses.
148 ///
149 /// # Returns
150 ///
151 /// A scalar tensor containing the reduced loss value.
152 ///
153 /// # Shapes
154 ///
155 /// - predictions: `[...dims]` - Any shape
156 /// - targets: `[...dims]` - Must match predictions shape
157 /// - output: `[1]` - Scalar loss value
158 pub fn forward_with_reduction<const D: usize, B: Backend>(
159 &self,
160 predictions: Tensor<B, D>,
161 targets: Tensor<B, D>,
162 reduction: Reduction,
163 ) -> Tensor<B, 1> {
164 let unreduced_loss = self.forward(predictions, targets);
165
166 match reduction {
167 Reduction::Mean | Reduction::Auto => unreduced_loss.mean(),
168 Reduction::Sum => unreduced_loss.sum(),
169 other => panic!("{other:?} reduction is not supported"),
170 }
171 }
172
173 /// Computes the smooth L1 loss with reduction over specified dimensions.
174 ///
175 /// Calculates element-wise smooth L1 loss, then takes the mean
176 /// over the specified dimensions. Useful for per-sample or per-channel losses.
177 ///
178 /// Dimensions can be provided in any order. They are sorted internally and
179 /// reduced from highest to lowest to ensure indices remain valid.
180 ///
181 /// # Arguments
182 ///
183 /// - `predictions` - The model's predicted values.
184 /// - `targets` - The ground truth target values.
185 /// - `dims` - Dimensions to reduce over.
186 ///
187 /// # Returns
188 ///
189 /// A tensor with the specified dimensions reduced to size 1.
190 ///
191 /// # Example
192 ///
193 /// ```ignore
194 /// // Consider image tensor with shape [batch, C, H, W]
195 /// let smooth_l1 = SmoothL1LossConfig::new().init();
196 ///
197 /// // Per-image loss: reduce over C, H, W → [batch, 1, 1, 1]
198 /// let loss_per_image = smooth_l1.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
199 /// ```
200 pub fn forward_reduce_dims<const D: usize, B: Backend>(
201 &self,
202 predictions: Tensor<B, D>,
203 targets: Tensor<B, D>,
204 dims: &[usize],
205 ) -> Tensor<B, D> {
206 let error = self.forward(predictions, targets);
207
208 // Sort the dimensions to ascending order
209 let mut sorted_dims = dims.to_vec();
210 sorted_dims.sort();
211
212 // Reduce over specified dimensions
213 error.mean_dims(sorted_dims.as_slice())
214 }
215}
216
217#[cfg(test)]
218mod tests {
219 use super::*;
220 use crate::TestBackend;
221 use burn::tensor::TensorData;
222 use burn::tensor::{Tolerance, ops::FloatElem};
223
224 type FT = FloatElem<TestBackend>;
225
226 // =========================================================================
227 // Configuration Tests
228 // =========================================================================
229
230 #[test]
231 fn test_smooth_l1_config_default_beta() {
232 let loss = SmoothL1LossConfig::new().init();
233 assert_eq!(loss.beta, 1.0);
234 }
235
236 #[test]
237 fn test_smooth_l1_config_custom_beta() {
238 let loss = SmoothL1LossConfig::new().with_beta(2.5).init();
239 assert_eq!(loss.beta, 2.5);
240 }
241
242 #[test]
243 #[should_panic(expected = "The parameter beta must be positive")]
244 fn test_smooth_l1_config_beta_zero_panics() {
245 SmoothL1LossConfig::new().with_beta(0.0).init();
246 }
247
248 #[test]
249 #[should_panic(expected = "The parameter beta must be positive")]
250 fn test_smooth_l1_config_beta_negative_panics() {
251 SmoothL1LossConfig::new().with_beta(-1.0).init();
252 }
253
254 // =========================================================================
255 // Forward Pass (Element-wise) Tests
256 // =========================================================================
257
258 #[test]
259 fn test_smooth_l1_forward_l2_region() {
260 // Beta = 1.0, errors = 0.0 and 0.5 (both < beta, use L2 formula)
261 // L2 formula: 0.5 * error^2 / beta
262 // error = 0.0 -> loss = 0.5 * 0.0 / 1.0 = 0.0
263 // error = 0.5 -> loss = 0.5 * 0.25 / 1.0 = 0.125
264 let device = Default::default();
265 let loss = SmoothL1LossConfig::new().init();
266
267 let predictions =
268 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.5]]), &device);
269 let targets =
270 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
271
272 let output = loss.forward(predictions, targets);
273 let expected = TensorData::from([[0.0_f32, 0.125]]);
274 output.into_data().assert_eq(&expected, false);
275 }
276
277 #[test]
278 fn test_smooth_l1_forward_l1_region() {
279 // Beta = 1.0, errors = 0.0 and 2.0 (2.0 >= beta, use L1 formula)
280 // L1 formula: |error| - 0.5 * beta
281 // L2 formula: 0.5 * (error)^2 / beta
282 // error = 0.0 -> loss = 0.0
283 // error = 2.0 -> loss = 2.0 - 0.5 = 1.5
284 let device = Default::default();
285 let loss = SmoothL1LossConfig::new().init();
286
287 let predictions =
288 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 2.0]]), &device);
289 let targets =
290 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
291
292 let output = loss.forward(predictions, targets);
293 let expected = TensorData::from([[0.0_f32, 1.5]]);
294 output.into_data().assert_eq(&expected, false);
295 }
296
297 #[test]
298 fn test_smooth_l1_forward_zero_error() {
299 let device = Default::default();
300 let loss = SmoothL1LossConfig::new().init();
301
302 let predictions =
303 Tensor::<TestBackend, 2>::from_data(TensorData::from([[1.0_f32, 2.0, 3.0]]), &device);
304 let targets = predictions.clone();
305
306 let output = loss.forward(predictions, targets);
307 let expected = TensorData::from([[0.0_f32, 0.0, 0.0]]);
308 output.into_data().assert_eq(&expected, false);
309 }
310
311 #[test]
312 fn test_smooth_l1_forward_negative_errors() {
313 // Ensure absolute value is used correctly
314 // L1 formula: |error| - 0.5 * beta
315 // L2 formula: 0.5 * (error)^2 / beta
316 // Beta = 1.0, error = -3.0 (L1: 3.0 - 0.5 = 2.5)
317 let device = Default::default();
318 let loss = SmoothL1LossConfig::new().init();
319
320 let predictions =
321 Tensor::<TestBackend, 1>::from_data(TensorData::from([-3.0_f32]), &device);
322 let targets = Tensor::<TestBackend, 1>::zeros([1], &device);
323
324 let output = loss.forward(predictions, targets);
325 let expected = TensorData::from([2.5_f32]);
326 output.into_data().assert_eq(&expected, false);
327 }
328
329 #[test]
330 fn test_smooth_l1_forward_mixed_regions() {
331 // Test with errors in both L1 and L2 regions
332 // Beta = 1.0
333 // L1 formula: |error| - 0.5 * beta
334 // L2 formula: 0.5 * (error)^2 / beta
335 // error = 0.5 -> L2: 0.5 * 0.25 / 1 = 0.125
336 // error = 1.5 -> L1: 1.5 - 0.5 = 1.0
337 // error = 3.0 -> L1: 3.0 - 0.5 = 2.5
338 let device = Default::default();
339 let loss = SmoothL1LossConfig::new().init();
340
341 let predictions =
342 Tensor::<TestBackend, 1>::from_data(TensorData::from([0.5_f32, 1.5, 3.0]), &device);
343 let targets = Tensor::<TestBackend, 1>::zeros([3], &device);
344
345 let output = loss.forward(predictions, targets);
346 let expected = TensorData::from([0.125_f32, 1.0, 2.5]);
347 output.into_data().assert_eq(&expected, false);
348 }
349
350 #[test]
351 fn test_smooth_l1_custom_beta_values() {
352 // Test with beta = 0.5
353 // error = 0.25 (< beta): L2 = 0.5 * 0.0625 / 0.5 = 0.0625
354 // error = 1.0 (>= beta): L1 = 1.0 - 0.25 = 0.75
355 let device = Default::default();
356 let loss = SmoothL1LossConfig::new().with_beta(0.5).init();
357
358 let predictions =
359 Tensor::<TestBackend, 1>::from_data(TensorData::from([0.25_f32, 1.0]), &device);
360 let targets = Tensor::<TestBackend, 1>::zeros([2], &device);
361
362 let output = loss.forward(predictions, targets);
363 let expected = TensorData::from([0.0625_f32, 0.75]);
364 output.into_data().assert_eq(&expected, false);
365 }
366
367 // =========================================================================
368 // forward_with_reduction Tests
369 // =========================================================================
370
371 #[test]
372 fn test_smooth_l1_reduction_mean() {
373 // Errors: 0.5 (L2: 0.125), 2.0 (L1: 1.5)
374 // Mean: (0.125 + 1.5) / 2 = 0.8125
375 let device = Default::default();
376 let loss = SmoothL1LossConfig::new().init();
377
378 let predictions =
379 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.5_f32, 2.0]]), &device);
380 let targets =
381 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
382
383 let output = loss.forward_with_reduction(predictions, targets, Reduction::Mean);
384 let expected = TensorData::from([0.8125_f32]);
385 output.into_data().assert_eq(&expected, false);
386 }
387
388 #[test]
389 fn test_smooth_l1_reduction_sum() {
390 // Errors: 0.5 (L2: 0.125), 2.0 (L1: 1.5)
391 // Sum: 1.625
392 let device = Default::default();
393 let loss = SmoothL1LossConfig::new().init();
394
395 let predictions =
396 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.5_f32, 2.0]]), &device);
397 let targets =
398 Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
399
400 let output = loss.forward_with_reduction(predictions, targets, Reduction::Sum);
401 let expected = TensorData::from([1.625_f32]);
402 output.into_data().assert_eq(&expected, false);
403 }
404
405 #[test]
406 fn test_smooth_l1_reduction_auto_equals_mean() {
407 let device = Default::default();
408 let loss = SmoothL1LossConfig::new().init();
409
410 let predictions = Tensor::<TestBackend, 1>::from_data(TensorData::from([2.0_f32]), &device);
411 let targets = Tensor::<TestBackend, 1>::zeros([1], &device);
412
413 let mean_out =
414 loss.forward_with_reduction(predictions.clone(), targets.clone(), Reduction::Mean);
415 let auto_out = loss.forward_with_reduction(predictions, targets, Reduction::Auto);
416
417 mean_out.into_data().assert_eq(&auto_out.into_data(), false);
418 }
419
420 // =========================================================================
421 // Dimension Reduction Tests
422 // =========================================================================
423
424 #[test]
425 fn test_smooth_l1_forward_reduce_dims_single_dim() {
426 // Beta = 2.0
427 // L1 formula: |error| - 0.5 * beta
428 // L2 formula: 0.5 * (error)^2 / beta
429 // Row 0: errors [0.0, 1.0, 4.0]
430 // error = 0.0 -> L2: 0.0
431 // error = 1.0 -> L2: 0.5 * 1.0 / 2.0 = 0.25
432 // error = 4.0 -> L1: 4.0 - 1.0 = 3.0
433 // Mean = 3.25 / 3 = 1.083333...
434 // Row 1: errors [0.0, 0.0, 0.0] -> Mean = 0.0
435 let device = Default::default();
436 let loss = SmoothL1LossConfig::new().with_beta(2.0).init();
437
438 let predictions = Tensor::<TestBackend, 2>::from_data(
439 TensorData::from([[0.0_f32, 1.0, 4.0], [5.0_f32, 5.0, 5.0]]),
440 &device,
441 );
442 let targets = Tensor::<TestBackend, 2>::from_data(
443 TensorData::from([[0.0_f32, 0.0, 0.0], [5.0_f32, 5.0, 5.0]]),
444 &device,
445 );
446
447 let output = loss.forward_reduce_dims(predictions, targets, &[1]);
448 let expected = TensorData::from([[3.25_f32 / 3.0], [0.0]]); // 3.25/3 = 1.0833...
449 output
450 .into_data()
451 .assert_approx_eq::<FT>(&expected, Tolerance::default());
452 }
453
454 #[test]
455 fn test_smooth_l1_forward_reduce_dims_image_batch() {
456 // Simulate per-image Smooth L1 loss for [batch, C, H, W] tensor
457 // (common in object detection like Fast R-CNN)
458 let device = Default::default();
459 let loss = SmoothL1LossConfig::new().init(); // beta = 1.0
460
461 // Shape: [2, 1, 2, 2] (batch=2, C=1, H=2, W=2)
462 let predictions = Tensor::<TestBackend, 4>::from_data(
463 TensorData::from([
464 [[[0.5_f32, 2.0], [0.0, 3.0]]], // Image 1
465 [[[1.0_f32, 0.0], [0.5, 1.5]]], // Image 2
466 ]),
467 &device,
468 );
469 let targets = Tensor::<TestBackend, 4>::zeros([2, 1, 2, 2], &device);
470
471 // Reduce over C, H, W (dims 1, 2, 3) to get per-image loss
472 let output = loss.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
473
474 // Image 1: losses [[0.125, 1.5], [0.0, 2.5]] -> mean: 4.125 / 4 = 1.03125
475 // Image 2: losses [[0.5, 0.0], [0.125, 1.0]] -> mean: 1.625 / 4 = 0.40625
476 let expected = TensorData::from([[[[1.03125_f32]]], [[[0.40625_f32]]]]);
477 output.into_data().assert_eq(&expected, false);
478 }
479
480 #[test]
481 fn test_smooth_l1_forward_reduce_dims_unsorted() {
482 // Test that unsorted dimensions are handled correctly (sorted internally)
483 let device = Default::default();
484 let loss = SmoothL1LossConfig::new().init();
485
486 let predictions = Tensor::<TestBackend, 3>::from_data(
487 TensorData::from([[[1.0_f32, 2.0], [3.0, 4.0]], [[5.0_f32, 6.0], [7.0, 8.0]]]),
488 &device,
489 );
490 let targets = Tensor::<TestBackend, 3>::zeros([2, 2, 2], &device);
491
492 // Pass dims in reverse order
493 let output = loss.forward_reduce_dims(predictions.clone(), targets.clone(), &[2, 1]);
494 let expected_output = loss.forward_reduce_dims(predictions, targets, &[1, 2]);
495
496 output
497 .into_data()
498 .assert_eq(&expected_output.into_data(), false);
499 }
500
501 #[test]
502 fn test_smooth_l1_forward_reduce_dims_empty_dims() {
503 // Reducing over no dimensions should return the unreduced loss
504 let device = Default::default();
505 let loss = SmoothL1LossConfig::new().init();
506
507 let predictions = Tensor::<TestBackend, 2>::from_data(
508 TensorData::from([[0.5_f32, 2.0], [0.0, 3.0]]),
509 &device,
510 );
511 let targets = Tensor::<TestBackend, 2>::zeros([2, 2], &device);
512
513 let loss_reduce_dims = loss.forward_reduce_dims(predictions.clone(), targets.clone(), &[]);
514 let loss_no_reduction = loss.forward(predictions, targets);
515
516 loss_reduce_dims
517 .into_data()
518 .assert_eq(&loss_no_reduction.into_data(), false);
519 }
520}