Skip to main content

burn_nn/loss/
smooth_l1.rs

1use super::Reduction;
2use burn::config::Config;
3use burn::module::Module;
4use burn::tensor::{Tensor, backend::Backend};
5use burn_core as burn;
6
7/// Configuration for the [SmoothL1Loss](SmoothL1Loss) module.
8///
9/// Smooth L1 loss combines L1 and L2 loss, using L2 loss for small errors (below beta)
10/// and L1 loss for large errors (above beta). This makes it less sensitive to outliers
11/// than MSE while maintaining smooth gradients near zero.
12///
13/// # Example
14///
15/// ```ignore
16/// use burn_nn::loss::{SmoothL1LossConfig, Reduction};
17///
18/// // Create Smooth L1 loss with default beta=1.0
19/// let smooth_l1 = SmoothL1LossConfig::new().init();
20///
21/// // Create with custom beta
22/// let smooth_l1_custom = SmoothL1LossConfig::new().with_beta(0.5).init();
23/// ```
24#[derive(Config, Debug)]
25pub struct SmoothL1LossConfig {
26    /// Specifies the threshold at which to change between L1 and L2 loss.
27    /// The value must be positive. Default: 1.0
28    #[config(default = 1.0)]
29    pub beta: f32,
30}
31
32impl SmoothL1LossConfig {
33    /// Initializes a [Smooth L1 Loss](SmoothL1Loss) module.
34    ///
35    /// # Panics
36    ///
37    /// Panics if `beta <= 0`.
38    pub fn init(&self) -> SmoothL1Loss {
39        self.assertions();
40        SmoothL1Loss { beta: self.beta }
41    }
42
43    fn assertions(&self) {
44        assert!(self.beta > 0.0, "The parameter beta must be positive.")
45    }
46}
47
48/// Computes the Smooth L1 Loss between predictions and targets.
49///
50/// This loss function uses L2 loss for small errors (below beta) and L1 loss for
51/// large errors (above beta), providing robustness to outliers while maintaining
52/// smooth gradients near |x - y| = 0.
53///
54/// # Mathematical Definition
55///
56/// For predictions `x` and targets `y`, the element-wise loss is:
57///
58/// - L_i = 0.5 * (x_i - y_i)² / beta   , if |x_i - y_i| < beta
59/// - L_i = |x_i - y_i| - 0.5 * beta    , otherwise
60///
61/// # Notes
62///
63/// Smooth L1 loss is closely related to HuberLoss since it is equivalent to HuberLoss
64/// scaled by `1/beta`:
65/// `SmoothL1(x, y, beta) = Huber(x, y, beta) / beta`
66///
67/// This leads to the following differences:
68///
69/// - As beta approaches 0, Smooth L1 loss converges to L1Loss, while HuberLoss converges to 0.
70///   When beta = 0, Smooth L1 loss is equivalent to L1 loss. Thus, the `beta`
71///   parameter in Burn must be positive. L1Loss should be used for beta = 0.
72/// - As beta approaches positive infinity, Smooth L1 loss converges to a constant 0 loss, while
73///   HuberLoss converges to L2Loss.
74///
75/// # Example
76///
77/// ```rust,ignore
78/// use burn_nn::loss::{SmoothL1LossConfig, Reduction};
79/// use burn::tensor::Tensor;
80///
81/// // Create Smooth L1 loss with the default beta=1.0
82/// let smooth_l1 = SmoothL1LossConfig::new().init();
83///
84/// let predictions: Tensor<Backend, 2> = /* model output */;
85/// let targets: Tensor<Backend, 2> = /* ground truth */;
86///
87/// // Compute element-wise loss without reduction
88/// let element_wise = smooth_l1.forward(predictions.clone(), targets.clone());
89///
90/// // Compute loss with mean reduction
91/// let loss = smooth_l1.forward_with_reduction(predictions.clone(), targets.clone(), Reduction::Mean);
92///
93/// // Per-image loss: reduce over C, H, W → [batch, 1, 1, 1]
94/// let loss_per_image = smooth_l1.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
95/// ```
96#[derive(Module, Clone, Debug)]
97pub struct SmoothL1Loss {
98    /// Specifies the threshold at which to change between L1 and L2 loss.
99    /// The value must be positive. Default: 1.0
100    pub beta: f32,
101}
102
103impl SmoothL1Loss {
104    /// Computes the element-wise smooth L1 loss without reduction.
105    ///
106    /// # Arguments
107    ///
108    /// - `predictions` - The model's predicted values.
109    /// - `targets` - The ground truth target values.
110    ///
111    /// # Returns
112    ///
113    /// A tensor of the same shape as the inputs, containing the smooth L1 loss
114    /// for each element.
115    ///
116    /// # Shapes
117    ///
118    /// - predictions: `[...dims]` - Any shape
119    /// - targets: `[...dims]` - Must match predictions shape
120    /// - output: `[...dims]` - Same shape as inputs
121    pub fn forward<const D: usize, B: Backend>(
122        &self,
123        predictions: Tensor<B, D>,
124        targets: Tensor<B, D>,
125    ) -> Tensor<B, D> {
126        let error = predictions.sub(targets);
127        let abs_error = error.clone().abs();
128
129        // The L1 case: |error| - 0.5 * beta (when |error| >= beta)
130        let l1_loss = abs_error.clone().sub_scalar(0.5 * self.beta);
131
132        // The L2 case: 0.5 * (error)^2 / beta (when |error| < beta)
133        let l2_loss = error.square().mul_scalar(0.5).div_scalar(self.beta);
134
135        let l2_mask = abs_error.lower_elem(self.beta);
136        l1_loss.mask_where(l2_mask, l2_loss)
137    }
138
139    /// Computes the smooth L1 loss with reduction.
140    ///
141    /// # Arguments
142    ///
143    /// - `predictions` - The model's predicted values.
144    /// - `targets` - The ground truth target values.
145    /// - `reduction` - Specifies how to reduce the element-wise losses:
146    ///   - `Reduction::Mean` or `Reduction::Auto`: Returns the mean of all element-wise losses.
147    ///   - `Reduction::Sum`: Returns the sum of all element-wise losses.
148    ///
149    /// # Returns
150    ///
151    /// A scalar tensor containing the reduced loss value.
152    ///
153    /// # Shapes
154    ///
155    /// - predictions: `[...dims]` - Any shape
156    /// - targets: `[...dims]` - Must match predictions shape
157    /// - output: `[1]` - Scalar loss value
158    pub fn forward_with_reduction<const D: usize, B: Backend>(
159        &self,
160        predictions: Tensor<B, D>,
161        targets: Tensor<B, D>,
162        reduction: Reduction,
163    ) -> Tensor<B, 1> {
164        let unreduced_loss = self.forward(predictions, targets);
165
166        match reduction {
167            Reduction::Mean | Reduction::Auto => unreduced_loss.mean(),
168            Reduction::Sum => unreduced_loss.sum(),
169            other => panic!("{other:?} reduction is not supported"),
170        }
171    }
172
173    /// Computes the smooth L1 loss with reduction over specified dimensions.
174    ///
175    /// Calculates element-wise smooth L1 loss, then takes the mean
176    /// over the specified dimensions. Useful for per-sample or per-channel losses.
177    ///
178    /// Dimensions can be provided in any order. They are sorted internally and
179    /// reduced from highest to lowest to ensure indices remain valid.
180    ///
181    /// # Arguments
182    ///
183    /// - `predictions` - The model's predicted values.
184    /// - `targets` - The ground truth target values.
185    /// - `dims` - Dimensions to reduce over.
186    ///
187    /// # Returns
188    ///
189    /// A tensor with the specified dimensions reduced to size 1.
190    ///
191    /// # Example
192    ///
193    /// ```ignore
194    /// // Consider image tensor with shape [batch, C, H, W]
195    /// let smooth_l1 = SmoothL1LossConfig::new().init();
196    ///
197    /// // Per-image loss: reduce over C, H, W → [batch, 1, 1, 1]
198    /// let loss_per_image = smooth_l1.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
199    /// ```
200    pub fn forward_reduce_dims<const D: usize, B: Backend>(
201        &self,
202        predictions: Tensor<B, D>,
203        targets: Tensor<B, D>,
204        dims: &[usize],
205    ) -> Tensor<B, D> {
206        let error = self.forward(predictions, targets);
207
208        // Sort the dimensions to ascending order
209        let mut sorted_dims = dims.to_vec();
210        sorted_dims.sort();
211
212        // Reduce over specified dimensions
213        error.mean_dims(sorted_dims.as_slice())
214    }
215}
216
217#[cfg(test)]
218mod tests {
219    use super::*;
220    use crate::TestBackend;
221    use burn::tensor::TensorData;
222    use burn::tensor::{Tolerance, ops::FloatElem};
223
224    type FT = FloatElem<TestBackend>;
225
226    // =========================================================================
227    // Configuration Tests
228    // =========================================================================
229
230    #[test]
231    fn test_smooth_l1_config_default_beta() {
232        let loss = SmoothL1LossConfig::new().init();
233        assert_eq!(loss.beta, 1.0);
234    }
235
236    #[test]
237    fn test_smooth_l1_config_custom_beta() {
238        let loss = SmoothL1LossConfig::new().with_beta(2.5).init();
239        assert_eq!(loss.beta, 2.5);
240    }
241
242    #[test]
243    #[should_panic(expected = "The parameter beta must be positive")]
244    fn test_smooth_l1_config_beta_zero_panics() {
245        SmoothL1LossConfig::new().with_beta(0.0).init();
246    }
247
248    #[test]
249    #[should_panic(expected = "The parameter beta must be positive")]
250    fn test_smooth_l1_config_beta_negative_panics() {
251        SmoothL1LossConfig::new().with_beta(-1.0).init();
252    }
253
254    // =========================================================================
255    // Forward Pass (Element-wise) Tests
256    // =========================================================================
257
258    #[test]
259    fn test_smooth_l1_forward_l2_region() {
260        // Beta = 1.0, errors = 0.0 and 0.5 (both < beta, use L2 formula)
261        // L2 formula: 0.5 * error^2 / beta
262        // error = 0.0  ->  loss = 0.5 * 0.0 / 1.0 = 0.0
263        // error = 0.5  ->  loss = 0.5 * 0.25 / 1.0 = 0.125
264        let device = Default::default();
265        let loss = SmoothL1LossConfig::new().init();
266
267        let predictions =
268            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.5]]), &device);
269        let targets =
270            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
271
272        let output = loss.forward(predictions, targets);
273        let expected = TensorData::from([[0.0_f32, 0.125]]);
274        output.into_data().assert_eq(&expected, false);
275    }
276
277    #[test]
278    fn test_smooth_l1_forward_l1_region() {
279        // Beta = 1.0, errors = 0.0 and 2.0 (2.0 >= beta, use L1 formula)
280        // L1 formula: |error| - 0.5 * beta
281        // L2 formula: 0.5 * (error)^2 / beta
282        // error = 0.0  ->  loss = 0.0
283        // error = 2.0  ->  loss = 2.0 - 0.5 = 1.5
284        let device = Default::default();
285        let loss = SmoothL1LossConfig::new().init();
286
287        let predictions =
288            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 2.0]]), &device);
289        let targets =
290            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
291
292        let output = loss.forward(predictions, targets);
293        let expected = TensorData::from([[0.0_f32, 1.5]]);
294        output.into_data().assert_eq(&expected, false);
295    }
296
297    #[test]
298    fn test_smooth_l1_forward_zero_error() {
299        let device = Default::default();
300        let loss = SmoothL1LossConfig::new().init();
301
302        let predictions =
303            Tensor::<TestBackend, 2>::from_data(TensorData::from([[1.0_f32, 2.0, 3.0]]), &device);
304        let targets = predictions.clone();
305
306        let output = loss.forward(predictions, targets);
307        let expected = TensorData::from([[0.0_f32, 0.0, 0.0]]);
308        output.into_data().assert_eq(&expected, false);
309    }
310
311    #[test]
312    fn test_smooth_l1_forward_negative_errors() {
313        // Ensure absolute value is used correctly
314        // L1 formula: |error| - 0.5 * beta
315        // L2 formula: 0.5 * (error)^2 / beta
316        // Beta = 1.0, error = -3.0 (L1: 3.0 - 0.5 = 2.5)
317        let device = Default::default();
318        let loss = SmoothL1LossConfig::new().init();
319
320        let predictions =
321            Tensor::<TestBackend, 1>::from_data(TensorData::from([-3.0_f32]), &device);
322        let targets = Tensor::<TestBackend, 1>::zeros([1], &device);
323
324        let output = loss.forward(predictions, targets);
325        let expected = TensorData::from([2.5_f32]);
326        output.into_data().assert_eq(&expected, false);
327    }
328
329    #[test]
330    fn test_smooth_l1_forward_mixed_regions() {
331        // Test with errors in both L1 and L2 regions
332        // Beta = 1.0
333        // L1 formula: |error| - 0.5 * beta
334        // L2 formula: 0.5 * (error)^2 / beta
335        // error = 0.5 -> L2: 0.5 * 0.25 / 1 = 0.125
336        // error = 1.5 -> L1: 1.5 - 0.5 = 1.0
337        // error = 3.0 -> L1: 3.0 - 0.5 = 2.5
338        let device = Default::default();
339        let loss = SmoothL1LossConfig::new().init();
340
341        let predictions =
342            Tensor::<TestBackend, 1>::from_data(TensorData::from([0.5_f32, 1.5, 3.0]), &device);
343        let targets = Tensor::<TestBackend, 1>::zeros([3], &device);
344
345        let output = loss.forward(predictions, targets);
346        let expected = TensorData::from([0.125_f32, 1.0, 2.5]);
347        output.into_data().assert_eq(&expected, false);
348    }
349
350    #[test]
351    fn test_smooth_l1_custom_beta_values() {
352        // Test with beta = 0.5
353        // error = 0.25 (< beta): L2 = 0.5 * 0.0625 / 0.5 = 0.0625
354        // error = 1.0 (>= beta): L1 = 1.0 - 0.25 = 0.75
355        let device = Default::default();
356        let loss = SmoothL1LossConfig::new().with_beta(0.5).init();
357
358        let predictions =
359            Tensor::<TestBackend, 1>::from_data(TensorData::from([0.25_f32, 1.0]), &device);
360        let targets = Tensor::<TestBackend, 1>::zeros([2], &device);
361
362        let output = loss.forward(predictions, targets);
363        let expected = TensorData::from([0.0625_f32, 0.75]);
364        output.into_data().assert_eq(&expected, false);
365    }
366
367    // =========================================================================
368    // forward_with_reduction Tests
369    // =========================================================================
370
371    #[test]
372    fn test_smooth_l1_reduction_mean() {
373        // Errors: 0.5 (L2: 0.125), 2.0 (L1: 1.5)
374        // Mean: (0.125 + 1.5) / 2 = 0.8125
375        let device = Default::default();
376        let loss = SmoothL1LossConfig::new().init();
377
378        let predictions =
379            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.5_f32, 2.0]]), &device);
380        let targets =
381            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
382
383        let output = loss.forward_with_reduction(predictions, targets, Reduction::Mean);
384        let expected = TensorData::from([0.8125_f32]);
385        output.into_data().assert_eq(&expected, false);
386    }
387
388    #[test]
389    fn test_smooth_l1_reduction_sum() {
390        // Errors: 0.5 (L2: 0.125), 2.0 (L1: 1.5)
391        // Sum: 1.625
392        let device = Default::default();
393        let loss = SmoothL1LossConfig::new().init();
394
395        let predictions =
396            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.5_f32, 2.0]]), &device);
397        let targets =
398            Tensor::<TestBackend, 2>::from_data(TensorData::from([[0.0_f32, 0.0]]), &device);
399
400        let output = loss.forward_with_reduction(predictions, targets, Reduction::Sum);
401        let expected = TensorData::from([1.625_f32]);
402        output.into_data().assert_eq(&expected, false);
403    }
404
405    #[test]
406    fn test_smooth_l1_reduction_auto_equals_mean() {
407        let device = Default::default();
408        let loss = SmoothL1LossConfig::new().init();
409
410        let predictions = Tensor::<TestBackend, 1>::from_data(TensorData::from([2.0_f32]), &device);
411        let targets = Tensor::<TestBackend, 1>::zeros([1], &device);
412
413        let mean_out =
414            loss.forward_with_reduction(predictions.clone(), targets.clone(), Reduction::Mean);
415        let auto_out = loss.forward_with_reduction(predictions, targets, Reduction::Auto);
416
417        mean_out.into_data().assert_eq(&auto_out.into_data(), false);
418    }
419
420    // =========================================================================
421    // Dimension Reduction Tests
422    // =========================================================================
423
424    #[test]
425    fn test_smooth_l1_forward_reduce_dims_single_dim() {
426        // Beta = 2.0
427        // L1 formula: |error| - 0.5 * beta
428        // L2 formula: 0.5 * (error)^2 / beta
429        // Row 0: errors [0.0, 1.0, 4.0]
430        //   error = 0.0 -> L2: 0.0
431        //   error = 1.0 -> L2: 0.5 * 1.0 / 2.0 = 0.25
432        //   error = 4.0 -> L1: 4.0 - 1.0 = 3.0
433        //   Mean = 3.25 / 3 = 1.083333...
434        // Row 1: errors [0.0, 0.0, 0.0] -> Mean = 0.0
435        let device = Default::default();
436        let loss = SmoothL1LossConfig::new().with_beta(2.0).init();
437
438        let predictions = Tensor::<TestBackend, 2>::from_data(
439            TensorData::from([[0.0_f32, 1.0, 4.0], [5.0_f32, 5.0, 5.0]]),
440            &device,
441        );
442        let targets = Tensor::<TestBackend, 2>::from_data(
443            TensorData::from([[0.0_f32, 0.0, 0.0], [5.0_f32, 5.0, 5.0]]),
444            &device,
445        );
446
447        let output = loss.forward_reduce_dims(predictions, targets, &[1]);
448        let expected = TensorData::from([[3.25_f32 / 3.0], [0.0]]); // 3.25/3 = 1.0833...
449        output
450            .into_data()
451            .assert_approx_eq::<FT>(&expected, Tolerance::default());
452    }
453
454    #[test]
455    fn test_smooth_l1_forward_reduce_dims_image_batch() {
456        // Simulate per-image Smooth L1 loss for [batch, C, H, W] tensor
457        // (common in object detection like Fast R-CNN)
458        let device = Default::default();
459        let loss = SmoothL1LossConfig::new().init(); // beta = 1.0
460
461        // Shape: [2, 1, 2, 2] (batch=2, C=1, H=2, W=2)
462        let predictions = Tensor::<TestBackend, 4>::from_data(
463            TensorData::from([
464                [[[0.5_f32, 2.0], [0.0, 3.0]]], // Image 1
465                [[[1.0_f32, 0.0], [0.5, 1.5]]], // Image 2
466            ]),
467            &device,
468        );
469        let targets = Tensor::<TestBackend, 4>::zeros([2, 1, 2, 2], &device);
470
471        // Reduce over C, H, W (dims 1, 2, 3) to get per-image loss
472        let output = loss.forward_reduce_dims(predictions, targets, &[1, 2, 3]);
473
474        // Image 1: losses [[0.125, 1.5], [0.0, 2.5]] -> mean: 4.125 / 4 = 1.03125
475        // Image 2: losses [[0.5, 0.0], [0.125, 1.0]] -> mean: 1.625 / 4 = 0.40625
476        let expected = TensorData::from([[[[1.03125_f32]]], [[[0.40625_f32]]]]);
477        output.into_data().assert_eq(&expected, false);
478    }
479
480    #[test]
481    fn test_smooth_l1_forward_reduce_dims_unsorted() {
482        // Test that unsorted dimensions are handled correctly (sorted internally)
483        let device = Default::default();
484        let loss = SmoothL1LossConfig::new().init();
485
486        let predictions = Tensor::<TestBackend, 3>::from_data(
487            TensorData::from([[[1.0_f32, 2.0], [3.0, 4.0]], [[5.0_f32, 6.0], [7.0, 8.0]]]),
488            &device,
489        );
490        let targets = Tensor::<TestBackend, 3>::zeros([2, 2, 2], &device);
491
492        // Pass dims in reverse order
493        let output = loss.forward_reduce_dims(predictions.clone(), targets.clone(), &[2, 1]);
494        let expected_output = loss.forward_reduce_dims(predictions, targets, &[1, 2]);
495
496        output
497            .into_data()
498            .assert_eq(&expected_output.into_data(), false);
499    }
500
501    #[test]
502    fn test_smooth_l1_forward_reduce_dims_empty_dims() {
503        // Reducing over no dimensions should return the unreduced loss
504        let device = Default::default();
505        let loss = SmoothL1LossConfig::new().init();
506
507        let predictions = Tensor::<TestBackend, 2>::from_data(
508            TensorData::from([[0.5_f32, 2.0], [0.0, 3.0]]),
509            &device,
510        );
511        let targets = Tensor::<TestBackend, 2>::zeros([2, 2], &device);
512
513        let loss_reduce_dims = loss.forward_reduce_dims(predictions.clone(), targets.clone(), &[]);
514        let loss_no_reduction = loss.forward(predictions, targets);
515
516        loss_reduce_dims
517            .into_data()
518            .assert_eq(&loss_no_reduction.into_data(), false);
519    }
520}