trustformers-optim 0.1.1

Optimizers for TrustformeRS
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
//! Learning Rate Range Test (LR Finder) for automatic LR selection.
//!
//! The LR Finder runs a mini training loop with exponentially increasing LR,
//! records the loss at each step, and identifies the optimal LR as the point
//! just before the loss diverges.
//!
//! Reference: Smith (2015) "Cyclical Learning Rates for Training Neural Networks"
//!
//! # Algorithm
//! 1. Set initial LR to `lr_min`, target final LR to `lr_max`
//! 2. For each step: compute loss, record (lr, loss), multiply lr by `lr_max/lr_min^(1/num_steps)`
//! 3. Stop if loss > best_loss * diverge_threshold
//! 4. Plot smoothed loss vs log(lr) to find optimal LR

use trustformers_core::errors::{Result, TrustformersError};

/// Configuration for the LR Finder
#[derive(Debug, Clone)]
pub struct LrFinderConfig {
    /// Starting (minimum) learning rate
    pub lr_min: f64,
    /// Ending (maximum) learning rate
    pub lr_max: f64,
    /// Number of steps to run
    pub num_steps: usize,
    /// Stop if loss exceeds best_loss * diverge_threshold
    pub diverge_threshold: f64,
    /// Exponential moving average smoothing beta
    pub smooth_beta: f64,
}

impl Default for LrFinderConfig {
    fn default() -> Self {
        Self {
            lr_min: 1e-7,
            lr_max: 10.0,
            num_steps: 100,
            diverge_threshold: 5.0,
            smooth_beta: 0.98,
        }
    }
}

/// A single data point recorded during LR finding
#[derive(Debug, Clone)]
pub struct LrFinderPoint {
    /// Step index (0-based)
    pub step: usize,
    /// Learning rate at this step
    pub lr: f64,
    /// Raw loss at this step
    pub loss: f64,
    /// EMA-smoothed loss at this step
    pub smoothed_loss: f64,
}

/// Reason the LR finder stopped early
#[derive(Debug, Clone, PartialEq)]
pub enum LrStopReason {
    /// Loss exceeded best_loss * diverge_threshold
    Diverged,
    /// Reached the maximum configured number of steps
    MaxStepsReached,
}

/// Action returned by `LrFinder::record_loss`
#[derive(Debug, Clone, PartialEq)]
pub enum LrFinderAction {
    /// Continue running more steps
    Continue,
    /// Stop for the given reason
    Stop(LrStopReason),
}

/// Result of the LR finder run
#[derive(Debug, Clone)]
pub struct LrFinderResult {
    /// Full history of (step, lr, loss, smoothed_loss)
    pub history: Vec<LrFinderPoint>,
    /// Suggested LR (one decade below min-loss LR)
    pub suggested_lr: f64,
    /// LR at the point of minimum smoothed loss
    pub min_loss_lr: f64,
    /// Step at which the run diverged, if applicable
    pub diverged_at_step: Option<usize>,
}

impl LrFinderResult {
    /// The LR at the point of steepest loss descent (maximum negative slope in smoothed loss)
    pub fn steepest_descent_lr(&self) -> f64 {
        if self.history.len() < 2 {
            return self.suggested_lr;
        }
        let mut steepest_slope = 0.0_f64;
        let mut steepest_lr = self.history[0].lr;
        for window in self.history.windows(2) {
            let slope = window[1].smoothed_loss - window[0].smoothed_loss;
            if slope < steepest_slope {
                steepest_slope = slope;
                steepest_lr = window[0].lr;
            }
        }
        steepest_lr
    }

    /// The suggested LR (one decade below min-loss LR)
    pub fn suggested_lr(&self) -> f64 {
        self.suggested_lr
    }

    /// Export history as CSV string
    pub fn to_csv(&self) -> String {
        let mut out = String::from("step,lr,loss,smoothed_loss\n");
        for pt in &self.history {
            out.push_str(&format!(
                "{},{:.10e},{:.10e},{:.10e}\n",
                pt.step, pt.lr, pt.loss, pt.smoothed_loss
            ));
        }
        out
    }

    /// Export history as a JSON string
    pub fn to_json(&self) -> Result<String> {
        // Hand-craft JSON to avoid needing serde here (history can be large)
        let mut out = String::from("{\n  \"history\": [\n");
        for (i, pt) in self.history.iter().enumerate() {
            let comma = if i + 1 < self.history.len() { "," } else { "" };
            out.push_str(&format!(
                "    {{\"step\":{},\"lr\":{:.10e},\"loss\":{:.10e},\"smoothed_loss\":{:.10e}}}{}",
                pt.step, pt.lr, pt.loss, pt.smoothed_loss, comma
            ));
            out.push('\n');
        }
        out.push_str("  ],\n");
        out.push_str(&format!(
            "  \"suggested_lr\":{:.10e},\n  \"min_loss_lr\":{:.10e},\n",
            self.suggested_lr, self.min_loss_lr
        ));
        match self.diverged_at_step {
            Some(s) => out.push_str(&format!("  \"diverged_at_step\":{}\n", s)),
            None => out.push_str("  \"diverged_at_step\":null\n"),
        }
        out.push('}');
        Ok(out)
    }

    /// Render an ASCII plot of log(lr) vs smoothed_loss
    ///
    /// `width` is the number of columns, `height` is the number of rows.
    pub fn to_ascii_plot(&self, width: usize, height: usize) -> String {
        if self.history.is_empty() || width < 4 || height < 4 {
            return String::from("[empty plot]");
        }
        let log_lrs: Vec<f64> = self.history.iter().map(|p| p.lr.ln()).collect();
        let losses: Vec<f64> = self.history.iter().map(|p| p.smoothed_loss).collect();

        let x_min = log_lrs.iter().cloned().fold(f64::INFINITY, f64::min);
        let x_max = log_lrs.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
        let y_min = losses.iter().cloned().fold(f64::INFINITY, f64::min);
        let y_max = losses.iter().cloned().fold(f64::NEG_INFINITY, f64::max);

        let x_range = if (x_max - x_min).abs() < f64::EPSILON { 1.0 } else { x_max - x_min };
        let y_range = if (y_max - y_min).abs() < f64::EPSILON { 1.0 } else { y_max - y_min };

        let mut grid = vec![vec![' '; width]; height];

        for (x_val, y_val) in log_lrs.iter().zip(losses.iter()) {
            let col = (((x_val - x_min) / x_range) * (width as f64 - 1.0)) as usize;
            let row_f = ((y_val - y_min) / y_range) * (height as f64 - 1.0);
            // Invert y-axis so higher loss is at the top
            let row = (height - 1).saturating_sub(row_f as usize);
            let col = col.min(width - 1);
            let row = row.min(height - 1);
            grid[row][col] = '*';
        }

        let mut out = String::new();
        out.push_str(&format!(
            "  Smoothed Loss vs log(LR)  [min_lr={:.2e}, max_lr={:.2e}]\n",
            self.history.first().map(|p| p.lr).unwrap_or(0.0),
            self.history.last().map(|p| p.lr).unwrap_or(0.0),
        ));
        let border: String = "-".repeat(width + 2);
        out.push_str(&format!("+{}+\n", border));
        for row in &grid {
            let line: String = row.iter().collect();
            out.push_str(&format!("|  {}  |\n", line));
        }
        out.push_str(&format!("+{}+\n", border));
        out
    }
}

/// LR Finder state machine
///
/// Usage pattern:
/// ```text
/// let mut finder = LrFinder::new(config)?;
/// while !finder.is_done() {
///     let lr = finder.current_lr();
///     // run one training step with this LR, obtain loss
///     let action = finder.record_loss(loss);
///     if action == LrFinderAction::Stop(_) { break; }
/// }
/// let result = finder.result();
/// ```
pub struct LrFinder {
    config: LrFinderConfig,
    history: Vec<LrFinderPoint>,
    current_step: usize,
    current_lr: f64,
    lr_multiplier: f64,
    best_loss: f64,
    smoothed_loss: f64,
    avg_loss: f64,
    done: bool,
    diverged_at_step: Option<usize>,
}

impl LrFinder {
    /// Create a new LR finder from the given config.
    ///
    /// Returns an error if the config is invalid (e.g., `lr_min >= lr_max`, `num_steps == 0`).
    pub fn new(config: LrFinderConfig) -> Result<Self> {
        if config.lr_min <= 0.0 {
            return Err(TrustformersError::config_error(
                "lr_min must be positive",
                "LrFinder::new",
            ));
        }
        if config.lr_max <= config.lr_min {
            return Err(TrustformersError::config_error(
                "lr_max must be greater than lr_min",
                "LrFinder::new",
            ));
        }
        if config.num_steps == 0 {
            return Err(TrustformersError::config_error(
                "num_steps must be > 0",
                "LrFinder::new",
            ));
        }
        if config.diverge_threshold <= 1.0 {
            return Err(TrustformersError::config_error(
                "diverge_threshold must be > 1.0",
                "LrFinder::new",
            ));
        }
        if !(0.0..1.0).contains(&config.smooth_beta) {
            return Err(TrustformersError::config_error(
                "smooth_beta must be in [0, 1)",
                "LrFinder::new",
            ));
        }

        // lr_multiplier = (lr_max / lr_min) ^ (1 / (num_steps - 1))
        // Handle num_steps == 1 as a degenerate case (single measurement at lr_min)
        let lr_multiplier = if config.num_steps > 1 {
            (config.lr_max / config.lr_min).powf(1.0 / (config.num_steps as f64 - 1.0))
        } else {
            1.0
        };

        Ok(Self {
            current_lr: config.lr_min,
            lr_multiplier,
            config,
            history: Vec::new(),
            current_step: 0,
            best_loss: f64::INFINITY,
            smoothed_loss: 0.0,
            avg_loss: 0.0,
            done: false,
            diverged_at_step: None,
        })
    }

    /// Current learning rate to use for this step.
    pub fn current_lr(&self) -> f64 {
        self.current_lr
    }

    /// Current step number (0-indexed; incremented after each `record_loss` call).
    pub fn step(&self) -> usize {
        self.current_step
    }

    /// Whether the finder has finished (diverged or max steps reached).
    pub fn is_done(&self) -> bool {
        self.done
    }

    /// Record the loss obtained at the current LR.
    ///
    /// Returns `LrFinderAction::Continue` if more steps should be taken, or
    /// `LrFinderAction::Stop(reason)` if the run should terminate.
    pub fn record_loss(&mut self, loss: f64) -> LrFinderAction {
        if self.done {
            return LrFinderAction::Stop(LrStopReason::MaxStepsReached);
        }

        let step = self.current_step;
        let lr = self.current_lr;

        // Compute EMA-smoothed loss (bias-corrected)
        self.avg_loss =
            self.config.smooth_beta * self.avg_loss + (1.0 - self.config.smooth_beta) * loss;
        let bias_correction = 1.0 - self.config.smooth_beta.powi(step as i32 + 1);
        let smoothed = self.avg_loss / bias_correction;
        self.smoothed_loss = smoothed;

        // Track best smoothed loss
        if smoothed < self.best_loss {
            self.best_loss = smoothed;
        }

        self.history.push(LrFinderPoint {
            step,
            lr,
            loss,
            smoothed_loss: smoothed,
        });

        self.current_step += 1;
        self.current_lr *= self.lr_multiplier;

        // Check divergence
        if smoothed > self.best_loss * self.config.diverge_threshold {
            self.done = true;
            self.diverged_at_step = Some(step);
            return LrFinderAction::Stop(LrStopReason::Diverged);
        }

        // Check max steps
        if self.current_step >= self.config.num_steps {
            self.done = true;
            return LrFinderAction::Stop(LrStopReason::MaxStepsReached);
        }

        LrFinderAction::Continue
    }

    /// Compute and return the final `LrFinderResult`.
    pub fn result(self) -> LrFinderResult {
        let (suggested_lr, min_loss_lr) = compute_suggested_lr(&self.history);
        LrFinderResult {
            history: self.history,
            suggested_lr,
            min_loss_lr,
            diverged_at_step: self.diverged_at_step,
        }
    }
}

/// Compute the suggested LR and the min-loss LR from a completed history.
fn compute_suggested_lr(history: &[LrFinderPoint]) -> (f64, f64) {
    if history.is_empty() {
        return (1e-3, 1e-3);
    }

    // Find min-loss LR
    let min_pt = history.iter().min_by(|a, b| {
        a.smoothed_loss
            .partial_cmp(&b.smoothed_loss)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    let min_loss_lr = min_pt.map(|p| p.lr).unwrap_or(history[0].lr);

    // Suggested LR = one decade below min-loss LR (as in fastai)
    let suggested_lr = min_loss_lr / 10.0;

    (suggested_lr, min_loss_lr)
}

/// Find the suggested optimal LR from a list of `(lr, loss)` data points.
///
/// Strategy: locate the point where the smoothed loss rate-of-decrease is maximum
/// (steepest descent), then return the LR at that point.
pub fn find_optimal_lr(history: &[(f64, f64)]) -> f64 {
    if history.is_empty() {
        return 1e-3;
    }
    if history.len() == 1 {
        return history[0].0;
    }

    // Compute EMA smoothing
    let beta = 0.98_f64;
    let mut avg = 0.0_f64;
    let mut smoothed: Vec<f64> = Vec::with_capacity(history.len());
    for (i, &(_, loss)) in history.iter().enumerate() {
        avg = beta * avg + (1.0 - beta) * loss;
        let bias_correction = 1.0 - beta.powi(i as i32 + 1);
        smoothed.push(avg / bias_correction);
    }

    // Find steepest descent (maximum negative slope over consecutive smoothed losses)
    let mut steepest_slope = 0.0_f64;
    let mut steepest_idx = 0_usize;
    for i in 1..smoothed.len() {
        let slope = smoothed[i] - smoothed[i - 1];
        if slope < steepest_slope {
            steepest_slope = slope;
            steepest_idx = i - 1;
        }
    }

    history[steepest_idx].0
}

// ─────────────────────────────────────────── tests ──────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_lr_finder_config_default() {
        let cfg = LrFinderConfig::default();
        assert!(cfg.lr_min > 0.0);
        assert!(cfg.lr_max > cfg.lr_min);
        assert!(cfg.num_steps > 0);
        assert!(cfg.diverge_threshold > 1.0);
        assert!((0.0..1.0).contains(&cfg.smooth_beta));
    }

    #[test]
    fn test_lr_finder_multiplier_computation() {
        let cfg = LrFinderConfig {
            lr_min: 1e-7,
            lr_max: 1e-1,
            num_steps: 7,
            ..Default::default()
        };
        let finder = LrFinder::new(cfg.clone()).expect("should construct");
        // After num_steps-1 multiplications, lr_min * mult^(n-1) == lr_max
        let final_lr = cfg.lr_min * finder.lr_multiplier.powi((cfg.num_steps as i32) - 1);
        let rel_err = ((final_lr - cfg.lr_max) / cfg.lr_max).abs();
        assert!(
            rel_err < 1e-9,
            "Expected final lr ≈ lr_max, got rel_err={}",
            rel_err
        );
    }

    #[test]
    fn test_lr_finder_records_history() {
        let cfg = LrFinderConfig {
            num_steps: 5,
            diverge_threshold: 100.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        for _ in 0..5 {
            finder.record_loss(1.0);
        }
        assert_eq!(finder.history.len(), 5);
    }

    #[test]
    fn test_lr_finder_detects_divergence() {
        let cfg = LrFinderConfig {
            num_steps: 20,
            diverge_threshold: 3.0,
            smooth_beta: 0.0, // no smoothing so raw == smoothed
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        // First loss seeds best_loss
        let action = finder.record_loss(1.0);
        assert_eq!(action, LrFinderAction::Continue);
        // Now supply a loss that clearly triggers divergence
        let action = finder.record_loss(10.0);
        assert_eq!(action, LrFinderAction::Stop(LrStopReason::Diverged));
        assert!(finder.is_done());
    }

    #[test]
    fn test_lr_finder_max_steps() {
        let cfg = LrFinderConfig {
            num_steps: 3,
            diverge_threshold: 1000.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        finder.record_loss(1.0);
        finder.record_loss(1.0);
        let action = finder.record_loss(1.0);
        assert_eq!(action, LrFinderAction::Stop(LrStopReason::MaxStepsReached));
        assert!(finder.is_done());
    }

    #[test]
    fn test_lr_finder_result_steepest_descent() {
        // Build a hand-crafted result with an unambiguous steepest descent.
        // Losses: 4.0, 3.5, 2.0, 1.8, 3.0
        //         Δ = -0.5, -1.5, -0.2, +1.2
        // Steepest drop is between index 1 (lr=1e-4) and index 2 (lr=1e-3): slope -1.5
        let mut history = Vec::new();
        let lrs = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1];
        let losses = [4.0, 3.5, 2.0, 1.8, 3.0];
        for (i, (&lr, &loss)) in lrs.iter().zip(losses.iter()).enumerate() {
            history.push(LrFinderPoint {
                step: i,
                lr,
                loss,
                smoothed_loss: loss,
            });
        }
        let result = LrFinderResult {
            history,
            suggested_lr: 1e-4,
            min_loss_lr: 1e-3,
            diverged_at_step: None,
        };
        let sd_lr = result.steepest_descent_lr();
        // Steepest descent starts at lr=1e-4 (slope=-1.5 going to step 2)
        assert_eq!(sd_lr, 1e-4);
    }

    #[test]
    fn test_lr_finder_to_csv() {
        let cfg = LrFinderConfig {
            num_steps: 3,
            diverge_threshold: 1000.0,
            smooth_beta: 0.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        finder.record_loss(1.0);
        finder.record_loss(0.8);
        finder.record_loss(0.6);
        let result = finder.result();
        let csv = result.to_csv();
        assert!(csv.starts_with("step,lr,loss,smoothed_loss\n"));
        let lines: Vec<&str> = csv.lines().collect();
        assert_eq!(lines.len(), 4); // header + 3 data rows
    }

    #[test]
    fn test_lr_finder_to_json() {
        let cfg = LrFinderConfig {
            num_steps: 2,
            diverge_threshold: 1000.0,
            smooth_beta: 0.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        finder.record_loss(1.0);
        finder.record_loss(0.5);
        let result = finder.result();
        let json = result.to_json().expect("to_json");
        assert!(json.contains("\"history\""));
        assert!(json.contains("\"suggested_lr\""));
        assert!(json.contains("\"min_loss_lr\""));
    }

    #[test]
    fn test_lr_finder_to_ascii_plot() {
        let cfg = LrFinderConfig {
            num_steps: 10,
            diverge_threshold: 1000.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        for i in 0..10 {
            finder.record_loss(10.0 - i as f64);
        }
        let result = finder.result();
        let plot = result.to_ascii_plot(40, 10);
        assert!(plot.contains('*'));
    }

    #[test]
    fn test_find_optimal_lr_known_case() {
        // Manually built (lr, loss) pairs with a clear minimum descent region
        let history: Vec<(f64, f64)> = vec![
            (1e-6, 4.0),
            (1e-5, 3.5),
            (1e-4, 2.0),
            (1e-3, 1.0), // steepest descent toward here
            (1e-2, 1.2),
            (1e-1, 2.5),
        ];
        let optimal = find_optimal_lr(&history);
        // The steepest descent should be before 1e-3
        assert!(
            optimal < 1e-2,
            "Expected optimal lr < 1e-2, got {}",
            optimal
        );
    }

    #[test]
    fn test_lr_finder_smooth_loss() {
        let cfg = LrFinderConfig {
            num_steps: 10,
            diverge_threshold: 1000.0,
            smooth_beta: 0.9,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        finder.record_loss(1.0);
        let first_smoothed = finder.history[0].smoothed_loss;
        // With beta=0.9 and step=0, smoothed should be close to the raw loss
        // bias_correction = 1 - 0.9^1 = 0.1; avg = 0.1 * 1.0 = 0.1; smoothed = 0.1/0.1 = 1.0
        assert!((first_smoothed - 1.0).abs() < 1e-10);
    }

    #[test]
    fn test_lr_finder_invalid_config_lr_min_zero() {
        let cfg = LrFinderConfig {
            lr_min: 0.0,
            ..Default::default()
        };
        assert!(LrFinder::new(cfg).is_err());
    }

    #[test]
    fn test_lr_finder_invalid_config_lr_max_lte_min() {
        let cfg = LrFinderConfig {
            lr_min: 0.01,
            lr_max: 0.001,
            ..Default::default()
        };
        assert!(LrFinder::new(cfg).is_err());
    }

    #[test]
    fn test_lr_finder_single_step() {
        let cfg = LrFinderConfig {
            num_steps: 1,
            diverge_threshold: 2.0,
            smooth_beta: 0.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        let action = finder.record_loss(0.5);
        assert_eq!(action, LrFinderAction::Stop(LrStopReason::MaxStepsReached));
        let result = finder.result();
        assert_eq!(result.history.len(), 1);
    }

    #[test]
    fn test_lr_finder_result_min_loss_lr() {
        let cfg = LrFinderConfig {
            num_steps: 5,
            diverge_threshold: 1000.0,
            smooth_beta: 0.0,
            ..Default::default()
        };
        let mut finder = LrFinder::new(cfg).expect("construct");
        // Feed losses: high, lower, lowest, higher, highest
        finder.record_loss(3.0);
        finder.record_loss(2.0);
        finder.record_loss(1.0);
        finder.record_loss(2.5);
        finder.record_loss(3.5);
        let result = finder.result();
        // min_loss_lr should be at step 2
        assert_eq!(result.min_loss_lr, result.history[2].lr);
        // suggested_lr is min_loss_lr / 10
        let expected_suggested = result.min_loss_lr / 10.0;
        assert!((result.suggested_lr - expected_suggested).abs() < 1e-12);
    }
}