Skip to main content

dreamwell_intelligence/
metrics.rs

1// Training metrics — collection, reporting, and CSV export.
2//
3// Clean Compute: all metrics are scalar. No heap allocation per-epoch.
4
5use crate::train::EpochMetrics;
6
7/// Accumulated training metrics.
8pub struct TrainingReport {
9    pub epochs: Vec<EpochMetrics>,
10    pub total_tokens: usize,
11}
12
13impl TrainingReport {
14    pub fn new() -> Self {
15        Self {
16            epochs: Vec::new(),
17            total_tokens: 0,
18        }
19    }
20
21    pub fn record(&mut self, m: EpochMetrics) {
22        self.epochs.push(m);
23    }
24
25    /// Average throughput: tokens processed per second.
26    pub fn throughput(&self) -> f32 {
27        if self.epochs.is_empty() {
28            return 0.0;
29        }
30        let total_ms: f32 = self.epochs.iter().map(|e| e.elapsed_ms).sum();
31        if total_ms < 1e-6 {
32            return 0.0;
33        }
34        (self.total_tokens as f32 * self.epochs.len() as f32) / (total_ms / 1000.0)
35    }
36
37    /// Print summary table.
38    pub fn print_summary(&self) {
39        if self.epochs.is_empty() {
40            return;
41        }
42        let first = &self.epochs[0];
43        let last = self.epochs.last().unwrap();
44        println!(
45            "Training: {} epochs, {} params",
46            self.epochs.len(),
47            first.params_trained
48        );
49        println!(
50            "Loss:     {:.4} → {:.4} (Δ={:+.4})",
51            first.loss,
52            last.loss,
53            last.loss - first.loss
54        );
55        println!("F:        {:.4} → {:.4}", first.free_energy, last.free_energy);
56        println!("|∇|:      {:.6} → {:.6}", first.grad_norm, last.grad_norm);
57        println!("LR:       {:.5} → {:.5}", first.learning_rate, last.learning_rate);
58        println!("Time:     {:.0}ms/epoch avg", self.avg_epoch_ms());
59        println!("Throughput: {:.0} tokens/s", self.throughput());
60    }
61
62    /// Export to CSV.
63    pub fn export_csv(&self, path: &std::path::Path) -> std::io::Result<()> {
64        use std::io::Write;
65        let mut f = std::fs::File::create(path)?;
66        writeln!(f, "epoch,loss,free_energy,grad_norm,learning_rate,elapsed_ms,params")?;
67        for e in &self.epochs {
68            writeln!(
69                f,
70                "{},{:.6},{:.6},{:.8},{:.6},{:.1},{}",
71                e.epoch, e.loss, e.free_energy, e.grad_norm, e.learning_rate, e.elapsed_ms, e.params_trained
72            )?;
73        }
74        Ok(())
75    }
76
77    fn avg_epoch_ms(&self) -> f32 {
78        if self.epochs.is_empty() {
79            return 0.0;
80        }
81        self.epochs.iter().map(|e| e.elapsed_ms).sum::<f32>() / self.epochs.len() as f32
82    }
83}
84
85impl Default for TrainingReport {
86    fn default() -> Self {
87        Self::new()
88    }
89}
90
91#[cfg(test)]
92mod tests {
93    use super::*;
94
95    #[test]
96    fn report_empty_is_safe() {
97        let r = TrainingReport::new();
98        assert_eq!(r.throughput(), 0.0);
99        r.print_summary(); // should not panic
100    }
101
102    #[test]
103    fn csv_export() {
104        let mut r = TrainingReport::new();
105        r.record(EpochMetrics {
106            epoch: 0,
107            loss: 3.5,
108            free_energy: -0.5,
109            grad_norm: 0.01,
110            elapsed_ms: 100.0,
111            learning_rate: 0.03,
112            params_trained: 544,
113        });
114        let dir = std::env::temp_dir();
115        let path = dir.join("qgpt_test_metrics.csv");
116        r.export_csv(&path).expect("CSV export failed");
117        let content = std::fs::read_to_string(&path).unwrap();
118        assert!(content.contains("epoch,loss"), "CSV should have header");
119        assert!(content.contains("0,3.5"), "CSV should have data");
120        let _ = std::fs::remove_file(&path);
121    }
122}