Skip to main content

nuviz_cli/data/
metrics.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7/// A single metrics record from a JSONL file.
8#[derive(Debug, Clone, Deserialize)]
9pub struct MetricRecord {
10    pub step: u64,
11    pub timestamp: f64,
12    pub metrics: HashMap<String, f64>,
13    #[serde(default)]
14    #[allow(dead_code)]
15    pub gpu: Option<HashMap<String, f64>>,
16}
17
18/// Read all metric records from a JSONL file and its rotated segments.
19///
20/// When rotation is in effect, files are named `metrics.jsonl`, `metrics.1.jsonl`,
21/// `metrics.2.jsonl`, etc. Higher-numbered files contain older data.
22/// Records are returned in chronological order (oldest first).
23pub fn read_metrics(path: &Path) -> Vec<MetricRecord> {
24    let mut all_records = Vec::new();
25
26    // Find rotated segments (oldest first)
27    let segments = find_rotated_segments(path);
28    for seg_path in segments.iter().rev() {
29        all_records.extend(read_single_jsonl(seg_path));
30    }
31
32    // Read the main file (newest data)
33    all_records.extend(read_single_jsonl(path));
34
35    all_records
36}
37
38/// Read records from a single JSONL file, skipping malformed lines.
39fn read_single_jsonl(path: &Path) -> Vec<MetricRecord> {
40    let file = match File::open(path) {
41        Ok(f) => f,
42        Err(_) => return Vec::new(),
43    };
44
45    let reader = BufReader::new(file);
46    let mut records = Vec::new();
47
48    for line in reader.lines() {
49        let line = match line {
50            Ok(l) => l,
51            Err(_) => continue,
52        };
53
54        let trimmed = line.trim();
55        if trimmed.is_empty() {
56            continue;
57        }
58
59        match serde_json::from_str::<MetricRecord>(trimmed) {
60            Ok(record) => records.push(record),
61            Err(e) => {
62                eprintln!("[nuviz] Warning: skipping malformed JSONL line: {e}");
63            }
64        }
65    }
66
67    records
68}
69
70/// Find rotated JSONL segments (e.g., metrics.1.jsonl, metrics.2.jsonl).
71/// Returns paths sorted by number ascending (1, 2, 3, ...).
72fn find_rotated_segments(main_path: &Path) -> Vec<std::path::PathBuf> {
73    let parent = match main_path.parent() {
74        Some(p) => p,
75        None => return Vec::new(),
76    };
77
78    let stem = main_path
79        .file_stem()
80        .and_then(|s| s.to_str())
81        .unwrap_or("metrics");
82    let ext = main_path
83        .extension()
84        .and_then(|s| s.to_str())
85        .unwrap_or("jsonl");
86
87    let mut segments: Vec<(u32, std::path::PathBuf)> = Vec::new();
88
89    let entries = match std::fs::read_dir(parent) {
90        Ok(e) => e,
91        Err(_) => return Vec::new(),
92    };
93
94    for entry in entries.flatten() {
95        let name = entry.file_name();
96        let name_str = name.to_string_lossy();
97
98        // Match pattern: stem.N.ext (e.g., metrics.1.jsonl)
99        let prefix = format!("{stem}.");
100        let suffix = format!(".{ext}");
101
102        if let Some(stripped) = name_str.strip_prefix(&prefix) {
103            if let Some(num_str) = stripped.strip_suffix(&suffix) {
104                if let Ok(num) = num_str.parse::<u32>() {
105                    segments.push((num, entry.path()));
106                }
107            }
108        }
109    }
110
111    segments.sort_by_key(|(n, _)| *n);
112    segments.into_iter().map(|(_, p)| p).collect()
113}
114
115/// Read only the last record from a JSONL file (efficient for large files).
116#[allow(dead_code)]
117pub fn read_last_record(path: &Path) -> Option<MetricRecord> {
118    let file = File::open(path).ok()?;
119    let reader = BufReader::new(file);
120    let mut last: Option<MetricRecord> = None;
121
122    for line in reader.lines().map_while(Result::ok) {
123        let trimmed = line.trim();
124        if !trimmed.is_empty() {
125            if let Ok(record) = serde_json::from_str::<MetricRecord>(trimmed) {
126                last = Some(record);
127            }
128        }
129    }
130
131    last
132}
133
134/// Extract the best value for each metric across all records.
135/// "loss", "lpips", "error", "mse", "mae" are minimized; others are maximized.
136pub fn best_metrics(records: &[MetricRecord]) -> HashMap<String, f64> {
137    let mut best: HashMap<String, f64> = HashMap::new();
138
139    for record in records {
140        for (name, &value) in &record.metrics {
141            if value.is_nan() || value.is_infinite() {
142                continue;
143            }
144
145            let is_better = match best.get(name) {
146                None => true,
147                Some(&current) => {
148                    if is_minimize_metric(name) {
149                        value < current
150                    } else {
151                        value > current
152                    }
153                }
154            };
155
156            if is_better {
157                best.insert(name.clone(), value);
158            }
159        }
160    }
161
162    best
163}
164
165/// Alignment mode for multi-experiment comparison.
166#[derive(Debug, Clone, Copy, PartialEq, Eq)]
167pub enum AlignMode {
168    /// Align by step number (default)
169    Step,
170    /// Align by wall-clock time relative to first record
171    WallTime,
172}
173
174/// Extract an aligned series for a given metric.
175///
176/// Returns (x_values, y_values) where x depends on the alignment mode.
177pub fn align_series(
178    records: &[MetricRecord],
179    metric: &str,
180    mode: AlignMode,
181) -> (Vec<f64>, Vec<f64>) {
182    let start_time = records.first().map(|r| r.timestamp).unwrap_or(0.0);
183
184    let mut xs = Vec::new();
185    let mut ys = Vec::new();
186
187    for record in records {
188        if let Some(&value) = record.metrics.get(metric) {
189            if !value.is_finite() {
190                continue;
191            }
192            let x = match mode {
193                AlignMode::Step => record.step as f64,
194                AlignMode::WallTime => record.timestamp - start_time,
195            };
196            xs.push(x);
197            ys.push(value);
198        }
199    }
200
201    (xs, ys)
202}
203
204pub fn is_minimize_metric(name: &str) -> bool {
205    let lower = name.to_lowercase();
206    lower.contains("loss")
207        || lower.contains("lpips")
208        || lower.contains("error")
209        || lower.contains("mse")
210        || lower.contains("mae")
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use std::io::Write;
217    use tempfile::NamedTempFile;
218
219    fn write_jsonl(lines: &[&str]) -> NamedTempFile {
220        let mut f = NamedTempFile::new().unwrap();
221        for line in lines {
222            writeln!(f, "{line}").unwrap();
223        }
224        f
225    }
226
227    #[test]
228    fn test_read_valid_jsonl() {
229        let f = write_jsonl(&[
230            r#"{"step":0,"timestamp":1.0,"metrics":{"loss":1.0,"psnr":20.0}}"#,
231            r#"{"step":1,"timestamp":2.0,"metrics":{"loss":0.5,"psnr":25.0}}"#,
232        ]);
233        let records = read_metrics(f.path());
234        assert_eq!(records.len(), 2);
235        assert_eq!(records[0].step, 0);
236        assert_eq!(records[1].step, 1);
237        assert!((records[0].metrics["loss"] - 1.0).abs() < f64::EPSILON);
238    }
239
240    #[test]
241    fn test_skip_malformed_lines() {
242        let f = write_jsonl(&[
243            r#"{"step":0,"timestamp":1.0,"metrics":{"loss":1.0}}"#,
244            "not json at all",
245            r#"{"step":2,"timestamp":3.0,"metrics":{"loss":0.3}}"#,
246        ]);
247        let records = read_metrics(f.path());
248        assert_eq!(records.len(), 2);
249        assert_eq!(records[0].step, 0);
250        assert_eq!(records[1].step, 2);
251    }
252
253    #[test]
254    fn test_skip_empty_lines() {
255        let f = write_jsonl(&[
256            r#"{"step":0,"timestamp":1.0,"metrics":{"loss":1.0}}"#,
257            "",
258            r#"{"step":1,"timestamp":2.0,"metrics":{"loss":0.5}}"#,
259        ]);
260        let records = read_metrics(f.path());
261        assert_eq!(records.len(), 2);
262    }
263
264    #[test]
265    fn test_read_missing_file() {
266        let records = read_metrics(Path::new("/nonexistent/path/metrics.jsonl"));
267        assert!(records.is_empty());
268    }
269
270    #[test]
271    fn test_gpu_field_optional() {
272        let f = write_jsonl(&[
273            r#"{"step":0,"timestamp":1.0,"metrics":{"loss":1.0}}"#,
274            r#"{"step":1,"timestamp":2.0,"metrics":{"loss":0.5},"gpu":{"util":87,"mem_used":10240}}"#,
275        ]);
276        let records = read_metrics(f.path());
277        assert_eq!(records.len(), 2);
278        assert!(records[0].gpu.is_none());
279        assert!(records[1].gpu.is_some());
280    }
281
282    #[test]
283    fn test_read_last_record() {
284        let f = write_jsonl(&[
285            r#"{"step":0,"timestamp":1.0,"metrics":{"loss":1.0}}"#,
286            r#"{"step":99,"timestamp":100.0,"metrics":{"loss":0.01}}"#,
287        ]);
288        let last = read_last_record(f.path()).unwrap();
289        assert_eq!(last.step, 99);
290    }
291
292    #[test]
293    fn test_best_metrics_minimize_loss() {
294        let records = vec![
295            MetricRecord {
296                step: 0,
297                timestamp: 1.0,
298                metrics: HashMap::from([("loss".into(), 1.0), ("psnr".into(), 20.0)]),
299                gpu: None,
300            },
301            MetricRecord {
302                step: 1,
303                timestamp: 2.0,
304                metrics: HashMap::from([("loss".into(), 0.5), ("psnr".into(), 25.0)]),
305                gpu: None,
306            },
307            MetricRecord {
308                step: 2,
309                timestamp: 3.0,
310                metrics: HashMap::from([("loss".into(), 0.8), ("psnr".into(), 23.0)]),
311                gpu: None,
312            },
313        ];
314        let best = best_metrics(&records);
315        assert!((best["loss"] - 0.5).abs() < f64::EPSILON);
316        assert!((best["psnr"] - 25.0).abs() < f64::EPSILON);
317    }
318
319    #[test]
320    fn test_best_metrics_skips_nan() {
321        let records = vec![
322            MetricRecord {
323                step: 0,
324                timestamp: 1.0,
325                metrics: HashMap::from([("loss".into(), 0.5)]),
326                gpu: None,
327            },
328            MetricRecord {
329                step: 1,
330                timestamp: 2.0,
331                metrics: HashMap::from([("loss".into(), f64::NAN)]),
332                gpu: None,
333            },
334        ];
335        let best = best_metrics(&records);
336        assert!((best["loss"] - 0.5).abs() < f64::EPSILON);
337    }
338}