Skip to main content

nuviz_cli/data/
experiment.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use crate::data::meta;
5use crate::data::metrics;
6
7/// A discovered experiment with its metadata and location.
8#[derive(Debug, Clone)]
9pub struct Experiment {
10    pub name: String,
11    pub project: Option<String>,
12    pub dir: PathBuf,
13    pub status: String,
14    pub total_steps: Option<u64>,
15    pub best_metrics: HashMap<String, f64>,
16    pub start_time: Option<String>,
17    #[allow(dead_code)]
18    pub end_time: Option<String>,
19    #[allow(dead_code)]
20    pub seed: Option<u64>,
21    pub config_hash: Option<String>,
22    pub config: Option<serde_json::Value>,
23    #[allow(dead_code)]
24    pub tags: Vec<String>,
25}
26
27/// Resolve the base directory for experiments.
28pub fn resolve_base_dir(dir_override: Option<&str>) -> PathBuf {
29    if let Some(d) = dir_override {
30        return PathBuf::from(d);
31    }
32
33    if let Ok(env_dir) = std::env::var("NUVIZ_DIR") {
34        return PathBuf::from(env_dir);
35    }
36
37    dirs::home_dir()
38        .unwrap_or_else(|| PathBuf::from("."))
39        .join(".nuviz")
40        .join("experiments")
41}
42
43/// Discover all experiments under the base directory.
44///
45/// An experiment directory is any directory that contains `metrics.jsonl`
46/// or `meta.json`.
47pub fn discover_experiments(base_dir: &Path) -> Vec<Experiment> {
48    let mut experiments = Vec::new();
49
50    if !base_dir.exists() {
51        return experiments;
52    }
53
54    // Walk up to 2 levels deep: base_dir/experiment or base_dir/project/experiment
55    discover_in_dir(base_dir, None, &mut experiments);
56
57    // Sort by start_time descending (most recent first)
58    experiments.sort_by(|a, b| b.start_time.cmp(&a.start_time));
59
60    experiments
61}
62
63fn discover_in_dir(dir: &Path, project: Option<&str>, experiments: &mut Vec<Experiment>) {
64    let entries = match std::fs::read_dir(dir) {
65        Ok(e) => e,
66        Err(_) => return,
67    };
68
69    for entry in entries.flatten() {
70        let path = entry.path();
71        if !path.is_dir() {
72            continue;
73        }
74
75        let has_metrics = path.join("metrics.jsonl").exists();
76        let has_meta = path.join("meta.json").exists();
77
78        if has_metrics || has_meta {
79            // This is an experiment directory
80            let exp = load_experiment(&path, project);
81            experiments.push(exp);
82        } else if project.is_none() {
83            // Could be a project directory — recurse one level
84            let project_name = path
85                .file_name()
86                .and_then(|n| n.to_str())
87                .unwrap_or("unknown");
88            discover_in_dir(&path, Some(project_name), experiments);
89        }
90    }
91}
92
93fn load_experiment(dir: &Path, project: Option<&str>) -> Experiment {
94    let dir_name = dir
95        .file_name()
96        .and_then(|n| n.to_str())
97        .unwrap_or("unknown")
98        .to_string();
99
100    let meta_data = meta::read_meta(dir);
101
102    // If meta.json has best_metrics, use those. Otherwise compute from JSONL.
103    let (total_steps, best, status) = if let Some(ref m) = meta_data {
104        (
105            m.total_steps,
106            if m.best_metrics.is_empty() {
107                compute_metrics_from_jsonl(dir)
108            } else {
109                m.best_metrics.clone()
110            },
111            m.status.clone().unwrap_or_else(|| "unknown".into()),
112        )
113    } else {
114        let (steps, best) = compute_steps_and_metrics(dir);
115        (steps, best, "running".into())
116    };
117
118    Experiment {
119        name: meta_data
120            .as_ref()
121            .and_then(|m| m.name.clone())
122            .unwrap_or(dir_name),
123        project: project
124            .map(String::from)
125            .or_else(|| meta_data.as_ref().and_then(|m| m.project.clone())),
126        dir: dir.to_path_buf(),
127        status,
128        total_steps,
129        best_metrics: best,
130        start_time: meta_data.as_ref().and_then(|m| m.start_time.clone()),
131        end_time: meta_data.as_ref().and_then(|m| m.end_time.clone()),
132        seed: meta_data.as_ref().and_then(|m| m.seed),
133        config_hash: meta_data.as_ref().and_then(|m| m.config_hash.clone()),
134        config: meta_data.as_ref().and_then(|m| m.config.clone()),
135        tags: meta_data
136            .as_ref()
137            .map(|m| m.tags.clone())
138            .unwrap_or_default(),
139    }
140}
141
142fn compute_metrics_from_jsonl(dir: &Path) -> HashMap<String, f64> {
143    let records = metrics::read_metrics(&dir.join("metrics.jsonl"));
144    metrics::best_metrics(&records)
145}
146
147fn compute_steps_and_metrics(dir: &Path) -> (Option<u64>, HashMap<String, f64>) {
148    let records = metrics::read_metrics(&dir.join("metrics.jsonl"));
149    let steps = records.last().map(|r| r.step);
150    let best = metrics::best_metrics(&records);
151    (steps, best)
152}
153
154/// Group experiments by config_hash for multi-seed aggregation.
155///
156/// Falls back to grouping by name prefix (stripping `_seed\d+` or `_s\d+` suffix).
157/// Returns a map of group_key -> list of experiments.
158pub fn group_by_config(experiments: &[Experiment]) -> HashMap<String, Vec<&Experiment>> {
159    let mut groups: HashMap<String, Vec<&Experiment>> = HashMap::new();
160
161    for exp in experiments {
162        let key = if let Some(ref hash) = exp.config_hash {
163            hash.clone()
164        } else {
165            strip_seed_suffix(&exp.name)
166        };
167        groups.entry(key).or_default().push(exp);
168    }
169
170    groups
171}
172
173/// Strip seed suffix from experiment name for grouping.
174/// Matches patterns like `_seed42`, `_s3`, `_seed0`.
175fn strip_seed_suffix(name: &str) -> String {
176    // Try _seed\d+ first, then _s\d+
177    if let Some(pos) = name.rfind("_seed") {
178        let suffix = &name[pos + 5..];
179        if suffix.chars().all(|c| c.is_ascii_digit()) && !suffix.is_empty() {
180            return name[..pos].to_string();
181        }
182    }
183    if let Some(pos) = name.rfind("_s") {
184        let suffix = &name[pos + 2..];
185        if suffix.chars().all(|c| c.is_ascii_digit()) && !suffix.is_empty() {
186            return name[..pos].to_string();
187        }
188    }
189    name.to_string()
190}
191
192/// Filter experiments by project name.
193pub fn filter_by_project(experiments: &[Experiment], project: &str) -> Vec<Experiment> {
194    experiments
195        .iter()
196        .filter(|e| e.project.as_deref() == Some(project))
197        .cloned()
198        .collect()
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use std::fs;
205
206    fn create_experiment(base: &Path, project: Option<&str>, name: &str, steps: u32) {
207        let dir = if let Some(p) = project {
208            base.join(p).join(name)
209        } else {
210            base.join(name)
211        };
212        fs::create_dir_all(&dir).unwrap();
213
214        // Write metrics.jsonl
215        let mut jsonl = String::new();
216        for i in 0..steps {
217            jsonl.push_str(&format!(
218                r#"{{"step":{},"timestamp":{},"metrics":{{"loss":{}}}}}"#,
219                i,
220                i as f64,
221                1.0 - (i as f64 / steps as f64)
222            ));
223            jsonl.push('\n');
224        }
225        fs::write(dir.join("metrics.jsonl"), jsonl).unwrap();
226
227        // Write meta.json
228        let meta = format!(
229            r#"{{"name":"{}","project":{},"status":"done","total_steps":{},"best_metrics":{{"loss":0.01}}}}"#,
230            name,
231            project
232                .map(|p| format!(r#""{p}""#))
233                .unwrap_or("null".into()),
234            steps,
235        );
236        fs::write(dir.join("meta.json"), meta).unwrap();
237    }
238
239    #[test]
240    fn test_discover_flat_experiments() {
241        let dir = tempfile::tempdir().unwrap();
242        create_experiment(dir.path(), None, "exp-001", 10);
243        create_experiment(dir.path(), None, "exp-002", 20);
244
245        let exps = discover_experiments(dir.path());
246        assert_eq!(exps.len(), 2);
247    }
248
249    #[test]
250    fn test_discover_project_nested() {
251        let dir = tempfile::tempdir().unwrap();
252        create_experiment(dir.path(), Some("project_a"), "exp-001", 10);
253        create_experiment(dir.path(), Some("project_a"), "exp-002", 20);
254        create_experiment(dir.path(), Some("project_b"), "exp-003", 5);
255
256        let exps = discover_experiments(dir.path());
257        assert_eq!(exps.len(), 3);
258    }
259
260    #[test]
261    fn test_discover_empty_dir() {
262        let dir = tempfile::tempdir().unwrap();
263        let exps = discover_experiments(dir.path());
264        assert!(exps.is_empty());
265    }
266
267    #[test]
268    fn test_discover_nonexistent_dir() {
269        let exps = discover_experiments(Path::new("/nonexistent/path"));
270        assert!(exps.is_empty());
271    }
272
273    #[test]
274    fn test_filter_by_project() {
275        let dir = tempfile::tempdir().unwrap();
276        create_experiment(dir.path(), Some("alpha"), "exp-1", 10);
277        create_experiment(dir.path(), Some("beta"), "exp-2", 10);
278
279        let all = discover_experiments(dir.path());
280        let filtered = filter_by_project(&all, "alpha");
281        assert_eq!(filtered.len(), 1);
282        assert_eq!(filtered[0].name, "exp-1");
283    }
284
285    #[test]
286    fn test_resolve_base_dir_default() {
287        let dir = resolve_base_dir(None);
288        assert!(dir.to_str().unwrap().contains(".nuviz"));
289    }
290
291    #[test]
292    fn test_resolve_base_dir_override() {
293        let dir = resolve_base_dir(Some("/custom/path"));
294        assert_eq!(dir, PathBuf::from("/custom/path"));
295    }
296}