entrenar/ecosystem/realizar/
provenance.rs1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct ExperimentProvenance {
9 pub experiment_id: String,
11 pub run_id: String,
13 pub config_hash: String,
15 pub dataset_id: Option<String>,
17 pub base_model_id: Option<String>,
19 pub metrics: HashMap<String, f64>,
21 pub timestamp: chrono::DateTime<chrono::Utc>,
23 pub git_commit: Option<String>,
25 pub custom: HashMap<String, String>,
27}
28
29impl ExperimentProvenance {
30 pub fn new(experiment_id: impl Into<String>, run_id: impl Into<String>) -> Self {
32 Self {
33 experiment_id: experiment_id.into(),
34 run_id: run_id.into(),
35 config_hash: String::new(),
36 dataset_id: None,
37 base_model_id: None,
38 metrics: HashMap::new(),
39 timestamp: chrono::Utc::now(),
40 git_commit: None,
41 custom: HashMap::new(),
42 }
43 }
44
45 pub fn with_config_hash(mut self, hash: impl Into<String>) -> Self {
47 self.config_hash = hash.into();
48 self
49 }
50
51 pub fn with_dataset(mut self, dataset_id: impl Into<String>) -> Self {
53 self.dataset_id = Some(dataset_id.into());
54 self
55 }
56
57 pub fn with_base_model(mut self, model_id: impl Into<String>) -> Self {
59 self.base_model_id = Some(model_id.into());
60 self
61 }
62
63 pub fn with_metric(mut self, name: impl Into<String>, value: f64) -> Self {
65 self.metrics.insert(name.into(), value);
66 self
67 }
68
69 pub fn with_metrics(mut self, metrics: impl IntoIterator<Item = (String, f64)>) -> Self {
71 self.metrics.extend(metrics);
72 self
73 }
74
75 pub fn with_git_commit(mut self, commit: impl Into<String>) -> Self {
77 self.git_commit = Some(commit.into());
78 self
79 }
80
81 pub fn with_custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
83 self.custom.insert(key.into(), value.into());
84 self
85 }
86
87 pub fn to_metadata_pairs(&self) -> Vec<(String, String)> {
89 let mut pairs = vec![
90 ("entrenar.experiment_id".to_string(), self.experiment_id.clone()),
91 ("entrenar.run_id".to_string(), self.run_id.clone()),
92 ("entrenar.timestamp".to_string(), self.timestamp.to_rfc3339()),
93 ];
94
95 if !self.config_hash.is_empty() {
96 pairs.push(("entrenar.config_hash".to_string(), self.config_hash.clone()));
97 }
98
99 if let Some(ref dataset) = self.dataset_id {
100 pairs.push(("entrenar.dataset_id".to_string(), dataset.clone()));
101 }
102
103 if let Some(ref base) = self.base_model_id {
104 pairs.push(("entrenar.base_model_id".to_string(), base.clone()));
105 }
106
107 if let Some(ref commit) = self.git_commit {
108 pairs.push(("entrenar.git_commit".to_string(), commit.clone()));
109 }
110
111 for (key, value) in &self.metrics {
112 pairs.push((format!("entrenar.metric.{key}"), value.to_string()));
113 }
114
115 for (key, value) in &self.custom {
116 pairs.push((format!("entrenar.custom.{key}"), value.clone()));
117 }
118
119 pairs
120 }
121}