Skip to main content

entrenar/ecosystem/realizar/
provenance.rs

1//! Experiment provenance tracking.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Experiment provenance for tracking model lineage.
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct ExperimentProvenance {
9    /// Experiment identifier
10    pub experiment_id: String,
11    /// Run identifier within experiment
12    pub run_id: String,
13    /// Training configuration hash
14    pub config_hash: String,
15    /// Dataset identifier
16    pub dataset_id: Option<String>,
17    /// Base model identifier (for fine-tuned models)
18    pub base_model_id: Option<String>,
19    /// Training metrics at export time
20    pub metrics: HashMap<String, f64>,
21    /// Timestamp of export
22    pub timestamp: chrono::DateTime<chrono::Utc>,
23    /// Git commit hash (if available)
24    pub git_commit: Option<String>,
25    /// Additional custom metadata
26    pub custom: HashMap<String, String>,
27}
28
29impl ExperimentProvenance {
30    /// Create new provenance with required fields.
31    pub fn new(experiment_id: impl Into<String>, run_id: impl Into<String>) -> Self {
32        Self {
33            experiment_id: experiment_id.into(),
34            run_id: run_id.into(),
35            config_hash: String::new(),
36            dataset_id: None,
37            base_model_id: None,
38            metrics: HashMap::new(),
39            timestamp: chrono::Utc::now(),
40            git_commit: None,
41            custom: HashMap::new(),
42        }
43    }
44
45    /// Set configuration hash.
46    pub fn with_config_hash(mut self, hash: impl Into<String>) -> Self {
47        self.config_hash = hash.into();
48        self
49    }
50
51    /// Set dataset identifier.
52    pub fn with_dataset(mut self, dataset_id: impl Into<String>) -> Self {
53        self.dataset_id = Some(dataset_id.into());
54        self
55    }
56
57    /// Set base model identifier.
58    pub fn with_base_model(mut self, model_id: impl Into<String>) -> Self {
59        self.base_model_id = Some(model_id.into());
60        self
61    }
62
63    /// Add a metric.
64    pub fn with_metric(mut self, name: impl Into<String>, value: f64) -> Self {
65        self.metrics.insert(name.into(), value);
66        self
67    }
68
69    /// Add multiple metrics.
70    pub fn with_metrics(mut self, metrics: impl IntoIterator<Item = (String, f64)>) -> Self {
71        self.metrics.extend(metrics);
72        self
73    }
74
75    /// Set git commit hash.
76    pub fn with_git_commit(mut self, commit: impl Into<String>) -> Self {
77        self.git_commit = Some(commit.into());
78        self
79    }
80
81    /// Add custom metadata.
82    pub fn with_custom(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
83        self.custom.insert(key.into(), value.into());
84        self
85    }
86
87    /// Convert to GGUF metadata key-value pairs.
88    pub fn to_metadata_pairs(&self) -> Vec<(String, String)> {
89        let mut pairs = vec![
90            ("entrenar.experiment_id".to_string(), self.experiment_id.clone()),
91            ("entrenar.run_id".to_string(), self.run_id.clone()),
92            ("entrenar.timestamp".to_string(), self.timestamp.to_rfc3339()),
93        ];
94
95        if !self.config_hash.is_empty() {
96            pairs.push(("entrenar.config_hash".to_string(), self.config_hash.clone()));
97        }
98
99        if let Some(ref dataset) = self.dataset_id {
100            pairs.push(("entrenar.dataset_id".to_string(), dataset.clone()));
101        }
102
103        if let Some(ref base) = self.base_model_id {
104            pairs.push(("entrenar.base_model_id".to_string(), base.clone()));
105        }
106
107        if let Some(ref commit) = self.git_commit {
108            pairs.push(("entrenar.git_commit".to_string(), commit.clone()));
109        }
110
111        for (key, value) in &self.metrics {
112            pairs.push((format!("entrenar.metric.{key}"), value.to_string()));
113        }
114
115        for (key, value) in &self.custom {
116            pairs.push((format!("entrenar.custom.{key}"), value.clone()));
117        }
118
119        pairs
120    }
121}