Skip to main content

datasynth_runtime/
run_manifest.rs

1//! Run manifest and metadata tracking for reproducibility.
2//!
3//! This module provides structures for capturing complete generation run metadata,
4//! enabling reproducibility and traceability of generated data.
5
6use chrono::{DateTime, Utc};
7use datasynth_config::schema::GeneratorConfig;
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::HashMap;
11use std::fs::File;
12use std::io::Write;
13use std::path::Path;
14use uuid::Uuid;
15
16use super::EnhancedGenerationStatistics;
17
18/// Complete manifest of a generation run for reproducibility.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct RunManifest {
21    /// Unique identifier for this run.
22    pub run_id: String,
23    /// Timestamp when generation started.
24    pub started_at: DateTime<Utc>,
25    /// Timestamp when generation completed.
26    pub completed_at: Option<DateTime<Utc>>,
27    /// SHA-256 hash of the configuration (for quick comparison).
28    pub config_hash: String,
29    /// Complete configuration snapshot.
30    pub config_snapshot: GeneratorConfig,
31    /// Seed used for random number generation.
32    pub seed: u64,
33    /// Scenario tags for categorization.
34    #[serde(default)]
35    pub scenario_tags: Vec<String>,
36    /// Generation statistics.
37    #[serde(default)]
38    pub statistics: Option<EnhancedGenerationStatistics>,
39    /// Duration in seconds.
40    pub duration_seconds: Option<f64>,
41    /// Version of the generator.
42    pub generator_version: String,
43    /// Additional metadata.
44    #[serde(default)]
45    pub metadata: HashMap<String, String>,
46    /// Output directory path.
47    pub output_directory: Option<String>,
48    /// List of output files generated.
49    #[serde(default)]
50    pub output_files: Vec<OutputFileInfo>,
51    /// Any warnings or notes from the generation.
52    #[serde(default)]
53    pub warnings: Vec<String>,
54}
55
56/// Information about an output file.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct OutputFileInfo {
59    /// Relative path from output directory.
60    pub path: String,
61    /// File format (csv, json, parquet).
62    pub format: String,
63    /// Record count.
64    pub record_count: Option<usize>,
65    /// File size in bytes.
66    pub size_bytes: Option<u64>,
67}
68
69impl RunManifest {
70    /// Creates a new run manifest.
71    pub fn new(config: &GeneratorConfig, seed: u64) -> Self {
72        let run_id = Uuid::new_v4().to_string();
73        let config_hash = Self::hash_config(config);
74
75        Self {
76            run_id,
77            started_at: Utc::now(),
78            completed_at: None,
79            config_hash,
80            config_snapshot: config.clone(),
81            seed,
82            scenario_tags: Vec::new(),
83            statistics: None,
84            duration_seconds: None,
85            generator_version: env!("CARGO_PKG_VERSION").to_string(),
86            metadata: HashMap::new(),
87            output_directory: None,
88            output_files: Vec::new(),
89            warnings: Vec::new(),
90        }
91    }
92
93    /// Computes SHA-256 hash of the configuration.
94    fn hash_config(config: &GeneratorConfig) -> String {
95        let json = serde_json::to_string(config).unwrap_or_default();
96        let mut hasher = Sha256::new();
97        hasher.update(json.as_bytes());
98        let result = hasher.finalize();
99        hex::encode(result)
100    }
101
102    /// Marks the run as complete.
103    pub fn complete(&mut self, statistics: EnhancedGenerationStatistics) {
104        self.completed_at = Some(Utc::now());
105        self.duration_seconds =
106            Some((self.completed_at.unwrap() - self.started_at).num_milliseconds() as f64 / 1000.0);
107        self.statistics = Some(statistics);
108    }
109
110    /// Adds a scenario tag.
111    pub fn add_tag(&mut self, tag: &str) {
112        if !self.scenario_tags.contains(&tag.to_string()) {
113            self.scenario_tags.push(tag.to_string());
114        }
115    }
116
117    /// Adds multiple scenario tags.
118    pub fn add_tags(&mut self, tags: &[String]) {
119        for tag in tags {
120            self.add_tag(tag);
121        }
122    }
123
124    /// Sets the output directory.
125    pub fn set_output_directory(&mut self, path: &Path) {
126        self.output_directory = Some(path.display().to_string());
127    }
128
129    /// Adds an output file record.
130    pub fn add_output_file(&mut self, info: OutputFileInfo) {
131        self.output_files.push(info);
132    }
133
134    /// Adds a warning message.
135    pub fn add_warning(&mut self, warning: &str) {
136        self.warnings.push(warning.to_string());
137    }
138
139    /// Adds metadata.
140    pub fn add_metadata(&mut self, key: &str, value: &str) {
141        self.metadata.insert(key.to_string(), value.to_string());
142    }
143
144    /// Writes the manifest to a JSON file.
145    pub fn write_to_file(&self, path: &Path) -> std::io::Result<()> {
146        let json = serde_json::to_string_pretty(self)?;
147        let mut file = File::create(path)?;
148        file.write_all(json.as_bytes())?;
149        Ok(())
150    }
151
152    /// Returns the run ID.
153    pub fn run_id(&self) -> &str {
154        &self.run_id
155    }
156}
157
158// Note: ScenarioConfig is now defined in datasynth-config/src/schema.rs
159// and exported via datasynth_config::schema::ScenarioConfig
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use datasynth_config::schema::*;
165
166    fn create_test_config() -> GeneratorConfig {
167        GeneratorConfig {
168            global: GlobalConfig {
169                industry: datasynth_core::models::IndustrySector::Manufacturing,
170                start_date: "2024-01-01".to_string(),
171                period_months: 1,
172                seed: Some(42),
173                parallel: false,
174                group_currency: "USD".to_string(),
175                worker_threads: 1,
176                memory_limit_mb: 512,
177            },
178            companies: vec![CompanyConfig {
179                code: "TEST".to_string(),
180                name: "Test Company".to_string(),
181                currency: "USD".to_string(),
182                country: "US".to_string(),
183                annual_transaction_volume: TransactionVolume::TenK,
184                volume_weight: 1.0,
185                fiscal_year_variant: "K4".to_string(),
186            }],
187            chart_of_accounts: ChartOfAccountsConfig::default(),
188            transactions: TransactionConfig::default(),
189            output: OutputConfig::default(),
190            fraud: FraudConfig::default(),
191            internal_controls: InternalControlsConfig::default(),
192            business_processes: BusinessProcessConfig::default(),
193            user_personas: UserPersonaConfig::default(),
194            templates: TemplateConfig::default(),
195            approval: ApprovalConfig::default(),
196            departments: DepartmentConfig::default(),
197            master_data: MasterDataConfig::default(),
198            document_flows: DocumentFlowConfig::default(),
199            intercompany: IntercompanyConfig::default(),
200            balance: BalanceConfig::default(),
201            ocpm: OcpmConfig::default(),
202            audit: AuditGenerationConfig::default(),
203            banking: datasynth_banking::BankingConfig::default(),
204            data_quality: DataQualitySchemaConfig::default(),
205            scenario: ScenarioConfig::default(),
206            temporal: TemporalDriftConfig::default(),
207            graph_export: GraphExportConfig::default(),
208            streaming: StreamingSchemaConfig::default(),
209            rate_limit: RateLimitSchemaConfig::default(),
210            temporal_attributes: TemporalAttributeSchemaConfig::default(),
211            relationships: RelationshipSchemaConfig::default(),
212            accounting_standards: AccountingStandardsConfig::default(),
213            audit_standards: AuditStandardsConfig::default(),
214        }
215    }
216
217    #[test]
218    fn test_run_manifest_creation() {
219        let config = create_test_config();
220        let manifest = RunManifest::new(&config, 42);
221
222        assert!(!manifest.run_id.is_empty());
223        assert_eq!(manifest.seed, 42);
224        assert!(!manifest.config_hash.is_empty());
225        assert!(manifest.completed_at.is_none());
226    }
227
228    #[test]
229    fn test_run_manifest_completion() {
230        let config = create_test_config();
231        let mut manifest = RunManifest::new(&config, 42);
232
233        // Simulate some work
234        std::thread::sleep(std::time::Duration::from_millis(10));
235
236        let stats = EnhancedGenerationStatistics {
237            total_entries: 100,
238            total_line_items: 500,
239            ..Default::default()
240        };
241        manifest.complete(stats);
242
243        assert!(manifest.completed_at.is_some());
244        assert!(manifest.duration_seconds.unwrap() >= 0.01);
245        assert_eq!(manifest.statistics.as_ref().unwrap().total_entries, 100);
246    }
247
248    #[test]
249    fn test_config_hash_consistency() {
250        let config = create_test_config();
251        let hash1 = RunManifest::hash_config(&config);
252        let hash2 = RunManifest::hash_config(&config);
253
254        assert_eq!(hash1, hash2);
255    }
256
257    #[test]
258    fn test_scenario_tags() {
259        let config = create_test_config();
260        let mut manifest = RunManifest::new(&config, 42);
261
262        manifest.add_tag("fraud_detection");
263        manifest.add_tag("retail");
264        manifest.add_tag("fraud_detection"); // Duplicate
265
266        assert_eq!(manifest.scenario_tags.len(), 2);
267        assert!(manifest
268            .scenario_tags
269            .contains(&"fraud_detection".to_string()));
270        assert!(manifest.scenario_tags.contains(&"retail".to_string()));
271    }
272
273    #[test]
274    fn test_output_file_tracking() {
275        let config = create_test_config();
276        let mut manifest = RunManifest::new(&config, 42);
277
278        manifest.add_output_file(OutputFileInfo {
279            path: "journal_entries.csv".to_string(),
280            format: "csv".to_string(),
281            record_count: Some(1000),
282            size_bytes: Some(102400),
283        });
284
285        assert_eq!(manifest.output_files.len(), 1);
286        assert_eq!(manifest.output_files[0].record_count, Some(1000));
287    }
288}