1use chrono::{DateTime, Utc};
7use datasynth_config::schema::GeneratorConfig;
8use serde::{Deserialize, Serialize};
9use sha2::{Digest, Sha256};
10use std::collections::HashMap;
11use std::fs::File;
12use std::io::Write;
13use std::path::Path;
14use uuid::Uuid;
15
16use super::EnhancedGenerationStatistics;
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct RunManifest {
21 pub run_id: String,
23 pub started_at: DateTime<Utc>,
25 pub completed_at: Option<DateTime<Utc>>,
27 pub config_hash: String,
29 pub config_snapshot: GeneratorConfig,
31 pub seed: u64,
33 #[serde(default)]
35 pub scenario_tags: Vec<String>,
36 #[serde(default)]
38 pub statistics: Option<EnhancedGenerationStatistics>,
39 pub duration_seconds: Option<f64>,
41 pub generator_version: String,
43 #[serde(default)]
45 pub metadata: HashMap<String, String>,
46 pub output_directory: Option<String>,
48 #[serde(default)]
50 pub output_files: Vec<OutputFileInfo>,
51 #[serde(default)]
53 pub warnings: Vec<String>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct OutputFileInfo {
59 pub path: String,
61 pub format: String,
63 pub record_count: Option<usize>,
65 pub size_bytes: Option<u64>,
67}
68
69impl RunManifest {
70 pub fn new(config: &GeneratorConfig, seed: u64) -> Self {
72 let run_id = Uuid::new_v4().to_string();
73 let config_hash = Self::hash_config(config);
74
75 Self {
76 run_id,
77 started_at: Utc::now(),
78 completed_at: None,
79 config_hash,
80 config_snapshot: config.clone(),
81 seed,
82 scenario_tags: Vec::new(),
83 statistics: None,
84 duration_seconds: None,
85 generator_version: env!("CARGO_PKG_VERSION").to_string(),
86 metadata: HashMap::new(),
87 output_directory: None,
88 output_files: Vec::new(),
89 warnings: Vec::new(),
90 }
91 }
92
93 fn hash_config(config: &GeneratorConfig) -> String {
95 let json = serde_json::to_string(config).unwrap_or_default();
96 let mut hasher = Sha256::new();
97 hasher.update(json.as_bytes());
98 let result = hasher.finalize();
99 hex::encode(result)
100 }
101
102 pub fn complete(&mut self, statistics: EnhancedGenerationStatistics) {
104 self.completed_at = Some(Utc::now());
105 self.duration_seconds =
106 Some((self.completed_at.unwrap() - self.started_at).num_milliseconds() as f64 / 1000.0);
107 self.statistics = Some(statistics);
108 }
109
110 pub fn add_tag(&mut self, tag: &str) {
112 if !self.scenario_tags.contains(&tag.to_string()) {
113 self.scenario_tags.push(tag.to_string());
114 }
115 }
116
117 pub fn add_tags(&mut self, tags: &[String]) {
119 for tag in tags {
120 self.add_tag(tag);
121 }
122 }
123
124 pub fn set_output_directory(&mut self, path: &Path) {
126 self.output_directory = Some(path.display().to_string());
127 }
128
129 pub fn add_output_file(&mut self, info: OutputFileInfo) {
131 self.output_files.push(info);
132 }
133
134 pub fn add_warning(&mut self, warning: &str) {
136 self.warnings.push(warning.to_string());
137 }
138
139 pub fn add_metadata(&mut self, key: &str, value: &str) {
141 self.metadata.insert(key.to_string(), value.to_string());
142 }
143
144 pub fn write_to_file(&self, path: &Path) -> std::io::Result<()> {
146 let json = serde_json::to_string_pretty(self)?;
147 let mut file = File::create(path)?;
148 file.write_all(json.as_bytes())?;
149 Ok(())
150 }
151
152 pub fn run_id(&self) -> &str {
154 &self.run_id
155 }
156}
157
158#[cfg(test)]
162mod tests {
163 use super::*;
164 use datasynth_config::schema::*;
165
166 fn create_test_config() -> GeneratorConfig {
167 GeneratorConfig {
168 global: GlobalConfig {
169 industry: datasynth_core::models::IndustrySector::Manufacturing,
170 start_date: "2024-01-01".to_string(),
171 period_months: 1,
172 seed: Some(42),
173 parallel: false,
174 group_currency: "USD".to_string(),
175 worker_threads: 1,
176 memory_limit_mb: 512,
177 },
178 companies: vec![CompanyConfig {
179 code: "TEST".to_string(),
180 name: "Test Company".to_string(),
181 currency: "USD".to_string(),
182 country: "US".to_string(),
183 annual_transaction_volume: TransactionVolume::TenK,
184 volume_weight: 1.0,
185 fiscal_year_variant: "K4".to_string(),
186 }],
187 chart_of_accounts: ChartOfAccountsConfig::default(),
188 transactions: TransactionConfig::default(),
189 output: OutputConfig::default(),
190 fraud: FraudConfig::default(),
191 internal_controls: InternalControlsConfig::default(),
192 business_processes: BusinessProcessConfig::default(),
193 user_personas: UserPersonaConfig::default(),
194 templates: TemplateConfig::default(),
195 approval: ApprovalConfig::default(),
196 departments: DepartmentConfig::default(),
197 master_data: MasterDataConfig::default(),
198 document_flows: DocumentFlowConfig::default(),
199 intercompany: IntercompanyConfig::default(),
200 balance: BalanceConfig::default(),
201 ocpm: OcpmConfig::default(),
202 audit: AuditGenerationConfig::default(),
203 banking: datasynth_banking::BankingConfig::default(),
204 data_quality: DataQualitySchemaConfig::default(),
205 scenario: ScenarioConfig::default(),
206 temporal: TemporalDriftConfig::default(),
207 graph_export: GraphExportConfig::default(),
208 streaming: StreamingSchemaConfig::default(),
209 rate_limit: RateLimitSchemaConfig::default(),
210 temporal_attributes: TemporalAttributeSchemaConfig::default(),
211 relationships: RelationshipSchemaConfig::default(),
212 accounting_standards: AccountingStandardsConfig::default(),
213 audit_standards: AuditStandardsConfig::default(),
214 distributions: Default::default(),
215 temporal_patterns: Default::default(),
216 vendor_network: VendorNetworkSchemaConfig::default(),
217 customer_segmentation: CustomerSegmentationSchemaConfig::default(),
218 relationship_strength: RelationshipStrengthSchemaConfig::default(),
219 cross_process_links: CrossProcessLinksSchemaConfig::default(),
220 organizational_events: OrganizationalEventsSchemaConfig::default(),
221 behavioral_drift: BehavioralDriftSchemaConfig::default(),
222 market_drift: MarketDriftSchemaConfig::default(),
223 drift_labeling: DriftLabelingSchemaConfig::default(),
224 anomaly_injection: Default::default(),
225 industry_specific: Default::default(),
226 }
227 }
228
229 #[test]
230 fn test_run_manifest_creation() {
231 let config = create_test_config();
232 let manifest = RunManifest::new(&config, 42);
233
234 assert!(!manifest.run_id.is_empty());
235 assert_eq!(manifest.seed, 42);
236 assert!(!manifest.config_hash.is_empty());
237 assert!(manifest.completed_at.is_none());
238 }
239
240 #[test]
241 fn test_run_manifest_completion() {
242 let config = create_test_config();
243 let mut manifest = RunManifest::new(&config, 42);
244
245 std::thread::sleep(std::time::Duration::from_millis(10));
247
248 let stats = EnhancedGenerationStatistics {
249 total_entries: 100,
250 total_line_items: 500,
251 ..Default::default()
252 };
253 manifest.complete(stats);
254
255 assert!(manifest.completed_at.is_some());
256 assert!(manifest.duration_seconds.unwrap() >= 0.01);
257 assert_eq!(manifest.statistics.as_ref().unwrap().total_entries, 100);
258 }
259
260 #[test]
261 fn test_config_hash_consistency() {
262 let config = create_test_config();
263 let hash1 = RunManifest::hash_config(&config);
264 let hash2 = RunManifest::hash_config(&config);
265
266 assert_eq!(hash1, hash2);
267 }
268
269 #[test]
270 fn test_scenario_tags() {
271 let config = create_test_config();
272 let mut manifest = RunManifest::new(&config, 42);
273
274 manifest.add_tag("fraud_detection");
275 manifest.add_tag("retail");
276 manifest.add_tag("fraud_detection"); assert_eq!(manifest.scenario_tags.len(), 2);
279 assert!(manifest
280 .scenario_tags
281 .contains(&"fraud_detection".to_string()));
282 assert!(manifest.scenario_tags.contains(&"retail".to_string()));
283 }
284
285 #[test]
286 fn test_output_file_tracking() {
287 let config = create_test_config();
288 let mut manifest = RunManifest::new(&config, 42);
289
290 manifest.add_output_file(OutputFileInfo {
291 path: "journal_entries.csv".to_string(),
292 format: "csv".to_string(),
293 record_count: Some(1000),
294 size_bytes: Some(102400),
295 });
296
297 assert_eq!(manifest.output_files.len(), 1);
298 assert_eq!(manifest.output_files[0].record_count, Some(1000));
299 }
300}