datasynth_core/compliance/
article10.rs1use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct DataGovernanceReport {
15 pub report_version: String,
17 pub generated_at: DateTime<Utc>,
19 pub generator: String,
21 pub data_sources: Vec<DataSourceEntry>,
23 pub processing_steps: Vec<ProcessingStep>,
25 pub quality_measures: Vec<QualityMeasure>,
27 pub bias_assessment: BiasAssessment,
29 pub config_hash: String,
31 pub seed: u64,
33 #[serde(default)]
35 pub metadata: HashMap<String, String>,
36}
37
38impl DataGovernanceReport {
39 pub fn new(config_hash: String, seed: u64) -> Self {
41 Self {
42 report_version: "1.0".to_string(),
43 generated_at: Utc::now(),
44 generator: format!("DataSynth v{}", env!("CARGO_PKG_VERSION")),
45 data_sources: vec![DataSourceEntry {
46 name: "Synthetic generation (no real data used)".to_string(),
47 description: "All data is algorithmically generated using statistical distributions, domain models, and configurable parameters. No real personal or corporate data is used as input.".to_string(),
48 source_type: "synthetic".to_string(),
49 contains_personal_data: false,
50 }],
51 processing_steps: Vec::new(),
52 quality_measures: Vec::new(),
53 bias_assessment: BiasAssessment::default(),
54 config_hash,
55 seed,
56 metadata: HashMap::new(),
57 }
58 }
59
60 pub fn add_standard_processing_steps(&mut self) {
62 self.processing_steps = vec![
63 ProcessingStep {
64 name: "Chart of Accounts Generation".to_string(),
65 description: "Generate GL account structure based on industry and complexity"
66 .to_string(),
67 order: 1,
68 },
69 ProcessingStep {
70 name: "Master Data Generation".to_string(),
71 description: "Generate vendors, customers, materials, fixed assets, employees"
72 .to_string(),
73 order: 2,
74 },
75 ProcessingStep {
76 name: "Document Flow Generation".to_string(),
77 description: "Generate P2P and O2C document chains with three-way matching"
78 .to_string(),
79 order: 3,
80 },
81 ProcessingStep {
82 name: "Journal Entry Generation".to_string(),
83 description: "Generate balanced journal entries following Benford's Law"
84 .to_string(),
85 order: 4,
86 },
87 ProcessingStep {
88 name: "Anomaly Injection".to_string(),
89 description:
90 "Inject configurable fraud and error patterns with ground truth labels"
91 .to_string(),
92 order: 5,
93 },
94 ProcessingStep {
95 name: "Quality Validation".to_string(),
96 description:
97 "Validate balance coherence, referential integrity, and statistical properties"
98 .to_string(),
99 order: 6,
100 },
101 ];
102 }
103
104 pub fn add_standard_quality_measures(&mut self) {
106 self.quality_measures = vec![
107 QualityMeasure {
108 name: "Benford's Law Compliance".to_string(),
109 description: "First-digit distribution follows Benford's Law (MAD < 0.015)"
110 .to_string(),
111 result: "Applied".to_string(),
112 },
113 QualityMeasure {
114 name: "Balance Coherence".to_string(),
115 description: "All journal entries are balanced (debits = credits)".to_string(),
116 result: "Enforced at construction".to_string(),
117 },
118 QualityMeasure {
119 name: "Deterministic Reproducibility".to_string(),
120 description: "Same config + seed produces identical output".to_string(),
121 result: "ChaCha8 RNG with configurable seed".to_string(),
122 },
123 QualityMeasure {
124 name: "Referential Integrity".to_string(),
125 description: "All foreign key references are valid".to_string(),
126 result: "Applied".to_string(),
127 },
128 ];
129 }
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct DataSourceEntry {
135 pub name: String,
137 pub description: String,
139 pub source_type: String,
141 pub contains_personal_data: bool,
143}
144
145#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct ProcessingStep {
148 pub name: String,
150 pub description: String,
152 pub order: u32,
154}
155
156#[derive(Debug, Clone, Serialize, Deserialize)]
158pub struct QualityMeasure {
159 pub name: String,
161 pub description: String,
163 pub result: String,
165}
166
167#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct BiasAssessment {
170 pub assessment: String,
172 pub known_limitations: Vec<String>,
174 pub mitigation_measures: Vec<String>,
176}
177
178impl Default for BiasAssessment {
179 fn default() -> Self {
180 Self {
181 assessment: "Synthetic data generation uses configurable statistical distributions. \
182 Bias characteristics are determined by configuration parameters (industry profiles, \
183 distribution parameters, anomaly rates) rather than real-world data."
184 .to_string(),
185 known_limitations: vec![
186 "Generated data reflects configured distribution parameters, not real-world distributions".to_string(),
187 "Industry profiles are approximations based on published research".to_string(),
188 "Temporal patterns use simplified models of business cycles".to_string(),
189 ],
190 mitigation_measures: vec![
191 "All configuration parameters are documented and reproducible".to_string(),
192 "Evaluation framework validates statistical properties of output".to_string(),
193 "AutoTuner can adjust parameters based on evaluation feedback".to_string(),
194 "Users should validate generated data against their specific use case requirements".to_string(),
195 ],
196 }
197 }
198}
199
200#[cfg(test)]
201#[allow(clippy::unwrap_used)]
202mod tests {
203 use super::*;
204
205 #[test]
206 fn test_report_creation() {
207 let report = DataGovernanceReport::new("hash123".to_string(), 42);
208 assert_eq!(report.report_version, "1.0");
209 assert_eq!(report.seed, 42);
210 assert_eq!(report.config_hash, "hash123");
211 assert!(!report.data_sources.is_empty());
212 assert!(!report.data_sources[0].contains_personal_data);
213 }
214
215 #[test]
216 fn test_standard_processing_steps() {
217 let mut report = DataGovernanceReport::new("hash".to_string(), 42);
218 report.add_standard_processing_steps();
219 assert!(report.processing_steps.len() >= 5);
220 assert_eq!(report.processing_steps[0].order, 1);
221 }
222
223 #[test]
224 fn test_standard_quality_measures() {
225 let mut report = DataGovernanceReport::new("hash".to_string(), 42);
226 report.add_standard_quality_measures();
227 assert!(report.quality_measures.len() >= 3);
228 }
229
230 #[test]
231 fn test_bias_assessment_default() {
232 let assessment = BiasAssessment::default();
233 assert!(!assessment.assessment.is_empty());
234 assert!(!assessment.known_limitations.is_empty());
235 assert!(!assessment.mitigation_measures.is_empty());
236 }
237
238 #[test]
239 fn test_report_serialization() {
240 let mut report = DataGovernanceReport::new("hash".to_string(), 42);
241 report.add_standard_processing_steps();
242 report.add_standard_quality_measures();
243 let json = serde_json::to_string_pretty(&report).expect("should serialize");
244 assert!(json.contains("DataSynth"));
245 assert!(json.contains("Article 10") || json.contains("data_sources"));
246 let deser: DataGovernanceReport = serde_json::from_str(&json).expect("should deserialize");
248 assert_eq!(deser.seed, 42);
249 }
250}