datasynth-generators 2.2.0

50+ data generators covering GL, P2P, O2C, S2C, HR, manufacturing, audit, tax, treasury, and ESG
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! Service organization and SOC report generator per ISA 402.
//!
//! Generates 1–3 service organizations per entity and produces SOC 1 Type II
//! reports with 3–8 control objectives and 0–2 exceptions per report.
//! User entity controls are generated mapping back to SOC objectives.

use chrono::{Duration, NaiveDate};
use datasynth_core::models::audit::service_organization::{
    ControlEffectiveness, ControlObjective, ServiceOrganization, ServiceType, SocException,
    SocOpinionType, SocReport, SocReportType, UserEntityControl,
};
use datasynth_core::utils::seeded_rng;
use rand::Rng;
use rand_chacha::ChaCha8Rng;
use tracing::info;

/// Configuration for service organization generation.
#[derive(Debug, Clone)]
pub struct ServiceOrgGeneratorConfig {
    /// Number of service organizations per entity (min, max)
    pub service_orgs_per_entity: (usize, usize),
    /// Number of control objectives per SOC report (min, max)
    pub objectives_per_report: (usize, usize),
    /// Number of exceptions per report (min, max)
    pub exceptions_per_report: (usize, usize),
    /// Probability of a qualified opinion (vs unmodified)
    pub qualified_opinion_probability: f64,
    /// Number of user entity controls per SOC report (min, max)
    pub user_controls_per_report: (usize, usize),
}

impl Default for ServiceOrgGeneratorConfig {
    fn default() -> Self {
        Self {
            service_orgs_per_entity: (1, 3),
            objectives_per_report: (3, 8),
            exceptions_per_report: (0, 2),
            qualified_opinion_probability: 0.10,
            user_controls_per_report: (1, 4),
        }
    }
}

/// Result of generating service organization data for a set of entities.
#[derive(Debug, Clone, Default)]
pub struct ServiceOrgSnapshot {
    /// Service organizations identified
    pub service_organizations: Vec<ServiceOrganization>,
    /// SOC reports obtained
    pub soc_reports: Vec<SocReport>,
    /// User entity controls documented
    pub user_entity_controls: Vec<UserEntityControl>,
}

/// Generator for ISA 402 service organization controls.
pub struct ServiceOrgGenerator {
    rng: ChaCha8Rng,
    config: ServiceOrgGeneratorConfig,
}

impl ServiceOrgGenerator {
    /// Create a new generator with the given seed.
    pub fn new(seed: u64) -> Self {
        Self {
            rng: seeded_rng(seed, 0x402),
            config: ServiceOrgGeneratorConfig::default(),
        }
    }

    /// Create a new generator with custom configuration.
    pub fn with_config(seed: u64, config: ServiceOrgGeneratorConfig) -> Self {
        Self {
            rng: seeded_rng(seed, 0x402),
            config,
        }
    }

    /// Generate service organizations and SOC reports for a list of entities.
    pub fn generate(
        &mut self,
        entity_codes: &[String],
        period_end_date: NaiveDate,
    ) -> ServiceOrgSnapshot {
        if entity_codes.is_empty() {
            return ServiceOrgSnapshot::default();
        }

        info!(
            "Generating service org data for {} entities",
            entity_codes.len()
        );
        let mut snapshot = ServiceOrgSnapshot::default();

        // Pool of service type templates to draw from
        let service_type_pool = [
            ServiceType::PayrollProcessor,
            ServiceType::CloudHosting,
            ServiceType::PaymentProcessor,
            ServiceType::ItManagedServices,
            ServiceType::DataCentre,
        ];

        for entity_code in entity_codes {
            let org_count = self.rng.random_range(
                self.config.service_orgs_per_entity.0..=self.config.service_orgs_per_entity.1,
            );

            for i in 0..org_count {
                let service_type = service_type_pool[i % service_type_pool.len()];
                let org_name = self.org_name(service_type, i);

                // Check if a matching service org already exists (reuse across entities)
                let org_id = if let Some(existing) = snapshot
                    .service_organizations
                    .iter_mut()
                    .find(|o| o.service_type == service_type && o.name == org_name)
                {
                    existing.entities_served.push(entity_code.clone());
                    existing.id.clone()
                } else {
                    let org =
                        ServiceOrganization::new(org_name, service_type, vec![entity_code.clone()]);
                    let id = org.id.clone();
                    snapshot.service_organizations.push(org);
                    id
                };

                // Generate a SOC 1 Type II report for this org/entity pair
                let report = self.generate_soc_report(&org_id, period_end_date);
                let report_id = report.id.clone();
                let objective_ids: Vec<String> = report
                    .control_objectives
                    .iter()
                    .map(|o| o.id.clone())
                    .collect();
                snapshot.soc_reports.push(report);

                // Generate user entity controls for the report
                let user_controls =
                    self.generate_user_controls(&report_id, &objective_ids, entity_code);
                snapshot.user_entity_controls.extend(user_controls);
            }
        }

        info!(
            "Generated {} service orgs and {} SOC reports",
            snapshot.service_organizations.len(),
            snapshot.soc_reports.len()
        );
        snapshot
    }

    fn generate_soc_report(
        &mut self,
        service_org_id: &str,
        period_end_date: NaiveDate,
    ) -> SocReport {
        let objectives_count = self.rng.random_range(
            self.config.objectives_per_report.0..=self.config.objectives_per_report.1,
        );
        let exceptions_count = self.rng.random_range(
            self.config.exceptions_per_report.0..=self.config.exceptions_per_report.1,
        );

        let has_exceptions = exceptions_count > 0;
        let opinion_type = if has_exceptions
            && self.rng.random::<f64>() < self.config.qualified_opinion_probability
        {
            SocOpinionType::Qualified
        } else {
            SocOpinionType::Unmodified
        };

        // SOC report covers the 12 months ending at period-end
        let report_period_start = period_end_date - Duration::days(365);
        let report_period_end = period_end_date;

        let mut report = SocReport::new(
            service_org_id,
            SocReportType::Soc1Type2,
            report_period_start,
            report_period_end,
            opinion_type,
        );

        // Generate control objectives
        for j in 0..objectives_count {
            let controls_tested = self.rng.random_range(3u32..=12);
            // Objectives with exceptions may have ineffective controls
            let controls_effective = !(has_exceptions && j < exceptions_count);
            let description = self.objective_description(j);
            let objective = ControlObjective::new(description, controls_tested, controls_effective);
            report.control_objectives.push(objective);
        }

        // Generate exceptions for objectives that have failures
        let ineffective_objectives: Vec<String> = report
            .control_objectives
            .iter()
            .filter(|o| !o.controls_effective)
            .map(|o| o.id.clone())
            .collect();

        for obj_id in &ineffective_objectives {
            let exception = SocException {
                control_objective_id: obj_id.clone(),
                description: "A sample of transactions tested revealed that the control did not \
                               operate as designed during the period."
                    .to_string(),
                management_response: "Management has implemented enhanced monitoring procedures \
                                      to address the identified control deficiency."
                    .to_string(),
                user_entity_impact: "User entities should consider compensating controls to \
                                     address the risk arising from this exception."
                    .to_string(),
            };
            report.exceptions_noted.push(exception);
        }

        report
    }

    fn generate_user_controls(
        &mut self,
        soc_report_id: &str,
        objective_ids: &[String],
        _entity_code: &str,
    ) -> Vec<UserEntityControl> {
        if objective_ids.is_empty() {
            return Vec::new();
        }

        let count = self.rng.random_range(
            self.config.user_controls_per_report.0..=self.config.user_controls_per_report.1,
        );

        let mut controls = Vec::with_capacity(count);
        for i in 0..count {
            let mapped_objective = &objective_ids[i % objective_ids.len()];
            let implemented = self.rng.random::<f64>() < 0.90;
            let effectiveness = if implemented {
                if self.rng.random::<f64>() < 0.80 {
                    ControlEffectiveness::Effective
                } else {
                    ControlEffectiveness::EffectiveWithExceptions
                }
            } else {
                ControlEffectiveness::NotTested
            };

            let description = self.user_control_description(i);
            let control = UserEntityControl::new(
                soc_report_id,
                description,
                mapped_objective,
                implemented,
                effectiveness,
            );
            controls.push(control);
        }

        controls
    }

    fn org_name(&self, service_type: ServiceType, index: usize) -> String {
        let names_by_type: &[&str] = match service_type {
            ServiceType::PayrollProcessor => &[
                "Ceridian HCM Inc.",
                "ADP Employer Services",
                "Paychex Inc.",
                "Workday Payroll Ltd.",
            ],
            ServiceType::CloudHosting => &[
                "Amazon Web Services Inc.",
                "Microsoft Azure Cloud",
                "Google Cloud Platform",
                "IBM Cloud Services",
            ],
            ServiceType::PaymentProcessor => &[
                "Stripe Inc.",
                "PayPal Holdings Inc.",
                "Worldpay Group Ltd.",
                "Adyen N.V.",
            ],
            ServiceType::ItManagedServices => &[
                "DXC Technology Co.",
                "Unisys Corporation",
                "Cognizant IT Solutions",
                "Infosys BPM Ltd.",
            ],
            ServiceType::DataCentre => &[
                "Equinix Inc.",
                "Digital Realty Trust",
                "CyrusOne LLC",
                "Iron Mountain Data Centres",
            ],
        };
        names_by_type[index % names_by_type.len()].to_string()
    }

    fn objective_description(&self, index: usize) -> String {
        let objectives = [
            "Logical access controls over applications and data are designed and operating effectively.",
            "Change management procedures ensure that programme changes are authorised, tested, and approved.",
            "Computer operations controls ensure that processing is complete, accurate, and timely.",
            "Data backup and recovery controls ensure data integrity and availability.",
            "Network and security controls protect systems from unauthorised access.",
            "Incident management controls ensure that security incidents are identified and resolved.",
            "Vendor management controls ensure that third-party risks are assessed and monitored.",
            "Physical security controls restrict access to data processing facilities.",
        ];
        objectives[index % objectives.len()].to_string()
    }

    fn user_control_description(&self, index: usize) -> String {
        let descriptions = [
            "Review of user access rights at least annually and removal of access for terminated employees.",
            "Reconciliation of payroll data transmitted to the service organization and results received.",
            "Monitoring of service organization performance metrics and escalation of issues.",
            "Review and approval of changes to master data transmitted to the service organization.",
            "Periodic review of SOC reports and assessment of exceptions on user entity operations.",
        ];
        descriptions[index % descriptions.len()].to_string()
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;

    fn period_end() -> NaiveDate {
        NaiveDate::from_ymd_opt(2025, 12, 31).unwrap()
    }

    fn entity_codes(n: usize) -> Vec<String> {
        (1..=n).map(|i| format!("C{i:03}")).collect()
    }

    #[test]
    fn test_service_orgs_within_bounds() {
        let mut gen = ServiceOrgGenerator::new(42);
        let snapshot = gen.generate(&entity_codes(1), period_end());
        assert!(
            snapshot.service_organizations.len() >= 1 && snapshot.service_organizations.len() <= 3,
            "expected 1-3 service orgs, got {}",
            snapshot.service_organizations.len()
        );
    }

    #[test]
    fn test_soc_reports_have_objectives_in_range() {
        let mut gen = ServiceOrgGenerator::new(42);
        let snapshot = gen.generate(&entity_codes(2), period_end());
        for report in &snapshot.soc_reports {
            assert!(
                report.control_objectives.len() >= 3 && report.control_objectives.len() <= 8,
                "expected 3-8 control objectives, got {}",
                report.control_objectives.len()
            );
        }
    }

    #[test]
    fn test_exceptions_within_bounds() {
        let mut gen = ServiceOrgGenerator::new(42);
        let snapshot = gen.generate(&entity_codes(3), period_end());
        for report in &snapshot.soc_reports {
            assert!(
                report.exceptions_noted.len() <= 2,
                "expected 0-2 exceptions, got {}",
                report.exceptions_noted.len()
            );
        }
    }

    #[test]
    fn test_user_entity_controls_reference_valid_reports() {
        use std::collections::HashSet;
        let mut gen = ServiceOrgGenerator::new(42);
        let snapshot = gen.generate(&entity_codes(2), period_end());

        let report_ids: HashSet<String> =
            snapshot.soc_reports.iter().map(|r| r.id.clone()).collect();

        for ctrl in &snapshot.user_entity_controls {
            assert!(
                report_ids.contains(&ctrl.soc_report_id),
                "UserEntityControl references unknown soc_report_id '{}'",
                ctrl.soc_report_id
            );
        }
    }

    #[test]
    fn test_empty_entities_returns_empty_snapshot() {
        let mut gen = ServiceOrgGenerator::new(42);
        let snapshot = gen.generate(&[], period_end());
        assert!(snapshot.service_organizations.is_empty());
        assert!(snapshot.soc_reports.is_empty());
        assert!(snapshot.user_entity_controls.is_empty());
    }
}