datasynth_generators/audit/
service_org_generator.rs1use chrono::{Duration, NaiveDate};
8use datasynth_core::models::audit::service_organization::{
9 ControlEffectiveness, ControlObjective, ServiceOrganization, ServiceType, SocException,
10 SocOpinionType, SocReport, SocReportType, UserEntityControl,
11};
12use datasynth_core::utils::seeded_rng;
13use rand::RngExt;
14use rand_chacha::ChaCha8Rng;
15use tracing::info;
16
17#[derive(Debug, Clone)]
19pub struct ServiceOrgGeneratorConfig {
20 pub service_orgs_per_entity: (usize, usize),
22 pub objectives_per_report: (usize, usize),
24 pub exceptions_per_report: (usize, usize),
26 pub qualified_opinion_probability: f64,
28 pub user_controls_per_report: (usize, usize),
30}
31
32impl Default for ServiceOrgGeneratorConfig {
33 fn default() -> Self {
34 Self {
35 service_orgs_per_entity: (1, 3),
36 objectives_per_report: (3, 8),
37 exceptions_per_report: (0, 2),
38 qualified_opinion_probability: 0.10,
39 user_controls_per_report: (1, 4),
40 }
41 }
42}
43
44#[derive(Debug, Clone, Default)]
46pub struct ServiceOrgSnapshot {
47 pub service_organizations: Vec<ServiceOrganization>,
49 pub soc_reports: Vec<SocReport>,
51 pub user_entity_controls: Vec<UserEntityControl>,
53}
54
55pub struct ServiceOrgGenerator {
57 rng: ChaCha8Rng,
58 config: ServiceOrgGeneratorConfig,
59}
60
61impl ServiceOrgGenerator {
62 pub fn new(seed: u64) -> Self {
64 Self {
65 rng: seeded_rng(seed, 0x402),
66 config: ServiceOrgGeneratorConfig::default(),
67 }
68 }
69
70 pub fn with_config(seed: u64, config: ServiceOrgGeneratorConfig) -> Self {
72 Self {
73 rng: seeded_rng(seed, 0x402),
74 config,
75 }
76 }
77
78 pub fn generate(
80 &mut self,
81 entity_codes: &[String],
82 period_end_date: NaiveDate,
83 ) -> ServiceOrgSnapshot {
84 if entity_codes.is_empty() {
85 return ServiceOrgSnapshot::default();
86 }
87
88 info!(
89 "Generating service org data for {} entities",
90 entity_codes.len()
91 );
92 let mut snapshot = ServiceOrgSnapshot::default();
93
94 let service_type_pool = [
96 ServiceType::PayrollProcessor,
97 ServiceType::CloudHosting,
98 ServiceType::PaymentProcessor,
99 ServiceType::ItManagedServices,
100 ServiceType::DataCentre,
101 ];
102
103 for entity_code in entity_codes {
104 let org_count = self.rng.random_range(
105 self.config.service_orgs_per_entity.0..=self.config.service_orgs_per_entity.1,
106 );
107
108 for i in 0..org_count {
109 let service_type = service_type_pool[i % service_type_pool.len()];
110 let org_name = self.org_name(service_type, i);
111
112 let org_id = if let Some(existing) = snapshot
114 .service_organizations
115 .iter_mut()
116 .find(|o| o.service_type == service_type && o.name == org_name)
117 {
118 existing.entities_served.push(entity_code.clone());
119 existing.id.clone()
120 } else {
121 let org =
122 ServiceOrganization::new(org_name, service_type, vec![entity_code.clone()]);
123 let id = org.id.clone();
124 snapshot.service_organizations.push(org);
125 id
126 };
127
128 let report = self.generate_soc_report(&org_id, period_end_date);
130 let report_id = report.id.clone();
131 let objective_ids: Vec<String> = report
132 .control_objectives
133 .iter()
134 .map(|o| o.id.clone())
135 .collect();
136 snapshot.soc_reports.push(report);
137
138 let user_controls =
140 self.generate_user_controls(&report_id, &objective_ids, entity_code);
141 snapshot.user_entity_controls.extend(user_controls);
142 }
143 }
144
145 info!(
146 "Generated {} service orgs and {} SOC reports",
147 snapshot.service_organizations.len(),
148 snapshot.soc_reports.len()
149 );
150 snapshot
151 }
152
153 fn generate_soc_report(
154 &mut self,
155 service_org_id: &str,
156 period_end_date: NaiveDate,
157 ) -> SocReport {
158 let objectives_count = self.rng.random_range(
159 self.config.objectives_per_report.0..=self.config.objectives_per_report.1,
160 );
161 let exceptions_count = self.rng.random_range(
162 self.config.exceptions_per_report.0..=self.config.exceptions_per_report.1,
163 );
164
165 let has_exceptions = exceptions_count > 0;
166 let opinion_type = if has_exceptions
167 && self.rng.random::<f64>() < self.config.qualified_opinion_probability
168 {
169 SocOpinionType::Qualified
170 } else {
171 SocOpinionType::Unmodified
172 };
173
174 let report_period_start = period_end_date - Duration::days(365);
176 let report_period_end = period_end_date;
177
178 let mut report = SocReport::new(
179 service_org_id,
180 SocReportType::Soc1Type2,
181 report_period_start,
182 report_period_end,
183 opinion_type,
184 );
185
186 for j in 0..objectives_count {
188 let controls_tested = self.rng.random_range(3u32..=12);
189 let controls_effective = !(has_exceptions && j < exceptions_count);
191 let description = self.objective_description(j);
192 let objective = ControlObjective::new(description, controls_tested, controls_effective);
193 report.control_objectives.push(objective);
194 }
195
196 let ineffective_objectives: Vec<String> = report
198 .control_objectives
199 .iter()
200 .filter(|o| !o.controls_effective)
201 .map(|o| o.id.clone())
202 .collect();
203
204 for obj_id in &ineffective_objectives {
205 let exception = SocException {
206 control_objective_id: obj_id.clone(),
207 description: "A sample of transactions tested revealed that the control did not \
208 operate as designed during the period."
209 .to_string(),
210 management_response: "Management has implemented enhanced monitoring procedures \
211 to address the identified control deficiency."
212 .to_string(),
213 user_entity_impact: "User entities should consider compensating controls to \
214 address the risk arising from this exception."
215 .to_string(),
216 };
217 report.exceptions_noted.push(exception);
218 }
219
220 report
221 }
222
223 fn generate_user_controls(
224 &mut self,
225 soc_report_id: &str,
226 objective_ids: &[String],
227 _entity_code: &str,
228 ) -> Vec<UserEntityControl> {
229 if objective_ids.is_empty() {
230 return Vec::new();
231 }
232
233 let count = self.rng.random_range(
234 self.config.user_controls_per_report.0..=self.config.user_controls_per_report.1,
235 );
236
237 let mut controls = Vec::with_capacity(count);
238 for i in 0..count {
239 let mapped_objective = &objective_ids[i % objective_ids.len()];
240 let implemented = self.rng.random::<f64>() < 0.90;
241 let effectiveness = if implemented {
242 if self.rng.random::<f64>() < 0.80 {
243 ControlEffectiveness::Effective
244 } else {
245 ControlEffectiveness::EffectiveWithExceptions
246 }
247 } else {
248 ControlEffectiveness::NotTested
249 };
250
251 let description = self.user_control_description(i);
252 let control = UserEntityControl::new(
253 soc_report_id,
254 description,
255 mapped_objective,
256 implemented,
257 effectiveness,
258 );
259 controls.push(control);
260 }
261
262 controls
263 }
264
265 fn org_name(&self, service_type: ServiceType, index: usize) -> String {
266 let names_by_type: &[&str] = match service_type {
267 ServiceType::PayrollProcessor => &[
268 "Ceridian HCM Inc.",
269 "ADP Employer Services",
270 "Paychex Inc.",
271 "Workday Payroll Ltd.",
272 ],
273 ServiceType::CloudHosting => &[
274 "Amazon Web Services Inc.",
275 "Microsoft Azure Cloud",
276 "Google Cloud Platform",
277 "IBM Cloud Services",
278 ],
279 ServiceType::PaymentProcessor => &[
280 "Stripe Inc.",
281 "PayPal Holdings Inc.",
282 "Worldpay Group Ltd.",
283 "Adyen N.V.",
284 ],
285 ServiceType::ItManagedServices => &[
286 "DXC Technology Co.",
287 "Unisys Corporation",
288 "Cognizant IT Solutions",
289 "Infosys BPM Ltd.",
290 ],
291 ServiceType::DataCentre => &[
292 "Equinix Inc.",
293 "Digital Realty Trust",
294 "CyrusOne LLC",
295 "Iron Mountain Data Centres",
296 ],
297 };
298 names_by_type[index % names_by_type.len()].to_string()
299 }
300
301 fn objective_description(&self, index: usize) -> String {
302 let objectives = [
303 "Logical access controls over applications and data are designed and operating effectively.",
304 "Change management procedures ensure that programme changes are authorised, tested, and approved.",
305 "Computer operations controls ensure that processing is complete, accurate, and timely.",
306 "Data backup and recovery controls ensure data integrity and availability.",
307 "Network and security controls protect systems from unauthorised access.",
308 "Incident management controls ensure that security incidents are identified and resolved.",
309 "Vendor management controls ensure that third-party risks are assessed and monitored.",
310 "Physical security controls restrict access to data processing facilities.",
311 ];
312 objectives[index % objectives.len()].to_string()
313 }
314
315 fn user_control_description(&self, index: usize) -> String {
316 let descriptions = [
317 "Review of user access rights at least annually and removal of access for terminated employees.",
318 "Reconciliation of payroll data transmitted to the service organization and results received.",
319 "Monitoring of service organization performance metrics and escalation of issues.",
320 "Review and approval of changes to master data transmitted to the service organization.",
321 "Periodic review of SOC reports and assessment of exceptions on user entity operations.",
322 ];
323 descriptions[index % descriptions.len()].to_string()
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 fn period_end() -> NaiveDate {
332 NaiveDate::from_ymd_opt(2025, 12, 31).unwrap()
333 }
334
335 fn entity_codes(n: usize) -> Vec<String> {
336 (1..=n).map(|i| format!("C{i:03}")).collect()
337 }
338
339 #[test]
340 fn test_service_orgs_within_bounds() {
341 let mut gen = ServiceOrgGenerator::new(42);
342 let snapshot = gen.generate(&entity_codes(1), period_end());
343 assert!(
344 !snapshot.service_organizations.is_empty() && snapshot.service_organizations.len() <= 3,
345 "expected 1-3 service orgs, got {}",
346 snapshot.service_organizations.len()
347 );
348 }
349
350 #[test]
351 fn test_soc_reports_have_objectives_in_range() {
352 let mut gen = ServiceOrgGenerator::new(42);
353 let snapshot = gen.generate(&entity_codes(2), period_end());
354 for report in &snapshot.soc_reports {
355 assert!(
356 report.control_objectives.len() >= 3 && report.control_objectives.len() <= 8,
357 "expected 3-8 control objectives, got {}",
358 report.control_objectives.len()
359 );
360 }
361 }
362
363 #[test]
364 fn test_exceptions_within_bounds() {
365 let mut gen = ServiceOrgGenerator::new(42);
366 let snapshot = gen.generate(&entity_codes(3), period_end());
367 for report in &snapshot.soc_reports {
368 assert!(
369 report.exceptions_noted.len() <= 2,
370 "expected 0-2 exceptions, got {}",
371 report.exceptions_noted.len()
372 );
373 }
374 }
375
376 #[test]
377 fn test_user_entity_controls_reference_valid_reports() {
378 use std::collections::HashSet;
379 let mut gen = ServiceOrgGenerator::new(42);
380 let snapshot = gen.generate(&entity_codes(2), period_end());
381
382 let report_ids: HashSet<String> =
383 snapshot.soc_reports.iter().map(|r| r.id.clone()).collect();
384
385 for ctrl in &snapshot.user_entity_controls {
386 assert!(
387 report_ids.contains(&ctrl.soc_report_id),
388 "UserEntityControl references unknown soc_report_id '{}'",
389 ctrl.soc_report_id
390 );
391 }
392 }
393
394 #[test]
395 fn test_empty_entities_returns_empty_snapshot() {
396 let mut gen = ServiceOrgGenerator::new(42);
397 let snapshot = gen.generate(&[], period_end());
398 assert!(snapshot.service_organizations.is_empty());
399 assert!(snapshot.soc_reports.is_empty());
400 assert!(snapshot.user_entity_controls.is_empty());
401 }
402}