1use datasynth_core::models::audit::risk_assessment_cra::{
28 AuditAssertion, CombinedRiskAssessment, CraLevel,
29};
30use datasynth_core::models::audit::sampling_plan::{
31 KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
32};
33use datasynth_core::utils::seeded_rng;
34use rand::Rng;
35use rand_chacha::ChaCha8Rng;
36use rust_decimal::Decimal;
37use rust_decimal_macros::dec;
38use tracing::info;
39
40fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
50 use AuditAssertion::*;
51 if cra == CraLevel::Minimal {
52 return SamplingMethodology::HaphazardSelection;
53 }
54 match assertion {
55 Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
57 SamplingMethodology::MonetaryUnitSampling
58 }
59 PresentationAndDisclosure => SamplingMethodology::RandomSelection,
61 Occurrence | Completeness | Accuracy | Cutoff | Classification => {
63 SamplingMethodology::SystematicSelection
64 }
65 }
66}
67
68fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
70 match cra {
71 CraLevel::Minimal => 0,
72 CraLevel::Low => rng.random_range(10usize..=15),
73 CraLevel::Moderate => rng.random_range(20usize..=30),
74 CraLevel::High => rng.random_range(40usize..=60),
75 }
76}
77
78fn misstatement_rate(cra: CraLevel) -> f64 {
80 match cra {
81 CraLevel::Minimal => 0.02,
82 CraLevel::Low => 0.04,
83 CraLevel::Moderate => 0.08,
84 CraLevel::High => 0.15,
85 }
86}
87
88#[derive(Debug, Clone)]
94pub struct SamplingPlanGeneratorConfig {
95 pub key_item_fraction: f64,
98 pub min_population_size: usize,
100 pub max_population_size: usize,
102 pub base_population_value: Decimal,
104}
105
106impl Default for SamplingPlanGeneratorConfig {
107 fn default() -> Self {
108 Self {
109 key_item_fraction: 0.05, min_population_size: 100,
111 max_population_size: 2_000,
112 base_population_value: dec!(5_000_000),
113 }
114 }
115}
116
117pub struct SamplingPlanGenerator {
123 rng: ChaCha8Rng,
124 config: SamplingPlanGeneratorConfig,
125}
126
127impl SamplingPlanGenerator {
128 pub fn new(seed: u64) -> Self {
130 Self {
131 rng: seeded_rng(seed, 0x530), config: SamplingPlanGeneratorConfig::default(),
133 }
134 }
135
136 pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
138 Self {
139 rng: seeded_rng(seed, 0x530),
140 config,
141 }
142 }
143
144 pub fn generate_for_cras(
153 &mut self,
154 cras: &[CombinedRiskAssessment],
155 tolerable_error: Option<Decimal>,
156 ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
157 info!("Generating sampling plans for {} CRAs", cras.len());
158 let mut plans: Vec<SamplingPlan> = Vec::new();
159 let mut all_items: Vec<SampledItem> = Vec::new();
160
161 for cra in cras {
162 if cra.combined_risk < CraLevel::Moderate {
164 continue;
165 }
166
167 let te =
168 tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
169
170 let (plan, items) = self.generate_plan(cra, te);
171 all_items.extend(items);
172 plans.push(plan);
173 }
174
175 info!(
176 "Generated {} sampling plans with {} sampled items",
177 plans.len(),
178 all_items.len()
179 );
180 (plans, all_items)
181 }
182
183 fn generate_plan(
185 &mut self,
186 cra: &CombinedRiskAssessment,
187 tolerable_error: Decimal,
188 ) -> (SamplingPlan, Vec<SampledItem>) {
189 let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
190 let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
191
192 let pop_size = self
194 .rng
195 .random_range(self.config.min_population_size..=self.config.max_population_size);
196 let pop_value = self.synthetic_population_value(pop_size);
197
198 let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
200 let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
201 let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
202
203 let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
205 remaining_value / Decimal::from(rep_sample_size as i64)
206 } else {
207 Decimal::ZERO
208 };
209
210 let plan_id = format!(
211 "SP-{}-{}-{}",
212 cra.entity_code,
213 cra.account_area.replace(' ', "_").to_uppercase(),
214 format!("{:?}", cra.assertion).to_uppercase(),
215 );
216
217 let plan = SamplingPlan {
218 id: plan_id.clone(),
219 entity_code: cra.entity_code.clone(),
220 account_area: cra.account_area.clone(),
221 assertion: format!("{}", cra.assertion),
222 methodology,
223 population_size: pop_size,
224 population_value: pop_value,
225 key_items: key_items.clone(),
226 key_items_value,
227 remaining_population_value: remaining_value,
228 sample_size: rep_sample_size,
229 sampling_interval,
230 cra_level: cra.combined_risk.to_string(),
231 tolerable_error,
232 };
233
234 let mut sampled_items: Vec<SampledItem> = Vec::new();
236 let misstatement_p = misstatement_rate(cra.combined_risk);
237
238 for ki in &key_items {
240 let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
241 let misstatement_amount = if misstatement_found {
242 let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
243 .unwrap_or(dec!(0.05));
244 Some((ki.amount * pct).round_dp(2))
245 } else {
246 None
247 };
248
249 sampled_items.push(SampledItem {
250 item_id: ki.item_id.clone(),
251 sampling_plan_id: plan_id.clone(),
252 amount: ki.amount,
253 selection_type: SelectionType::KeyItem,
254 tested: true,
255 misstatement_found,
256 misstatement_amount,
257 });
258 }
259
260 if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
262 let avg_remaining_item_value =
263 remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
264
265 for i in 0..rep_sample_size {
266 let item_id = format!("{plan_id}-REP-{i:04}");
267 let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
269 .unwrap_or(Decimal::ONE);
270 let amount = (avg_remaining_item_value * jitter_pct)
271 .round_dp(2)
272 .max(dec!(1));
273
274 let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
275 let misstatement_amount = if misstatement_found {
276 let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
277 .unwrap_or(dec!(0.05));
278 Some((amount * pct).round_dp(2))
279 } else {
280 None
281 };
282
283 sampled_items.push(SampledItem {
284 item_id,
285 sampling_plan_id: plan_id.clone(),
286 amount,
287 selection_type: SelectionType::Representative,
288 tested: true,
289 misstatement_found,
290 misstatement_amount,
291 });
292 }
293 }
294
295 (plan, sampled_items)
296 }
297
298 fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
300 let avg_item = self.rng.random_range(500_i64..=50_000);
302 let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
303 ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
305 }
306
307 fn generate_key_items(
313 &mut self,
314 pop_size: usize,
315 pop_value: Decimal,
316 tolerable_error: Decimal,
317 cra: &CombinedRiskAssessment,
318 ) -> Vec<KeyItem> {
319 let fraction = match cra.combined_risk {
320 CraLevel::High => self.config.key_item_fraction * 2.0,
321 _ => self.config.key_item_fraction,
322 };
323 let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
324
325 let avg_key_value = pop_value
327 * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
328 / Decimal::from(n_key_items as i64);
329 let key_item_min = tolerable_error * dec!(1.01); let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); let mut items = Vec::with_capacity(n_key_items);
333 for i in 0..n_key_items {
334 let amount_f = self.rng.random_range(
335 key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
336 ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
337 );
338 let amount = Decimal::try_from(amount_f)
339 .unwrap_or(key_item_min)
340 .round_dp(2)
341 .max(key_item_min);
342
343 let reason = self.pick_key_item_reason(cra, i);
344
345 items.push(KeyItem {
346 item_id: format!(
347 "{}-{}-KEY-{i:03}",
348 cra.entity_code,
349 cra.account_area.replace(' ', "_").to_uppercase()
350 ),
351 amount,
352 reason,
353 });
354 }
355
356 let key_total: Decimal = items.iter().map(|k| k.amount).sum();
360 if key_total > pop_value {
361 let scale = (pop_value * dec!(0.8)) / key_total;
362 for item in &mut items {
363 item.amount = (item.amount * scale).round_dp(2);
364 }
365 }
366
367 items
368 }
369
370 fn pick_key_item_reason(
372 &mut self,
373 cra: &CombinedRiskAssessment,
374 index: usize,
375 ) -> KeyItemReason {
376 if index == 0 {
378 return KeyItemReason::AboveTolerableError;
379 }
380 if cra.significant_risk {
382 let roll: f64 = self.rng.random();
383 if roll < 0.40 {
384 return KeyItemReason::ManagementOverride;
385 }
386 if roll < 0.70 {
387 return KeyItemReason::HighRisk;
388 }
389 }
390 let roll: f64 = self.rng.random();
391 if roll < 0.60 {
392 KeyItemReason::AboveTolerableError
393 } else if roll < 0.80 {
394 KeyItemReason::UnusualNature
395 } else {
396 KeyItemReason::HighRisk
397 }
398 }
399}
400
401#[cfg(test)]
406#[allow(clippy::unwrap_used)]
407mod tests {
408 use super::*;
409 use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
410 use rust_decimal_macros::dec;
411
412 fn make_cra(
413 account_area: &str,
414 assertion: AuditAssertion,
415 ir: RiskRating,
416 cr: RiskRating,
417 ) -> CombinedRiskAssessment {
418 CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
419 }
420
421 #[test]
422 fn moderate_cra_generates_plan() {
423 let cra = make_cra(
424 "Trade Receivables",
425 AuditAssertion::Existence,
426 RiskRating::Medium,
427 RiskRating::Medium,
428 );
429 assert_eq!(cra.combined_risk, CraLevel::Moderate);
430
431 let mut gen = SamplingPlanGenerator::new(42);
432 let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
433
434 assert_eq!(
435 plans.len(),
436 1,
437 "Should generate exactly one plan for Moderate CRA"
438 );
439 let plan = &plans[0];
440 assert!(!items.is_empty(), "Should generate sampled items");
441 assert!(
442 plan.sample_size >= 20 && plan.sample_size <= 30,
443 "Moderate CRA sample size 20–30"
444 );
445 }
446
447 #[test]
448 fn low_cra_skipped() {
449 let cra = make_cra(
450 "Cash",
451 AuditAssertion::Existence,
452 RiskRating::Low,
453 RiskRating::Low,
454 );
455 assert_eq!(cra.combined_risk, CraLevel::Minimal);
456
457 let mut gen = SamplingPlanGenerator::new(42);
458 let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
459
460 assert!(
461 plans.is_empty(),
462 "Minimal CRA should produce no sampling plan"
463 );
464 }
465
466 #[test]
467 fn high_cra_large_sample() {
468 let cra = make_cra(
469 "Revenue",
470 AuditAssertion::Occurrence,
471 RiskRating::High,
472 RiskRating::High,
473 );
474 assert_eq!(cra.combined_risk, CraLevel::High);
475
476 let mut gen = SamplingPlanGenerator::new(99);
477 let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
478
479 assert_eq!(plans.len(), 1);
480 let plan = &plans[0];
481 assert!(
482 plan.sample_size >= 40,
483 "High CRA sample size should be 40–60"
484 );
485 }
486
487 #[test]
488 fn key_items_all_above_tolerable_error() {
489 let cra = make_cra(
490 "Provisions",
491 AuditAssertion::ValuationAndAllocation,
492 RiskRating::High,
493 RiskRating::Medium,
494 );
495
496 let mut gen = SamplingPlanGenerator::new(7);
497 let te = dec!(32_500);
498 let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
499
500 assert!(!plans.is_empty());
501 let plan = &plans[0];
502 for ki in &plan.key_items {
503 assert!(
504 ki.amount >= te,
505 "Key item amount {} must be >= tolerable error {}",
506 ki.amount,
507 te
508 );
509 }
510 }
511
512 #[test]
513 fn sampling_interval_formula() {
514 let cra = make_cra(
515 "Inventory",
516 AuditAssertion::Existence,
517 RiskRating::High,
518 RiskRating::Medium,
519 );
520
521 let mut gen = SamplingPlanGenerator::new(13);
522 let te = dec!(32_500);
523 let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
524
525 assert!(!plans.is_empty());
526 let plan = &plans[0];
527 if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
528 let expected_interval =
529 plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
530 let diff = (plan.sampling_interval - expected_interval).abs();
532 assert!(
533 diff < dec!(0.01),
534 "Interval {} ≠ remaining/sample_size {}",
535 plan.sampling_interval,
536 expected_interval
537 );
538 }
539 }
540
541 #[test]
542 fn balance_assertion_uses_mus() {
543 let cra = make_cra(
544 "Trade Receivables",
545 AuditAssertion::Existence,
546 RiskRating::Medium,
547 RiskRating::Medium,
548 );
549 let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
550 assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
551 }
552
553 #[test]
554 fn transaction_assertion_uses_systematic() {
555 let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
556 assert_eq!(methodology, SamplingMethodology::SystematicSelection);
557 }
558
559 #[test]
560 fn all_sampled_items_have_plan_id() {
561 let cras = vec![
562 make_cra(
563 "Revenue",
564 AuditAssertion::Occurrence,
565 RiskRating::High,
566 RiskRating::Medium,
567 ),
568 make_cra(
569 "Inventory",
570 AuditAssertion::Existence,
571 RiskRating::High,
572 RiskRating::Low,
573 ),
574 ];
575
576 let mut gen = SamplingPlanGenerator::new(55);
577 let te = dec!(32_500);
578 let (plans, items) = gen.generate_for_cras(&cras, Some(te));
579
580 assert!(!plans.is_empty());
581 assert!(!items.is_empty());
582 assert!(
584 items.iter().all(|i| i.tested),
585 "All items should be marked tested"
586 );
587 }
588}