1use datasynth_core::models::audit::risk_assessment_cra::{
28 AuditAssertion, CombinedRiskAssessment, CraLevel,
29};
30use datasynth_core::models::audit::sampling_plan::{
31 KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
32};
33use datasynth_core::utils::seeded_rng;
34use rand::Rng;
35use rand_chacha::ChaCha8Rng;
36use rust_decimal::Decimal;
37use rust_decimal_macros::dec;
38
39fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
49 use AuditAssertion::*;
50 if cra == CraLevel::Minimal {
51 return SamplingMethodology::HaphazardSelection;
52 }
53 match assertion {
54 Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
56 SamplingMethodology::MonetaryUnitSampling
57 }
58 PresentationAndDisclosure => SamplingMethodology::RandomSelection,
60 Occurrence | Completeness | Accuracy | Cutoff | Classification => {
62 SamplingMethodology::SystematicSelection
63 }
64 }
65}
66
67fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
69 match cra {
70 CraLevel::Minimal => 0,
71 CraLevel::Low => rng.random_range(10usize..=15),
72 CraLevel::Moderate => rng.random_range(20usize..=30),
73 CraLevel::High => rng.random_range(40usize..=60),
74 }
75}
76
77fn misstatement_rate(cra: CraLevel) -> f64 {
79 match cra {
80 CraLevel::Minimal => 0.02,
81 CraLevel::Low => 0.04,
82 CraLevel::Moderate => 0.08,
83 CraLevel::High => 0.15,
84 }
85}
86
87#[derive(Debug, Clone)]
93pub struct SamplingPlanGeneratorConfig {
94 pub key_item_fraction: f64,
97 pub min_population_size: usize,
99 pub max_population_size: usize,
101 pub base_population_value: Decimal,
103}
104
105impl Default for SamplingPlanGeneratorConfig {
106 fn default() -> Self {
107 Self {
108 key_item_fraction: 0.05, min_population_size: 100,
110 max_population_size: 2_000,
111 base_population_value: dec!(5_000_000),
112 }
113 }
114}
115
116pub struct SamplingPlanGenerator {
122 rng: ChaCha8Rng,
123 config: SamplingPlanGeneratorConfig,
124}
125
126impl SamplingPlanGenerator {
127 pub fn new(seed: u64) -> Self {
129 Self {
130 rng: seeded_rng(seed, 0x530), config: SamplingPlanGeneratorConfig::default(),
132 }
133 }
134
135 pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
137 Self {
138 rng: seeded_rng(seed, 0x530),
139 config,
140 }
141 }
142
143 pub fn generate_for_cras(
152 &mut self,
153 cras: &[CombinedRiskAssessment],
154 tolerable_error: Option<Decimal>,
155 ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
156 let mut plans: Vec<SamplingPlan> = Vec::new();
157 let mut all_items: Vec<SampledItem> = Vec::new();
158
159 for cra in cras {
160 if cra.combined_risk < CraLevel::Moderate {
162 continue;
163 }
164
165 let te =
166 tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
167
168 let (plan, items) = self.generate_plan(cra, te);
169 all_items.extend(items);
170 plans.push(plan);
171 }
172
173 (plans, all_items)
174 }
175
176 fn generate_plan(
178 &mut self,
179 cra: &CombinedRiskAssessment,
180 tolerable_error: Decimal,
181 ) -> (SamplingPlan, Vec<SampledItem>) {
182 let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
183 let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
184
185 let pop_size = self
187 .rng
188 .random_range(self.config.min_population_size..=self.config.max_population_size);
189 let pop_value = self.synthetic_population_value(pop_size);
190
191 let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
193 let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
194 let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
195
196 let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
198 remaining_value / Decimal::from(rep_sample_size as i64)
199 } else {
200 Decimal::ZERO
201 };
202
203 let plan_id = format!(
204 "SP-{}-{}-{}",
205 cra.entity_code,
206 cra.account_area.replace(' ', "_").to_uppercase(),
207 format!("{:?}", cra.assertion).to_uppercase(),
208 );
209
210 let plan = SamplingPlan {
211 id: plan_id.clone(),
212 entity_code: cra.entity_code.clone(),
213 account_area: cra.account_area.clone(),
214 assertion: format!("{}", cra.assertion),
215 methodology,
216 population_size: pop_size,
217 population_value: pop_value,
218 key_items: key_items.clone(),
219 key_items_value,
220 remaining_population_value: remaining_value,
221 sample_size: rep_sample_size,
222 sampling_interval,
223 cra_level: cra.combined_risk.to_string(),
224 tolerable_error,
225 };
226
227 let mut sampled_items: Vec<SampledItem> = Vec::new();
229 let misstatement_p = misstatement_rate(cra.combined_risk);
230
231 for ki in &key_items {
233 let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
234 let misstatement_amount = if misstatement_found {
235 let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
236 .unwrap_or(dec!(0.05));
237 Some((ki.amount * pct).round_dp(2))
238 } else {
239 None
240 };
241
242 sampled_items.push(SampledItem {
243 item_id: ki.item_id.clone(),
244 amount: ki.amount,
245 selection_type: SelectionType::KeyItem,
246 tested: true,
247 misstatement_found,
248 misstatement_amount,
249 });
250 }
251
252 if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
254 let avg_remaining_item_value =
255 remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
256
257 for i in 0..rep_sample_size {
258 let item_id = format!("{plan_id}-REP-{i:04}");
259 let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
261 .unwrap_or(Decimal::ONE);
262 let amount = (avg_remaining_item_value * jitter_pct)
263 .round_dp(2)
264 .max(dec!(1));
265
266 let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
267 let misstatement_amount = if misstatement_found {
268 let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
269 .unwrap_or(dec!(0.05));
270 Some((amount * pct).round_dp(2))
271 } else {
272 None
273 };
274
275 sampled_items.push(SampledItem {
276 item_id,
277 amount,
278 selection_type: SelectionType::Representative,
279 tested: true,
280 misstatement_found,
281 misstatement_amount,
282 });
283 }
284 }
285
286 (plan, sampled_items)
287 }
288
289 fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
291 let avg_item = self.rng.random_range(500_i64..=50_000);
293 let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
294 ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
296 }
297
298 fn generate_key_items(
304 &mut self,
305 pop_size: usize,
306 pop_value: Decimal,
307 tolerable_error: Decimal,
308 cra: &CombinedRiskAssessment,
309 ) -> Vec<KeyItem> {
310 let fraction = match cra.combined_risk {
311 CraLevel::High => self.config.key_item_fraction * 2.0,
312 _ => self.config.key_item_fraction,
313 };
314 let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
315
316 let avg_key_value = pop_value
318 * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
319 / Decimal::from(n_key_items as i64);
320 let key_item_min = tolerable_error * dec!(1.01); let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); let mut items = Vec::with_capacity(n_key_items);
324 for i in 0..n_key_items {
325 let amount_f = self.rng.random_range(
326 key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
327 ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
328 );
329 let amount = Decimal::try_from(amount_f)
330 .unwrap_or(key_item_min)
331 .round_dp(2)
332 .max(key_item_min);
333
334 let reason = self.pick_key_item_reason(cra, i);
335
336 items.push(KeyItem {
337 item_id: format!(
338 "{}-{}-KEY-{i:03}",
339 cra.entity_code,
340 cra.account_area.replace(' ', "_").to_uppercase()
341 ),
342 amount,
343 reason,
344 });
345 }
346
347 let key_total: Decimal = items.iter().map(|k| k.amount).sum();
351 if key_total > pop_value {
352 let scale = (pop_value * dec!(0.8)) / key_total;
353 for item in &mut items {
354 item.amount = (item.amount * scale).round_dp(2);
355 }
356 }
357
358 items
359 }
360
361 fn pick_key_item_reason(
363 &mut self,
364 cra: &CombinedRiskAssessment,
365 index: usize,
366 ) -> KeyItemReason {
367 if index == 0 {
369 return KeyItemReason::AboveTolerableError;
370 }
371 if cra.significant_risk {
373 let roll: f64 = self.rng.random();
374 if roll < 0.40 {
375 return KeyItemReason::ManagementOverride;
376 }
377 if roll < 0.70 {
378 return KeyItemReason::HighRisk;
379 }
380 }
381 let roll: f64 = self.rng.random();
382 if roll < 0.60 {
383 KeyItemReason::AboveTolerableError
384 } else if roll < 0.80 {
385 KeyItemReason::UnusualNature
386 } else {
387 KeyItemReason::HighRisk
388 }
389 }
390}
391
392#[cfg(test)]
397#[allow(clippy::unwrap_used)]
398mod tests {
399 use super::*;
400 use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
401 use rust_decimal_macros::dec;
402
403 fn make_cra(
404 account_area: &str,
405 assertion: AuditAssertion,
406 ir: RiskRating,
407 cr: RiskRating,
408 ) -> CombinedRiskAssessment {
409 CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
410 }
411
412 #[test]
413 fn moderate_cra_generates_plan() {
414 let cra = make_cra(
415 "Trade Receivables",
416 AuditAssertion::Existence,
417 RiskRating::Medium,
418 RiskRating::Medium,
419 );
420 assert_eq!(cra.combined_risk, CraLevel::Moderate);
421
422 let mut gen = SamplingPlanGenerator::new(42);
423 let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
424
425 assert_eq!(
426 plans.len(),
427 1,
428 "Should generate exactly one plan for Moderate CRA"
429 );
430 let plan = &plans[0];
431 assert!(!items.is_empty(), "Should generate sampled items");
432 assert!(
433 plan.sample_size >= 20 && plan.sample_size <= 30,
434 "Moderate CRA sample size 20–30"
435 );
436 }
437
438 #[test]
439 fn low_cra_skipped() {
440 let cra = make_cra(
441 "Cash",
442 AuditAssertion::Existence,
443 RiskRating::Low,
444 RiskRating::Low,
445 );
446 assert_eq!(cra.combined_risk, CraLevel::Minimal);
447
448 let mut gen = SamplingPlanGenerator::new(42);
449 let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
450
451 assert!(
452 plans.is_empty(),
453 "Minimal CRA should produce no sampling plan"
454 );
455 }
456
457 #[test]
458 fn high_cra_large_sample() {
459 let cra = make_cra(
460 "Revenue",
461 AuditAssertion::Occurrence,
462 RiskRating::High,
463 RiskRating::High,
464 );
465 assert_eq!(cra.combined_risk, CraLevel::High);
466
467 let mut gen = SamplingPlanGenerator::new(99);
468 let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
469
470 assert_eq!(plans.len(), 1);
471 let plan = &plans[0];
472 assert!(
473 plan.sample_size >= 40,
474 "High CRA sample size should be 40–60"
475 );
476 }
477
478 #[test]
479 fn key_items_all_above_tolerable_error() {
480 let cra = make_cra(
481 "Provisions",
482 AuditAssertion::ValuationAndAllocation,
483 RiskRating::High,
484 RiskRating::Medium,
485 );
486
487 let mut gen = SamplingPlanGenerator::new(7);
488 let te = dec!(32_500);
489 let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
490
491 assert!(!plans.is_empty());
492 let plan = &plans[0];
493 for ki in &plan.key_items {
494 assert!(
495 ki.amount >= te,
496 "Key item amount {} must be >= tolerable error {}",
497 ki.amount,
498 te
499 );
500 }
501 }
502
503 #[test]
504 fn sampling_interval_formula() {
505 let cra = make_cra(
506 "Inventory",
507 AuditAssertion::Existence,
508 RiskRating::High,
509 RiskRating::Medium,
510 );
511
512 let mut gen = SamplingPlanGenerator::new(13);
513 let te = dec!(32_500);
514 let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
515
516 assert!(!plans.is_empty());
517 let plan = &plans[0];
518 if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
519 let expected_interval =
520 plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
521 let diff = (plan.sampling_interval - expected_interval).abs();
523 assert!(
524 diff < dec!(0.01),
525 "Interval {} ≠ remaining/sample_size {}",
526 plan.sampling_interval,
527 expected_interval
528 );
529 }
530 }
531
532 #[test]
533 fn balance_assertion_uses_mus() {
534 let cra = make_cra(
535 "Trade Receivables",
536 AuditAssertion::Existence,
537 RiskRating::Medium,
538 RiskRating::Medium,
539 );
540 let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
541 assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
542 }
543
544 #[test]
545 fn transaction_assertion_uses_systematic() {
546 let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
547 assert_eq!(methodology, SamplingMethodology::SystematicSelection);
548 }
549
550 #[test]
551 fn all_sampled_items_have_plan_id() {
552 let cras = vec![
553 make_cra(
554 "Revenue",
555 AuditAssertion::Occurrence,
556 RiskRating::High,
557 RiskRating::Medium,
558 ),
559 make_cra(
560 "Inventory",
561 AuditAssertion::Existence,
562 RiskRating::High,
563 RiskRating::Low,
564 ),
565 ];
566
567 let mut gen = SamplingPlanGenerator::new(55);
568 let te = dec!(32_500);
569 let (plans, items) = gen.generate_for_cras(&cras, Some(te));
570
571 assert!(!plans.is_empty());
572 assert!(!items.is_empty());
573 assert!(
575 items.iter().all(|i| i.tested),
576 "All items should be marked tested"
577 );
578 }
579}