Skip to main content

datasynth_generators/audit/
sampling_plan_generator.rs

1//! Audit sampling plan generator per ISA 530.
2//!
3//! For each Combined Risk Assessment (CRA) at Moderate or High level, this
4//! generator produces a complete `SamplingPlan` and the corresponding
5//! `SampledItem` records that document the actual sample drawn.
6//!
7//! # Sample-size logic (ISA 530 guidance)
8//!
9//! | CRA level | Representative items | Methodology |
10//! |-----------|---------------------|-------------|
11//! | Minimal   | 0 (analytical only) | — |
12//! | Low       | 10–15               | MUS (balance) / Systematic (transaction) |
13//! | Moderate  | 20–30               | MUS (balance) / Systematic (transaction) |
14//! | High      | 40–60               | MUS (balance) / Systematic (transaction) |
15//!
16//! Misstatement rates are correlated with CRA level:
17//! - Low: 2–5% of sampled items
18//! - Moderate: 5–10%
19//! - High: 10–20%
20//!
21//! # Key-item identification
22//!
23//! Key items are populated from the supplied JE amounts > tolerable error.
24//! When no JE data is available, synthetic key items are generated based on
25//! a fraction of the population size.
26
27use datasynth_core::models::audit::risk_assessment_cra::{
28    AuditAssertion, CombinedRiskAssessment, CraLevel,
29};
30use datasynth_core::models::audit::sampling_plan::{
31    KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
32};
33use datasynth_core::utils::seeded_rng;
34use rand::Rng;
35use rand_chacha::ChaCha8Rng;
36use rust_decimal::Decimal;
37use rust_decimal_macros::dec;
38use tracing::info;
39
40// ---------------------------------------------------------------------------
41// Helpers
42// ---------------------------------------------------------------------------
43
44/// Choose the appropriate methodology for an assertion type.
45///
46/// Balance-testing assertions (Existence, Valuation) → MUS.
47/// Transaction-testing assertions (Occurrence, Completeness, Accuracy, Cutoff) → Systematic.
48/// Low-risk (no sampling plan generated) → HaphazardSelection.
49fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
50    use AuditAssertion::*;
51    if cra == CraLevel::Minimal {
52        return SamplingMethodology::HaphazardSelection;
53    }
54    match assertion {
55        // Balance assertions → MUS
56        Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
57            SamplingMethodology::MonetaryUnitSampling
58        }
59        // Presentation → Random
60        PresentationAndDisclosure => SamplingMethodology::RandomSelection,
61        // Transaction assertions → Systematic
62        Occurrence | Completeness | Accuracy | Cutoff | Classification => {
63            SamplingMethodology::SystematicSelection
64        }
65    }
66}
67
68/// Derive representative sample size from CRA level (with random jitter).
69fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
70    match cra {
71        CraLevel::Minimal => 0,
72        CraLevel::Low => rng.random_range(10usize..=15),
73        CraLevel::Moderate => rng.random_range(20usize..=30),
74        CraLevel::High => rng.random_range(40usize..=60),
75    }
76}
77
78/// Misstatement rate for a given CRA level (probability a sampled item has error).
79fn misstatement_rate(cra: CraLevel) -> f64 {
80    match cra {
81        CraLevel::Minimal => 0.02,
82        CraLevel::Low => 0.04,
83        CraLevel::Moderate => 0.08,
84        CraLevel::High => 0.15,
85    }
86}
87
88// ---------------------------------------------------------------------------
89// Configuration
90// ---------------------------------------------------------------------------
91
92/// Configuration for the sampling plan generator.
93#[derive(Debug, Clone)]
94pub struct SamplingPlanGeneratorConfig {
95    /// Fraction of the population that consists of key items (0.0–1.0).
96    /// Applied when no external JE data is supplied.
97    pub key_item_fraction: f64,
98    /// Minimum population size assumed when no JE data is available.
99    pub min_population_size: usize,
100    /// Maximum population size assumed when no JE data is available.
101    pub max_population_size: usize,
102    /// Base population value (monetary) when no JE data is available.
103    pub base_population_value: Decimal,
104}
105
106impl Default for SamplingPlanGeneratorConfig {
107    fn default() -> Self {
108        Self {
109            key_item_fraction: 0.05, // 5% of items selected as key items
110            min_population_size: 100,
111            max_population_size: 2_000,
112            base_population_value: dec!(5_000_000),
113        }
114    }
115}
116
117// ---------------------------------------------------------------------------
118// Generator
119// ---------------------------------------------------------------------------
120
121/// Generator for ISA 530 sampling plans and sampled items.
122pub struct SamplingPlanGenerator {
123    rng: ChaCha8Rng,
124    config: SamplingPlanGeneratorConfig,
125}
126
127impl SamplingPlanGenerator {
128    /// Create a new generator with default configuration.
129    pub fn new(seed: u64) -> Self {
130        Self {
131            rng: seeded_rng(seed, 0x530), // discriminator for ISA 530
132            config: SamplingPlanGeneratorConfig::default(),
133        }
134    }
135
136    /// Create a new generator with custom configuration.
137    pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
138        Self {
139            rng: seeded_rng(seed, 0x530),
140            config,
141        }
142    }
143
144    /// Generate sampling plans and sampled items for all CRAs at Moderate or higher.
145    ///
146    /// # Arguments
147    /// * `cras` — All combined risk assessments for one or more entities.
148    /// * `tolerable_error` — Performance materiality / tolerable error for the entity.
149    ///   When `None`, a synthetic TE of 5% of the base population value is used.
150    ///
151    /// Returns `(plans, sampled_items)` — the plans and the flat list of all sampled items.
152    pub fn generate_for_cras(
153        &mut self,
154        cras: &[CombinedRiskAssessment],
155        tolerable_error: Option<Decimal>,
156    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
157        info!("Generating sampling plans for {} CRAs", cras.len());
158        let mut plans: Vec<SamplingPlan> = Vec::new();
159        let mut all_items: Vec<SampledItem> = Vec::new();
160
161        for cra in cras {
162            // Only generate plans for Moderate and High CRA levels
163            if cra.combined_risk < CraLevel::Moderate {
164                continue;
165            }
166
167            let te =
168                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
169
170            let (plan, items) = self.generate_plan(cra, te);
171            all_items.extend(items);
172            plans.push(plan);
173        }
174
175        info!(
176            "Generated {} sampling plans with {} sampled items",
177            plans.len(),
178            all_items.len()
179        );
180        (plans, all_items)
181    }
182
183    /// Generate a single sampling plan for one CRA.
184    fn generate_plan(
185        &mut self,
186        cra: &CombinedRiskAssessment,
187        tolerable_error: Decimal,
188    ) -> (SamplingPlan, Vec<SampledItem>) {
189        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
190        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
191
192        // Synthesise population size and value
193        let pop_size = self
194            .rng
195            .random_range(self.config.min_population_size..=self.config.max_population_size);
196        let pop_value = self.synthetic_population_value(pop_size);
197
198        // Generate key items
199        let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
200        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
201        let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
202
203        // Compute sampling interval
204        let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
205            remaining_value / Decimal::from(rep_sample_size as i64)
206        } else {
207            Decimal::ZERO
208        };
209
210        let plan_id = format!(
211            "SP-{}-{}-{}",
212            cra.entity_code,
213            cra.account_area.replace(' ', "_").to_uppercase(),
214            format!("{:?}", cra.assertion).to_uppercase(),
215        );
216
217        let plan = SamplingPlan {
218            id: plan_id.clone(),
219            entity_code: cra.entity_code.clone(),
220            account_area: cra.account_area.clone(),
221            assertion: format!("{}", cra.assertion),
222            methodology,
223            population_size: pop_size,
224            population_value: pop_value,
225            key_items: key_items.clone(),
226            key_items_value,
227            remaining_population_value: remaining_value,
228            sample_size: rep_sample_size,
229            sampling_interval,
230            cra_level: cra.combined_risk.to_string(),
231            tolerable_error,
232        };
233
234        // Build SampledItems: key items (always tested) + representative items
235        let mut sampled_items: Vec<SampledItem> = Vec::new();
236        let misstatement_p = misstatement_rate(cra.combined_risk);
237
238        // Key items — always tested
239        for ki in &key_items {
240            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
241            let misstatement_amount = if misstatement_found {
242                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
243                    .unwrap_or(dec!(0.05));
244                Some((ki.amount * pct).round_dp(2))
245            } else {
246                None
247            };
248
249            sampled_items.push(SampledItem {
250                item_id: ki.item_id.clone(),
251                sampling_plan_id: plan_id.clone(),
252                amount: ki.amount,
253                selection_type: SelectionType::KeyItem,
254                tested: true,
255                misstatement_found,
256                misstatement_amount,
257            });
258        }
259
260        // Representative items
261        if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
262            let avg_remaining_item_value =
263                remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
264
265            for i in 0..rep_sample_size {
266                let item_id = format!("{plan_id}-REP-{i:04}");
267                // Jitter the amount around the average remaining item value
268                let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
269                    .unwrap_or(Decimal::ONE);
270                let amount = (avg_remaining_item_value * jitter_pct)
271                    .round_dp(2)
272                    .max(dec!(1));
273
274                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
275                let misstatement_amount = if misstatement_found {
276                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
277                        .unwrap_or(dec!(0.05));
278                    Some((amount * pct).round_dp(2))
279                } else {
280                    None
281                };
282
283                sampled_items.push(SampledItem {
284                    item_id,
285                    sampling_plan_id: plan_id.clone(),
286                    amount,
287                    selection_type: SelectionType::Representative,
288                    tested: true,
289                    misstatement_found,
290                    misstatement_amount,
291                });
292            }
293        }
294
295        (plan, sampled_items)
296    }
297
298    /// Synthesise a realistic population value from the population size.
299    fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
300        // Average item value varies from $500 (routine small transactions) to $50,000 (large balances)
301        let avg_item = self.rng.random_range(500_i64..=50_000);
302        let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
303        // Round to nearest 1000
304        ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
305    }
306
307    /// Generate key items for the population.
308    ///
309    /// Key items are synthesised as items with amounts above the tolerable error.
310    /// The number of key items is driven by the key_item_fraction config and
311    /// whether the CRA is High (more key items for high-risk areas).
312    fn generate_key_items(
313        &mut self,
314        pop_size: usize,
315        pop_value: Decimal,
316        tolerable_error: Decimal,
317        cra: &CombinedRiskAssessment,
318    ) -> Vec<KeyItem> {
319        let fraction = match cra.combined_risk {
320            CraLevel::High => self.config.key_item_fraction * 2.0,
321            _ => self.config.key_item_fraction,
322        };
323        let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
324
325        // Distribute the key item value: each key item is > TE
326        let avg_key_value = pop_value
327            * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
328            / Decimal::from(n_key_items as i64);
329        let key_item_min = tolerable_error * dec!(1.01); // just above TE
330        let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); // ensure max > min
331
332        let mut items = Vec::with_capacity(n_key_items);
333        for i in 0..n_key_items {
334            let amount_f = self.rng.random_range(
335                key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
336                    ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
337            );
338            let amount = Decimal::try_from(amount_f)
339                .unwrap_or(key_item_min)
340                .round_dp(2)
341                .max(key_item_min);
342
343            let reason = self.pick_key_item_reason(cra, i);
344
345            items.push(KeyItem {
346                item_id: format!(
347                    "{}-{}-KEY-{i:03}",
348                    cra.entity_code,
349                    cra.account_area.replace(' ', "_").to_uppercase()
350                ),
351                amount,
352                reason,
353            });
354        }
355
356        // Guard: key items must not exceed the population value (they are a subset of it).
357        // If they do, scale all amounts down proportionally so their total is 80% of the
358        // population value, leaving room for representative items.
359        let key_total: Decimal = items.iter().map(|k| k.amount).sum();
360        if key_total > pop_value {
361            let scale = (pop_value * dec!(0.8)) / key_total;
362            for item in &mut items {
363                item.amount = (item.amount * scale).round_dp(2);
364            }
365        }
366
367        items
368    }
369
370    /// Choose a key item reason based on the CRA characteristics.
371    fn pick_key_item_reason(
372        &mut self,
373        cra: &CombinedRiskAssessment,
374        index: usize,
375    ) -> KeyItemReason {
376        // First item is always AboveTolerableError (primary reason)
377        if index == 0 {
378            return KeyItemReason::AboveTolerableError;
379        }
380        // Significant risks generate management override / high risk flags
381        if cra.significant_risk {
382            let roll: f64 = self.rng.random();
383            if roll < 0.40 {
384                return KeyItemReason::ManagementOverride;
385            }
386            if roll < 0.70 {
387                return KeyItemReason::HighRisk;
388            }
389        }
390        let roll: f64 = self.rng.random();
391        if roll < 0.60 {
392            KeyItemReason::AboveTolerableError
393        } else if roll < 0.80 {
394            KeyItemReason::UnusualNature
395        } else {
396            KeyItemReason::HighRisk
397        }
398    }
399}
400
401// ---------------------------------------------------------------------------
402// Tests
403// ---------------------------------------------------------------------------
404
405#[cfg(test)]
406#[allow(clippy::unwrap_used)]
407mod tests {
408    use super::*;
409    use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
410    use rust_decimal_macros::dec;
411
412    fn make_cra(
413        account_area: &str,
414        assertion: AuditAssertion,
415        ir: RiskRating,
416        cr: RiskRating,
417    ) -> CombinedRiskAssessment {
418        CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
419    }
420
421    #[test]
422    fn moderate_cra_generates_plan() {
423        let cra = make_cra(
424            "Trade Receivables",
425            AuditAssertion::Existence,
426            RiskRating::Medium,
427            RiskRating::Medium,
428        );
429        assert_eq!(cra.combined_risk, CraLevel::Moderate);
430
431        let mut gen = SamplingPlanGenerator::new(42);
432        let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
433
434        assert_eq!(
435            plans.len(),
436            1,
437            "Should generate exactly one plan for Moderate CRA"
438        );
439        let plan = &plans[0];
440        assert!(!items.is_empty(), "Should generate sampled items");
441        assert!(
442            plan.sample_size >= 20 && plan.sample_size <= 30,
443            "Moderate CRA sample size 20–30"
444        );
445    }
446
447    #[test]
448    fn low_cra_skipped() {
449        let cra = make_cra(
450            "Cash",
451            AuditAssertion::Existence,
452            RiskRating::Low,
453            RiskRating::Low,
454        );
455        assert_eq!(cra.combined_risk, CraLevel::Minimal);
456
457        let mut gen = SamplingPlanGenerator::new(42);
458        let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
459
460        assert!(
461            plans.is_empty(),
462            "Minimal CRA should produce no sampling plan"
463        );
464    }
465
466    #[test]
467    fn high_cra_large_sample() {
468        let cra = make_cra(
469            "Revenue",
470            AuditAssertion::Occurrence,
471            RiskRating::High,
472            RiskRating::High,
473        );
474        assert_eq!(cra.combined_risk, CraLevel::High);
475
476        let mut gen = SamplingPlanGenerator::new(99);
477        let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
478
479        assert_eq!(plans.len(), 1);
480        let plan = &plans[0];
481        assert!(
482            plan.sample_size >= 40,
483            "High CRA sample size should be 40–60"
484        );
485    }
486
487    #[test]
488    fn key_items_all_above_tolerable_error() {
489        let cra = make_cra(
490            "Provisions",
491            AuditAssertion::ValuationAndAllocation,
492            RiskRating::High,
493            RiskRating::Medium,
494        );
495
496        let mut gen = SamplingPlanGenerator::new(7);
497        let te = dec!(32_500);
498        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
499
500        assert!(!plans.is_empty());
501        let plan = &plans[0];
502        for ki in &plan.key_items {
503            assert!(
504                ki.amount >= te,
505                "Key item amount {} must be >= tolerable error {}",
506                ki.amount,
507                te
508            );
509        }
510    }
511
512    #[test]
513    fn sampling_interval_formula() {
514        let cra = make_cra(
515            "Inventory",
516            AuditAssertion::Existence,
517            RiskRating::High,
518            RiskRating::Medium,
519        );
520
521        let mut gen = SamplingPlanGenerator::new(13);
522        let te = dec!(32_500);
523        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
524
525        assert!(!plans.is_empty());
526        let plan = &plans[0];
527        if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
528            let expected_interval =
529                plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
530            // Allow 1 cent rounding tolerance
531            let diff = (plan.sampling_interval - expected_interval).abs();
532            assert!(
533                diff < dec!(0.01),
534                "Interval {} ≠ remaining/sample_size {}",
535                plan.sampling_interval,
536                expected_interval
537            );
538        }
539    }
540
541    #[test]
542    fn balance_assertion_uses_mus() {
543        let cra = make_cra(
544            "Trade Receivables",
545            AuditAssertion::Existence,
546            RiskRating::Medium,
547            RiskRating::Medium,
548        );
549        let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
550        assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
551    }
552
553    #[test]
554    fn transaction_assertion_uses_systematic() {
555        let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
556        assert_eq!(methodology, SamplingMethodology::SystematicSelection);
557    }
558
559    #[test]
560    fn all_sampled_items_have_plan_id() {
561        let cras = vec![
562            make_cra(
563                "Revenue",
564                AuditAssertion::Occurrence,
565                RiskRating::High,
566                RiskRating::Medium,
567            ),
568            make_cra(
569                "Inventory",
570                AuditAssertion::Existence,
571                RiskRating::High,
572                RiskRating::Low,
573            ),
574        ];
575
576        let mut gen = SamplingPlanGenerator::new(55);
577        let te = dec!(32_500);
578        let (plans, items) = gen.generate_for_cras(&cras, Some(te));
579
580        assert!(!plans.is_empty());
581        assert!(!items.is_empty());
582        // Verify at least some items have tested=true
583        assert!(
584            items.iter().all(|i| i.tested),
585            "All items should be marked tested"
586        );
587    }
588}