Skip to main content

datasynth_generators/audit/
sampling_plan_generator.rs

1//! Audit sampling plan generator per ISA 530.
2//!
3//! For each Combined Risk Assessment (CRA) at Moderate or High level, this
4//! generator produces a complete `SamplingPlan` and the corresponding
5//! `SampledItem` records that document the actual sample drawn.
6//!
7//! # Sample-size logic (ISA 530 guidance)
8//!
9//! | CRA level | Representative items | Methodology |
10//! |-----------|---------------------|-------------|
11//! | Minimal   | 0 (analytical only) | — |
12//! | Low       | 10–15               | MUS (balance) / Systematic (transaction) |
13//! | Moderate  | 20–30               | MUS (balance) / Systematic (transaction) |
14//! | High      | 40–60               | MUS (balance) / Systematic (transaction) |
15//!
16//! Misstatement rates are correlated with CRA level:
17//! - Low: 2–5% of sampled items
18//! - Moderate: 5–10%
19//! - High: 10–20%
20//!
21//! # Key-item identification
22//!
23//! Key items are populated from the supplied JE amounts > tolerable error.
24//! When no JE data is available, synthetic key items are generated based on
25//! a fraction of the population size.
26
27use datasynth_core::models::audit::risk_assessment_cra::{
28    AuditAssertion, CombinedRiskAssessment, CraLevel,
29};
30use datasynth_core::models::audit::sampling_plan::{
31    KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
32};
33use datasynth_core::utils::seeded_rng;
34use rand::Rng;
35use rand_chacha::ChaCha8Rng;
36use rust_decimal::Decimal;
37use rust_decimal_macros::dec;
38
39// ---------------------------------------------------------------------------
40// Helpers
41// ---------------------------------------------------------------------------
42
43/// Choose the appropriate methodology for an assertion type.
44///
45/// Balance-testing assertions (Existence, Valuation) → MUS.
46/// Transaction-testing assertions (Occurrence, Completeness, Accuracy, Cutoff) → Systematic.
47/// Low-risk (no sampling plan generated) → HaphazardSelection.
48fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
49    use AuditAssertion::*;
50    if cra == CraLevel::Minimal {
51        return SamplingMethodology::HaphazardSelection;
52    }
53    match assertion {
54        // Balance assertions → MUS
55        Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
56            SamplingMethodology::MonetaryUnitSampling
57        }
58        // Presentation → Random
59        PresentationAndDisclosure => SamplingMethodology::RandomSelection,
60        // Transaction assertions → Systematic
61        Occurrence | Completeness | Accuracy | Cutoff | Classification => {
62            SamplingMethodology::SystematicSelection
63        }
64    }
65}
66
67/// Derive representative sample size from CRA level (with random jitter).
68fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
69    match cra {
70        CraLevel::Minimal => 0,
71        CraLevel::Low => rng.random_range(10usize..=15),
72        CraLevel::Moderate => rng.random_range(20usize..=30),
73        CraLevel::High => rng.random_range(40usize..=60),
74    }
75}
76
77/// Misstatement rate for a given CRA level (probability a sampled item has error).
78fn misstatement_rate(cra: CraLevel) -> f64 {
79    match cra {
80        CraLevel::Minimal => 0.02,
81        CraLevel::Low => 0.04,
82        CraLevel::Moderate => 0.08,
83        CraLevel::High => 0.15,
84    }
85}
86
87// ---------------------------------------------------------------------------
88// Configuration
89// ---------------------------------------------------------------------------
90
91/// Configuration for the sampling plan generator.
92#[derive(Debug, Clone)]
93pub struct SamplingPlanGeneratorConfig {
94    /// Fraction of the population that consists of key items (0.0–1.0).
95    /// Applied when no external JE data is supplied.
96    pub key_item_fraction: f64,
97    /// Minimum population size assumed when no JE data is available.
98    pub min_population_size: usize,
99    /// Maximum population size assumed when no JE data is available.
100    pub max_population_size: usize,
101    /// Base population value (monetary) when no JE data is available.
102    pub base_population_value: Decimal,
103}
104
105impl Default for SamplingPlanGeneratorConfig {
106    fn default() -> Self {
107        Self {
108            key_item_fraction: 0.05, // 5% of items selected as key items
109            min_population_size: 100,
110            max_population_size: 2_000,
111            base_population_value: dec!(5_000_000),
112        }
113    }
114}
115
116// ---------------------------------------------------------------------------
117// Generator
118// ---------------------------------------------------------------------------
119
120/// Generator for ISA 530 sampling plans and sampled items.
121pub struct SamplingPlanGenerator {
122    rng: ChaCha8Rng,
123    config: SamplingPlanGeneratorConfig,
124}
125
126impl SamplingPlanGenerator {
127    /// Create a new generator with default configuration.
128    pub fn new(seed: u64) -> Self {
129        Self {
130            rng: seeded_rng(seed, 0x530), // discriminator for ISA 530
131            config: SamplingPlanGeneratorConfig::default(),
132        }
133    }
134
135    /// Create a new generator with custom configuration.
136    pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
137        Self {
138            rng: seeded_rng(seed, 0x530),
139            config,
140        }
141    }
142
143    /// Generate sampling plans and sampled items for all CRAs at Moderate or higher.
144    ///
145    /// # Arguments
146    /// * `cras` — All combined risk assessments for one or more entities.
147    /// * `tolerable_error` — Performance materiality / tolerable error for the entity.
148    ///   When `None`, a synthetic TE of 5% of the base population value is used.
149    ///
150    /// Returns `(plans, sampled_items)` — the plans and the flat list of all sampled items.
151    pub fn generate_for_cras(
152        &mut self,
153        cras: &[CombinedRiskAssessment],
154        tolerable_error: Option<Decimal>,
155    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
156        let mut plans: Vec<SamplingPlan> = Vec::new();
157        let mut all_items: Vec<SampledItem> = Vec::new();
158
159        for cra in cras {
160            // Only generate plans for Moderate and High CRA levels
161            if cra.combined_risk < CraLevel::Moderate {
162                continue;
163            }
164
165            let te =
166                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
167
168            let (plan, items) = self.generate_plan(cra, te);
169            all_items.extend(items);
170            plans.push(plan);
171        }
172
173        (plans, all_items)
174    }
175
176    /// Generate a single sampling plan for one CRA.
177    fn generate_plan(
178        &mut self,
179        cra: &CombinedRiskAssessment,
180        tolerable_error: Decimal,
181    ) -> (SamplingPlan, Vec<SampledItem>) {
182        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
183        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
184
185        // Synthesise population size and value
186        let pop_size = self
187            .rng
188            .random_range(self.config.min_population_size..=self.config.max_population_size);
189        let pop_value = self.synthetic_population_value(pop_size);
190
191        // Generate key items
192        let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
193        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
194        let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
195
196        // Compute sampling interval
197        let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
198            remaining_value / Decimal::from(rep_sample_size as i64)
199        } else {
200            Decimal::ZERO
201        };
202
203        let plan_id = format!(
204            "SP-{}-{}-{}",
205            cra.entity_code,
206            cra.account_area.replace(' ', "_").to_uppercase(),
207            format!("{:?}", cra.assertion).to_uppercase(),
208        );
209
210        let plan = SamplingPlan {
211            id: plan_id.clone(),
212            entity_code: cra.entity_code.clone(),
213            account_area: cra.account_area.clone(),
214            assertion: format!("{}", cra.assertion),
215            methodology,
216            population_size: pop_size,
217            population_value: pop_value,
218            key_items: key_items.clone(),
219            key_items_value,
220            remaining_population_value: remaining_value,
221            sample_size: rep_sample_size,
222            sampling_interval,
223            cra_level: cra.combined_risk.to_string(),
224            tolerable_error,
225        };
226
227        // Build SampledItems: key items (always tested) + representative items
228        let mut sampled_items: Vec<SampledItem> = Vec::new();
229        let misstatement_p = misstatement_rate(cra.combined_risk);
230
231        // Key items — always tested
232        for ki in &key_items {
233            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
234            let misstatement_amount = if misstatement_found {
235                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
236                    .unwrap_or(dec!(0.05));
237                Some((ki.amount * pct).round_dp(2))
238            } else {
239                None
240            };
241
242            sampled_items.push(SampledItem {
243                item_id: ki.item_id.clone(),
244                amount: ki.amount,
245                selection_type: SelectionType::KeyItem,
246                tested: true,
247                misstatement_found,
248                misstatement_amount,
249            });
250        }
251
252        // Representative items
253        if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
254            let avg_remaining_item_value =
255                remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
256
257            for i in 0..rep_sample_size {
258                let item_id = format!("{plan_id}-REP-{i:04}");
259                // Jitter the amount around the average remaining item value
260                let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
261                    .unwrap_or(Decimal::ONE);
262                let amount = (avg_remaining_item_value * jitter_pct)
263                    .round_dp(2)
264                    .max(dec!(1));
265
266                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
267                let misstatement_amount = if misstatement_found {
268                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
269                        .unwrap_or(dec!(0.05));
270                    Some((amount * pct).round_dp(2))
271                } else {
272                    None
273                };
274
275                sampled_items.push(SampledItem {
276                    item_id,
277                    amount,
278                    selection_type: SelectionType::Representative,
279                    tested: true,
280                    misstatement_found,
281                    misstatement_amount,
282                });
283            }
284        }
285
286        (plan, sampled_items)
287    }
288
289    /// Synthesise a realistic population value from the population size.
290    fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
291        // Average item value varies from $500 (routine small transactions) to $50,000 (large balances)
292        let avg_item = self.rng.random_range(500_i64..=50_000);
293        let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
294        // Round to nearest 1000
295        ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
296    }
297
298    /// Generate key items for the population.
299    ///
300    /// Key items are synthesised as items with amounts above the tolerable error.
301    /// The number of key items is driven by the key_item_fraction config and
302    /// whether the CRA is High (more key items for high-risk areas).
303    fn generate_key_items(
304        &mut self,
305        pop_size: usize,
306        pop_value: Decimal,
307        tolerable_error: Decimal,
308        cra: &CombinedRiskAssessment,
309    ) -> Vec<KeyItem> {
310        let fraction = match cra.combined_risk {
311            CraLevel::High => self.config.key_item_fraction * 2.0,
312            _ => self.config.key_item_fraction,
313        };
314        let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
315
316        // Distribute the key item value: each key item is > TE
317        let avg_key_value = pop_value
318            * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
319            / Decimal::from(n_key_items as i64);
320        let key_item_min = tolerable_error * dec!(1.01); // just above TE
321        let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); // ensure max > min
322
323        let mut items = Vec::with_capacity(n_key_items);
324        for i in 0..n_key_items {
325            let amount_f = self.rng.random_range(
326                key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
327                    ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
328            );
329            let amount = Decimal::try_from(amount_f)
330                .unwrap_or(key_item_min)
331                .round_dp(2)
332                .max(key_item_min);
333
334            let reason = self.pick_key_item_reason(cra, i);
335
336            items.push(KeyItem {
337                item_id: format!(
338                    "{}-{}-KEY-{i:03}",
339                    cra.entity_code,
340                    cra.account_area.replace(' ', "_").to_uppercase()
341                ),
342                amount,
343                reason,
344            });
345        }
346
347        // Guard: key items must not exceed the population value (they are a subset of it).
348        // If they do, scale all amounts down proportionally so their total is 80% of the
349        // population value, leaving room for representative items.
350        let key_total: Decimal = items.iter().map(|k| k.amount).sum();
351        if key_total > pop_value {
352            let scale = (pop_value * dec!(0.8)) / key_total;
353            for item in &mut items {
354                item.amount = (item.amount * scale).round_dp(2);
355            }
356        }
357
358        items
359    }
360
361    /// Choose a key item reason based on the CRA characteristics.
362    fn pick_key_item_reason(
363        &mut self,
364        cra: &CombinedRiskAssessment,
365        index: usize,
366    ) -> KeyItemReason {
367        // First item is always AboveTolerableError (primary reason)
368        if index == 0 {
369            return KeyItemReason::AboveTolerableError;
370        }
371        // Significant risks generate management override / high risk flags
372        if cra.significant_risk {
373            let roll: f64 = self.rng.random();
374            if roll < 0.40 {
375                return KeyItemReason::ManagementOverride;
376            }
377            if roll < 0.70 {
378                return KeyItemReason::HighRisk;
379            }
380        }
381        let roll: f64 = self.rng.random();
382        if roll < 0.60 {
383            KeyItemReason::AboveTolerableError
384        } else if roll < 0.80 {
385            KeyItemReason::UnusualNature
386        } else {
387            KeyItemReason::HighRisk
388        }
389    }
390}
391
392// ---------------------------------------------------------------------------
393// Tests
394// ---------------------------------------------------------------------------
395
396#[cfg(test)]
397#[allow(clippy::unwrap_used)]
398mod tests {
399    use super::*;
400    use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
401    use rust_decimal_macros::dec;
402
403    fn make_cra(
404        account_area: &str,
405        assertion: AuditAssertion,
406        ir: RiskRating,
407        cr: RiskRating,
408    ) -> CombinedRiskAssessment {
409        CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
410    }
411
412    #[test]
413    fn moderate_cra_generates_plan() {
414        let cra = make_cra(
415            "Trade Receivables",
416            AuditAssertion::Existence,
417            RiskRating::Medium,
418            RiskRating::Medium,
419        );
420        assert_eq!(cra.combined_risk, CraLevel::Moderate);
421
422        let mut gen = SamplingPlanGenerator::new(42);
423        let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
424
425        assert_eq!(
426            plans.len(),
427            1,
428            "Should generate exactly one plan for Moderate CRA"
429        );
430        let plan = &plans[0];
431        assert!(!items.is_empty(), "Should generate sampled items");
432        assert!(
433            plan.sample_size >= 20 && plan.sample_size <= 30,
434            "Moderate CRA sample size 20–30"
435        );
436    }
437
438    #[test]
439    fn low_cra_skipped() {
440        let cra = make_cra(
441            "Cash",
442            AuditAssertion::Existence,
443            RiskRating::Low,
444            RiskRating::Low,
445        );
446        assert_eq!(cra.combined_risk, CraLevel::Minimal);
447
448        let mut gen = SamplingPlanGenerator::new(42);
449        let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
450
451        assert!(
452            plans.is_empty(),
453            "Minimal CRA should produce no sampling plan"
454        );
455    }
456
457    #[test]
458    fn high_cra_large_sample() {
459        let cra = make_cra(
460            "Revenue",
461            AuditAssertion::Occurrence,
462            RiskRating::High,
463            RiskRating::High,
464        );
465        assert_eq!(cra.combined_risk, CraLevel::High);
466
467        let mut gen = SamplingPlanGenerator::new(99);
468        let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
469
470        assert_eq!(plans.len(), 1);
471        let plan = &plans[0];
472        assert!(
473            plan.sample_size >= 40,
474            "High CRA sample size should be 40–60"
475        );
476    }
477
478    #[test]
479    fn key_items_all_above_tolerable_error() {
480        let cra = make_cra(
481            "Provisions",
482            AuditAssertion::ValuationAndAllocation,
483            RiskRating::High,
484            RiskRating::Medium,
485        );
486
487        let mut gen = SamplingPlanGenerator::new(7);
488        let te = dec!(32_500);
489        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
490
491        assert!(!plans.is_empty());
492        let plan = &plans[0];
493        for ki in &plan.key_items {
494            assert!(
495                ki.amount >= te,
496                "Key item amount {} must be >= tolerable error {}",
497                ki.amount,
498                te
499            );
500        }
501    }
502
503    #[test]
504    fn sampling_interval_formula() {
505        let cra = make_cra(
506            "Inventory",
507            AuditAssertion::Existence,
508            RiskRating::High,
509            RiskRating::Medium,
510        );
511
512        let mut gen = SamplingPlanGenerator::new(13);
513        let te = dec!(32_500);
514        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
515
516        assert!(!plans.is_empty());
517        let plan = &plans[0];
518        if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
519            let expected_interval =
520                plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
521            // Allow 1 cent rounding tolerance
522            let diff = (plan.sampling_interval - expected_interval).abs();
523            assert!(
524                diff < dec!(0.01),
525                "Interval {} ≠ remaining/sample_size {}",
526                plan.sampling_interval,
527                expected_interval
528            );
529        }
530    }
531
532    #[test]
533    fn balance_assertion_uses_mus() {
534        let cra = make_cra(
535            "Trade Receivables",
536            AuditAssertion::Existence,
537            RiskRating::Medium,
538            RiskRating::Medium,
539        );
540        let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
541        assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
542    }
543
544    #[test]
545    fn transaction_assertion_uses_systematic() {
546        let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
547        assert_eq!(methodology, SamplingMethodology::SystematicSelection);
548    }
549
550    #[test]
551    fn all_sampled_items_have_plan_id() {
552        let cras = vec![
553            make_cra(
554                "Revenue",
555                AuditAssertion::Occurrence,
556                RiskRating::High,
557                RiskRating::Medium,
558            ),
559            make_cra(
560                "Inventory",
561                AuditAssertion::Existence,
562                RiskRating::High,
563                RiskRating::Low,
564            ),
565        ];
566
567        let mut gen = SamplingPlanGenerator::new(55);
568        let te = dec!(32_500);
569        let (plans, items) = gen.generate_for_cras(&cras, Some(te));
570
571        assert!(!plans.is_empty());
572        assert!(!items.is_empty());
573        // Verify at least some items have tested=true
574        assert!(
575            items.iter().all(|i| i.tested),
576            "All items should be marked tested"
577        );
578    }
579}