datasynth_generators/audit/
sampling_plan_generator.rs

1//! Audit sampling plan generator per ISA 530.
2//!
3//! For each Combined Risk Assessment (CRA) at Moderate or High level, this
4//! generator produces a complete `SamplingPlan` and the corresponding
5//! `SampledItem` records that document the actual sample drawn.
6//!
7//! # Sample-size logic (ISA 530 guidance)
8//!
9//! | CRA level | Representative items | Methodology |
10//! |-----------|---------------------|-------------|
11//! | Minimal   | 0 (analytical only) | — |
12//! | Low       | 10–15               | MUS (balance) / Systematic (transaction) |
13//! | Moderate  | 20–30               | MUS (balance) / Systematic (transaction) |
14//! | High      | 40–60               | MUS (balance) / Systematic (transaction) |
15//!
16//! Misstatement rates are correlated with CRA level:
17//! - Low: 2–5% of sampled items
18//! - Moderate: 5–10%
19//! - High: 10–20%
20//!
21//! # Key-item identification
22//!
23//! Key items are populated from the supplied JE amounts > tolerable error.
24//! When no JE data is available, synthetic key items are generated based on
25//! a fraction of the population size.
26
27use std::collections::HashSet;
28
29use datasynth_core::models::audit::risk_assessment_cra::{
30    AuditAssertion, CombinedRiskAssessment, CraLevel,
31};
32use datasynth_core::models::audit::sampling_plan::{
33    KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
34};
35use datasynth_core::models::journal_entry::{JournalEntry, JournalEntryLine};
36use datasynth_core::utils::seeded_rng;
37use rand::RngExt;
38use rand_chacha::ChaCha8Rng;
39use rust_decimal::Decimal;
40use rust_decimal_macros::dec;
41use tracing::info;
42
43// ---------------------------------------------------------------------------
44// Helpers
45// ---------------------------------------------------------------------------
46
47/// Choose the appropriate methodology for an assertion type.
48///
49/// Balance-testing assertions (Existence, Valuation) → MUS.
50/// Transaction-testing assertions (Occurrence, Completeness, Accuracy, Cutoff) → Systematic.
51/// Low-risk (no sampling plan generated) → HaphazardSelection.
52fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
53    use AuditAssertion::*;
54    if cra == CraLevel::Minimal {
55        return SamplingMethodology::HaphazardSelection;
56    }
57    match assertion {
58        // Balance assertions → MUS
59        Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
60            SamplingMethodology::MonetaryUnitSampling
61        }
62        // Presentation → Random
63        PresentationAndDisclosure => SamplingMethodology::RandomSelection,
64        // Transaction assertions → Systematic
65        Occurrence | Completeness | Accuracy | Cutoff | Classification => {
66            SamplingMethodology::SystematicSelection
67        }
68    }
69}
70
71/// Derive representative sample size from CRA level (with random jitter).
72fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
73    match cra {
74        CraLevel::Minimal => 0,
75        CraLevel::Low => rng.random_range(10usize..=15),
76        CraLevel::Moderate => rng.random_range(20usize..=30),
77        CraLevel::High => rng.random_range(40usize..=60),
78    }
79}
80
81/// Misstatement rate for a given CRA level (probability a sampled item has error).
82fn misstatement_rate(cra: CraLevel) -> f64 {
83    match cra {
84        CraLevel::Minimal => 0.02,
85        CraLevel::Low => 0.04,
86        CraLevel::Moderate => 0.08,
87        CraLevel::High => 0.15,
88    }
89}
90
91/// Map an audit account area name to GL account code prefixes.
92fn account_area_to_prefixes(account_area: &str) -> Vec<&'static str> {
93    let lower = account_area.to_lowercase();
94    if lower.contains("revenue") || lower.contains("sales") {
95        vec!["4"]
96    } else if lower.contains("receivable") {
97        vec!["11"]
98    } else if lower.contains("payable") {
99        vec!["20"]
100    } else if lower.contains("inventory") || lower.contains("stock") {
101        vec!["12", "13"]
102    } else if lower.contains("cash") || lower.contains("bank") {
103        vec!["10"]
104    } else if lower.contains("fixed asset") || lower.contains("ppe") || lower.contains("property") {
105        vec!["14", "15", "16"]
106    } else if lower.contains("equity") || lower.contains("capital") {
107        vec!["3"]
108    } else if lower.contains("expense") || lower.contains("cost") {
109        vec!["5", "6"]
110    } else if lower.contains("debt") || lower.contains("loan") || lower.contains("borrow") {
111        vec!["23", "24"]
112    } else if lower.contains("tax") {
113        vec!["17", "25"]
114    } else if lower.contains("provision") {
115        vec!["26"]
116    } else if lower.contains("intangible") || lower.contains("goodwill") {
117        vec!["19"]
118    } else if lower.contains("interest") {
119        vec!["71"]
120    } else if lower.contains("other income") || lower.contains("other expense") {
121        vec!["7"]
122    } else if lower.contains("depreciation") || lower.contains("amortization") {
123        vec!["60"]
124    } else if lower.contains("salary") || lower.contains("wages") || lower.contains("payroll") {
125        vec!["61"]
126    } else if lower.contains("rent") || lower.contains("lease") {
127        vec!["63"]
128    } else {
129        vec![] // Empty = use all JE lines as fallback
130    }
131}
132
133/// Filter JE lines matching the account area's GL prefixes.
134/// Returns (JournalEntry ref, JournalEntryLine ref, absolute amount) tuples.
135fn filter_je_lines_for_area<'a>(
136    entries: &'a [JournalEntry],
137    account_area: &str,
138) -> Vec<(&'a JournalEntry, &'a JournalEntryLine, Decimal)> {
139    let prefixes = account_area_to_prefixes(account_area);
140    let mut results = Vec::new();
141
142    for je in entries {
143        for line in &je.lines {
144            let matches = if prefixes.is_empty() {
145                true
146            } else {
147                prefixes.iter().any(|p| line.account_code.starts_with(p))
148            };
149            if matches {
150                let amount = (line.debit_amount - line.credit_amount).abs();
151                if amount > Decimal::ZERO {
152                    results.push((je, line, amount));
153                }
154            }
155        }
156    }
157    results
158}
159
160// ---------------------------------------------------------------------------
161// Configuration
162// ---------------------------------------------------------------------------
163
164/// Configuration for the sampling plan generator.
165#[derive(Debug, Clone)]
166pub struct SamplingPlanGeneratorConfig {
167    /// Fraction of the population that consists of key items (0.0–1.0).
168    /// Applied when no external JE data is supplied.
169    pub key_item_fraction: f64,
170    /// Minimum population size assumed when no JE data is available.
171    pub min_population_size: usize,
172    /// Maximum population size assumed when no JE data is available.
173    pub max_population_size: usize,
174    /// Base population value (monetary) when no JE data is available.
175    pub base_population_value: Decimal,
176}
177
178impl Default for SamplingPlanGeneratorConfig {
179    fn default() -> Self {
180        Self {
181            key_item_fraction: 0.05, // 5% of items selected as key items
182            min_population_size: 100,
183            max_population_size: 2_000,
184            base_population_value: dec!(5_000_000),
185        }
186    }
187}
188
189// ---------------------------------------------------------------------------
190// Generator
191// ---------------------------------------------------------------------------
192
193/// Generator for ISA 530 sampling plans and sampled items.
194pub struct SamplingPlanGenerator {
195    rng: ChaCha8Rng,
196    config: SamplingPlanGeneratorConfig,
197}
198
199impl SamplingPlanGenerator {
200    /// Create a new generator with default configuration.
201    pub fn new(seed: u64) -> Self {
202        Self {
203            rng: seeded_rng(seed, 0x530), // discriminator for ISA 530
204            config: SamplingPlanGeneratorConfig::default(),
205        }
206    }
207
208    /// Create a new generator with custom configuration.
209    pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
210        Self {
211            rng: seeded_rng(seed, 0x530),
212            config,
213        }
214    }
215
216    /// Generate sampling plans and sampled items for all CRAs at Moderate or higher.
217    ///
218    /// # Arguments
219    /// * `cras` — All combined risk assessments for one or more entities.
220    /// * `tolerable_error` — Performance materiality / tolerable error for the entity.
221    ///   When `None`, a synthetic TE of 5% of the base population value is used.
222    ///
223    /// Returns `(plans, sampled_items)` — the plans and the flat list of all sampled items.
224    pub fn generate_for_cras(
225        &mut self,
226        cras: &[CombinedRiskAssessment],
227        tolerable_error: Option<Decimal>,
228    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
229        info!("Generating sampling plans for {} CRAs", cras.len());
230        let mut plans: Vec<SamplingPlan> = Vec::new();
231        let mut all_items: Vec<SampledItem> = Vec::new();
232
233        for cra in cras {
234            // Only generate plans for Moderate and High CRA levels
235            if cra.combined_risk < CraLevel::Moderate {
236                continue;
237            }
238
239            let te =
240                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
241
242            let (plan, items) = self.generate_plan(cra, te);
243            all_items.extend(items);
244            plans.push(plan);
245        }
246
247        info!(
248            "Generated {} sampling plans with {} sampled items",
249            plans.len(),
250            all_items.len()
251        );
252        (plans, all_items)
253    }
254
255    /// Generate a single sampling plan for one CRA.
256    fn generate_plan(
257        &mut self,
258        cra: &CombinedRiskAssessment,
259        tolerable_error: Decimal,
260    ) -> (SamplingPlan, Vec<SampledItem>) {
261        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
262        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
263
264        // Synthesise population size and value
265        let pop_size = self
266            .rng
267            .random_range(self.config.min_population_size..=self.config.max_population_size);
268        let pop_value = self.synthetic_population_value(pop_size);
269
270        // Generate key items
271        let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
272        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
273        let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
274
275        // Compute sampling interval
276        let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
277            remaining_value / Decimal::from(rep_sample_size as i64)
278        } else {
279            Decimal::ZERO
280        };
281
282        let plan_id = format!(
283            "SP-{}-{}-{}",
284            cra.entity_code,
285            cra.account_area.replace(' ', "_").to_uppercase(),
286            format!("{:?}", cra.assertion).to_uppercase(),
287        );
288
289        let plan = SamplingPlan {
290            id: plan_id.clone(),
291            entity_code: cra.entity_code.clone(),
292            account_area: cra.account_area.clone(),
293            assertion: format!("{}", cra.assertion),
294            methodology,
295            population_size: pop_size,
296            population_value: pop_value,
297            key_items: key_items.clone(),
298            key_items_value,
299            remaining_population_value: remaining_value,
300            sample_size: rep_sample_size,
301            sampling_interval,
302            cra_level: cra.combined_risk.to_string(),
303            tolerable_error,
304        };
305
306        // Build SampledItems: key items (always tested) + representative items
307        let mut sampled_items: Vec<SampledItem> = Vec::new();
308        let misstatement_p = misstatement_rate(cra.combined_risk);
309
310        // Key items — always tested
311        for ki in &key_items {
312            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
313            let misstatement_amount = if misstatement_found {
314                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
315                    .unwrap_or(dec!(0.05));
316                Some((ki.amount * pct).round_dp(2))
317            } else {
318                None
319            };
320
321            sampled_items.push(SampledItem {
322                item_id: ki.item_id.clone(),
323                sampling_plan_id: plan_id.clone(),
324                amount: ki.amount,
325                selection_type: SelectionType::KeyItem,
326                key_item_reason: Some(ki.reason),
327                tested: true,
328                misstatement_found,
329                misstatement_amount,
330            });
331        }
332
333        // Representative items
334        if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
335            let avg_remaining_item_value =
336                remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
337
338            for i in 0..rep_sample_size {
339                let item_id = format!("{plan_id}-REP-{i:04}");
340                // Jitter the amount around the average remaining item value
341                let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
342                    .unwrap_or(Decimal::ONE);
343                let amount = (avg_remaining_item_value * jitter_pct)
344                    .round_dp(2)
345                    .max(dec!(1));
346
347                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
348                let misstatement_amount = if misstatement_found {
349                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
350                        .unwrap_or(dec!(0.05));
351                    Some((amount * pct).round_dp(2))
352                } else {
353                    None
354                };
355
356                sampled_items.push(SampledItem {
357                    item_id,
358                    sampling_plan_id: plan_id.clone(),
359                    amount,
360                    selection_type: SelectionType::Representative,
361                    key_item_reason: None,
362                    tested: true,
363                    misstatement_found,
364                    misstatement_amount,
365                });
366            }
367        }
368
369        (plan, sampled_items)
370    }
371
372    /// Synthesise a realistic population value from the population size.
373    fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
374        // Average item value varies from $500 (routine small transactions) to $50,000 (large balances)
375        let avg_item = self.rng.random_range(500_i64..=50_000);
376        let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
377        // Round to nearest 1000
378        ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
379    }
380
381    /// Generate key items for the population.
382    ///
383    /// Key items are synthesised as items with amounts above the tolerable error.
384    /// The number of key items is driven by the key_item_fraction config and
385    /// whether the CRA is High (more key items for high-risk areas).
386    fn generate_key_items(
387        &mut self,
388        pop_size: usize,
389        pop_value: Decimal,
390        tolerable_error: Decimal,
391        cra: &CombinedRiskAssessment,
392    ) -> Vec<KeyItem> {
393        let fraction = match cra.combined_risk {
394            CraLevel::High => self.config.key_item_fraction * 2.0,
395            _ => self.config.key_item_fraction,
396        };
397        let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
398
399        // Distribute the key item value: each key item is > TE
400        let avg_key_value = pop_value
401            * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
402            / Decimal::from(n_key_items as i64);
403        let key_item_min = tolerable_error * dec!(1.01); // just above TE
404        let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); // ensure max > min
405
406        let mut items = Vec::with_capacity(n_key_items);
407        for i in 0..n_key_items {
408            let amount_f = self.rng.random_range(
409                key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
410                    ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
411            );
412            let amount = Decimal::try_from(amount_f)
413                .unwrap_or(key_item_min)
414                .round_dp(2)
415                .max(key_item_min);
416
417            let reason = self.pick_key_item_reason(cra, i);
418
419            items.push(KeyItem {
420                item_id: format!(
421                    "{}-{}-KEY-{i:03}",
422                    cra.entity_code,
423                    cra.account_area.replace(' ', "_").to_uppercase()
424                ),
425                amount,
426                reason,
427            });
428        }
429
430        // Guard: key items must not exceed the population value (they are a subset of it).
431        // If they do, scale all amounts down proportionally so their total is 80% of the
432        // population value, leaving room for representative items.
433        let key_total: Decimal = items.iter().map(|k| k.amount).sum();
434        if key_total > pop_value {
435            let scale = (pop_value * dec!(0.8)) / key_total;
436            for item in &mut items {
437                item.amount = (item.amount * scale).round_dp(2);
438            }
439        }
440
441        items
442    }
443
444    /// Choose a key item reason based on the CRA characteristics.
445    fn pick_key_item_reason(
446        &mut self,
447        cra: &CombinedRiskAssessment,
448        index: usize,
449    ) -> KeyItemReason {
450        // First item is always AboveTolerableError (primary reason)
451        if index == 0 {
452            return KeyItemReason::AboveTolerableError;
453        }
454        // Significant risks generate management override / high risk flags
455        if cra.significant_risk {
456            let roll: f64 = self.rng.random();
457            if roll < 0.40 {
458                return KeyItemReason::ManagementOverride;
459            }
460            if roll < 0.70 {
461                return KeyItemReason::HighRisk;
462            }
463        }
464        let roll: f64 = self.rng.random();
465        if roll < 0.60 {
466            KeyItemReason::AboveTolerableError
467        } else if roll < 0.80 {
468            KeyItemReason::UnusualNature
469        } else {
470            KeyItemReason::HighRisk
471        }
472    }
473
474    // -----------------------------------------------------------------------
475    // JE-aware sampling (population-based)
476    // -----------------------------------------------------------------------
477
478    /// Generate sampling plans using real journal entry population data.
479    ///
480    /// Key items are actual JE lines with amount > tolerable_error.
481    /// Representative items are sampled from the remaining JE population.
482    /// Falls back to synthetic generation for CRAs with no matching JE lines.
483    pub fn generate_for_cras_with_population(
484        &mut self,
485        cras: &[CombinedRiskAssessment],
486        tolerable_error: Option<Decimal>,
487        journal_entries: &[JournalEntry],
488    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
489        info!(
490            "Generating JE-aware sampling plans for {} CRAs against {} journal entries",
491            cras.len(),
492            journal_entries.len()
493        );
494        let mut plans: Vec<SamplingPlan> = Vec::new();
495        let mut all_items: Vec<SampledItem> = Vec::new();
496
497        for cra in cras {
498            // Only generate plans for Moderate and High CRA levels
499            if cra.combined_risk < CraLevel::Moderate {
500                continue;
501            }
502
503            let te =
504                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
505
506            let matching_lines = filter_je_lines_for_area(journal_entries, &cra.account_area);
507
508            let (plan, items) = if matching_lines.is_empty() {
509                // Fallback to synthetic generation when no JE lines match
510                self.generate_plan(cra, te)
511            } else {
512                self.generate_plan_from_population(cra, te, &matching_lines)
513            };
514
515            all_items.extend(items);
516            plans.push(plan);
517        }
518
519        info!(
520            "Generated {} JE-aware sampling plans with {} sampled items",
521            plans.len(),
522            all_items.len()
523        );
524        (plans, all_items)
525    }
526
527    /// Generate a sampling plan from a real JE population for one CRA.
528    fn generate_plan_from_population(
529        &mut self,
530        cra: &CombinedRiskAssessment,
531        tolerable_error: Decimal,
532        matching_lines: &[(&JournalEntry, &JournalEntryLine, Decimal)],
533    ) -> (SamplingPlan, Vec<SampledItem>) {
534        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
535        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
536
537        // Compute real population metrics
538        let population_size = matching_lines.len();
539        let population_value: Decimal = matching_lines.iter().map(|(_, _, amt)| *amt).sum();
540
541        // Sort lines descending by amount for key item selection
542        let mut sorted_lines: Vec<_> = matching_lines.to_vec();
543        sorted_lines.sort_by_key(|b| std::cmp::Reverse(b.2));
544
545        // Select key items: lines where amount > tolerable_error, capped at 20
546        let mut key_items: Vec<KeyItem> = Vec::new();
547        let mut seen_ids: HashSet<String> = HashSet::new();
548
549        for (idx, (je, _line, amount)) in sorted_lines.iter().enumerate() {
550            if *amount <= tolerable_error {
551                break;
552            }
553            if key_items.len() >= 20 {
554                break;
555            }
556            let je_id = je.header.document_id.to_string();
557            // Skip duplicate JE IDs (same JE may have multiple matching lines)
558            if seen_ids.contains(&je_id) {
559                continue;
560            }
561            seen_ids.insert(je_id.clone());
562            let reason = self.pick_key_item_reason(cra, idx);
563            key_items.push(KeyItem {
564                item_id: je_id,
565                amount: *amount,
566                reason,
567            });
568        }
569
570        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
571        let remaining_value = (population_value - key_items_value).max(Decimal::ZERO);
572
573        // Select representative items from remaining lines using systematic selection
574        let remaining: Vec<_> = sorted_lines
575            .iter()
576            .filter(|(je, _, _)| !seen_ids.contains(&je.header.document_id.to_string()))
577            .collect();
578        let actual_rep_size = rep_sample_size.min(remaining.len());
579        let step = remaining.len().checked_div(actual_rep_size).unwrap_or(0);
580        let start = if step > 0 {
581            self.rng.random_range(0..step)
582        } else {
583            0
584        };
585
586        // Compute sampling interval
587        let sampling_interval = if actual_rep_size > 0 && remaining_value > Decimal::ZERO {
588            remaining_value / Decimal::from(actual_rep_size as i64)
589        } else {
590            Decimal::ZERO
591        };
592
593        let plan_id = format!(
594            "SP-{}-{}-{}",
595            cra.entity_code,
596            cra.account_area.replace(' ', "_").to_uppercase(),
597            format!("{:?}", cra.assertion).to_uppercase(),
598        );
599
600        let plan = SamplingPlan {
601            id: plan_id.clone(),
602            entity_code: cra.entity_code.clone(),
603            account_area: cra.account_area.clone(),
604            assertion: format!("{}", cra.assertion),
605            methodology,
606            population_size,
607            population_value,
608            key_items: key_items.clone(),
609            key_items_value,
610            remaining_population_value: remaining_value,
611            sample_size: actual_rep_size,
612            sampling_interval,
613            cra_level: cra.combined_risk.to_string(),
614            tolerable_error,
615        };
616
617        // Build SampledItems
618        let mut sampled_items: Vec<SampledItem> = Vec::new();
619        let misstatement_p = misstatement_rate(cra.combined_risk);
620
621        // Key items — always tested
622        for ki in &key_items {
623            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
624            let misstatement_amount = if misstatement_found {
625                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
626                    .unwrap_or(dec!(0.05));
627                Some((ki.amount * pct).round_dp(2))
628            } else {
629                None
630            };
631
632            sampled_items.push(SampledItem {
633                item_id: ki.item_id.clone(),
634                sampling_plan_id: plan_id.clone(),
635                amount: ki.amount,
636                selection_type: SelectionType::KeyItem,
637                key_item_reason: Some(ki.reason),
638                tested: true,
639                misstatement_found,
640                misstatement_amount,
641            });
642        }
643
644        // Representative items via systematic selection
645        if actual_rep_size > 0 && step > 0 {
646            let mut rep_seen: HashSet<String> = HashSet::new();
647            for i in 0..actual_rep_size {
648                let idx = (start + i * step) % remaining.len();
649                let (je, _line, amount) = remaining[idx];
650                let je_id = je.header.document_id.to_string();
651
652                // Avoid duplicate representative items
653                if rep_seen.contains(&je_id) {
654                    continue;
655                }
656                rep_seen.insert(je_id.clone());
657
658                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
659                let misstatement_amount = if misstatement_found {
660                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
661                        .unwrap_or(dec!(0.05));
662                    Some((amount * pct).round_dp(2))
663                } else {
664                    None
665                };
666
667                sampled_items.push(SampledItem {
668                    item_id: je_id,
669                    sampling_plan_id: plan_id.clone(),
670                    amount: *amount,
671                    selection_type: SelectionType::Representative,
672                    key_item_reason: None,
673                    tested: true,
674                    misstatement_found,
675                    misstatement_amount,
676                });
677            }
678        }
679
680        (plan, sampled_items)
681    }
682}
683
684// ---------------------------------------------------------------------------
685// Tests
686// ---------------------------------------------------------------------------
687
688#[cfg(test)]
689mod tests {
690    use super::*;
691    use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
692    use rust_decimal_macros::dec;
693
694    fn make_cra(
695        account_area: &str,
696        assertion: AuditAssertion,
697        ir: RiskRating,
698        cr: RiskRating,
699    ) -> CombinedRiskAssessment {
700        CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
701    }
702
703    #[test]
704    fn moderate_cra_generates_plan() {
705        let cra = make_cra(
706            "Trade Receivables",
707            AuditAssertion::Existence,
708            RiskRating::Medium,
709            RiskRating::Medium,
710        );
711        assert_eq!(cra.combined_risk, CraLevel::Moderate);
712
713        let mut gen = SamplingPlanGenerator::new(42);
714        let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
715
716        assert_eq!(
717            plans.len(),
718            1,
719            "Should generate exactly one plan for Moderate CRA"
720        );
721        let plan = &plans[0];
722        assert!(!items.is_empty(), "Should generate sampled items");
723        assert!(
724            plan.sample_size >= 20 && plan.sample_size <= 30,
725            "Moderate CRA sample size 20–30"
726        );
727    }
728
729    #[test]
730    fn low_cra_skipped() {
731        let cra = make_cra(
732            "Cash",
733            AuditAssertion::Existence,
734            RiskRating::Low,
735            RiskRating::Low,
736        );
737        assert_eq!(cra.combined_risk, CraLevel::Minimal);
738
739        let mut gen = SamplingPlanGenerator::new(42);
740        let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
741
742        assert!(
743            plans.is_empty(),
744            "Minimal CRA should produce no sampling plan"
745        );
746    }
747
748    #[test]
749    fn high_cra_large_sample() {
750        let cra = make_cra(
751            "Revenue",
752            AuditAssertion::Occurrence,
753            RiskRating::High,
754            RiskRating::High,
755        );
756        assert_eq!(cra.combined_risk, CraLevel::High);
757
758        let mut gen = SamplingPlanGenerator::new(99);
759        let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
760
761        assert_eq!(plans.len(), 1);
762        let plan = &plans[0];
763        assert!(
764            plan.sample_size >= 40,
765            "High CRA sample size should be 40–60"
766        );
767    }
768
769    #[test]
770    fn key_items_all_above_tolerable_error() {
771        let cra = make_cra(
772            "Provisions",
773            AuditAssertion::ValuationAndAllocation,
774            RiskRating::High,
775            RiskRating::Medium,
776        );
777
778        let mut gen = SamplingPlanGenerator::new(7);
779        let te = dec!(32_500);
780        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
781
782        assert!(!plans.is_empty());
783        let plan = &plans[0];
784        for ki in &plan.key_items {
785            assert!(
786                ki.amount >= te,
787                "Key item amount {} must be >= tolerable error {}",
788                ki.amount,
789                te
790            );
791        }
792    }
793
794    #[test]
795    fn sampling_interval_formula() {
796        let cra = make_cra(
797            "Inventory",
798            AuditAssertion::Existence,
799            RiskRating::High,
800            RiskRating::Medium,
801        );
802
803        let mut gen = SamplingPlanGenerator::new(13);
804        let te = dec!(32_500);
805        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
806
807        assert!(!plans.is_empty());
808        let plan = &plans[0];
809        if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
810            let expected_interval =
811                plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
812            // Allow 1 cent rounding tolerance
813            let diff = (plan.sampling_interval - expected_interval).abs();
814            assert!(
815                diff < dec!(0.01),
816                "Interval {} ≠ remaining/sample_size {}",
817                plan.sampling_interval,
818                expected_interval
819            );
820        }
821    }
822
823    #[test]
824    fn balance_assertion_uses_mus() {
825        let cra = make_cra(
826            "Trade Receivables",
827            AuditAssertion::Existence,
828            RiskRating::Medium,
829            RiskRating::Medium,
830        );
831        let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
832        assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
833    }
834
835    #[test]
836    fn transaction_assertion_uses_systematic() {
837        let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
838        assert_eq!(methodology, SamplingMethodology::SystematicSelection);
839    }
840
841    #[test]
842    fn all_sampled_items_have_plan_id() {
843        let cras = vec![
844            make_cra(
845                "Revenue",
846                AuditAssertion::Occurrence,
847                RiskRating::High,
848                RiskRating::Medium,
849            ),
850            make_cra(
851                "Inventory",
852                AuditAssertion::Existence,
853                RiskRating::High,
854                RiskRating::Low,
855            ),
856        ];
857
858        let mut gen = SamplingPlanGenerator::new(55);
859        let te = dec!(32_500);
860        let (plans, items) = gen.generate_for_cras(&cras, Some(te));
861
862        assert!(!plans.is_empty());
863        assert!(!items.is_empty());
864        // Verify at least some items have tested=true
865        assert!(
866            items.iter().all(|i| i.tested),
867            "All items should be marked tested"
868        );
869    }
870}
datasynth_generators/audit/sampling_plan_generator.rs

datasynth_generators/audit/
sampling_plan_generator.rs