datasynth_generators/audit/
sampling_plan_generator.rs

1//! Audit sampling plan generator per ISA 530.
2//!
3//! For each Combined Risk Assessment (CRA) at Moderate or High level, this
4//! generator produces a complete `SamplingPlan` and the corresponding
5//! `SampledItem` records that document the actual sample drawn.
6//!
7//! # Sample-size logic (ISA 530 guidance)
8//!
9//! | CRA level | Representative items | Methodology |
10//! |-----------|---------------------|-------------|
11//! | Minimal   | 0 (analytical only) | — |
12//! | Low       | 10–15               | MUS (balance) / Systematic (transaction) |
13//! | Moderate  | 20–30               | MUS (balance) / Systematic (transaction) |
14//! | High      | 40–60               | MUS (balance) / Systematic (transaction) |
15//!
16//! Misstatement rates are correlated with CRA level:
17//! - Low: 2–5% of sampled items
18//! - Moderate: 5–10%
19//! - High: 10–20%
20//!
21//! # Key-item identification
22//!
23//! Key items are populated from the supplied JE amounts > tolerable error.
24//! When no JE data is available, synthetic key items are generated based on
25//! a fraction of the population size.
26
27use std::collections::HashSet;
28
29use datasynth_core::models::audit::risk_assessment_cra::{
30    AuditAssertion, CombinedRiskAssessment, CraLevel,
31};
32use datasynth_core::models::audit::sampling_plan::{
33    KeyItem, KeyItemReason, SampledItem, SamplingMethodology, SamplingPlan, SelectionType,
34};
35use datasynth_core::models::journal_entry::{JournalEntry, JournalEntryLine};
36use datasynth_core::utils::seeded_rng;
37use rand::RngExt;
38use rand_chacha::ChaCha8Rng;
39use rust_decimal::Decimal;
40use rust_decimal_macros::dec;
41use tracing::info;
42
43// ---------------------------------------------------------------------------
44// Helpers
45// ---------------------------------------------------------------------------
46
47/// Choose the appropriate methodology for an assertion type.
48///
49/// Balance-testing assertions (Existence, Valuation) → MUS.
50/// Transaction-testing assertions (Occurrence, Completeness, Accuracy, Cutoff) → Systematic.
51/// Low-risk (no sampling plan generated) → HaphazardSelection.
52fn methodology_for_assertion(assertion: AuditAssertion, cra: CraLevel) -> SamplingMethodology {
53    use AuditAssertion::*;
54    if cra == CraLevel::Minimal {
55        return SamplingMethodology::HaphazardSelection;
56    }
57    match assertion {
58        // Balance assertions → MUS
59        Existence | ValuationAndAllocation | RightsAndObligations | CompletenessBalance => {
60            SamplingMethodology::MonetaryUnitSampling
61        }
62        // Presentation → Random
63        PresentationAndDisclosure => SamplingMethodology::RandomSelection,
64        // Transaction assertions → Systematic
65        Occurrence | Completeness | Accuracy | Cutoff | Classification => {
66            SamplingMethodology::SystematicSelection
67        }
68    }
69}
70
71/// Derive representative sample size from CRA level (with random jitter).
72fn sample_size_for_cra(rng: &mut ChaCha8Rng, cra: CraLevel) -> usize {
73    match cra {
74        CraLevel::Minimal => 0,
75        CraLevel::Low => rng.random_range(10usize..=15),
76        CraLevel::Moderate => rng.random_range(20usize..=30),
77        CraLevel::High => rng.random_range(40usize..=60),
78    }
79}
80
81/// Misstatement rate for a given CRA level (probability a sampled item has error).
82fn misstatement_rate(cra: CraLevel) -> f64 {
83    match cra {
84        CraLevel::Minimal => 0.02,
85        CraLevel::Low => 0.04,
86        CraLevel::Moderate => 0.08,
87        CraLevel::High => 0.15,
88    }
89}
90
91/// Map an audit account area name to GL account code prefixes.
92fn account_area_to_prefixes(account_area: &str) -> Vec<&'static str> {
93    let lower = account_area.to_lowercase();
94    if lower.contains("revenue") || lower.contains("sales") {
95        vec!["4"]
96    } else if lower.contains("receivable") {
97        vec!["11"]
98    } else if lower.contains("payable") {
99        vec!["20"]
100    } else if lower.contains("inventory") || lower.contains("stock") {
101        vec!["12", "13"]
102    } else if lower.contains("cash") || lower.contains("bank") {
103        vec!["10"]
104    } else if lower.contains("fixed asset") || lower.contains("ppe") || lower.contains("property") {
105        vec!["14", "15", "16"]
106    } else if lower.contains("equity") || lower.contains("capital") {
107        vec!["3"]
108    } else if lower.contains("expense") || lower.contains("cost") {
109        vec!["5", "6"]
110    } else if lower.contains("debt") || lower.contains("loan") || lower.contains("borrow") {
111        vec!["23", "24"]
112    } else if lower.contains("tax") {
113        vec!["17", "25"]
114    } else if lower.contains("provision") {
115        vec!["26"]
116    } else if lower.contains("intangible") || lower.contains("goodwill") {
117        vec!["19"]
118    } else if lower.contains("interest") {
119        vec!["71"]
120    } else if lower.contains("other income") || lower.contains("other expense") {
121        vec!["7"]
122    } else if lower.contains("depreciation") || lower.contains("amortization") {
123        vec!["60"]
124    } else if lower.contains("salary") || lower.contains("wages") || lower.contains("payroll") {
125        vec!["61"]
126    } else if lower.contains("rent") || lower.contains("lease") {
127        vec!["63"]
128    } else {
129        vec![] // Empty = use all JE lines as fallback
130    }
131}
132
133/// Filter JE lines matching the account area's GL prefixes.
134/// Returns (JournalEntry ref, JournalEntryLine ref, absolute amount) tuples.
135fn filter_je_lines_for_area<'a>(
136    entries: &'a [JournalEntry],
137    account_area: &str,
138) -> Vec<(&'a JournalEntry, &'a JournalEntryLine, Decimal)> {
139    let prefixes = account_area_to_prefixes(account_area);
140    let mut results = Vec::new();
141
142    for je in entries {
143        for line in &je.lines {
144            let matches = if prefixes.is_empty() {
145                true
146            } else {
147                prefixes.iter().any(|p| line.account_code.starts_with(p))
148            };
149            if matches {
150                let amount = (line.debit_amount - line.credit_amount).abs();
151                if amount > Decimal::ZERO {
152                    results.push((je, line, amount));
153                }
154            }
155        }
156    }
157    results
158}
159
160// ---------------------------------------------------------------------------
161// Configuration
162// ---------------------------------------------------------------------------
163
164/// Configuration for the sampling plan generator.
165#[derive(Debug, Clone)]
166pub struct SamplingPlanGeneratorConfig {
167    /// Fraction of the population that consists of key items (0.0–1.0).
168    /// Applied when no external JE data is supplied.
169    pub key_item_fraction: f64,
170    /// Minimum population size assumed when no JE data is available.
171    pub min_population_size: usize,
172    /// Maximum population size assumed when no JE data is available.
173    pub max_population_size: usize,
174    /// Base population value (monetary) when no JE data is available.
175    pub base_population_value: Decimal,
176}
177
178impl Default for SamplingPlanGeneratorConfig {
179    fn default() -> Self {
180        Self {
181            key_item_fraction: 0.05, // 5% of items selected as key items
182            min_population_size: 100,
183            max_population_size: 2_000,
184            base_population_value: dec!(5_000_000),
185        }
186    }
187}
188
189// ---------------------------------------------------------------------------
190// Generator
191// ---------------------------------------------------------------------------
192
193/// Generator for ISA 530 sampling plans and sampled items.
194pub struct SamplingPlanGenerator {
195    rng: ChaCha8Rng,
196    config: SamplingPlanGeneratorConfig,
197}
198
199impl SamplingPlanGenerator {
200    /// Create a new generator with default configuration.
201    pub fn new(seed: u64) -> Self {
202        Self {
203            rng: seeded_rng(seed, 0x530), // discriminator for ISA 530
204            config: SamplingPlanGeneratorConfig::default(),
205        }
206    }
207
208    /// Create a new generator with custom configuration.
209    pub fn with_config(seed: u64, config: SamplingPlanGeneratorConfig) -> Self {
210        Self {
211            rng: seeded_rng(seed, 0x530),
212            config,
213        }
214    }
215
216    /// Generate sampling plans and sampled items for all CRAs at Moderate or higher.
217    ///
218    /// # Arguments
219    /// * `cras` — All combined risk assessments for one or more entities.
220    /// * `tolerable_error` — Performance materiality / tolerable error for the entity.
221    ///   When `None`, a synthetic TE of 5% of the base population value is used.
222    ///
223    /// Returns `(plans, sampled_items)` — the plans and the flat list of all sampled items.
224    pub fn generate_for_cras(
225        &mut self,
226        cras: &[CombinedRiskAssessment],
227        tolerable_error: Option<Decimal>,
228    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
229        info!("Generating sampling plans for {} CRAs", cras.len());
230        let mut plans: Vec<SamplingPlan> = Vec::new();
231        let mut all_items: Vec<SampledItem> = Vec::new();
232
233        for cra in cras {
234            // Only generate plans for Moderate and High CRA levels
235            if cra.combined_risk < CraLevel::Moderate {
236                continue;
237            }
238
239            let te =
240                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
241
242            let (plan, items) = self.generate_plan(cra, te);
243            all_items.extend(items);
244            plans.push(plan);
245        }
246
247        info!(
248            "Generated {} sampling plans with {} sampled items",
249            plans.len(),
250            all_items.len()
251        );
252        (plans, all_items)
253    }
254
255    /// Generate a single sampling plan for one CRA.
256    fn generate_plan(
257        &mut self,
258        cra: &CombinedRiskAssessment,
259        tolerable_error: Decimal,
260    ) -> (SamplingPlan, Vec<SampledItem>) {
261        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
262        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
263
264        // Synthesise population size and value
265        let pop_size = self
266            .rng
267            .random_range(self.config.min_population_size..=self.config.max_population_size);
268        let pop_value = self.synthetic_population_value(pop_size);
269
270        // Generate key items
271        let key_items = self.generate_key_items(pop_size, pop_value, tolerable_error, cra);
272        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
273        let remaining_value = (pop_value - key_items_value).max(Decimal::ZERO);
274
275        // Compute sampling interval
276        let sampling_interval = if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
277            remaining_value / Decimal::from(rep_sample_size as i64)
278        } else {
279            Decimal::ZERO
280        };
281
282        let plan_id = format!(
283            "SP-{}-{}-{}",
284            cra.entity_code,
285            cra.account_area.replace(' ', "_").to_uppercase(),
286            format!("{:?}", cra.assertion).to_uppercase(),
287        );
288
289        let plan = SamplingPlan {
290            id: plan_id.clone(),
291            entity_code: cra.entity_code.clone(),
292            account_area: cra.account_area.clone(),
293            assertion: format!("{}", cra.assertion),
294            methodology,
295            population_size: pop_size,
296            population_value: pop_value,
297            key_items: key_items.clone(),
298            key_items_value,
299            remaining_population_value: remaining_value,
300            sample_size: rep_sample_size,
301            sampling_interval,
302            cra_level: cra.combined_risk.to_string(),
303            tolerable_error,
304        };
305
306        // Build SampledItems: key items (always tested) + representative items
307        let mut sampled_items: Vec<SampledItem> = Vec::new();
308        let misstatement_p = misstatement_rate(cra.combined_risk);
309
310        // Key items — always tested
311        for ki in &key_items {
312            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
313            let misstatement_amount = if misstatement_found {
314                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
315                    .unwrap_or(dec!(0.05));
316                Some((ki.amount * pct).round_dp(2))
317            } else {
318                None
319            };
320
321            sampled_items.push(SampledItem {
322                item_id: ki.item_id.clone(),
323                sampling_plan_id: plan_id.clone(),
324                amount: ki.amount,
325                selection_type: SelectionType::KeyItem,
326                tested: true,
327                misstatement_found,
328                misstatement_amount,
329            });
330        }
331
332        // Representative items
333        if rep_sample_size > 0 && remaining_value > Decimal::ZERO {
334            let avg_remaining_item_value =
335                remaining_value / Decimal::from((pop_size - key_items.len()).max(1) as i64);
336
337            for i in 0..rep_sample_size {
338                let item_id = format!("{plan_id}-REP-{i:04}");
339                // Jitter the amount around the average remaining item value
340                let jitter_pct = Decimal::try_from(self.rng.random_range(0.5_f64..=2.0_f64))
341                    .unwrap_or(Decimal::ONE);
342                let amount = (avg_remaining_item_value * jitter_pct)
343                    .round_dp(2)
344                    .max(dec!(1));
345
346                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
347                let misstatement_amount = if misstatement_found {
348                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
349                        .unwrap_or(dec!(0.05));
350                    Some((amount * pct).round_dp(2))
351                } else {
352                    None
353                };
354
355                sampled_items.push(SampledItem {
356                    item_id,
357                    sampling_plan_id: plan_id.clone(),
358                    amount,
359                    selection_type: SelectionType::Representative,
360                    tested: true,
361                    misstatement_found,
362                    misstatement_amount,
363                });
364            }
365        }
366
367        (plan, sampled_items)
368    }
369
370    /// Synthesise a realistic population value from the population size.
371    fn synthetic_population_value(&mut self, pop_size: usize) -> Decimal {
372        // Average item value varies from $500 (routine small transactions) to $50,000 (large balances)
373        let avg_item = self.rng.random_range(500_i64..=50_000);
374        let raw = Decimal::from(pop_size as i64) * Decimal::from(avg_item);
375        // Round to nearest 1000
376        ((raw / dec!(1000)).round() * dec!(1000)).max(dec!(10_000))
377    }
378
379    /// Generate key items for the population.
380    ///
381    /// Key items are synthesised as items with amounts above the tolerable error.
382    /// The number of key items is driven by the key_item_fraction config and
383    /// whether the CRA is High (more key items for high-risk areas).
384    fn generate_key_items(
385        &mut self,
386        pop_size: usize,
387        pop_value: Decimal,
388        tolerable_error: Decimal,
389        cra: &CombinedRiskAssessment,
390    ) -> Vec<KeyItem> {
391        let fraction = match cra.combined_risk {
392            CraLevel::High => self.config.key_item_fraction * 2.0,
393            _ => self.config.key_item_fraction,
394        };
395        let n_key_items = ((pop_size as f64 * fraction) as usize).clamp(1, 20);
396
397        // Distribute the key item value: each key item is > TE
398        let avg_key_value = pop_value
399            * Decimal::try_from(self.config.key_item_fraction * 3.0).unwrap_or(dec!(0.15))
400            / Decimal::from(n_key_items as i64);
401        let key_item_min = tolerable_error * dec!(1.01); // just above TE
402        let key_item_max = (avg_key_value * dec!(2)).max(key_item_min * dec!(2)); // ensure max > min
403
404        let mut items = Vec::with_capacity(n_key_items);
405        for i in 0..n_key_items {
406            let amount_f = self.rng.random_range(
407                key_item_min.to_string().parse::<f64>().unwrap_or(10_000.0)
408                    ..=key_item_max.to_string().parse::<f64>().unwrap_or(500_000.0),
409            );
410            let amount = Decimal::try_from(amount_f)
411                .unwrap_or(key_item_min)
412                .round_dp(2)
413                .max(key_item_min);
414
415            let reason = self.pick_key_item_reason(cra, i);
416
417            items.push(KeyItem {
418                item_id: format!(
419                    "{}-{}-KEY-{i:03}",
420                    cra.entity_code,
421                    cra.account_area.replace(' ', "_").to_uppercase()
422                ),
423                amount,
424                reason,
425            });
426        }
427
428        // Guard: key items must not exceed the population value (they are a subset of it).
429        // If they do, scale all amounts down proportionally so their total is 80% of the
430        // population value, leaving room for representative items.
431        let key_total: Decimal = items.iter().map(|k| k.amount).sum();
432        if key_total > pop_value {
433            let scale = (pop_value * dec!(0.8)) / key_total;
434            for item in &mut items {
435                item.amount = (item.amount * scale).round_dp(2);
436            }
437        }
438
439        items
440    }
441
442    /// Choose a key item reason based on the CRA characteristics.
443    fn pick_key_item_reason(
444        &mut self,
445        cra: &CombinedRiskAssessment,
446        index: usize,
447    ) -> KeyItemReason {
448        // First item is always AboveTolerableError (primary reason)
449        if index == 0 {
450            return KeyItemReason::AboveTolerableError;
451        }
452        // Significant risks generate management override / high risk flags
453        if cra.significant_risk {
454            let roll: f64 = self.rng.random();
455            if roll < 0.40 {
456                return KeyItemReason::ManagementOverride;
457            }
458            if roll < 0.70 {
459                return KeyItemReason::HighRisk;
460            }
461        }
462        let roll: f64 = self.rng.random();
463        if roll < 0.60 {
464            KeyItemReason::AboveTolerableError
465        } else if roll < 0.80 {
466            KeyItemReason::UnusualNature
467        } else {
468            KeyItemReason::HighRisk
469        }
470    }
471
472    // -----------------------------------------------------------------------
473    // JE-aware sampling (population-based)
474    // -----------------------------------------------------------------------
475
476    /// Generate sampling plans using real journal entry population data.
477    ///
478    /// Key items are actual JE lines with amount > tolerable_error.
479    /// Representative items are sampled from the remaining JE population.
480    /// Falls back to synthetic generation for CRAs with no matching JE lines.
481    pub fn generate_for_cras_with_population(
482        &mut self,
483        cras: &[CombinedRiskAssessment],
484        tolerable_error: Option<Decimal>,
485        journal_entries: &[JournalEntry],
486    ) -> (Vec<SamplingPlan>, Vec<SampledItem>) {
487        info!(
488            "Generating JE-aware sampling plans for {} CRAs against {} journal entries",
489            cras.len(),
490            journal_entries.len()
491        );
492        let mut plans: Vec<SamplingPlan> = Vec::new();
493        let mut all_items: Vec<SampledItem> = Vec::new();
494
495        for cra in cras {
496            // Only generate plans for Moderate and High CRA levels
497            if cra.combined_risk < CraLevel::Moderate {
498                continue;
499            }
500
501            let te =
502                tolerable_error.unwrap_or_else(|| self.config.base_population_value * dec!(0.05));
503
504            let matching_lines = filter_je_lines_for_area(journal_entries, &cra.account_area);
505
506            let (plan, items) = if matching_lines.is_empty() {
507                // Fallback to synthetic generation when no JE lines match
508                self.generate_plan(cra, te)
509            } else {
510                self.generate_plan_from_population(cra, te, &matching_lines)
511            };
512
513            all_items.extend(items);
514            plans.push(plan);
515        }
516
517        info!(
518            "Generated {} JE-aware sampling plans with {} sampled items",
519            plans.len(),
520            all_items.len()
521        );
522        (plans, all_items)
523    }
524
525    /// Generate a sampling plan from a real JE population for one CRA.
526    fn generate_plan_from_population(
527        &mut self,
528        cra: &CombinedRiskAssessment,
529        tolerable_error: Decimal,
530        matching_lines: &[(&JournalEntry, &JournalEntryLine, Decimal)],
531    ) -> (SamplingPlan, Vec<SampledItem>) {
532        let methodology = methodology_for_assertion(cra.assertion, cra.combined_risk);
533        let rep_sample_size = sample_size_for_cra(&mut self.rng, cra.combined_risk);
534
535        // Compute real population metrics
536        let population_size = matching_lines.len();
537        let population_value: Decimal = matching_lines.iter().map(|(_, _, amt)| *amt).sum();
538
539        // Sort lines descending by amount for key item selection
540        let mut sorted_lines: Vec<_> = matching_lines.to_vec();
541        sorted_lines.sort_by(|a, b| b.2.cmp(&a.2));
542
543        // Select key items: lines where amount > tolerable_error, capped at 20
544        let mut key_items: Vec<KeyItem> = Vec::new();
545        let mut seen_ids: HashSet<String> = HashSet::new();
546
547        for (idx, (je, _line, amount)) in sorted_lines.iter().enumerate() {
548            if *amount <= tolerable_error {
549                break;
550            }
551            if key_items.len() >= 20 {
552                break;
553            }
554            let je_id = je.header.document_id.to_string();
555            // Skip duplicate JE IDs (same JE may have multiple matching lines)
556            if seen_ids.contains(&je_id) {
557                continue;
558            }
559            seen_ids.insert(je_id.clone());
560            let reason = self.pick_key_item_reason(cra, idx);
561            key_items.push(KeyItem {
562                item_id: je_id,
563                amount: *amount,
564                reason,
565            });
566        }
567
568        let key_items_value: Decimal = key_items.iter().map(|k| k.amount).sum();
569        let remaining_value = (population_value - key_items_value).max(Decimal::ZERO);
570
571        // Select representative items from remaining lines using systematic selection
572        let remaining: Vec<_> = sorted_lines
573            .iter()
574            .filter(|(je, _, _)| !seen_ids.contains(&je.header.document_id.to_string()))
575            .collect();
576        let actual_rep_size = rep_sample_size.min(remaining.len());
577        let step = if actual_rep_size > 0 {
578            remaining.len() / actual_rep_size
579        } else {
580            0
581        };
582        let start = if step > 0 {
583            self.rng.random_range(0..step)
584        } else {
585            0
586        };
587
588        // Compute sampling interval
589        let sampling_interval = if actual_rep_size > 0 && remaining_value > Decimal::ZERO {
590            remaining_value / Decimal::from(actual_rep_size as i64)
591        } else {
592            Decimal::ZERO
593        };
594
595        let plan_id = format!(
596            "SP-{}-{}-{}",
597            cra.entity_code,
598            cra.account_area.replace(' ', "_").to_uppercase(),
599            format!("{:?}", cra.assertion).to_uppercase(),
600        );
601
602        let plan = SamplingPlan {
603            id: plan_id.clone(),
604            entity_code: cra.entity_code.clone(),
605            account_area: cra.account_area.clone(),
606            assertion: format!("{}", cra.assertion),
607            methodology,
608            population_size,
609            population_value,
610            key_items: key_items.clone(),
611            key_items_value,
612            remaining_population_value: remaining_value,
613            sample_size: actual_rep_size,
614            sampling_interval,
615            cra_level: cra.combined_risk.to_string(),
616            tolerable_error,
617        };
618
619        // Build SampledItems
620        let mut sampled_items: Vec<SampledItem> = Vec::new();
621        let misstatement_p = misstatement_rate(cra.combined_risk);
622
623        // Key items — always tested
624        for ki in &key_items {
625            let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
626            let misstatement_amount = if misstatement_found {
627                let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.15_f64))
628                    .unwrap_or(dec!(0.05));
629                Some((ki.amount * pct).round_dp(2))
630            } else {
631                None
632            };
633
634            sampled_items.push(SampledItem {
635                item_id: ki.item_id.clone(),
636                sampling_plan_id: plan_id.clone(),
637                amount: ki.amount,
638                selection_type: SelectionType::KeyItem,
639                tested: true,
640                misstatement_found,
641                misstatement_amount,
642            });
643        }
644
645        // Representative items via systematic selection
646        if actual_rep_size > 0 && step > 0 {
647            let mut rep_seen: HashSet<String> = HashSet::new();
648            for i in 0..actual_rep_size {
649                let idx = (start + i * step) % remaining.len();
650                let (je, _line, amount) = remaining[idx];
651                let je_id = je.header.document_id.to_string();
652
653                // Avoid duplicate representative items
654                if rep_seen.contains(&je_id) {
655                    continue;
656                }
657                rep_seen.insert(je_id.clone());
658
659                let misstatement_found: bool = self.rng.random::<f64>() < misstatement_p;
660                let misstatement_amount = if misstatement_found {
661                    let pct = Decimal::try_from(self.rng.random_range(0.01_f64..=0.30_f64))
662                        .unwrap_or(dec!(0.05));
663                    Some((amount * pct).round_dp(2))
664                } else {
665                    None
666                };
667
668                sampled_items.push(SampledItem {
669                    item_id: je_id,
670                    sampling_plan_id: plan_id.clone(),
671                    amount: *amount,
672                    selection_type: SelectionType::Representative,
673                    tested: true,
674                    misstatement_found,
675                    misstatement_amount,
676                });
677            }
678        }
679
680        (plan, sampled_items)
681    }
682}
683
684// ---------------------------------------------------------------------------
685// Tests
686// ---------------------------------------------------------------------------
687
688#[cfg(test)]
689#[allow(clippy::unwrap_used)]
690mod tests {
691    use super::*;
692    use datasynth_core::models::audit::risk_assessment_cra::RiskRating;
693    use rust_decimal_macros::dec;
694
695    fn make_cra(
696        account_area: &str,
697        assertion: AuditAssertion,
698        ir: RiskRating,
699        cr: RiskRating,
700    ) -> CombinedRiskAssessment {
701        CombinedRiskAssessment::new("C001", account_area, assertion, ir, cr, false, vec![])
702    }
703
704    #[test]
705    fn moderate_cra_generates_plan() {
706        let cra = make_cra(
707            "Trade Receivables",
708            AuditAssertion::Existence,
709            RiskRating::Medium,
710            RiskRating::Medium,
711        );
712        assert_eq!(cra.combined_risk, CraLevel::Moderate);
713
714        let mut gen = SamplingPlanGenerator::new(42);
715        let (plans, items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
716
717        assert_eq!(
718            plans.len(),
719            1,
720            "Should generate exactly one plan for Moderate CRA"
721        );
722        let plan = &plans[0];
723        assert!(!items.is_empty(), "Should generate sampled items");
724        assert!(
725            plan.sample_size >= 20 && plan.sample_size <= 30,
726            "Moderate CRA sample size 20–30"
727        );
728    }
729
730    #[test]
731    fn low_cra_skipped() {
732        let cra = make_cra(
733            "Cash",
734            AuditAssertion::Existence,
735            RiskRating::Low,
736            RiskRating::Low,
737        );
738        assert_eq!(cra.combined_risk, CraLevel::Minimal);
739
740        let mut gen = SamplingPlanGenerator::new(42);
741        let (plans, _items) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
742
743        assert!(
744            plans.is_empty(),
745            "Minimal CRA should produce no sampling plan"
746        );
747    }
748
749    #[test]
750    fn high_cra_large_sample() {
751        let cra = make_cra(
752            "Revenue",
753            AuditAssertion::Occurrence,
754            RiskRating::High,
755            RiskRating::High,
756        );
757        assert_eq!(cra.combined_risk, CraLevel::High);
758
759        let mut gen = SamplingPlanGenerator::new(99);
760        let (plans, _) = gen.generate_for_cras(&[cra], Some(dec!(32_500)));
761
762        assert_eq!(plans.len(), 1);
763        let plan = &plans[0];
764        assert!(
765            plan.sample_size >= 40,
766            "High CRA sample size should be 40–60"
767        );
768    }
769
770    #[test]
771    fn key_items_all_above_tolerable_error() {
772        let cra = make_cra(
773            "Provisions",
774            AuditAssertion::ValuationAndAllocation,
775            RiskRating::High,
776            RiskRating::Medium,
777        );
778
779        let mut gen = SamplingPlanGenerator::new(7);
780        let te = dec!(32_500);
781        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
782
783        assert!(!plans.is_empty());
784        let plan = &plans[0];
785        for ki in &plan.key_items {
786            assert!(
787                ki.amount >= te,
788                "Key item amount {} must be >= tolerable error {}",
789                ki.amount,
790                te
791            );
792        }
793    }
794
795    #[test]
796    fn sampling_interval_formula() {
797        let cra = make_cra(
798            "Inventory",
799            AuditAssertion::Existence,
800            RiskRating::High,
801            RiskRating::Medium,
802        );
803
804        let mut gen = SamplingPlanGenerator::new(13);
805        let te = dec!(32_500);
806        let (plans, _) = gen.generate_for_cras(&[cra], Some(te));
807
808        assert!(!plans.is_empty());
809        let plan = &plans[0];
810        if plan.sample_size > 0 && plan.remaining_population_value > Decimal::ZERO {
811            let expected_interval =
812                plan.remaining_population_value / Decimal::from(plan.sample_size as i64);
813            // Allow 1 cent rounding tolerance
814            let diff = (plan.sampling_interval - expected_interval).abs();
815            assert!(
816                diff < dec!(0.01),
817                "Interval {} ≠ remaining/sample_size {}",
818                plan.sampling_interval,
819                expected_interval
820            );
821        }
822    }
823
824    #[test]
825    fn balance_assertion_uses_mus() {
826        let cra = make_cra(
827            "Trade Receivables",
828            AuditAssertion::Existence,
829            RiskRating::Medium,
830            RiskRating::Medium,
831        );
832        let methodology = methodology_for_assertion(cra.assertion, CraLevel::Moderate);
833        assert_eq!(methodology, SamplingMethodology::MonetaryUnitSampling);
834    }
835
836    #[test]
837    fn transaction_assertion_uses_systematic() {
838        let methodology = methodology_for_assertion(AuditAssertion::Occurrence, CraLevel::Moderate);
839        assert_eq!(methodology, SamplingMethodology::SystematicSelection);
840    }
841
842    #[test]
843    fn all_sampled_items_have_plan_id() {
844        let cras = vec![
845            make_cra(
846                "Revenue",
847                AuditAssertion::Occurrence,
848                RiskRating::High,
849                RiskRating::Medium,
850            ),
851            make_cra(
852                "Inventory",
853                AuditAssertion::Existence,
854                RiskRating::High,
855                RiskRating::Low,
856            ),
857        ];
858
859        let mut gen = SamplingPlanGenerator::new(55);
860        let te = dec!(32_500);
861        let (plans, items) = gen.generate_for_cras(&cras, Some(te));
862
863        assert!(!plans.is_empty());
864        assert!(!items.is_empty());
865        // Verify at least some items have tested=true
866        assert!(
867            items.iter().all(|i| i.tested),
868            "All items should be marked tested"
869        );
870    }
871}
datasynth_generators/audit/sampling_plan_generator.rs

datasynth_generators/audit/
sampling_plan_generator.rs