datasynth_generators/audit/
sample_generator.rs

1//! Audit sample generator per ISA 530.
2//!
3//! Generates `AuditSample` records with realistic item distributions for
4//! workpapers that use statistical sampling.  Workpapers with `SamplingMethod::Judgmental`
5//! and `population_size == 0` are treated as non-sampling procedures and receive
6//! no sample (returns `None`).
7
8use datasynth_core::utils::seeded_rng;
9use rand::Rng;
10use rand_chacha::ChaCha8Rng;
11use rust_decimal::Decimal;
12use uuid::Uuid;
13
14use datasynth_core::models::audit::{
15    AuditSample, SampleItem, SampleItemResult, SamplingMethod, Workpaper,
16};
17
18/// Configuration for the sample generator (ISA 530).
19#[derive(Debug, Clone)]
20pub struct SampleGeneratorConfig {
21    /// Number of items to include in each sample (min, max)
22    pub items_per_sample: (u32, u32),
23    /// Fraction of items that are correct (no misstatement)
24    pub correct_ratio: f64,
25    /// Fraction of items that have a misstatement
26    pub misstatement_ratio: f64,
27    /// Fraction of items that have a deviation / exception
28    pub exception_ratio: f64,
29    /// Only generate samples for workpapers with statistical sampling methods;
30    /// when `false`, any workpaper with `population_size > 0` receives a sample.
31    pub generate_for_non_sampling: bool,
32}
33
34impl Default for SampleGeneratorConfig {
35    fn default() -> Self {
36        Self {
37            items_per_sample: (15, 60),
38            correct_ratio: 0.90,
39            misstatement_ratio: 0.07,
40            exception_ratio: 0.03,
41            generate_for_non_sampling: false,
42        }
43    }
44}
45
46/// Generator for `AuditSample` records per ISA 530.
47pub struct SampleGenerator {
48    /// Seeded random number generator
49    rng: ChaCha8Rng,
50    /// Configuration
51    config: SampleGeneratorConfig,
52    /// Counter for human-readable document references
53    item_counter: u64,
54}
55
56impl SampleGenerator {
57    /// Create a new generator with the given seed and default configuration.
58    pub fn new(seed: u64) -> Self {
59        Self {
60            rng: seeded_rng(seed, 0),
61            config: SampleGeneratorConfig::default(),
62            item_counter: 0,
63        }
64    }
65
66    /// Create a new generator with custom configuration.
67    pub fn with_config(seed: u64, config: SampleGeneratorConfig) -> Self {
68        Self {
69            rng: seeded_rng(seed, 0),
70            config,
71            item_counter: 0,
72        }
73    }
74
75    /// Generate an `AuditSample` for a workpaper, or `None` if sampling is not applicable.
76    ///
77    /// A sample is generated when:
78    /// - The workpaper's `sampling_method` is one of the statistical methods
79    ///   (`StatisticalRandom`, `MonetaryUnit`), **or**
80    /// - `config.generate_for_non_sampling` is `true` and `workpaper.population_size > 0`.
81    ///
82    /// # Arguments
83    /// * `workpaper`     — The workpaper to create the sample for.
84    /// * `engagement_id` — The engagement UUID (must match `workpaper.engagement_id`).
85    pub fn generate_sample(
86        &mut self,
87        workpaper: &Workpaper,
88        engagement_id: Uuid,
89    ) -> Option<AuditSample> {
90        // Decide whether to generate a sample for this workpaper.
91        let is_statistical = matches!(
92            workpaper.sampling_method,
93            SamplingMethod::StatisticalRandom | SamplingMethod::MonetaryUnit
94        );
95        let has_population = workpaper.population_size > 0;
96
97        let should_generate =
98            is_statistical || (self.config.generate_for_non_sampling && has_population);
99        if !should_generate {
100            return None;
101        }
102
103        let sample_count = self
104            .rng
105            .random_range(self.config.items_per_sample.0..=self.config.items_per_sample.1);
106
107        // Population description derived from workpaper title.
108        let pop_description = format!("{} — sampled population", workpaper.title);
109
110        let mut sample = AuditSample::new(
111            workpaper.workpaper_id,
112            engagement_id,
113            pop_description,
114            workpaper.population_size.max(sample_count as u64),
115            workpaper.sampling_method,
116            sample_count,
117        );
118
119        // Set population value (rough estimate: average item ~$50k × population size).
120        let pop_value_units: i64 = (workpaper.population_size as i64)
121            .saturating_mul(50_000_i64)
122            .max(100_000);
123        sample.population_value = Some(Decimal::new(pop_value_units, 0));
124
125        // Tolerable misstatement ≈ 5% of population value.
126        sample.tolerable_misstatement = sample.population_value.map(|v| v / Decimal::from(20));
127
128        // Generate the individual items.
129        for _ in 0..sample_count {
130            self.item_counter += 1;
131            let doc_ref = format!("DOC-{:06}", self.item_counter);
132
133            // Book value: $1k – $500k
134            let book_units: i64 = self.rng.random_range(1_000_i64..=500_000_i64);
135            let book_value = Decimal::new(book_units, 0);
136
137            let roll: f64 = self.rng.random();
138            let misstatement_cutoff = self.config.misstatement_ratio;
139            let exception_cutoff = misstatement_cutoff + self.config.exception_ratio;
140
141            let mut item = SampleItem::new(&doc_ref, book_value);
142
143            if roll < misstatement_cutoff {
144                // Misstatement: audited value differs by 1–15% of book.
145                let pct: f64 = self.rng.random_range(0.01..0.15);
146                let diff_units = (book_units as f64 * pct).round() as i64;
147                let diff = Decimal::new(diff_units.max(1), 0);
148                // Randomly overstate or understate.
149                let audited = if self.rng.random::<bool>() {
150                    book_value + diff
151                } else {
152                    (book_value - diff).max(Decimal::ZERO)
153                };
154                let misstatement = book_value - audited;
155
156                item.audited_value = Some(audited);
157                item.misstatement = Some(misstatement);
158                item.result = SampleItemResult::Misstatement;
159            } else if roll < exception_cutoff {
160                // Exception / deviation: audited value differs by 5–20%.
161                let pct: f64 = self.rng.random_range(0.05..0.20);
162                let diff_units = (book_units as f64 * pct).round() as i64;
163                let diff = Decimal::new(diff_units.max(1), 0);
164                let audited = (book_value - diff).max(Decimal::ZERO);
165                let misstatement = book_value - audited;
166
167                item.audited_value = Some(audited);
168                item.misstatement = Some(misstatement);
169                item.result = SampleItemResult::Exception;
170            } else {
171                // Correct: audited value equals book value.
172                item.audited_value = Some(book_value);
173                item.result = SampleItemResult::Correct;
174            }
175
176            sample.add_item(item);
177        }
178
179        // Compute projection and reach a conclusion.
180        sample.conclude();
181
182        // Upgrade to InsufficientEvidence → use actual projection.
183        // (conclude() handles this already; no override needed.)
184
185        Some(sample)
186    }
187}
188
189// =============================================================================
190// Tests
191// =============================================================================
192
193#[cfg(test)]
194#[allow(clippy::unwrap_used)]
195mod tests {
196    use super::*;
197    use datasynth_core::models::audit::{
198        ProcedureType, SampleConclusion, Workpaper, WorkpaperScope, WorkpaperSection,
199    };
200
201    fn make_gen(seed: u64) -> SampleGenerator {
202        SampleGenerator::new(seed)
203    }
204
205    /// Build a workpaper that will receive a sample (statistical method + population).
206    fn sampling_workpaper(method: SamplingMethod) -> Workpaper {
207        Workpaper::new(
208            Uuid::new_v4(),
209            "D-100",
210            "Accounts Receivable Testing",
211            WorkpaperSection::SubstantiveTesting,
212        )
213        .with_procedure("Test AR balances", ProcedureType::SubstantiveTest)
214        .with_scope(WorkpaperScope::default(), 1_000, 50, method)
215    }
216
217    fn non_sampling_workpaper() -> Workpaper {
218        Workpaper::new(
219            Uuid::new_v4(),
220            "C-100",
221            "Controls Walk-through",
222            WorkpaperSection::ControlTesting,
223        )
224        .with_scope(WorkpaperScope::default(), 0, 0, SamplingMethod::Judgmental)
225    }
226
227    // -------------------------------------------------------------------------
228
229    /// A statistical-method workpaper produces a sample with items in range.
230    #[test]
231    fn test_generates_sample() {
232        let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
233        let eng_id = wp.engagement_id;
234        let mut gen = make_gen(42);
235        let sample = gen.generate_sample(&wp, eng_id).unwrap();
236
237        let cfg = SampleGeneratorConfig::default();
238        let min = cfg.items_per_sample.0 as usize;
239        let max = cfg.items_per_sample.1 as usize;
240        assert!(
241            sample.items.len() >= min && sample.items.len() <= max,
242            "expected {min}..={max} items, got {}",
243            sample.items.len()
244        );
245        assert!(
246            sample.conclusion.is_some(),
247            "sample should have a conclusion"
248        );
249    }
250
251    /// A non-sampling workpaper with population_size == 0 returns None.
252    #[test]
253    fn test_no_sample_for_non_sampling() {
254        let wp = non_sampling_workpaper();
255        let eng_id = wp.engagement_id;
256        let mut gen = make_gen(99);
257        let result = gen.generate_sample(&wp, eng_id);
258        assert!(result.is_none(), "expected None for non-sampling workpaper");
259    }
260
261    /// With a large count, the item result distribution should roughly match the config.
262    #[test]
263    fn test_item_distribution() {
264        let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
265        let eng_id = wp.engagement_id;
266        let config = SampleGeneratorConfig {
267            items_per_sample: (300, 300),
268            correct_ratio: 0.90,
269            misstatement_ratio: 0.07,
270            exception_ratio: 0.03,
271            generate_for_non_sampling: false,
272        };
273        let mut gen = SampleGenerator::with_config(77, config);
274        let sample = gen.generate_sample(&wp, eng_id).unwrap();
275
276        let total = sample.items.len() as f64;
277        let correct_count = sample
278            .items
279            .iter()
280            .filter(|i| i.result == SampleItemResult::Correct)
281            .count() as f64;
282
283        // Correct ratio should be within ±15% of 90%.
284        let ratio = correct_count / total;
285        assert!(
286            (0.75..=1.00).contains(&ratio),
287            "correct ratio {ratio:.2} outside expected 75–100%"
288        );
289    }
290
291    /// Same seed produces identical output.
292    #[test]
293    fn test_deterministic() {
294        let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
295        let eng_id = wp.engagement_id;
296
297        let sample_a = SampleGenerator::new(1234)
298            .generate_sample(&wp, eng_id)
299            .unwrap();
300        let sample_b = SampleGenerator::new(1234)
301            .generate_sample(&wp, eng_id)
302            .unwrap();
303
304        assert_eq!(sample_a.items.len(), sample_b.items.len());
305        for (a, b) in sample_a.items.iter().zip(sample_b.items.iter()) {
306            assert_eq!(a.document_ref, b.document_ref);
307            assert_eq!(a.book_value, b.book_value);
308            assert_eq!(a.result, b.result);
309        }
310        assert_eq!(sample_a.conclusion, sample_b.conclusion);
311    }
312
313    /// `generate_for_non_sampling = true` causes a Judgmental-method workpaper
314    /// with population_size > 0 to receive a sample.
315    #[test]
316    fn test_generate_for_non_sampling_flag() {
317        let mut wp = non_sampling_workpaper();
318        wp.population_size = 500; // non-zero population
319        let eng_id = wp.engagement_id;
320
321        let config = SampleGeneratorConfig {
322            generate_for_non_sampling: true,
323            ..Default::default()
324        };
325        let mut gen = SampleGenerator::with_config(55, config);
326        let result = gen.generate_sample(&wp, eng_id);
327        assert!(
328            result.is_some(),
329            "expected Some when generate_for_non_sampling = true"
330        );
331    }
332
333    /// Misstatement items should have a non-zero misstatement amount.
334    #[test]
335    fn test_misstatement_items_have_amounts() {
336        let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
337        let eng_id = wp.engagement_id;
338        let config = SampleGeneratorConfig {
339            items_per_sample: (200, 200),
340            misstatement_ratio: 0.50, // inflate so we always get some
341            exception_ratio: 0.10,
342            correct_ratio: 0.40,
343            generate_for_non_sampling: false,
344        };
345        let mut gen = SampleGenerator::with_config(33, config);
346        let sample = gen.generate_sample(&wp, eng_id).unwrap();
347
348        let mist_items: Vec<_> = sample
349            .items
350            .iter()
351            .filter(|i| i.result == SampleItemResult::Misstatement)
352            .collect();
353
354        assert!(!mist_items.is_empty(), "expected some misstatement items");
355        for item in mist_items {
356            assert!(
357                item.misstatement.is_some(),
358                "misstatement item should have a misstatement amount"
359            );
360            // misstatement amount should be non-zero
361            assert_ne!(
362                item.misstatement.unwrap(),
363                Decimal::ZERO,
364                "misstatement amount should not be zero"
365            );
366        }
367    }
368
369    /// The sample conclusion should be a valid `SampleConclusion` variant.
370    #[test]
371    fn test_conclusion_is_set() {
372        let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
373        let eng_id = wp.engagement_id;
374        let mut gen = make_gen(12);
375        let sample = gen.generate_sample(&wp, eng_id).unwrap();
376
377        let conclusion = sample.conclusion.unwrap();
378        let valid = matches!(
379            conclusion,
380            SampleConclusion::ProjectedBelowTolerable
381                | SampleConclusion::ProjectedExceedsTolerable
382                | SampleConclusion::InsufficientEvidence
383        );
384        assert!(valid, "unexpected SampleConclusion variant");
385    }
386}
datasynth_generators/audit/sample_generator.rs

datasynth_generators/audit/
sample_generator.rs