datasynth_generators/audit/
sample_generator.rs1use datasynth_core::utils::seeded_rng;
9use rand::RngExt;
10use rand_chacha::ChaCha8Rng;
11use rust_decimal::Decimal;
12use uuid::Uuid;
13
14use datasynth_core::models::audit::{
15 AuditSample, SampleItem, SampleItemResult, SamplingMethod, Workpaper,
16};
17
18#[derive(Debug, Clone)]
20pub struct SampleGeneratorConfig {
21 pub items_per_sample: (u32, u32),
23 pub correct_ratio: f64,
25 pub misstatement_ratio: f64,
27 pub exception_ratio: f64,
29 pub generate_for_non_sampling: bool,
32}
33
34impl Default for SampleGeneratorConfig {
35 fn default() -> Self {
36 Self {
37 items_per_sample: (15, 60),
38 correct_ratio: 0.90,
39 misstatement_ratio: 0.07,
40 exception_ratio: 0.03,
41 generate_for_non_sampling: false,
42 }
43 }
44}
45
46pub struct SampleGenerator {
48 rng: ChaCha8Rng,
50 config: SampleGeneratorConfig,
52 item_counter: u64,
54}
55
56impl SampleGenerator {
57 pub fn new(seed: u64) -> Self {
59 Self {
60 rng: seeded_rng(seed, 0),
61 config: SampleGeneratorConfig::default(),
62 item_counter: 0,
63 }
64 }
65
66 pub fn with_config(seed: u64, config: SampleGeneratorConfig) -> Self {
68 Self {
69 rng: seeded_rng(seed, 0),
70 config,
71 item_counter: 0,
72 }
73 }
74
75 pub fn generate_sample(
86 &mut self,
87 workpaper: &Workpaper,
88 engagement_id: Uuid,
89 ) -> Option<AuditSample> {
90 let is_statistical = matches!(
92 workpaper.sampling_method,
93 SamplingMethod::StatisticalRandom | SamplingMethod::MonetaryUnit
94 );
95 let has_population = workpaper.population_size > 0;
96
97 let should_generate =
98 is_statistical || (self.config.generate_for_non_sampling && has_population);
99 if !should_generate {
100 return None;
101 }
102
103 let sample_count = self
104 .rng
105 .random_range(self.config.items_per_sample.0..=self.config.items_per_sample.1);
106
107 let pop_description = format!("{} — sampled population", workpaper.title);
109
110 let mut sample = AuditSample::new(
111 workpaper.workpaper_id,
112 engagement_id,
113 pop_description,
114 workpaper.population_size.max(sample_count as u64),
115 workpaper.sampling_method,
116 sample_count,
117 );
118
119 let pop_value_units: i64 = (workpaper.population_size as i64)
121 .saturating_mul(50_000_i64)
122 .max(100_000);
123 sample.population_value = Some(Decimal::new(pop_value_units, 0));
124
125 sample.tolerable_misstatement = sample.population_value.map(|v| v / Decimal::from(20));
127
128 for _ in 0..sample_count {
130 self.item_counter += 1;
131 let doc_ref = format!("DOC-{:06}", self.item_counter);
132
133 let book_units: i64 = self.rng.random_range(1_000_i64..=500_000_i64);
135 let book_value = Decimal::new(book_units, 0);
136
137 let roll: f64 = self.rng.random();
138 let misstatement_cutoff = self.config.misstatement_ratio;
139 let exception_cutoff = misstatement_cutoff + self.config.exception_ratio;
140
141 let mut item = SampleItem::new(&doc_ref, book_value);
142
143 if roll < misstatement_cutoff {
144 let pct: f64 = self.rng.random_range(0.01..0.15);
146 let diff_units = (book_units as f64 * pct).round() as i64;
147 let diff = Decimal::new(diff_units.max(1), 0);
148 let audited = if self.rng.random::<bool>() {
150 book_value + diff
151 } else {
152 (book_value - diff).max(Decimal::ZERO)
153 };
154 let misstatement = book_value - audited;
155
156 item.audited_value = Some(audited);
157 item.misstatement = Some(misstatement);
158 item.result = SampleItemResult::Misstatement;
159 } else if roll < exception_cutoff {
160 let pct: f64 = self.rng.random_range(0.05..0.20);
162 let diff_units = (book_units as f64 * pct).round() as i64;
163 let diff = Decimal::new(diff_units.max(1), 0);
164 let audited = (book_value - diff).max(Decimal::ZERO);
165 let misstatement = book_value - audited;
166
167 item.audited_value = Some(audited);
168 item.misstatement = Some(misstatement);
169 item.result = SampleItemResult::Exception;
170 } else {
171 item.audited_value = Some(book_value);
173 item.result = SampleItemResult::Correct;
174 }
175
176 sample.add_item(item);
177 }
178
179 sample.conclude();
181
182 Some(sample)
186 }
187}
188
189#[cfg(test)]
194mod tests {
195 use super::*;
196 use datasynth_core::models::audit::{
197 ProcedureType, SampleConclusion, Workpaper, WorkpaperScope, WorkpaperSection,
198 };
199
200 fn make_gen(seed: u64) -> SampleGenerator {
201 SampleGenerator::new(seed)
202 }
203
204 fn sampling_workpaper(method: SamplingMethod) -> Workpaper {
206 Workpaper::new(
207 Uuid::new_v4(),
208 "D-100",
209 "Accounts Receivable Testing",
210 WorkpaperSection::SubstantiveTesting,
211 )
212 .with_procedure("Test AR balances", ProcedureType::SubstantiveTest)
213 .with_scope(WorkpaperScope::default(), 1_000, 50, method)
214 }
215
216 fn non_sampling_workpaper() -> Workpaper {
217 Workpaper::new(
218 Uuid::new_v4(),
219 "C-100",
220 "Controls Walk-through",
221 WorkpaperSection::ControlTesting,
222 )
223 .with_scope(WorkpaperScope::default(), 0, 0, SamplingMethod::Judgmental)
224 }
225
226 #[test]
230 fn test_generates_sample() {
231 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
232 let eng_id = wp.engagement_id;
233 let mut gen = make_gen(42);
234 let sample = gen.generate_sample(&wp, eng_id).unwrap();
235
236 let cfg = SampleGeneratorConfig::default();
237 let min = cfg.items_per_sample.0 as usize;
238 let max = cfg.items_per_sample.1 as usize;
239 assert!(
240 sample.items.len() >= min && sample.items.len() <= max,
241 "expected {min}..={max} items, got {}",
242 sample.items.len()
243 );
244 assert!(
245 sample.conclusion.is_some(),
246 "sample should have a conclusion"
247 );
248 }
249
250 #[test]
252 fn test_no_sample_for_non_sampling() {
253 let wp = non_sampling_workpaper();
254 let eng_id = wp.engagement_id;
255 let mut gen = make_gen(99);
256 let result = gen.generate_sample(&wp, eng_id);
257 assert!(result.is_none(), "expected None for non-sampling workpaper");
258 }
259
260 #[test]
262 fn test_item_distribution() {
263 let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
264 let eng_id = wp.engagement_id;
265 let config = SampleGeneratorConfig {
266 items_per_sample: (300, 300),
267 correct_ratio: 0.90,
268 misstatement_ratio: 0.07,
269 exception_ratio: 0.03,
270 generate_for_non_sampling: false,
271 };
272 let mut gen = SampleGenerator::with_config(77, config);
273 let sample = gen.generate_sample(&wp, eng_id).unwrap();
274
275 let total = sample.items.len() as f64;
276 let correct_count = sample
277 .items
278 .iter()
279 .filter(|i| i.result == SampleItemResult::Correct)
280 .count() as f64;
281
282 let ratio = correct_count / total;
284 assert!(
285 (0.75..=1.00).contains(&ratio),
286 "correct ratio {ratio:.2} outside expected 75–100%"
287 );
288 }
289
290 #[test]
292 fn test_deterministic() {
293 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
294 let eng_id = wp.engagement_id;
295
296 let sample_a = SampleGenerator::new(1234)
297 .generate_sample(&wp, eng_id)
298 .unwrap();
299 let sample_b = SampleGenerator::new(1234)
300 .generate_sample(&wp, eng_id)
301 .unwrap();
302
303 assert_eq!(sample_a.items.len(), sample_b.items.len());
304 for (a, b) in sample_a.items.iter().zip(sample_b.items.iter()) {
305 assert_eq!(a.document_ref, b.document_ref);
306 assert_eq!(a.book_value, b.book_value);
307 assert_eq!(a.result, b.result);
308 }
309 assert_eq!(sample_a.conclusion, sample_b.conclusion);
310 }
311
312 #[test]
315 fn test_generate_for_non_sampling_flag() {
316 let mut wp = non_sampling_workpaper();
317 wp.population_size = 500; let eng_id = wp.engagement_id;
319
320 let config = SampleGeneratorConfig {
321 generate_for_non_sampling: true,
322 ..Default::default()
323 };
324 let mut gen = SampleGenerator::with_config(55, config);
325 let result = gen.generate_sample(&wp, eng_id);
326 assert!(
327 result.is_some(),
328 "expected Some when generate_for_non_sampling = true"
329 );
330 }
331
332 #[test]
334 fn test_misstatement_items_have_amounts() {
335 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
336 let eng_id = wp.engagement_id;
337 let config = SampleGeneratorConfig {
338 items_per_sample: (200, 200),
339 misstatement_ratio: 0.50, exception_ratio: 0.10,
341 correct_ratio: 0.40,
342 generate_for_non_sampling: false,
343 };
344 let mut gen = SampleGenerator::with_config(33, config);
345 let sample = gen.generate_sample(&wp, eng_id).unwrap();
346
347 let mist_items: Vec<_> = sample
348 .items
349 .iter()
350 .filter(|i| i.result == SampleItemResult::Misstatement)
351 .collect();
352
353 assert!(!mist_items.is_empty(), "expected some misstatement items");
354 for item in mist_items {
355 assert!(
356 item.misstatement.is_some(),
357 "misstatement item should have a misstatement amount"
358 );
359 assert_ne!(
361 item.misstatement.unwrap(),
362 Decimal::ZERO,
363 "misstatement amount should not be zero"
364 );
365 }
366 }
367
368 #[test]
370 fn test_conclusion_is_set() {
371 let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
372 let eng_id = wp.engagement_id;
373 let mut gen = make_gen(12);
374 let sample = gen.generate_sample(&wp, eng_id).unwrap();
375
376 let conclusion = sample.conclusion.unwrap();
377 let valid = matches!(
378 conclusion,
379 SampleConclusion::ProjectedBelowTolerable
380 | SampleConclusion::ProjectedExceedsTolerable
381 | SampleConclusion::InsufficientEvidence
382 );
383 assert!(valid, "unexpected SampleConclusion variant");
384 }
385}