datasynth_generators/audit/
sample_generator.rs1use datasynth_core::utils::seeded_rng;
9use rand::Rng;
10use rand_chacha::ChaCha8Rng;
11use rust_decimal::Decimal;
12use uuid::Uuid;
13
14use datasynth_core::models::audit::{
15 AuditSample, SampleItem, SampleItemResult, SamplingMethod, Workpaper,
16};
17
18#[derive(Debug, Clone)]
20pub struct SampleGeneratorConfig {
21 pub items_per_sample: (u32, u32),
23 pub correct_ratio: f64,
25 pub misstatement_ratio: f64,
27 pub exception_ratio: f64,
29 pub generate_for_non_sampling: bool,
32}
33
34impl Default for SampleGeneratorConfig {
35 fn default() -> Self {
36 Self {
37 items_per_sample: (15, 60),
38 correct_ratio: 0.90,
39 misstatement_ratio: 0.07,
40 exception_ratio: 0.03,
41 generate_for_non_sampling: false,
42 }
43 }
44}
45
46pub struct SampleGenerator {
48 rng: ChaCha8Rng,
50 config: SampleGeneratorConfig,
52 item_counter: u64,
54}
55
56impl SampleGenerator {
57 pub fn new(seed: u64) -> Self {
59 Self {
60 rng: seeded_rng(seed, 0),
61 config: SampleGeneratorConfig::default(),
62 item_counter: 0,
63 }
64 }
65
66 pub fn with_config(seed: u64, config: SampleGeneratorConfig) -> Self {
68 Self {
69 rng: seeded_rng(seed, 0),
70 config,
71 item_counter: 0,
72 }
73 }
74
75 pub fn generate_sample(
86 &mut self,
87 workpaper: &Workpaper,
88 engagement_id: Uuid,
89 ) -> Option<AuditSample> {
90 let is_statistical = matches!(
92 workpaper.sampling_method,
93 SamplingMethod::StatisticalRandom | SamplingMethod::MonetaryUnit
94 );
95 let has_population = workpaper.population_size > 0;
96
97 let should_generate =
98 is_statistical || (self.config.generate_for_non_sampling && has_population);
99 if !should_generate {
100 return None;
101 }
102
103 let sample_count = self
104 .rng
105 .random_range(self.config.items_per_sample.0..=self.config.items_per_sample.1);
106
107 let pop_description = format!("{} — sampled population", workpaper.title);
109
110 let mut sample = AuditSample::new(
111 workpaper.workpaper_id,
112 engagement_id,
113 pop_description,
114 workpaper.population_size.max(sample_count as u64),
115 workpaper.sampling_method,
116 sample_count,
117 );
118
119 let pop_value_units: i64 = (workpaper.population_size as i64)
121 .saturating_mul(50_000_i64)
122 .max(100_000);
123 sample.population_value = Some(Decimal::new(pop_value_units, 0));
124
125 sample.tolerable_misstatement = sample.population_value.map(|v| v / Decimal::from(20));
127
128 for _ in 0..sample_count {
130 self.item_counter += 1;
131 let doc_ref = format!("DOC-{:06}", self.item_counter);
132
133 let book_units: i64 = self.rng.random_range(1_000_i64..=500_000_i64);
135 let book_value = Decimal::new(book_units, 0);
136
137 let roll: f64 = self.rng.random();
138 let misstatement_cutoff = self.config.misstatement_ratio;
139 let exception_cutoff = misstatement_cutoff + self.config.exception_ratio;
140
141 let mut item = SampleItem::new(&doc_ref, book_value);
142
143 if roll < misstatement_cutoff {
144 let pct: f64 = self.rng.random_range(0.01..0.15);
146 let diff_units = (book_units as f64 * pct).round() as i64;
147 let diff = Decimal::new(diff_units.max(1), 0);
148 let audited = if self.rng.random::<bool>() {
150 book_value + diff
151 } else {
152 (book_value - diff).max(Decimal::ZERO)
153 };
154 let misstatement = book_value - audited;
155
156 item.audited_value = Some(audited);
157 item.misstatement = Some(misstatement);
158 item.result = SampleItemResult::Misstatement;
159 } else if roll < exception_cutoff {
160 let pct: f64 = self.rng.random_range(0.05..0.20);
162 let diff_units = (book_units as f64 * pct).round() as i64;
163 let diff = Decimal::new(diff_units.max(1), 0);
164 let audited = (book_value - diff).max(Decimal::ZERO);
165 let misstatement = book_value - audited;
166
167 item.audited_value = Some(audited);
168 item.misstatement = Some(misstatement);
169 item.result = SampleItemResult::Exception;
170 } else {
171 item.audited_value = Some(book_value);
173 item.result = SampleItemResult::Correct;
174 }
175
176 sample.add_item(item);
177 }
178
179 sample.conclude();
181
182 Some(sample)
186 }
187}
188
189#[cfg(test)]
194#[allow(clippy::unwrap_used)]
195mod tests {
196 use super::*;
197 use datasynth_core::models::audit::{
198 ProcedureType, SampleConclusion, Workpaper, WorkpaperScope, WorkpaperSection,
199 };
200
201 fn make_gen(seed: u64) -> SampleGenerator {
202 SampleGenerator::new(seed)
203 }
204
205 fn sampling_workpaper(method: SamplingMethod) -> Workpaper {
207 Workpaper::new(
208 Uuid::new_v4(),
209 "D-100",
210 "Accounts Receivable Testing",
211 WorkpaperSection::SubstantiveTesting,
212 )
213 .with_procedure("Test AR balances", ProcedureType::SubstantiveTest)
214 .with_scope(WorkpaperScope::default(), 1_000, 50, method)
215 }
216
217 fn non_sampling_workpaper() -> Workpaper {
218 Workpaper::new(
219 Uuid::new_v4(),
220 "C-100",
221 "Controls Walk-through",
222 WorkpaperSection::ControlTesting,
223 )
224 .with_scope(WorkpaperScope::default(), 0, 0, SamplingMethod::Judgmental)
225 }
226
227 #[test]
231 fn test_generates_sample() {
232 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
233 let eng_id = wp.engagement_id;
234 let mut gen = make_gen(42);
235 let sample = gen.generate_sample(&wp, eng_id).unwrap();
236
237 let cfg = SampleGeneratorConfig::default();
238 let min = cfg.items_per_sample.0 as usize;
239 let max = cfg.items_per_sample.1 as usize;
240 assert!(
241 sample.items.len() >= min && sample.items.len() <= max,
242 "expected {min}..={max} items, got {}",
243 sample.items.len()
244 );
245 assert!(
246 sample.conclusion.is_some(),
247 "sample should have a conclusion"
248 );
249 }
250
251 #[test]
253 fn test_no_sample_for_non_sampling() {
254 let wp = non_sampling_workpaper();
255 let eng_id = wp.engagement_id;
256 let mut gen = make_gen(99);
257 let result = gen.generate_sample(&wp, eng_id);
258 assert!(result.is_none(), "expected None for non-sampling workpaper");
259 }
260
261 #[test]
263 fn test_item_distribution() {
264 let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
265 let eng_id = wp.engagement_id;
266 let config = SampleGeneratorConfig {
267 items_per_sample: (300, 300),
268 correct_ratio: 0.90,
269 misstatement_ratio: 0.07,
270 exception_ratio: 0.03,
271 generate_for_non_sampling: false,
272 };
273 let mut gen = SampleGenerator::with_config(77, config);
274 let sample = gen.generate_sample(&wp, eng_id).unwrap();
275
276 let total = sample.items.len() as f64;
277 let correct_count = sample
278 .items
279 .iter()
280 .filter(|i| i.result == SampleItemResult::Correct)
281 .count() as f64;
282
283 let ratio = correct_count / total;
285 assert!(
286 (0.75..=1.00).contains(&ratio),
287 "correct ratio {ratio:.2} outside expected 75–100%"
288 );
289 }
290
291 #[test]
293 fn test_deterministic() {
294 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
295 let eng_id = wp.engagement_id;
296
297 let sample_a = SampleGenerator::new(1234)
298 .generate_sample(&wp, eng_id)
299 .unwrap();
300 let sample_b = SampleGenerator::new(1234)
301 .generate_sample(&wp, eng_id)
302 .unwrap();
303
304 assert_eq!(sample_a.items.len(), sample_b.items.len());
305 for (a, b) in sample_a.items.iter().zip(sample_b.items.iter()) {
306 assert_eq!(a.document_ref, b.document_ref);
307 assert_eq!(a.book_value, b.book_value);
308 assert_eq!(a.result, b.result);
309 }
310 assert_eq!(sample_a.conclusion, sample_b.conclusion);
311 }
312
313 #[test]
316 fn test_generate_for_non_sampling_flag() {
317 let mut wp = non_sampling_workpaper();
318 wp.population_size = 500; let eng_id = wp.engagement_id;
320
321 let config = SampleGeneratorConfig {
322 generate_for_non_sampling: true,
323 ..Default::default()
324 };
325 let mut gen = SampleGenerator::with_config(55, config);
326 let result = gen.generate_sample(&wp, eng_id);
327 assert!(
328 result.is_some(),
329 "expected Some when generate_for_non_sampling = true"
330 );
331 }
332
333 #[test]
335 fn test_misstatement_items_have_amounts() {
336 let wp = sampling_workpaper(SamplingMethod::StatisticalRandom);
337 let eng_id = wp.engagement_id;
338 let config = SampleGeneratorConfig {
339 items_per_sample: (200, 200),
340 misstatement_ratio: 0.50, exception_ratio: 0.10,
342 correct_ratio: 0.40,
343 generate_for_non_sampling: false,
344 };
345 let mut gen = SampleGenerator::with_config(33, config);
346 let sample = gen.generate_sample(&wp, eng_id).unwrap();
347
348 let mist_items: Vec<_> = sample
349 .items
350 .iter()
351 .filter(|i| i.result == SampleItemResult::Misstatement)
352 .collect();
353
354 assert!(!mist_items.is_empty(), "expected some misstatement items");
355 for item in mist_items {
356 assert!(
357 item.misstatement.is_some(),
358 "misstatement item should have a misstatement amount"
359 );
360 assert_ne!(
362 item.misstatement.unwrap(),
363 Decimal::ZERO,
364 "misstatement amount should not be zero"
365 );
366 }
367 }
368
369 #[test]
371 fn test_conclusion_is_set() {
372 let wp = sampling_workpaper(SamplingMethod::MonetaryUnit);
373 let eng_id = wp.engagement_id;
374 let mut gen = make_gen(12);
375 let sample = gen.generate_sample(&wp, eng_id).unwrap();
376
377 let conclusion = sample.conclusion.unwrap();
378 let valid = matches!(
379 conclusion,
380 SampleConclusion::ProjectedBelowTolerable
381 | SampleConclusion::ProjectedExceedsTolerable
382 | SampleConclusion::InsufficientEvidence
383 );
384 assert!(valid, "unexpected SampleConclusion variant");
385 }
386}