Skip to main content

datasynth_generators/audit/
analytical_procedure_generator.rs

1//! Analytical procedure generator per ISA 520.
2//!
3//! Generates `AnalyticalProcedureResult` records distributed across the three
4//! audit phases (Planning, Substantive, FinalReview) with realistic expectation /
5//! actual value pairs and conclusion distributions.
6
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use rand_distr::{Distribution, Normal};
11use rust_decimal::Decimal;
12
13use datasynth_core::models::audit::{
14    AnalyticalConclusion, AnalyticalMethod, AnalyticalPhase, AnalyticalProcedureResult,
15    AuditEngagement,
16};
17
18/// Configuration for the analytical procedure generator (ISA 520).
19#[derive(Debug, Clone)]
20pub struct AnalyticalProcedureGeneratorConfig {
21    /// Number of procedures to generate per engagement (min, max)
22    pub procedures_per_engagement: (u32, u32),
23    /// Fraction of procedures that conclude `Consistent`
24    pub consistent_ratio: f64,
25    /// Fraction of procedures that conclude `ExplainedVariance`
26    pub explained_ratio: f64,
27    /// Fraction of procedures that conclude `FurtherInvestigation`
28    pub further_ratio: f64,
29    /// Fraction of procedures that conclude `PossibleMisstatement`
30    pub misstatement_ratio: f64,
31}
32
33impl Default for AnalyticalProcedureGeneratorConfig {
34    fn default() -> Self {
35        Self {
36            procedures_per_engagement: (8, 15),
37            consistent_ratio: 0.60,
38            explained_ratio: 0.25,
39            further_ratio: 0.10,
40            misstatement_ratio: 0.05,
41        }
42    }
43}
44
45/// Generator for `AnalyticalProcedureResult` records per ISA 520.
46pub struct AnalyticalProcedureGenerator {
47    /// Seeded random number generator
48    rng: ChaCha8Rng,
49    /// Configuration
50    config: AnalyticalProcedureGeneratorConfig,
51}
52
53impl AnalyticalProcedureGenerator {
54    /// Create a new generator with the given seed and default configuration.
55    pub fn new(seed: u64) -> Self {
56        Self {
57            rng: seeded_rng(seed, 0),
58            config: AnalyticalProcedureGeneratorConfig::default(),
59        }
60    }
61
62    /// Create a new generator with custom configuration.
63    pub fn with_config(seed: u64, config: AnalyticalProcedureGeneratorConfig) -> Self {
64        Self {
65            rng: seeded_rng(seed, 0),
66            config,
67        }
68    }
69
70    /// Generate analytical procedures for an engagement.
71    ///
72    /// # Arguments
73    /// * `engagement`    — The audit engagement these procedures belong to.
74    /// * `account_codes` — GL account codes (or area names) to associate procedures with.
75    ///   When empty, generic area names are used.
76    pub fn generate_procedures(
77        &mut self,
78        engagement: &AuditEngagement,
79        account_codes: &[String],
80    ) -> Vec<AnalyticalProcedureResult> {
81        let count = self.rng.random_range(
82            self.config.procedures_per_engagement.0..=self.config.procedures_per_engagement.1,
83        ) as usize;
84
85        // Phase distribution: Planning 20%, Substantive 60%, FinalReview 20%.
86        let planning_count = (count as f64 * 0.20).round() as usize;
87        let final_count = (count as f64 * 0.20).round() as usize;
88        let substantive_count = count.saturating_sub(planning_count + final_count).max(1);
89
90        // Build the phase sequence.
91        let mut phases: Vec<AnalyticalPhase> = Vec::with_capacity(count);
92        phases.extend(std::iter::repeat_n(
93            AnalyticalPhase::Planning,
94            planning_count,
95        ));
96        phases.extend(std::iter::repeat_n(
97            AnalyticalPhase::Substantive,
98            substantive_count,
99        ));
100        phases.extend(std::iter::repeat_n(
101            AnalyticalPhase::FinalReview,
102            final_count,
103        ));
104
105        // Fallback area names when no account codes are provided.
106        let default_areas = [
107            "Revenue",
108            "Cost of Sales",
109            "Operating Expenses",
110            "Accounts Receivable",
111            "Inventory",
112            "Payroll Expense",
113            "Interest Expense",
114            "Depreciation",
115            "Accounts Payable",
116            "Income Tax Expense",
117        ];
118
119        let all_methods = [
120            AnalyticalMethod::TrendAnalysis,
121            AnalyticalMethod::RatioAnalysis,
122            AnalyticalMethod::ReasonablenessTest,
123            AnalyticalMethod::Regression,
124            AnalyticalMethod::Comparison,
125        ];
126
127        let mut results = Vec::with_capacity(phases.len());
128
129        for (i, &phase) in phases.iter().enumerate() {
130            // Choose account or area.
131            let account_or_area: String = if !account_codes.is_empty() {
132                let idx = self.rng.random_range(0..account_codes.len());
133                account_codes[idx].clone()
134            } else {
135                let idx = i % default_areas.len();
136                default_areas[idx].to_string()
137            };
138
139            // Analytical method — cycle through available methods.
140            let method = all_methods[i % all_methods.len()];
141
142            // Expectation: $100k – $10M
143            let expect_units: i64 = self.rng.random_range(100_000_i64..=10_000_000_i64);
144            let expectation = Decimal::new(expect_units, 0);
145
146            // Threshold: 5–15% of expectation.
147            let threshold_pct: f64 = self.rng.random_range(0.05..0.15);
148            let threshold_units = (expect_units as f64 * threshold_pct).round() as i64;
149            let threshold = Decimal::new(threshold_units.max(1), 0);
150
151            // Actual value: expectation + normal noise centred at 0, σ = threshold × 0.6.
152            let sigma = (expect_units as f64 * threshold_pct * 0.6).max(1.0);
153            let normal = Normal::new(0.0_f64, sigma)
154                .unwrap_or_else(|_| Normal::new(0.0, 1.0).expect("fallback Normal"));
155            let noise = normal.sample(&mut self.rng);
156            let actual_units = (expect_units as f64 + noise).round() as i64;
157            let actual_units = actual_units.max(0);
158            let actual_value = Decimal::new(actual_units, 0);
159
160            let expectation_basis =
161                format!("Prior year adjusted for growth — {method:?} applied to {account_or_area}");
162            let threshold_basis = format!("{:.0}% of expectation", threshold_pct * 100.0);
163
164            let mut result = AnalyticalProcedureResult::new(
165                engagement.engagement_id,
166                account_or_area.clone(),
167                method,
168                expectation,
169                expectation_basis,
170                threshold,
171                threshold_basis,
172                actual_value,
173            );
174
175            // Override the default phase (constructor sets Substantive).
176            result.procedure_phase = phase;
177
178            // Assign a conclusion according to the configured ratios.
179            let conclusion = self.choose_conclusion(result.requires_investigation);
180            result.conclusion = Some(conclusion);
181            result.status = datasynth_core::models::audit::AnalyticalStatus::Concluded;
182
183            // Add an explanation for non-Consistent conclusions.
184            if !matches!(conclusion, AnalyticalConclusion::Consistent) {
185                result.explanation = Some(self.explanation_text(conclusion, &account_or_area));
186                if matches!(conclusion, AnalyticalConclusion::ExplainedVariance) {
187                    result.explanation_corroborated = Some(true);
188                    result.corroboration_evidence = Some(
189                        "Management provided supporting schedule; figures agreed to source data."
190                            .to_string(),
191                    );
192                }
193            }
194
195            results.push(result);
196        }
197
198        results
199    }
200
201    /// Generate analytical procedures anchored to real account balances.
202    ///
203    /// Behaves identically to [`generate_procedures`] except that, for each
204    /// procedure, `actual_value` is set to the account's real balance (looked
205    /// up in `account_balances`) and `expectation` is derived as
206    /// `actual_value * (1 + noise)` so the variance is small and realistic.
207    ///
208    /// Accounts that do not appear in `account_balances` fall back to a
209    /// default balance of 100,000.
210    pub fn generate_procedures_with_balances(
211        &mut self,
212        engagement: &AuditEngagement,
213        account_codes: &[String],
214        account_balances: &std::collections::HashMap<String, f64>,
215    ) -> Vec<AnalyticalProcedureResult> {
216        let count = self.rng.random_range(
217            self.config.procedures_per_engagement.0..=self.config.procedures_per_engagement.1,
218        ) as usize;
219
220        // Phase distribution: Planning 20%, Substantive 60%, FinalReview 20%.
221        let planning_count = (count as f64 * 0.20).round() as usize;
222        let final_count = (count as f64 * 0.20).round() as usize;
223        let substantive_count = count.saturating_sub(planning_count + final_count).max(1);
224
225        let mut phases: Vec<AnalyticalPhase> = Vec::with_capacity(count);
226        phases.extend(std::iter::repeat_n(
227            AnalyticalPhase::Planning,
228            planning_count,
229        ));
230        phases.extend(std::iter::repeat_n(
231            AnalyticalPhase::Substantive,
232            substantive_count,
233        ));
234        phases.extend(std::iter::repeat_n(
235            AnalyticalPhase::FinalReview,
236            final_count,
237        ));
238
239        let default_areas = [
240            "Revenue",
241            "Cost of Sales",
242            "Operating Expenses",
243            "Accounts Receivable",
244            "Inventory",
245            "Payroll Expense",
246            "Interest Expense",
247            "Depreciation",
248            "Accounts Payable",
249            "Income Tax Expense",
250        ];
251
252        let all_methods = [
253            AnalyticalMethod::TrendAnalysis,
254            AnalyticalMethod::RatioAnalysis,
255            AnalyticalMethod::ReasonablenessTest,
256            AnalyticalMethod::Regression,
257            AnalyticalMethod::Comparison,
258        ];
259
260        let mut results = Vec::with_capacity(phases.len());
261
262        for (i, &phase) in phases.iter().enumerate() {
263            let account_or_area: String = if !account_codes.is_empty() {
264                let idx = self.rng.random_range(0..account_codes.len());
265                account_codes[idx].clone()
266            } else {
267                let idx = i % default_areas.len();
268                default_areas[idx].to_string()
269            };
270
271            let method = all_methods[i % all_methods.len()];
272
273            // Look up the real balance; fall back to a sensible default.
274            let real_balance = account_balances
275                .get(&account_or_area)
276                .copied()
277                .unwrap_or(100_000.0);
278            let actual_units = real_balance.round() as i64;
279            let actual_value = Decimal::new(actual_units.max(0), 0);
280
281            // Threshold: 5-15% of actual value.
282            let threshold_pct: f64 = self.rng.random_range(0.05..0.15);
283            let threshold_units = (actual_units as f64 * threshold_pct).round().abs() as i64;
284            let threshold = Decimal::new(threshold_units.max(1), 0);
285
286            // Expectation = actual_value + small normal noise (σ = threshold * 0.6).
287            let sigma = (actual_units as f64 * threshold_pct * 0.6).abs().max(1.0);
288            let normal = Normal::new(0.0_f64, sigma)
289                .unwrap_or_else(|_| Normal::new(0.0, 1.0).expect("fallback Normal"));
290            let noise = normal.sample(&mut self.rng);
291            let expect_units = (actual_units as f64 + noise).round() as i64;
292            let expect_units = expect_units.max(0);
293            let expectation = Decimal::new(expect_units, 0);
294
295            let expectation_basis =
296                format!("Prior year adjusted for growth — {method:?} applied to {account_or_area}");
297            let threshold_basis = format!("{:.0}% of expectation", threshold_pct * 100.0);
298
299            let mut result = AnalyticalProcedureResult::new(
300                engagement.engagement_id,
301                account_or_area.clone(),
302                method,
303                expectation,
304                expectation_basis,
305                threshold,
306                threshold_basis,
307                actual_value,
308            );
309
310            result.procedure_phase = phase;
311
312            let conclusion = self.choose_conclusion(result.requires_investigation);
313            result.conclusion = Some(conclusion);
314            result.status = datasynth_core::models::audit::AnalyticalStatus::Concluded;
315
316            if !matches!(conclusion, AnalyticalConclusion::Consistent) {
317                result.explanation = Some(self.explanation_text(conclusion, &account_or_area));
318                if matches!(conclusion, AnalyticalConclusion::ExplainedVariance) {
319                    result.explanation_corroborated = Some(true);
320                    result.corroboration_evidence = Some(
321                        "Management provided supporting schedule; figures agreed to source data."
322                            .to_string(),
323                    );
324                }
325            }
326
327            results.push(result);
328        }
329
330        results
331    }
332
333    // -------------------------------------------------------------------------
334    // Private helpers
335    // -------------------------------------------------------------------------
336
337    /// Choose a conclusion according to the configured ratios.
338    ///
339    /// When the variance requires investigation we bias towards the three non-Consistent
340    /// outcomes; otherwise we strongly favour Consistent.
341    fn choose_conclusion(&mut self, requires_investigation: bool) -> AnalyticalConclusion {
342        let roll: f64 = self.rng.random();
343
344        // If investigation is NOT required, use unmodified ratios (consistent should dominate).
345        // If investigation IS required, shift weight away from Consistent.
346        let consistent_ratio = if requires_investigation {
347            self.config.consistent_ratio * 0.3 // much less likely
348        } else {
349            self.config.consistent_ratio
350        };
351
352        let consistent_cutoff = consistent_ratio;
353        let explained_cutoff = consistent_cutoff + self.config.explained_ratio;
354        let further_cutoff = explained_cutoff + self.config.further_ratio;
355
356        if roll < consistent_cutoff {
357            AnalyticalConclusion::Consistent
358        } else if roll < explained_cutoff {
359            AnalyticalConclusion::ExplainedVariance
360        } else if roll < further_cutoff {
361            AnalyticalConclusion::FurtherInvestigation
362        } else {
363            AnalyticalConclusion::PossibleMisstatement
364        }
365    }
366
367    fn explanation_text(&self, conclusion: AnalyticalConclusion, area: &str) -> String {
368        match conclusion {
369            AnalyticalConclusion::ExplainedVariance => {
370                format!(
371                    "Variance in {area} explained by timing of year-end transactions \
372					 and one-off items — management provided reconciliation."
373                )
374            }
375            AnalyticalConclusion::FurtherInvestigation => {
376                format!(
377                    "Variance in {area} exceeds threshold; additional procedures \
378					 required to determine whether a misstatement exists."
379                )
380            }
381            AnalyticalConclusion::PossibleMisstatement => {
382                format!(
383                    "Variance in {area} is unexplained and may indicate a misstatement; \
384					 extend substantive testing to corroborate."
385                )
386            }
387            AnalyticalConclusion::Consistent => String::new(),
388        }
389    }
390}
391
392// =============================================================================
393// Tests
394// =============================================================================
395
396#[cfg(test)]
397#[allow(clippy::unwrap_used)]
398mod tests {
399    use super::*;
400    use crate::audit::test_helpers::create_test_engagement;
401
402    fn make_gen(seed: u64) -> AnalyticalProcedureGenerator {
403        AnalyticalProcedureGenerator::new(seed)
404    }
405
406    fn empty_accounts() -> Vec<String> {
407        Vec::new()
408    }
409
410    // -------------------------------------------------------------------------
411
412    /// Count is within the configured (min, max) range.
413    #[test]
414    fn test_generates_procedures() {
415        let engagement = create_test_engagement();
416        let mut gen = make_gen(42);
417        let results = gen.generate_procedures(&engagement, &empty_accounts());
418
419        let cfg = AnalyticalProcedureGeneratorConfig::default();
420        let min = cfg.procedures_per_engagement.0 as usize;
421        let max = cfg.procedures_per_engagement.1 as usize;
422        assert!(
423            results.len() >= min && results.len() <= max,
424            "expected {min}..={max}, got {}",
425            results.len()
426        );
427    }
428
429    /// Phase distribution should include all three phases.
430    #[test]
431    fn test_phase_distribution() {
432        let engagement = create_test_engagement();
433        let config = AnalyticalProcedureGeneratorConfig {
434            procedures_per_engagement: (20, 20),
435            ..Default::default()
436        };
437        let mut gen = AnalyticalProcedureGenerator::with_config(10, config);
438        let results = gen.generate_procedures(&engagement, &empty_accounts());
439
440        let has_planning = results
441            .iter()
442            .any(|r| r.procedure_phase == AnalyticalPhase::Planning);
443        let has_substantive = results
444            .iter()
445            .any(|r| r.procedure_phase == AnalyticalPhase::Substantive);
446        let has_final = results
447            .iter()
448            .any(|r| r.procedure_phase == AnalyticalPhase::FinalReview);
449
450        assert!(has_planning, "expected at least one Planning procedure");
451        assert!(
452            has_substantive,
453            "expected at least one Substantive procedure"
454        );
455        assert!(has_final, "expected at least one FinalReview procedure");
456    }
457
458    /// With a large count, conclusion distribution should roughly match config.
459    #[test]
460    fn test_conclusion_distribution() {
461        let engagement = create_test_engagement();
462        let config = AnalyticalProcedureGeneratorConfig {
463            procedures_per_engagement: (200, 200),
464            consistent_ratio: 0.60,
465            explained_ratio: 0.25,
466            further_ratio: 0.10,
467            misstatement_ratio: 0.05,
468        };
469        let mut gen = AnalyticalProcedureGenerator::with_config(99, config);
470        let results = gen.generate_procedures(&engagement, &empty_accounts());
471
472        // All results should have a conclusion.
473        let no_conclusion = results.iter().filter(|r| r.conclusion.is_none()).count();
474        assert_eq!(no_conclusion, 0, "all results must have a conclusion");
475
476        // There should be at least some Consistent results (dominant outcome).
477        let consistent_count = results
478            .iter()
479            .filter(|r| r.conclusion == Some(AnalyticalConclusion::Consistent))
480            .count();
481        assert!(
482            consistent_count > 0,
483            "expected at least some Consistent conclusions, got 0"
484        );
485    }
486
487    /// Same seed produces identical output.
488    #[test]
489    fn test_deterministic() {
490        let engagement = create_test_engagement();
491        let accounts = vec!["1000".to_string(), "2000".to_string(), "3000".to_string()];
492
493        let results_a =
494            AnalyticalProcedureGenerator::new(1234).generate_procedures(&engagement, &accounts);
495        let results_b =
496            AnalyticalProcedureGenerator::new(1234).generate_procedures(&engagement, &accounts);
497
498        assert_eq!(
499            results_a.len(),
500            results_b.len(),
501            "lengths differ across identical seeds"
502        );
503        for (a, b) in results_a.iter().zip(results_b.iter()) {
504            assert_eq!(a.account_or_area, b.account_or_area);
505            assert_eq!(a.expectation, b.expectation);
506            assert_eq!(a.actual_value, b.actual_value);
507            assert_eq!(a.conclusion, b.conclusion);
508            assert_eq!(a.procedure_phase, b.procedure_phase);
509        }
510    }
511
512    /// When account_codes is non-empty, results should reference those codes.
513    #[test]
514    fn test_account_codes_used() {
515        let engagement = create_test_engagement();
516        let accounts = vec![
517            "REV-1000".to_string(),
518            "EXP-2000".to_string(),
519            "ASS-3000".to_string(),
520        ];
521
522        let mut gen = make_gen(55);
523        let results = gen.generate_procedures(&engagement, &accounts);
524
525        for result in &results {
526            assert!(
527                accounts.contains(&result.account_or_area),
528                "account_or_area '{}' not in provided list",
529                result.account_or_area
530            );
531        }
532    }
533
534    /// Variance, variance_percentage, and requires_investigation should be consistent.
535    #[test]
536    fn test_variance_fields_consistent() {
537        let engagement = create_test_engagement();
538        let mut gen = make_gen(88);
539        let results = gen.generate_procedures(&engagement, &empty_accounts());
540
541        for r in &results {
542            let expected_variance = r.actual_value - r.expectation;
543            assert_eq!(
544                r.variance, expected_variance,
545                "variance mismatch for result_ref {}",
546                r.result_ref
547            );
548            // requires_investigation must be consistent with |variance| > threshold.
549            let expected_flag = r.variance.abs() > r.threshold;
550            assert_eq!(
551                r.requires_investigation, expected_flag,
552                "requires_investigation flag mismatch for {}",
553                r.result_ref
554            );
555        }
556    }
557}