datasynth_generators/audit/
analytical_procedure_generator.rs

1//! Analytical procedure generator per ISA 520.
2//!
3//! Generates `AnalyticalProcedureResult` records distributed across the three
4//! audit phases (Planning, Substantive, FinalReview) with realistic expectation /
5//! actual value pairs and conclusion distributions.
6
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use rand_distr::{Distribution, Normal};
11use rust_decimal::Decimal;
12
13use datasynth_core::models::audit::{
14    AnalyticalConclusion, AnalyticalMethod, AnalyticalPhase, AnalyticalProcedureResult,
15    AuditEngagement,
16};
17
18/// Configuration for the analytical procedure generator (ISA 520).
19#[derive(Debug, Clone)]
20pub struct AnalyticalProcedureGeneratorConfig {
21    /// Number of procedures to generate per engagement (min, max)
22    pub procedures_per_engagement: (u32, u32),
23    /// Fraction of procedures that conclude `Consistent`
24    pub consistent_ratio: f64,
25    /// Fraction of procedures that conclude `ExplainedVariance`
26    pub explained_ratio: f64,
27    /// Fraction of procedures that conclude `FurtherInvestigation`
28    pub further_ratio: f64,
29    /// Fraction of procedures that conclude `PossibleMisstatement`
30    pub misstatement_ratio: f64,
31}
32
33impl Default for AnalyticalProcedureGeneratorConfig {
34    fn default() -> Self {
35        Self {
36            procedures_per_engagement: (8, 15),
37            consistent_ratio: 0.60,
38            explained_ratio: 0.25,
39            further_ratio: 0.10,
40            misstatement_ratio: 0.05,
41        }
42    }
43}
44
45/// Generator for `AnalyticalProcedureResult` records per ISA 520.
46pub struct AnalyticalProcedureGenerator {
47    /// Seeded random number generator
48    rng: ChaCha8Rng,
49    /// Configuration
50    config: AnalyticalProcedureGeneratorConfig,
51}
52
53impl AnalyticalProcedureGenerator {
54    /// Create a new generator with the given seed and default configuration.
55    pub fn new(seed: u64) -> Self {
56        Self {
57            rng: seeded_rng(seed, 0),
58            config: AnalyticalProcedureGeneratorConfig::default(),
59        }
60    }
61
62    /// Create a new generator with custom configuration.
63    pub fn with_config(seed: u64, config: AnalyticalProcedureGeneratorConfig) -> Self {
64        Self {
65            rng: seeded_rng(seed, 0),
66            config,
67        }
68    }
69
70    /// Generate analytical procedures for an engagement.
71    ///
72    /// # Arguments
73    /// * `engagement`    — The audit engagement these procedures belong to.
74    /// * `account_codes` — GL account codes (or area names) to associate procedures with.
75    ///   When empty, generic area names are used.
76    pub fn generate_procedures(
77        &mut self,
78        engagement: &AuditEngagement,
79        account_codes: &[String],
80    ) -> Vec<AnalyticalProcedureResult> {
81        let count = self.rng.random_range(
82            self.config.procedures_per_engagement.0..=self.config.procedures_per_engagement.1,
83        ) as usize;
84
85        // Phase distribution: Planning 20%, Substantive 60%, FinalReview 20%.
86        let planning_count = (count as f64 * 0.20).round() as usize;
87        let final_count = (count as f64 * 0.20).round() as usize;
88        let substantive_count = count.saturating_sub(planning_count + final_count).max(1);
89
90        // Build the phase sequence.
91        let mut phases: Vec<AnalyticalPhase> = Vec::with_capacity(count);
92        phases.extend(std::iter::repeat_n(
93            AnalyticalPhase::Planning,
94            planning_count,
95        ));
96        phases.extend(std::iter::repeat_n(
97            AnalyticalPhase::Substantive,
98            substantive_count,
99        ));
100        phases.extend(std::iter::repeat_n(
101            AnalyticalPhase::FinalReview,
102            final_count,
103        ));
104
105        // Fallback area names when no account codes are provided.
106        let default_areas = [
107            "Revenue",
108            "Cost of Sales",
109            "Operating Expenses",
110            "Accounts Receivable",
111            "Inventory",
112            "Payroll Expense",
113            "Interest Expense",
114            "Depreciation",
115            "Accounts Payable",
116            "Income Tax Expense",
117        ];
118
119        let all_methods = [
120            AnalyticalMethod::TrendAnalysis,
121            AnalyticalMethod::RatioAnalysis,
122            AnalyticalMethod::ReasonablenessTest,
123            AnalyticalMethod::Regression,
124            AnalyticalMethod::Comparison,
125        ];
126
127        let mut results = Vec::with_capacity(phases.len());
128
129        for (i, &phase) in phases.iter().enumerate() {
130            // Choose account or area.
131            let account_or_area: String = if !account_codes.is_empty() {
132                let idx = self.rng.random_range(0..account_codes.len());
133                account_codes[idx].clone()
134            } else {
135                let idx = i % default_areas.len();
136                default_areas[idx].to_string()
137            };
138
139            // Analytical method — cycle through available methods.
140            let method = all_methods[i % all_methods.len()];
141
142            // Expectation: $100k – $10M
143            let expect_units: i64 = self.rng.random_range(100_000_i64..=10_000_000_i64);
144            let expectation = Decimal::new(expect_units, 0);
145
146            // Threshold: 5–15% of expectation.
147            let threshold_pct: f64 = self.rng.random_range(0.05..0.15);
148            let threshold_units = (expect_units as f64 * threshold_pct).round() as i64;
149            let threshold = Decimal::new(threshold_units.max(1), 0);
150
151            // Actual value: expectation + normal noise centred at 0, σ = threshold × 0.6.
152            let sigma = (expect_units as f64 * threshold_pct * 0.6).max(1.0);
153            let normal = Normal::new(0.0_f64, sigma)
154                .unwrap_or_else(|_| Normal::new(0.0, 1.0).expect("fallback Normal"));
155            let noise = normal.sample(&mut self.rng);
156            let actual_units = (expect_units as f64 + noise).round() as i64;
157            let actual_units = actual_units.max(0);
158            let actual_value = Decimal::new(actual_units, 0);
159
160            let expectation_basis =
161                format!("Prior year adjusted for growth — {method:?} applied to {account_or_area}");
162            let threshold_basis = format!("{:.0}% of expectation", threshold_pct * 100.0);
163
164            let mut result = AnalyticalProcedureResult::new(
165                engagement.engagement_id,
166                account_or_area.clone(),
167                method,
168                expectation,
169                expectation_basis,
170                threshold,
171                threshold_basis,
172                actual_value,
173            );
174
175            // Override the default phase (constructor sets Substantive).
176            result.procedure_phase = phase;
177
178            // Assign a conclusion according to the configured ratios.
179            let conclusion = self.choose_conclusion(result.requires_investigation);
180            result.conclusion = Some(conclusion);
181            result.status = datasynth_core::models::audit::AnalyticalStatus::Concluded;
182
183            // Add an explanation for non-Consistent conclusions.
184            if !matches!(conclusion, AnalyticalConclusion::Consistent) {
185                result.explanation = Some(self.explanation_text(conclusion, &account_or_area));
186                if matches!(conclusion, AnalyticalConclusion::ExplainedVariance) {
187                    result.explanation_corroborated = Some(true);
188                    result.corroboration_evidence = Some(
189                        "Management provided supporting schedule; figures agreed to source data."
190                            .to_string(),
191                    );
192                }
193            }
194
195            results.push(result);
196        }
197
198        results
199    }
200
201    // -------------------------------------------------------------------------
202    // Private helpers
203    // -------------------------------------------------------------------------
204
205    /// Choose a conclusion according to the configured ratios.
206    ///
207    /// When the variance requires investigation we bias towards the three non-Consistent
208    /// outcomes; otherwise we strongly favour Consistent.
209    fn choose_conclusion(&mut self, requires_investigation: bool) -> AnalyticalConclusion {
210        let roll: f64 = self.rng.random();
211
212        // If investigation is NOT required, use unmodified ratios (consistent should dominate).
213        // If investigation IS required, shift weight away from Consistent.
214        let consistent_ratio = if requires_investigation {
215            self.config.consistent_ratio * 0.3 // much less likely
216        } else {
217            self.config.consistent_ratio
218        };
219
220        let consistent_cutoff = consistent_ratio;
221        let explained_cutoff = consistent_cutoff + self.config.explained_ratio;
222        let further_cutoff = explained_cutoff + self.config.further_ratio;
223
224        if roll < consistent_cutoff {
225            AnalyticalConclusion::Consistent
226        } else if roll < explained_cutoff {
227            AnalyticalConclusion::ExplainedVariance
228        } else if roll < further_cutoff {
229            AnalyticalConclusion::FurtherInvestigation
230        } else {
231            AnalyticalConclusion::PossibleMisstatement
232        }
233    }
234
235    fn explanation_text(&self, conclusion: AnalyticalConclusion, area: &str) -> String {
236        match conclusion {
237            AnalyticalConclusion::ExplainedVariance => {
238                format!(
239                    "Variance in {area} explained by timing of year-end transactions \
240					 and one-off items — management provided reconciliation."
241                )
242            }
243            AnalyticalConclusion::FurtherInvestigation => {
244                format!(
245                    "Variance in {area} exceeds threshold; additional procedures \
246					 required to determine whether a misstatement exists."
247                )
248            }
249            AnalyticalConclusion::PossibleMisstatement => {
250                format!(
251                    "Variance in {area} is unexplained and may indicate a misstatement; \
252					 extend substantive testing to corroborate."
253                )
254            }
255            AnalyticalConclusion::Consistent => String::new(),
256        }
257    }
258}
259
260// =============================================================================
261// Tests
262// =============================================================================
263
264#[cfg(test)]
265#[allow(clippy::unwrap_used)]
266mod tests {
267    use super::*;
268    use crate::audit::test_helpers::create_test_engagement;
269
270    fn make_gen(seed: u64) -> AnalyticalProcedureGenerator {
271        AnalyticalProcedureGenerator::new(seed)
272    }
273
274    fn empty_accounts() -> Vec<String> {
275        Vec::new()
276    }
277
278    // -------------------------------------------------------------------------
279
280    /// Count is within the configured (min, max) range.
281    #[test]
282    fn test_generates_procedures() {
283        let engagement = create_test_engagement();
284        let mut gen = make_gen(42);
285        let results = gen.generate_procedures(&engagement, &empty_accounts());
286
287        let cfg = AnalyticalProcedureGeneratorConfig::default();
288        let min = cfg.procedures_per_engagement.0 as usize;
289        let max = cfg.procedures_per_engagement.1 as usize;
290        assert!(
291            results.len() >= min && results.len() <= max,
292            "expected {min}..={max}, got {}",
293            results.len()
294        );
295    }
296
297    /// Phase distribution should include all three phases.
298    #[test]
299    fn test_phase_distribution() {
300        let engagement = create_test_engagement();
301        let config = AnalyticalProcedureGeneratorConfig {
302            procedures_per_engagement: (20, 20),
303            ..Default::default()
304        };
305        let mut gen = AnalyticalProcedureGenerator::with_config(10, config);
306        let results = gen.generate_procedures(&engagement, &empty_accounts());
307
308        let has_planning = results
309            .iter()
310            .any(|r| r.procedure_phase == AnalyticalPhase::Planning);
311        let has_substantive = results
312            .iter()
313            .any(|r| r.procedure_phase == AnalyticalPhase::Substantive);
314        let has_final = results
315            .iter()
316            .any(|r| r.procedure_phase == AnalyticalPhase::FinalReview);
317
318        assert!(has_planning, "expected at least one Planning procedure");
319        assert!(
320            has_substantive,
321            "expected at least one Substantive procedure"
322        );
323        assert!(has_final, "expected at least one FinalReview procedure");
324    }
325
326    /// With a large count, conclusion distribution should roughly match config.
327    #[test]
328    fn test_conclusion_distribution() {
329        let engagement = create_test_engagement();
330        let config = AnalyticalProcedureGeneratorConfig {
331            procedures_per_engagement: (200, 200),
332            consistent_ratio: 0.60,
333            explained_ratio: 0.25,
334            further_ratio: 0.10,
335            misstatement_ratio: 0.05,
336        };
337        let mut gen = AnalyticalProcedureGenerator::with_config(99, config);
338        let results = gen.generate_procedures(&engagement, &empty_accounts());
339
340        // All results should have a conclusion.
341        let no_conclusion = results.iter().filter(|r| r.conclusion.is_none()).count();
342        assert_eq!(no_conclusion, 0, "all results must have a conclusion");
343
344        // There should be at least some Consistent results (dominant outcome).
345        let consistent_count = results
346            .iter()
347            .filter(|r| r.conclusion == Some(AnalyticalConclusion::Consistent))
348            .count();
349        assert!(
350            consistent_count > 0,
351            "expected at least some Consistent conclusions, got 0"
352        );
353    }
354
355    /// Same seed produces identical output.
356    #[test]
357    fn test_deterministic() {
358        let engagement = create_test_engagement();
359        let accounts = vec!["1000".to_string(), "2000".to_string(), "3000".to_string()];
360
361        let results_a =
362            AnalyticalProcedureGenerator::new(1234).generate_procedures(&engagement, &accounts);
363        let results_b =
364            AnalyticalProcedureGenerator::new(1234).generate_procedures(&engagement, &accounts);
365
366        assert_eq!(
367            results_a.len(),
368            results_b.len(),
369            "lengths differ across identical seeds"
370        );
371        for (a, b) in results_a.iter().zip(results_b.iter()) {
372            assert_eq!(a.account_or_area, b.account_or_area);
373            assert_eq!(a.expectation, b.expectation);
374            assert_eq!(a.actual_value, b.actual_value);
375            assert_eq!(a.conclusion, b.conclusion);
376            assert_eq!(a.procedure_phase, b.procedure_phase);
377        }
378    }
379
380    /// When account_codes is non-empty, results should reference those codes.
381    #[test]
382    fn test_account_codes_used() {
383        let engagement = create_test_engagement();
384        let accounts = vec![
385            "REV-1000".to_string(),
386            "EXP-2000".to_string(),
387            "ASS-3000".to_string(),
388        ];
389
390        let mut gen = make_gen(55);
391        let results = gen.generate_procedures(&engagement, &accounts);
392
393        for result in &results {
394            assert!(
395                accounts.contains(&result.account_or_area),
396                "account_or_area '{}' not in provided list",
397                result.account_or_area
398            );
399        }
400    }
401
402    /// Variance, variance_percentage, and requires_investigation should be consistent.
403    #[test]
404    fn test_variance_fields_consistent() {
405        let engagement = create_test_engagement();
406        let mut gen = make_gen(88);
407        let results = gen.generate_procedures(&engagement, &empty_accounts());
408
409        for r in &results {
410            let expected_variance = r.actual_value - r.expectation;
411            assert_eq!(
412                r.variance, expected_variance,
413                "variance mismatch for result_ref {}",
414                r.result_ref
415            );
416            // requires_investigation must be consistent with |variance| > threshold.
417            let expected_flag = r.variance.abs() > r.threshold;
418            assert_eq!(
419                r.requires_investigation, expected_flag,
420                "requires_investigation flag mismatch for {}",
421                r.result_ref
422            );
423        }
424    }
425}
datasynth_generators/audit/analytical_procedure_generator.rs

datasynth_generators/audit/
analytical_procedure_generator.rs