Skip to main content

datasynth_generators/audit/
evidence_generator.rs

1//! Evidence generator for audit engagements.
2//!
3//! Generates audit evidence with appropriate reliability assessments,
4//! source classifications, and cross-references per ISA 500.
5
6use chrono::{Duration, NaiveDate};
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use uuid::Uuid;
11
12use datasynth_core::models::audit::{
13    Assertion, AuditEngagement, AuditEvidence, EvidenceSource, EvidenceType, ReliabilityAssessment,
14    ReliabilityLevel, Workpaper,
15};
16
17/// Configuration for evidence generation.
18#[derive(Debug, Clone)]
19pub struct EvidenceGeneratorConfig {
20    /// Evidence pieces per workpaper (min, max)
21    pub evidence_per_workpaper: (u32, u32),
22    /// Probability of external third-party evidence
23    pub external_third_party_probability: f64,
24    /// Probability of high reliability evidence
25    pub high_reliability_probability: f64,
26    /// Probability of AI extraction
27    pub ai_extraction_probability: f64,
28    /// File size range in bytes (min, max)
29    pub file_size_range: (u64, u64),
30}
31
32impl Default for EvidenceGeneratorConfig {
33    fn default() -> Self {
34        Self {
35            evidence_per_workpaper: (1, 5),
36            external_third_party_probability: 0.20,
37            high_reliability_probability: 0.40,
38            ai_extraction_probability: 0.15,
39            file_size_range: (10_000, 5_000_000),
40        }
41    }
42}
43
44/// Generator for audit evidence.
45pub struct EvidenceGenerator {
46    rng: ChaCha8Rng,
47    config: EvidenceGeneratorConfig,
48    evidence_counter: u32,
49}
50
51impl EvidenceGenerator {
52    /// Create a new generator with the given seed.
53    pub fn new(seed: u64) -> Self {
54        Self {
55            rng: seeded_rng(seed, 0),
56            config: EvidenceGeneratorConfig::default(),
57            evidence_counter: 0,
58        }
59    }
60
61    /// Create a new generator with custom configuration.
62    pub fn with_config(seed: u64, config: EvidenceGeneratorConfig) -> Self {
63        Self {
64            rng: seeded_rng(seed, 0),
65            config,
66            evidence_counter: 0,
67        }
68    }
69
70    /// Generate evidence for a workpaper.
71    pub fn generate_evidence_for_workpaper(
72        &mut self,
73        workpaper: &Workpaper,
74        team_members: &[String],
75        base_date: NaiveDate,
76    ) -> Vec<AuditEvidence> {
77        let count = self
78            .rng
79            .gen_range(self.config.evidence_per_workpaper.0..=self.config.evidence_per_workpaper.1);
80
81        (0..count)
82            .map(|i| {
83                self.generate_evidence(
84                    workpaper.engagement_id,
85                    Some(workpaper.workpaper_id),
86                    &workpaper.assertions_tested,
87                    team_members,
88                    base_date + Duration::days(i as i64),
89                )
90            })
91            .collect()
92    }
93
94    /// Generate a single piece of evidence.
95    pub fn generate_evidence(
96        &mut self,
97        engagement_id: Uuid,
98        workpaper_id: Option<Uuid>,
99        assertions: &[Assertion],
100        team_members: &[String],
101        obtained_date: NaiveDate,
102    ) -> AuditEvidence {
103        self.evidence_counter += 1;
104
105        // Determine evidence type and source
106        let (evidence_type, source_type) = self.select_evidence_type_and_source();
107        let title = self.generate_evidence_title(evidence_type);
108
109        let mut evidence = AuditEvidence::new(engagement_id, evidence_type, source_type, &title);
110
111        evidence.evidence_ref = format!("EV-{:06}", self.evidence_counter);
112
113        // Set description
114        let description = self.generate_evidence_description(evidence_type, source_type);
115        evidence = evidence.with_description(&description);
116
117        // Set obtained by
118        let obtainer = self.select_team_member(team_members);
119        evidence = evidence.with_obtained_by(&obtainer, obtained_date);
120
121        // Set file info
122        let file_size = self
123            .rng
124            .gen_range(self.config.file_size_range.0..=self.config.file_size_range.1);
125        let file_path = self.generate_file_path(evidence_type, self.evidence_counter);
126        let file_hash = format!("sha256:{:064x}", self.rng.gen::<u128>());
127        evidence = evidence.with_file_info(&file_path, &file_hash, file_size);
128
129        // Set reliability assessment
130        let reliability = self.generate_reliability_assessment(source_type);
131        evidence = evidence.with_reliability(reliability);
132
133        // Set assertions
134        if assertions.is_empty() {
135            evidence = evidence.with_assertions(vec![self.random_assertion()]);
136        } else {
137            evidence = evidence.with_assertions(assertions.to_vec());
138        }
139
140        // Link to workpaper if provided
141        if let Some(wp_id) = workpaper_id {
142            evidence.link_workpaper(wp_id);
143        }
144
145        // Maybe add AI extraction
146        if self.rng.gen::<f64>() < self.config.ai_extraction_probability {
147            let terms = self.generate_ai_terms(evidence_type);
148            let confidence = self.rng.gen_range(0.75..0.98);
149            let summary = self.generate_ai_summary(evidence_type);
150            evidence = evidence.with_ai_extraction(terms, confidence, &summary);
151        }
152
153        evidence
154    }
155
156    /// Generate evidence for an entire engagement.
157    pub fn generate_evidence_for_engagement(
158        &mut self,
159        engagement: &AuditEngagement,
160        workpapers: &[Workpaper],
161        team_members: &[String],
162    ) -> Vec<AuditEvidence> {
163        let mut all_evidence = Vec::new();
164
165        for workpaper in workpapers {
166            let evidence = self.generate_evidence_for_workpaper(
167                workpaper,
168                team_members,
169                workpaper.preparer_date,
170            );
171            all_evidence.extend(evidence);
172        }
173
174        // Add some standalone evidence not linked to specific workpapers
175        let standalone_count = self.rng.gen_range(5..15);
176        for i in 0..standalone_count {
177            let date = engagement.fieldwork_start + Duration::days(i as i64 * 3);
178            let evidence =
179                self.generate_evidence(engagement.engagement_id, None, &[], team_members, date);
180            all_evidence.push(evidence);
181        }
182
183        all_evidence
184    }
185
186    /// Select evidence type and source.
187    fn select_evidence_type_and_source(&mut self) -> (EvidenceType, EvidenceSource) {
188        let is_external = self.rng.gen::<f64>() < self.config.external_third_party_probability;
189
190        if is_external {
191            let external_types = [
192                (
193                    EvidenceType::Confirmation,
194                    EvidenceSource::ExternalThirdParty,
195                ),
196                (
197                    EvidenceType::BankStatement,
198                    EvidenceSource::ExternalThirdParty,
199                ),
200                (
201                    EvidenceType::LegalLetter,
202                    EvidenceSource::ExternalThirdParty,
203                ),
204                (
205                    EvidenceType::Contract,
206                    EvidenceSource::ExternalClientProvided,
207                ),
208            ];
209            let idx = self.rng.gen_range(0..external_types.len());
210            external_types[idx]
211        } else {
212            let internal_types = [
213                (
214                    EvidenceType::Document,
215                    EvidenceSource::InternalClientPrepared,
216                ),
217                (
218                    EvidenceType::Invoice,
219                    EvidenceSource::InternalClientPrepared,
220                ),
221                (
222                    EvidenceType::SystemExtract,
223                    EvidenceSource::InternalClientPrepared,
224                ),
225                (EvidenceType::Analysis, EvidenceSource::AuditorPrepared),
226                (EvidenceType::Recalculation, EvidenceSource::AuditorPrepared),
227                (
228                    EvidenceType::MeetingMinutes,
229                    EvidenceSource::InternalClientPrepared,
230                ),
231                (EvidenceType::Email, EvidenceSource::InternalClientPrepared),
232            ];
233            let idx = self.rng.gen_range(0..internal_types.len());
234            internal_types[idx]
235        }
236    }
237
238    /// Generate evidence title.
239    fn generate_evidence_title(&mut self, evidence_type: EvidenceType) -> String {
240        let titles = match evidence_type {
241            EvidenceType::Confirmation => vec![
242                "Bank Confirmation - Primary Account",
243                "AR Confirmation - Major Customer",
244                "AP Confirmation - Key Vendor",
245                "Legal Confirmation",
246                "Investment Confirmation",
247            ],
248            EvidenceType::BankStatement => vec![
249                "Bank Statement - Operating Account",
250                "Bank Statement - Payroll Account",
251                "Bank Statement - Investment Account",
252                "Bank Statement - Foreign Currency",
253            ],
254            EvidenceType::Invoice => vec![
255                "Vendor Invoice Sample",
256                "Customer Invoice Sample",
257                "Intercompany Invoice",
258                "Service Invoice",
259            ],
260            EvidenceType::Contract => vec![
261                "Customer Contract",
262                "Vendor Agreement",
263                "Lease Agreement",
264                "Employment Contract Sample",
265                "Loan Agreement",
266            ],
267            EvidenceType::Document => vec![
268                "Supporting Documentation",
269                "Source Document",
270                "Transaction Support",
271                "Authorization Document",
272            ],
273            EvidenceType::Analysis => vec![
274                "Analytical Review",
275                "Variance Analysis",
276                "Trend Analysis",
277                "Ratio Analysis",
278                "Account Reconciliation Review",
279            ],
280            EvidenceType::SystemExtract => vec![
281                "ERP System Extract",
282                "GL Detail Extract",
283                "Transaction Log Extract",
284                "User Access Report",
285            ],
286            EvidenceType::MeetingMinutes => vec![
287                "Board Meeting Minutes",
288                "Audit Committee Minutes",
289                "Management Meeting Notes",
290            ],
291            EvidenceType::Email => vec![
292                "Management Inquiry Response",
293                "Confirmation Follow-up",
294                "Exception Explanation",
295            ],
296            EvidenceType::Recalculation => vec![
297                "Depreciation Recalculation",
298                "Interest Recalculation",
299                "Tax Provision Recalculation",
300                "Allowance Recalculation",
301            ],
302            EvidenceType::LegalLetter => vec!["Attorney Response Letter", "Litigation Summary"],
303            EvidenceType::ManagementRepresentation => vec![
304                "Management Representation Letter",
305                "Specific Representation",
306            ],
307            EvidenceType::SpecialistReport => vec![
308                "Valuation Specialist Report",
309                "Actuary Report",
310                "IT Specialist Assessment",
311            ],
312            EvidenceType::PhysicalObservation => vec![
313                "Inventory Count Observation",
314                "Fixed Asset Inspection",
315                "Physical Verification",
316            ],
317        };
318
319        let idx = self.rng.gen_range(0..titles.len());
320        titles[idx].to_string()
321    }
322
323    /// Generate evidence description.
324    fn generate_evidence_description(
325        &mut self,
326        evidence_type: EvidenceType,
327        source: EvidenceSource,
328    ) -> String {
329        let source_desc = source.description();
330        match evidence_type {
331            EvidenceType::Confirmation => {
332                format!("External confirmation {}. Response received and agreed to client records.", source_desc)
333            }
334            EvidenceType::BankStatement => {
335                format!("Bank statement {}. Statement obtained for period-end reconciliation.", source_desc)
336            }
337            EvidenceType::Invoice => {
338                "Invoice selected as part of sample testing. Examined for appropriate approval, accuracy, and proper period recording.".into()
339            }
340            EvidenceType::Analysis => {
341                "Auditor-prepared analytical procedure. Expectations developed based on prior year, industry data, and management budgets.".into()
342            }
343            EvidenceType::SystemExtract => {
344                format!("System report {}. Extract validated for completeness and accuracy.", source_desc)
345            }
346            _ => format!("Supporting documentation {}.", source_desc),
347        }
348    }
349
350    /// Generate reliability assessment.
351    fn generate_reliability_assessment(&mut self, source: EvidenceSource) -> ReliabilityAssessment {
352        let base_reliability = source.inherent_reliability();
353
354        let independence = base_reliability;
355        let controls = if self.rng.gen::<f64>() < self.config.high_reliability_probability {
356            ReliabilityLevel::High
357        } else {
358            ReliabilityLevel::Medium
359        };
360        let qualifications = if self.rng.gen::<f64>() < 0.7 {
361            ReliabilityLevel::High
362        } else {
363            ReliabilityLevel::Medium
364        };
365        let objectivity = match source {
366            EvidenceSource::ExternalThirdParty | EvidenceSource::AuditorPrepared => {
367                ReliabilityLevel::High
368            }
369            _ => {
370                if self.rng.gen::<f64>() < 0.5 {
371                    ReliabilityLevel::Medium
372                } else {
373                    ReliabilityLevel::Low
374                }
375            }
376        };
377
378        let notes = match base_reliability {
379            ReliabilityLevel::High => {
380                "Evidence obtained from independent source with high reliability"
381            }
382            ReliabilityLevel::Medium => "Evidence obtained from client with adequate controls",
383            ReliabilityLevel::Low => "Internal evidence requires corroboration",
384        };
385
386        ReliabilityAssessment::new(independence, controls, qualifications, objectivity, notes)
387    }
388
389    /// Generate file path for evidence.
390    fn generate_file_path(&mut self, evidence_type: EvidenceType, counter: u32) -> String {
391        let extension = match evidence_type {
392            EvidenceType::SystemExtract => "xlsx",
393            EvidenceType::Analysis | EvidenceType::Recalculation => "xlsx",
394            EvidenceType::MeetingMinutes | EvidenceType::ManagementRepresentation => "pdf",
395            EvidenceType::Email => "msg",
396            _ => {
397                if self.rng.gen::<f64>() < 0.6 {
398                    "pdf"
399                } else {
400                    "xlsx"
401                }
402            }
403        };
404
405        format!("/evidence/EV-{:06}.{}", counter, extension)
406    }
407
408    /// Select a random team member.
409    fn select_team_member(&mut self, team_members: &[String]) -> String {
410        if team_members.is_empty() {
411            format!("STAFF{:03}", self.rng.gen_range(1..100))
412        } else {
413            let idx = self.rng.gen_range(0..team_members.len());
414            team_members[idx].clone()
415        }
416    }
417
418    /// Generate a random assertion.
419    fn random_assertion(&mut self) -> Assertion {
420        let assertions = [
421            Assertion::Occurrence,
422            Assertion::Completeness,
423            Assertion::Accuracy,
424            Assertion::Cutoff,
425            Assertion::Classification,
426            Assertion::Existence,
427            Assertion::RightsAndObligations,
428            Assertion::ValuationAndAllocation,
429            Assertion::PresentationAndDisclosure,
430        ];
431        let idx = self.rng.gen_range(0..assertions.len());
432        assertions[idx]
433    }
434
435    /// Generate AI-extracted terms.
436    fn generate_ai_terms(
437        &mut self,
438        evidence_type: EvidenceType,
439    ) -> std::collections::HashMap<String, String> {
440        let mut terms = std::collections::HashMap::new();
441
442        match evidence_type {
443            EvidenceType::Invoice => {
444                terms.insert(
445                    "invoice_number".into(),
446                    format!("INV-{:06}", self.rng.gen_range(100000..999999)),
447                );
448                terms.insert(
449                    "amount".into(),
450                    format!("{:.2}", self.rng.gen_range(1000.0..100000.0)),
451                );
452                terms.insert("vendor".into(), "Extracted Vendor Name".into());
453            }
454            EvidenceType::Contract => {
455                terms.insert("effective_date".into(), "2025-01-01".into());
456                terms.insert("term_years".into(), format!("{}", self.rng.gen_range(1..5)));
457                terms.insert(
458                    "total_value".into(),
459                    format!("{:.2}", self.rng.gen_range(50000.0..500000.0)),
460                );
461            }
462            EvidenceType::BankStatement => {
463                terms.insert(
464                    "ending_balance".into(),
465                    format!("{:.2}", self.rng.gen_range(100000.0..10000000.0)),
466                );
467                terms.insert("statement_date".into(), "2025-12-31".into());
468            }
469            _ => {
470                terms.insert("document_date".into(), "2025-12-31".into());
471                terms.insert(
472                    "reference".into(),
473                    format!("REF-{:06}", self.rng.gen_range(100000..999999)),
474                );
475            }
476        }
477
478        terms
479    }
480
481    /// Generate AI summary.
482    fn generate_ai_summary(&mut self, evidence_type: EvidenceType) -> String {
483        match evidence_type {
484            EvidenceType::Invoice => {
485                "Invoice for goods/services with standard payment terms. Amount within expected range.".into()
486            }
487            EvidenceType::Contract => {
488                "Multi-year agreement with standard commercial terms. Key provisions identified.".into()
489            }
490            EvidenceType::BankStatement => {
491                "Month-end bank statement showing reconciled balance. No unusual items noted.".into()
492            }
493            _ => "Document reviewed and key data points extracted.".into(),
494        }
495    }
496}
497
498#[cfg(test)]
499#[allow(clippy::unwrap_used)]
500mod tests {
501    use super::*;
502
503    #[test]
504    fn test_evidence_generation() {
505        let mut generator = EvidenceGenerator::new(42);
506        let evidence = generator.generate_evidence(
507            Uuid::new_v4(),
508            None,
509            &[Assertion::Occurrence],
510            &["STAFF001".into()],
511            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
512        );
513
514        assert!(!evidence.evidence_ref.is_empty());
515        assert!(!evidence.title.is_empty());
516        assert!(evidence.file_size.is_some());
517    }
518
519    #[test]
520    fn test_evidence_reliability() {
521        let mut generator = EvidenceGenerator::new(42);
522
523        // Generate multiple evidence pieces and check reliability
524        for _ in 0..10 {
525            let evidence = generator.generate_evidence(
526                Uuid::new_v4(),
527                None,
528                &[],
529                &["STAFF001".into()],
530                NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
531            );
532
533            // Verify reliability assessment is set
534            assert!(!evidence.reliability_assessment.notes.is_empty());
535        }
536    }
537
538    #[test]
539    fn test_evidence_with_ai_extraction() {
540        let config = EvidenceGeneratorConfig {
541            ai_extraction_probability: 1.0, // Always extract
542            ..Default::default()
543        };
544        let mut generator = EvidenceGenerator::with_config(42, config);
545
546        let evidence = generator.generate_evidence(
547            Uuid::new_v4(),
548            None,
549            &[],
550            &["STAFF001".into()],
551            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
552        );
553
554        assert!(evidence.ai_extracted_terms.is_some());
555        assert!(evidence.ai_confidence.is_some());
556        assert!(evidence.ai_summary.is_some());
557    }
558
559    #[test]
560    fn test_evidence_workpaper_link() {
561        let mut generator = EvidenceGenerator::new(42);
562        let workpaper_id = Uuid::new_v4();
563
564        let evidence = generator.generate_evidence(
565            Uuid::new_v4(),
566            Some(workpaper_id),
567            &[Assertion::Completeness],
568            &["STAFF001".into()],
569            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
570        );
571
572        assert!(evidence.linked_workpapers.contains(&workpaper_id));
573    }
574}