Skip to main content

datasynth_generators/audit/
evidence_generator.rs

1//! Evidence generator for audit engagements.
2//!
3//! Generates audit evidence with appropriate reliability assessments,
4//! source classifications, and cross-references per ISA 500.
5
6use chrono::{Datelike, Duration, NaiveDate};
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use uuid::Uuid;
11
12use datasynth_core::models::audit::{
13    Assertion, AuditEngagement, AuditEvidence, EvidenceSource, EvidenceType, ReliabilityAssessment,
14    ReliabilityLevel, Workpaper,
15};
16
17/// Configuration for evidence generation.
18#[derive(Debug, Clone)]
19pub struct EvidenceGeneratorConfig {
20    /// Evidence pieces per workpaper (min, max)
21    pub evidence_per_workpaper: (u32, u32),
22    /// Probability of external third-party evidence
23    pub external_third_party_probability: f64,
24    /// Probability of high reliability evidence
25    pub high_reliability_probability: f64,
26    /// Probability of AI extraction
27    pub ai_extraction_probability: f64,
28    /// File size range in bytes (min, max)
29    pub file_size_range: (u64, u64),
30    /// Period end date used for evidence document dates (e.g., statement_date, document_date).
31    /// Defaults to 2025-12-31 if not set.
32    pub period_end_date: Option<NaiveDate>,
33}
34
35impl Default for EvidenceGeneratorConfig {
36    fn default() -> Self {
37        Self {
38            evidence_per_workpaper: (1, 5),
39            external_third_party_probability: 0.20,
40            high_reliability_probability: 0.40,
41            ai_extraction_probability: 0.15,
42            file_size_range: (10_000, 5_000_000),
43            period_end_date: None,
44        }
45    }
46}
47
48/// Generator for audit evidence.
49pub struct EvidenceGenerator {
50    rng: ChaCha8Rng,
51    config: EvidenceGeneratorConfig,
52    evidence_counter: u32,
53}
54
55impl EvidenceGenerator {
56    /// Create a new generator with the given seed.
57    pub fn new(seed: u64) -> Self {
58        Self {
59            rng: seeded_rng(seed, 0),
60            config: EvidenceGeneratorConfig::default(),
61            evidence_counter: 0,
62        }
63    }
64
65    /// Create a new generator with custom configuration.
66    pub fn with_config(seed: u64, config: EvidenceGeneratorConfig) -> Self {
67        Self {
68            rng: seeded_rng(seed, 0),
69            config,
70            evidence_counter: 0,
71        }
72    }
73
74    /// Generate evidence for a workpaper.
75    pub fn generate_evidence_for_workpaper(
76        &mut self,
77        workpaper: &Workpaper,
78        team_members: &[String],
79        base_date: NaiveDate,
80    ) -> Vec<AuditEvidence> {
81        let count = self.rng.random_range(
82            self.config.evidence_per_workpaper.0..=self.config.evidence_per_workpaper.1,
83        );
84
85        (0..count)
86            .map(|i| {
87                self.generate_evidence(
88                    workpaper.engagement_id,
89                    Some(workpaper.workpaper_id),
90                    &workpaper.assertions_tested,
91                    team_members,
92                    base_date + Duration::days(i as i64),
93                )
94            })
95            .collect()
96    }
97
98    /// Generate a single piece of evidence.
99    pub fn generate_evidence(
100        &mut self,
101        engagement_id: Uuid,
102        workpaper_id: Option<Uuid>,
103        assertions: &[Assertion],
104        team_members: &[String],
105        obtained_date: NaiveDate,
106    ) -> AuditEvidence {
107        self.evidence_counter += 1;
108
109        // Determine evidence type and source
110        let (evidence_type, source_type) = self.select_evidence_type_and_source();
111        let title = self.generate_evidence_title(evidence_type);
112
113        let mut evidence = AuditEvidence::new(engagement_id, evidence_type, source_type, &title);
114
115        evidence.evidence_ref = format!("EV-{:06}", self.evidence_counter);
116
117        // Set description
118        let description = self.generate_evidence_description(evidence_type, source_type);
119        evidence = evidence.with_description(&description);
120
121        // Set obtained by
122        let obtainer = self.select_team_member(team_members);
123        evidence = evidence.with_obtained_by(&obtainer, obtained_date);
124
125        // Set file info
126        let file_size = self
127            .rng
128            .random_range(self.config.file_size_range.0..=self.config.file_size_range.1);
129        let file_path = self.generate_file_path(evidence_type, self.evidence_counter);
130        let file_hash = format!("sha256:{:064x}", self.rng.random::<u128>());
131        evidence = evidence.with_file_info(&file_path, &file_hash, file_size);
132
133        // Set reliability assessment
134        let reliability = self.generate_reliability_assessment(source_type);
135        evidence = evidence.with_reliability(reliability);
136
137        // Set assertions
138        if assertions.is_empty() {
139            evidence = evidence.with_assertions(vec![self.random_assertion()]);
140        } else {
141            evidence = evidence.with_assertions(assertions.to_vec());
142        }
143
144        // Link to workpaper if provided
145        if let Some(wp_id) = workpaper_id {
146            evidence.link_workpaper(wp_id);
147        }
148
149        // Maybe add AI extraction
150        if self.rng.random::<f64>() < self.config.ai_extraction_probability {
151            let terms = self.generate_ai_terms(evidence_type);
152            let confidence = self.rng.random_range(0.75..0.98);
153            let summary = self.generate_ai_summary(evidence_type);
154            evidence = evidence.with_ai_extraction(terms, confidence, &summary);
155        }
156
157        evidence
158    }
159
160    /// Generate evidence for an entire engagement.
161    pub fn generate_evidence_for_engagement(
162        &mut self,
163        engagement: &AuditEngagement,
164        workpapers: &[Workpaper],
165        team_members: &[String],
166    ) -> Vec<AuditEvidence> {
167        let mut all_evidence = Vec::new();
168
169        for workpaper in workpapers {
170            let evidence = self.generate_evidence_for_workpaper(
171                workpaper,
172                team_members,
173                workpaper.preparer_date,
174            );
175            all_evidence.extend(evidence);
176        }
177
178        // Add some standalone evidence not linked to specific workpapers
179        let standalone_count = self.rng.random_range(5..15);
180        for i in 0..standalone_count {
181            let date = engagement.fieldwork_start + Duration::days(i as i64 * 3);
182            let evidence =
183                self.generate_evidence(engagement.engagement_id, None, &[], team_members, date);
184            all_evidence.push(evidence);
185        }
186
187        all_evidence
188    }
189
190    /// Select evidence type and source.
191    fn select_evidence_type_and_source(&mut self) -> (EvidenceType, EvidenceSource) {
192        let is_external = self.rng.random::<f64>() < self.config.external_third_party_probability;
193
194        if is_external {
195            let external_types = [
196                (
197                    EvidenceType::Confirmation,
198                    EvidenceSource::ExternalThirdParty,
199                ),
200                (
201                    EvidenceType::BankStatement,
202                    EvidenceSource::ExternalThirdParty,
203                ),
204                (
205                    EvidenceType::LegalLetter,
206                    EvidenceSource::ExternalThirdParty,
207                ),
208                (
209                    EvidenceType::Contract,
210                    EvidenceSource::ExternalClientProvided,
211                ),
212            ];
213            let idx = self.rng.random_range(0..external_types.len());
214            external_types[idx]
215        } else {
216            let internal_types = [
217                (
218                    EvidenceType::Document,
219                    EvidenceSource::InternalClientPrepared,
220                ),
221                (
222                    EvidenceType::Invoice,
223                    EvidenceSource::InternalClientPrepared,
224                ),
225                (
226                    EvidenceType::SystemExtract,
227                    EvidenceSource::InternalClientPrepared,
228                ),
229                (EvidenceType::Analysis, EvidenceSource::AuditorPrepared),
230                (EvidenceType::Recalculation, EvidenceSource::AuditorPrepared),
231                (
232                    EvidenceType::MeetingMinutes,
233                    EvidenceSource::InternalClientPrepared,
234                ),
235                (EvidenceType::Email, EvidenceSource::InternalClientPrepared),
236            ];
237            let idx = self.rng.random_range(0..internal_types.len());
238            internal_types[idx]
239        }
240    }
241
242    /// Generate evidence title.
243    fn generate_evidence_title(&mut self, evidence_type: EvidenceType) -> String {
244        let titles = match evidence_type {
245            EvidenceType::Confirmation => vec![
246                "Bank Confirmation - Primary Account",
247                "AR Confirmation - Major Customer",
248                "AP Confirmation - Key Vendor",
249                "Legal Confirmation",
250                "Investment Confirmation",
251            ],
252            EvidenceType::BankStatement => vec![
253                "Bank Statement - Operating Account",
254                "Bank Statement - Payroll Account",
255                "Bank Statement - Investment Account",
256                "Bank Statement - Foreign Currency",
257            ],
258            EvidenceType::Invoice => vec![
259                "Vendor Invoice Sample",
260                "Customer Invoice Sample",
261                "Intercompany Invoice",
262                "Service Invoice",
263            ],
264            EvidenceType::Contract => vec![
265                "Customer Contract",
266                "Vendor Agreement",
267                "Lease Agreement",
268                "Employment Contract Sample",
269                "Loan Agreement",
270            ],
271            EvidenceType::Document => vec![
272                "Supporting Documentation",
273                "Source Document",
274                "Transaction Support",
275                "Authorization Document",
276            ],
277            EvidenceType::Analysis => vec![
278                "Analytical Review",
279                "Variance Analysis",
280                "Trend Analysis",
281                "Ratio Analysis",
282                "Account Reconciliation Review",
283            ],
284            EvidenceType::SystemExtract => vec![
285                "ERP System Extract",
286                "GL Detail Extract",
287                "Transaction Log Extract",
288                "User Access Report",
289            ],
290            EvidenceType::MeetingMinutes => vec![
291                "Board Meeting Minutes",
292                "Audit Committee Minutes",
293                "Management Meeting Notes",
294            ],
295            EvidenceType::Email => vec![
296                "Management Inquiry Response",
297                "Confirmation Follow-up",
298                "Exception Explanation",
299            ],
300            EvidenceType::Recalculation => vec![
301                "Depreciation Recalculation",
302                "Interest Recalculation",
303                "Tax Provision Recalculation",
304                "Allowance Recalculation",
305            ],
306            EvidenceType::LegalLetter => vec!["Attorney Response Letter", "Litigation Summary"],
307            EvidenceType::ManagementRepresentation => vec![
308                "Management Representation Letter",
309                "Specific Representation",
310            ],
311            EvidenceType::SpecialistReport => vec![
312                "Valuation Specialist Report",
313                "Actuary Report",
314                "IT Specialist Assessment",
315            ],
316            EvidenceType::PhysicalObservation => vec![
317                "Inventory Count Observation",
318                "Fixed Asset Inspection",
319                "Physical Verification",
320            ],
321        };
322
323        let idx = self.rng.random_range(0..titles.len());
324        titles[idx].to_string()
325    }
326
327    /// Generate evidence description.
328    fn generate_evidence_description(
329        &mut self,
330        evidence_type: EvidenceType,
331        source: EvidenceSource,
332    ) -> String {
333        let source_desc = source.description();
334        match evidence_type {
335            EvidenceType::Confirmation => {
336                format!("External confirmation {}. Response received and agreed to client records.", source_desc)
337            }
338            EvidenceType::BankStatement => {
339                format!("Bank statement {}. Statement obtained for period-end reconciliation.", source_desc)
340            }
341            EvidenceType::Invoice => {
342                "Invoice selected as part of sample testing. Examined for appropriate approval, accuracy, and proper period recording.".into()
343            }
344            EvidenceType::Analysis => {
345                "Auditor-prepared analytical procedure. Expectations developed based on prior year, industry data, and management budgets.".into()
346            }
347            EvidenceType::SystemExtract => {
348                format!("System report {}. Extract validated for completeness and accuracy.", source_desc)
349            }
350            _ => format!("Supporting documentation {}.", source_desc),
351        }
352    }
353
354    /// Generate reliability assessment.
355    fn generate_reliability_assessment(&mut self, source: EvidenceSource) -> ReliabilityAssessment {
356        let base_reliability = source.inherent_reliability();
357
358        let independence = base_reliability;
359        let controls = if self.rng.random::<f64>() < self.config.high_reliability_probability {
360            ReliabilityLevel::High
361        } else {
362            ReliabilityLevel::Medium
363        };
364        let qualifications = if self.rng.random::<f64>() < 0.7 {
365            ReliabilityLevel::High
366        } else {
367            ReliabilityLevel::Medium
368        };
369        let objectivity = match source {
370            EvidenceSource::ExternalThirdParty | EvidenceSource::AuditorPrepared => {
371                ReliabilityLevel::High
372            }
373            _ => {
374                if self.rng.random::<f64>() < 0.5 {
375                    ReliabilityLevel::Medium
376                } else {
377                    ReliabilityLevel::Low
378                }
379            }
380        };
381
382        let notes = match base_reliability {
383            ReliabilityLevel::High => {
384                "Evidence obtained from independent source with high reliability"
385            }
386            ReliabilityLevel::Medium => "Evidence obtained from client with adequate controls",
387            ReliabilityLevel::Low => "Internal evidence requires corroboration",
388        };
389
390        ReliabilityAssessment::new(independence, controls, qualifications, objectivity, notes)
391    }
392
393    /// Generate file path for evidence.
394    fn generate_file_path(&mut self, evidence_type: EvidenceType, counter: u32) -> String {
395        let extension = match evidence_type {
396            EvidenceType::SystemExtract => "xlsx",
397            EvidenceType::Analysis | EvidenceType::Recalculation => "xlsx",
398            EvidenceType::MeetingMinutes | EvidenceType::ManagementRepresentation => "pdf",
399            EvidenceType::Email => "msg",
400            _ => {
401                if self.rng.random::<f64>() < 0.6 {
402                    "pdf"
403                } else {
404                    "xlsx"
405                }
406            }
407        };
408
409        format!("/evidence/EV-{:06}.{}", counter, extension)
410    }
411
412    /// Select a random team member.
413    fn select_team_member(&mut self, team_members: &[String]) -> String {
414        if team_members.is_empty() {
415            format!("STAFF{:03}", self.rng.random_range(1..100))
416        } else {
417            let idx = self.rng.random_range(0..team_members.len());
418            team_members[idx].clone()
419        }
420    }
421
422    /// Generate a random assertion.
423    fn random_assertion(&mut self) -> Assertion {
424        let assertions = [
425            Assertion::Occurrence,
426            Assertion::Completeness,
427            Assertion::Accuracy,
428            Assertion::Cutoff,
429            Assertion::Classification,
430            Assertion::Existence,
431            Assertion::RightsAndObligations,
432            Assertion::ValuationAndAllocation,
433            Assertion::PresentationAndDisclosure,
434        ];
435        let idx = self.rng.random_range(0..assertions.len());
436        assertions[idx]
437    }
438
439    /// Generate AI-extracted terms.
440    fn generate_ai_terms(
441        &mut self,
442        evidence_type: EvidenceType,
443    ) -> std::collections::HashMap<String, String> {
444        let mut terms = std::collections::HashMap::new();
445
446        let default_end = NaiveDate::from_ymd_opt(2025, 12, 31).expect("valid date");
447        let period_end = self.config.period_end_date.unwrap_or(default_end);
448        let period_end_str = period_end.format("%Y-%m-%d").to_string();
449        // Derive a period-start from period_end (beginning of that year)
450        let period_start_str = NaiveDate::from_ymd_opt(period_end.year(), 1, 1)
451            .expect("valid date")
452            .format("%Y-%m-%d")
453            .to_string();
454
455        match evidence_type {
456            EvidenceType::Invoice => {
457                terms.insert(
458                    "invoice_number".into(),
459                    format!("INV-{:06}", self.rng.random_range(100000..999999)),
460                );
461                terms.insert(
462                    "amount".into(),
463                    format!("{:.2}", self.rng.random_range(1000.0..100000.0)),
464                );
465                terms.insert("vendor".into(), "Extracted Vendor Name".into());
466            }
467            EvidenceType::Contract => {
468                terms.insert("effective_date".into(), period_start_str);
469                terms.insert(
470                    "term_years".into(),
471                    format!("{}", self.rng.random_range(1..5)),
472                );
473                terms.insert(
474                    "total_value".into(),
475                    format!("{:.2}", self.rng.random_range(50000.0..500000.0)),
476                );
477            }
478            EvidenceType::BankStatement => {
479                terms.insert(
480                    "ending_balance".into(),
481                    format!("{:.2}", self.rng.random_range(100000.0..10000000.0)),
482                );
483                terms.insert("statement_date".into(), period_end_str);
484            }
485            _ => {
486                terms.insert("document_date".into(), period_end_str);
487                terms.insert(
488                    "reference".into(),
489                    format!("REF-{:06}", self.rng.random_range(100000..999999)),
490                );
491            }
492        }
493
494        terms
495    }
496
497    /// Generate AI summary.
498    fn generate_ai_summary(&mut self, evidence_type: EvidenceType) -> String {
499        match evidence_type {
500            EvidenceType::Invoice => {
501                "Invoice for goods/services with standard payment terms. Amount within expected range.".into()
502            }
503            EvidenceType::Contract => {
504                "Multi-year agreement with standard commercial terms. Key provisions identified.".into()
505            }
506            EvidenceType::BankStatement => {
507                "Month-end bank statement showing reconciled balance. No unusual items noted.".into()
508            }
509            _ => "Document reviewed and key data points extracted.".into(),
510        }
511    }
512}
513
514#[cfg(test)]
515#[allow(clippy::unwrap_used)]
516mod tests {
517    use super::*;
518
519    #[test]
520    fn test_evidence_generation() {
521        let mut generator = EvidenceGenerator::new(42);
522        let evidence = generator.generate_evidence(
523            Uuid::new_v4(),
524            None,
525            &[Assertion::Occurrence],
526            &["STAFF001".into()],
527            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
528        );
529
530        assert!(!evidence.evidence_ref.is_empty());
531        assert!(!evidence.title.is_empty());
532        assert!(evidence.file_size.is_some());
533    }
534
535    #[test]
536    fn test_evidence_reliability() {
537        let mut generator = EvidenceGenerator::new(42);
538
539        // Generate multiple evidence pieces and check reliability
540        for _ in 0..10 {
541            let evidence = generator.generate_evidence(
542                Uuid::new_v4(),
543                None,
544                &[],
545                &["STAFF001".into()],
546                NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
547            );
548
549            // Verify reliability assessment is set
550            assert!(!evidence.reliability_assessment.notes.is_empty());
551        }
552    }
553
554    #[test]
555    fn test_evidence_with_ai_extraction() {
556        let config = EvidenceGeneratorConfig {
557            ai_extraction_probability: 1.0, // Always extract
558            ..Default::default()
559        };
560        let mut generator = EvidenceGenerator::with_config(42, config);
561
562        let evidence = generator.generate_evidence(
563            Uuid::new_v4(),
564            None,
565            &[],
566            &["STAFF001".into()],
567            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
568        );
569
570        assert!(evidence.ai_extracted_terms.is_some());
571        assert!(evidence.ai_confidence.is_some());
572        assert!(evidence.ai_summary.is_some());
573    }
574
575    #[test]
576    fn test_evidence_workpaper_link() {
577        let mut generator = EvidenceGenerator::new(42);
578        let workpaper_id = Uuid::new_v4();
579
580        let evidence = generator.generate_evidence(
581            Uuid::new_v4(),
582            Some(workpaper_id),
583            &[Assertion::Completeness],
584            &["STAFF001".into()],
585            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
586        );
587
588        assert!(evidence.linked_workpapers.contains(&workpaper_id));
589    }
590}