Skip to main content

datasynth_generators/audit/
evidence_generator.rs

1//! Evidence generator for audit engagements.
2//!
3//! Generates audit evidence with appropriate reliability assessments,
4//! source classifications, and cross-references per ISA 500.
5
6use chrono::{Duration, NaiveDate};
7use rand::{Rng, SeedableRng};
8use rand_chacha::ChaCha8Rng;
9use uuid::Uuid;
10
11use datasynth_core::models::audit::{
12    Assertion, AuditEngagement, AuditEvidence, EvidenceSource, EvidenceType, ReliabilityAssessment,
13    ReliabilityLevel, Workpaper,
14};
15
16/// Configuration for evidence generation.
17#[derive(Debug, Clone)]
18pub struct EvidenceGeneratorConfig {
19    /// Evidence pieces per workpaper (min, max)
20    pub evidence_per_workpaper: (u32, u32),
21    /// Probability of external third-party evidence
22    pub external_third_party_probability: f64,
23    /// Probability of high reliability evidence
24    pub high_reliability_probability: f64,
25    /// Probability of AI extraction
26    pub ai_extraction_probability: f64,
27    /// File size range in bytes (min, max)
28    pub file_size_range: (u64, u64),
29}
30
31impl Default for EvidenceGeneratorConfig {
32    fn default() -> Self {
33        Self {
34            evidence_per_workpaper: (1, 5),
35            external_third_party_probability: 0.20,
36            high_reliability_probability: 0.40,
37            ai_extraction_probability: 0.15,
38            file_size_range: (10_000, 5_000_000),
39        }
40    }
41}
42
43/// Generator for audit evidence.
44pub struct EvidenceGenerator {
45    rng: ChaCha8Rng,
46    config: EvidenceGeneratorConfig,
47    evidence_counter: u32,
48}
49
50impl EvidenceGenerator {
51    /// Create a new generator with the given seed.
52    pub fn new(seed: u64) -> Self {
53        Self {
54            rng: ChaCha8Rng::seed_from_u64(seed),
55            config: EvidenceGeneratorConfig::default(),
56            evidence_counter: 0,
57        }
58    }
59
60    /// Create a new generator with custom configuration.
61    pub fn with_config(seed: u64, config: EvidenceGeneratorConfig) -> Self {
62        Self {
63            rng: ChaCha8Rng::seed_from_u64(seed),
64            config,
65            evidence_counter: 0,
66        }
67    }
68
69    /// Generate evidence for a workpaper.
70    pub fn generate_evidence_for_workpaper(
71        &mut self,
72        workpaper: &Workpaper,
73        team_members: &[String],
74        base_date: NaiveDate,
75    ) -> Vec<AuditEvidence> {
76        let count = self
77            .rng
78            .gen_range(self.config.evidence_per_workpaper.0..=self.config.evidence_per_workpaper.1);
79
80        (0..count)
81            .map(|i| {
82                self.generate_evidence(
83                    workpaper.engagement_id,
84                    Some(workpaper.workpaper_id),
85                    &workpaper.assertions_tested,
86                    team_members,
87                    base_date + Duration::days(i as i64),
88                )
89            })
90            .collect()
91    }
92
93    /// Generate a single piece of evidence.
94    pub fn generate_evidence(
95        &mut self,
96        engagement_id: Uuid,
97        workpaper_id: Option<Uuid>,
98        assertions: &[Assertion],
99        team_members: &[String],
100        obtained_date: NaiveDate,
101    ) -> AuditEvidence {
102        self.evidence_counter += 1;
103
104        // Determine evidence type and source
105        let (evidence_type, source_type) = self.select_evidence_type_and_source();
106        let title = self.generate_evidence_title(evidence_type);
107
108        let mut evidence = AuditEvidence::new(engagement_id, evidence_type, source_type, &title);
109
110        evidence.evidence_ref = format!("EV-{:06}", self.evidence_counter);
111
112        // Set description
113        let description = self.generate_evidence_description(evidence_type, source_type);
114        evidence = evidence.with_description(&description);
115
116        // Set obtained by
117        let obtainer = self.select_team_member(team_members);
118        evidence = evidence.with_obtained_by(&obtainer, obtained_date);
119
120        // Set file info
121        let file_size = self
122            .rng
123            .gen_range(self.config.file_size_range.0..=self.config.file_size_range.1);
124        let file_path = self.generate_file_path(evidence_type, self.evidence_counter);
125        let file_hash = format!("sha256:{:064x}", self.rng.gen::<u128>());
126        evidence = evidence.with_file_info(&file_path, &file_hash, file_size);
127
128        // Set reliability assessment
129        let reliability = self.generate_reliability_assessment(source_type);
130        evidence = evidence.with_reliability(reliability);
131
132        // Set assertions
133        if assertions.is_empty() {
134            evidence = evidence.with_assertions(vec![self.random_assertion()]);
135        } else {
136            evidence = evidence.with_assertions(assertions.to_vec());
137        }
138
139        // Link to workpaper if provided
140        if let Some(wp_id) = workpaper_id {
141            evidence.link_workpaper(wp_id);
142        }
143
144        // Maybe add AI extraction
145        if self.rng.gen::<f64>() < self.config.ai_extraction_probability {
146            let terms = self.generate_ai_terms(evidence_type);
147            let confidence = self.rng.gen_range(0.75..0.98);
148            let summary = self.generate_ai_summary(evidence_type);
149            evidence = evidence.with_ai_extraction(terms, confidence, &summary);
150        }
151
152        evidence
153    }
154
155    /// Generate evidence for an entire engagement.
156    pub fn generate_evidence_for_engagement(
157        &mut self,
158        engagement: &AuditEngagement,
159        workpapers: &[Workpaper],
160        team_members: &[String],
161    ) -> Vec<AuditEvidence> {
162        let mut all_evidence = Vec::new();
163
164        for workpaper in workpapers {
165            let evidence = self.generate_evidence_for_workpaper(
166                workpaper,
167                team_members,
168                workpaper.preparer_date,
169            );
170            all_evidence.extend(evidence);
171        }
172
173        // Add some standalone evidence not linked to specific workpapers
174        let standalone_count = self.rng.gen_range(5..15);
175        for i in 0..standalone_count {
176            let date = engagement.fieldwork_start + Duration::days(i as i64 * 3);
177            let evidence =
178                self.generate_evidence(engagement.engagement_id, None, &[], team_members, date);
179            all_evidence.push(evidence);
180        }
181
182        all_evidence
183    }
184
185    /// Select evidence type and source.
186    fn select_evidence_type_and_source(&mut self) -> (EvidenceType, EvidenceSource) {
187        let is_external = self.rng.gen::<f64>() < self.config.external_third_party_probability;
188
189        if is_external {
190            let external_types = [
191                (
192                    EvidenceType::Confirmation,
193                    EvidenceSource::ExternalThirdParty,
194                ),
195                (
196                    EvidenceType::BankStatement,
197                    EvidenceSource::ExternalThirdParty,
198                ),
199                (
200                    EvidenceType::LegalLetter,
201                    EvidenceSource::ExternalThirdParty,
202                ),
203                (
204                    EvidenceType::Contract,
205                    EvidenceSource::ExternalClientProvided,
206                ),
207            ];
208            let idx = self.rng.gen_range(0..external_types.len());
209            external_types[idx]
210        } else {
211            let internal_types = [
212                (
213                    EvidenceType::Document,
214                    EvidenceSource::InternalClientPrepared,
215                ),
216                (
217                    EvidenceType::Invoice,
218                    EvidenceSource::InternalClientPrepared,
219                ),
220                (
221                    EvidenceType::SystemExtract,
222                    EvidenceSource::InternalClientPrepared,
223                ),
224                (EvidenceType::Analysis, EvidenceSource::AuditorPrepared),
225                (EvidenceType::Recalculation, EvidenceSource::AuditorPrepared),
226                (
227                    EvidenceType::MeetingMinutes,
228                    EvidenceSource::InternalClientPrepared,
229                ),
230                (EvidenceType::Email, EvidenceSource::InternalClientPrepared),
231            ];
232            let idx = self.rng.gen_range(0..internal_types.len());
233            internal_types[idx]
234        }
235    }
236
237    /// Generate evidence title.
238    fn generate_evidence_title(&mut self, evidence_type: EvidenceType) -> String {
239        let titles = match evidence_type {
240            EvidenceType::Confirmation => vec![
241                "Bank Confirmation - Primary Account",
242                "AR Confirmation - Major Customer",
243                "AP Confirmation - Key Vendor",
244                "Legal Confirmation",
245                "Investment Confirmation",
246            ],
247            EvidenceType::BankStatement => vec![
248                "Bank Statement - Operating Account",
249                "Bank Statement - Payroll Account",
250                "Bank Statement - Investment Account",
251                "Bank Statement - Foreign Currency",
252            ],
253            EvidenceType::Invoice => vec![
254                "Vendor Invoice Sample",
255                "Customer Invoice Sample",
256                "Intercompany Invoice",
257                "Service Invoice",
258            ],
259            EvidenceType::Contract => vec![
260                "Customer Contract",
261                "Vendor Agreement",
262                "Lease Agreement",
263                "Employment Contract Sample",
264                "Loan Agreement",
265            ],
266            EvidenceType::Document => vec![
267                "Supporting Documentation",
268                "Source Document",
269                "Transaction Support",
270                "Authorization Document",
271            ],
272            EvidenceType::Analysis => vec![
273                "Analytical Review",
274                "Variance Analysis",
275                "Trend Analysis",
276                "Ratio Analysis",
277                "Account Reconciliation Review",
278            ],
279            EvidenceType::SystemExtract => vec![
280                "ERP System Extract",
281                "GL Detail Extract",
282                "Transaction Log Extract",
283                "User Access Report",
284            ],
285            EvidenceType::MeetingMinutes => vec![
286                "Board Meeting Minutes",
287                "Audit Committee Minutes",
288                "Management Meeting Notes",
289            ],
290            EvidenceType::Email => vec![
291                "Management Inquiry Response",
292                "Confirmation Follow-up",
293                "Exception Explanation",
294            ],
295            EvidenceType::Recalculation => vec![
296                "Depreciation Recalculation",
297                "Interest Recalculation",
298                "Tax Provision Recalculation",
299                "Allowance Recalculation",
300            ],
301            EvidenceType::LegalLetter => vec!["Attorney Response Letter", "Litigation Summary"],
302            EvidenceType::ManagementRepresentation => vec![
303                "Management Representation Letter",
304                "Specific Representation",
305            ],
306            EvidenceType::SpecialistReport => vec![
307                "Valuation Specialist Report",
308                "Actuary Report",
309                "IT Specialist Assessment",
310            ],
311            EvidenceType::PhysicalObservation => vec![
312                "Inventory Count Observation",
313                "Fixed Asset Inspection",
314                "Physical Verification",
315            ],
316        };
317
318        let idx = self.rng.gen_range(0..titles.len());
319        titles[idx].to_string()
320    }
321
322    /// Generate evidence description.
323    fn generate_evidence_description(
324        &mut self,
325        evidence_type: EvidenceType,
326        source: EvidenceSource,
327    ) -> String {
328        let source_desc = source.description();
329        match evidence_type {
330            EvidenceType::Confirmation => {
331                format!("External confirmation {}. Response received and agreed to client records.", source_desc)
332            }
333            EvidenceType::BankStatement => {
334                format!("Bank statement {}. Statement obtained for period-end reconciliation.", source_desc)
335            }
336            EvidenceType::Invoice => {
337                "Invoice selected as part of sample testing. Examined for appropriate approval, accuracy, and proper period recording.".into()
338            }
339            EvidenceType::Analysis => {
340                "Auditor-prepared analytical procedure. Expectations developed based on prior year, industry data, and management budgets.".into()
341            }
342            EvidenceType::SystemExtract => {
343                format!("System report {}. Extract validated for completeness and accuracy.", source_desc)
344            }
345            _ => format!("Supporting documentation {}.", source_desc),
346        }
347    }
348
349    /// Generate reliability assessment.
350    fn generate_reliability_assessment(&mut self, source: EvidenceSource) -> ReliabilityAssessment {
351        let base_reliability = source.inherent_reliability();
352
353        let independence = base_reliability;
354        let controls = if self.rng.gen::<f64>() < self.config.high_reliability_probability {
355            ReliabilityLevel::High
356        } else {
357            ReliabilityLevel::Medium
358        };
359        let qualifications = if self.rng.gen::<f64>() < 0.7 {
360            ReliabilityLevel::High
361        } else {
362            ReliabilityLevel::Medium
363        };
364        let objectivity = match source {
365            EvidenceSource::ExternalThirdParty | EvidenceSource::AuditorPrepared => {
366                ReliabilityLevel::High
367            }
368            _ => {
369                if self.rng.gen::<f64>() < 0.5 {
370                    ReliabilityLevel::Medium
371                } else {
372                    ReliabilityLevel::Low
373                }
374            }
375        };
376
377        let notes = match base_reliability {
378            ReliabilityLevel::High => {
379                "Evidence obtained from independent source with high reliability"
380            }
381            ReliabilityLevel::Medium => "Evidence obtained from client with adequate controls",
382            ReliabilityLevel::Low => "Internal evidence requires corroboration",
383        };
384
385        ReliabilityAssessment::new(independence, controls, qualifications, objectivity, notes)
386    }
387
388    /// Generate file path for evidence.
389    fn generate_file_path(&mut self, evidence_type: EvidenceType, counter: u32) -> String {
390        let extension = match evidence_type {
391            EvidenceType::SystemExtract => "xlsx",
392            EvidenceType::Analysis | EvidenceType::Recalculation => "xlsx",
393            EvidenceType::MeetingMinutes | EvidenceType::ManagementRepresentation => "pdf",
394            EvidenceType::Email => "msg",
395            _ => {
396                if self.rng.gen::<f64>() < 0.6 {
397                    "pdf"
398                } else {
399                    "xlsx"
400                }
401            }
402        };
403
404        format!("/evidence/EV-{:06}.{}", counter, extension)
405    }
406
407    /// Select a random team member.
408    fn select_team_member(&mut self, team_members: &[String]) -> String {
409        if team_members.is_empty() {
410            format!("STAFF{:03}", self.rng.gen_range(1..100))
411        } else {
412            let idx = self.rng.gen_range(0..team_members.len());
413            team_members[idx].clone()
414        }
415    }
416
417    /// Generate a random assertion.
418    fn random_assertion(&mut self) -> Assertion {
419        let assertions = [
420            Assertion::Occurrence,
421            Assertion::Completeness,
422            Assertion::Accuracy,
423            Assertion::Cutoff,
424            Assertion::Classification,
425            Assertion::Existence,
426            Assertion::RightsAndObligations,
427            Assertion::ValuationAndAllocation,
428            Assertion::PresentationAndDisclosure,
429        ];
430        let idx = self.rng.gen_range(0..assertions.len());
431        assertions[idx]
432    }
433
434    /// Generate AI-extracted terms.
435    fn generate_ai_terms(
436        &mut self,
437        evidence_type: EvidenceType,
438    ) -> std::collections::HashMap<String, String> {
439        let mut terms = std::collections::HashMap::new();
440
441        match evidence_type {
442            EvidenceType::Invoice => {
443                terms.insert(
444                    "invoice_number".into(),
445                    format!("INV-{:06}", self.rng.gen_range(100000..999999)),
446                );
447                terms.insert(
448                    "amount".into(),
449                    format!("{:.2}", self.rng.gen_range(1000.0..100000.0)),
450                );
451                terms.insert("vendor".into(), "Extracted Vendor Name".into());
452            }
453            EvidenceType::Contract => {
454                terms.insert("effective_date".into(), "2025-01-01".into());
455                terms.insert("term_years".into(), format!("{}", self.rng.gen_range(1..5)));
456                terms.insert(
457                    "total_value".into(),
458                    format!("{:.2}", self.rng.gen_range(50000.0..500000.0)),
459                );
460            }
461            EvidenceType::BankStatement => {
462                terms.insert(
463                    "ending_balance".into(),
464                    format!("{:.2}", self.rng.gen_range(100000.0..10000000.0)),
465                );
466                terms.insert("statement_date".into(), "2025-12-31".into());
467            }
468            _ => {
469                terms.insert("document_date".into(), "2025-12-31".into());
470                terms.insert(
471                    "reference".into(),
472                    format!("REF-{:06}", self.rng.gen_range(100000..999999)),
473                );
474            }
475        }
476
477        terms
478    }
479
480    /// Generate AI summary.
481    fn generate_ai_summary(&mut self, evidence_type: EvidenceType) -> String {
482        match evidence_type {
483            EvidenceType::Invoice => {
484                "Invoice for goods/services with standard payment terms. Amount within expected range.".into()
485            }
486            EvidenceType::Contract => {
487                "Multi-year agreement with standard commercial terms. Key provisions identified.".into()
488            }
489            EvidenceType::BankStatement => {
490                "Month-end bank statement showing reconciled balance. No unusual items noted.".into()
491            }
492            _ => "Document reviewed and key data points extracted.".into(),
493        }
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500
501    #[test]
502    fn test_evidence_generation() {
503        let mut generator = EvidenceGenerator::new(42);
504        let evidence = generator.generate_evidence(
505            Uuid::new_v4(),
506            None,
507            &[Assertion::Occurrence],
508            &["STAFF001".into()],
509            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
510        );
511
512        assert!(!evidence.evidence_ref.is_empty());
513        assert!(!evidence.title.is_empty());
514        assert!(evidence.file_size.is_some());
515    }
516
517    #[test]
518    fn test_evidence_reliability() {
519        let mut generator = EvidenceGenerator::new(42);
520
521        // Generate multiple evidence pieces and check reliability
522        for _ in 0..10 {
523            let evidence = generator.generate_evidence(
524                Uuid::new_v4(),
525                None,
526                &[],
527                &["STAFF001".into()],
528                NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
529            );
530
531            // Verify reliability assessment is set
532            assert!(!evidence.reliability_assessment.notes.is_empty());
533        }
534    }
535
536    #[test]
537    fn test_evidence_with_ai_extraction() {
538        let config = EvidenceGeneratorConfig {
539            ai_extraction_probability: 1.0, // Always extract
540            ..Default::default()
541        };
542        let mut generator = EvidenceGenerator::with_config(42, config);
543
544        let evidence = generator.generate_evidence(
545            Uuid::new_v4(),
546            None,
547            &[],
548            &["STAFF001".into()],
549            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
550        );
551
552        assert!(evidence.ai_extracted_terms.is_some());
553        assert!(evidence.ai_confidence.is_some());
554        assert!(evidence.ai_summary.is_some());
555    }
556
557    #[test]
558    fn test_evidence_workpaper_link() {
559        let mut generator = EvidenceGenerator::new(42);
560        let workpaper_id = Uuid::new_v4();
561
562        let evidence = generator.generate_evidence(
563            Uuid::new_v4(),
564            Some(workpaper_id),
565            &[Assertion::Completeness],
566            &["STAFF001".into()],
567            NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
568        );
569
570        assert!(evidence.linked_workpapers.contains(&workpaper_id));
571    }
572}