1use chrono::{Datelike, Duration, NaiveDate};
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use uuid::Uuid;
11
12use datasynth_core::models::audit::{
13 Assertion, AuditEngagement, AuditEvidence, EvidenceSource, EvidenceType, ReliabilityAssessment,
14 ReliabilityLevel, Workpaper,
15};
16
17#[derive(Debug, Clone)]
19pub struct EvidenceGeneratorConfig {
20 pub evidence_per_workpaper: (u32, u32),
22 pub external_third_party_probability: f64,
24 pub high_reliability_probability: f64,
26 pub ai_extraction_probability: f64,
28 pub file_size_range: (u64, u64),
30 pub period_end_date: Option<NaiveDate>,
33}
34
35impl Default for EvidenceGeneratorConfig {
36 fn default() -> Self {
37 Self {
38 evidence_per_workpaper: (1, 5),
39 external_third_party_probability: 0.20,
40 high_reliability_probability: 0.40,
41 ai_extraction_probability: 0.15,
42 file_size_range: (10_000, 5_000_000),
43 period_end_date: None,
44 }
45 }
46}
47
48pub struct EvidenceGenerator {
50 rng: ChaCha8Rng,
51 config: EvidenceGeneratorConfig,
52 evidence_counter: u32,
53}
54
55impl EvidenceGenerator {
56 pub fn new(seed: u64) -> Self {
58 Self {
59 rng: seeded_rng(seed, 0),
60 config: EvidenceGeneratorConfig::default(),
61 evidence_counter: 0,
62 }
63 }
64
65 pub fn with_config(seed: u64, config: EvidenceGeneratorConfig) -> Self {
67 Self {
68 rng: seeded_rng(seed, 0),
69 config,
70 evidence_counter: 0,
71 }
72 }
73
74 pub fn generate_evidence_for_workpaper(
76 &mut self,
77 workpaper: &Workpaper,
78 team_members: &[String],
79 base_date: NaiveDate,
80 ) -> Vec<AuditEvidence> {
81 let count = self.rng.random_range(
82 self.config.evidence_per_workpaper.0..=self.config.evidence_per_workpaper.1,
83 );
84
85 (0..count)
86 .map(|i| {
87 self.generate_evidence(
88 workpaper.engagement_id,
89 Some(workpaper.workpaper_id),
90 &workpaper.assertions_tested,
91 team_members,
92 base_date + Duration::days(i as i64),
93 )
94 })
95 .collect()
96 }
97
98 pub fn generate_evidence(
100 &mut self,
101 engagement_id: Uuid,
102 workpaper_id: Option<Uuid>,
103 assertions: &[Assertion],
104 team_members: &[String],
105 obtained_date: NaiveDate,
106 ) -> AuditEvidence {
107 self.evidence_counter += 1;
108
109 let (evidence_type, source_type) = self.select_evidence_type_and_source();
111 let title = self.generate_evidence_title(evidence_type);
112
113 let mut evidence = AuditEvidence::new(engagement_id, evidence_type, source_type, &title);
114
115 evidence.evidence_ref = format!("EV-{:06}", self.evidence_counter);
116
117 let description = self.generate_evidence_description(evidence_type, source_type);
119 evidence = evidence.with_description(&description);
120
121 let obtainer = self.select_team_member(team_members);
123 evidence = evidence.with_obtained_by(&obtainer, obtained_date);
124
125 let file_size = self
127 .rng
128 .random_range(self.config.file_size_range.0..=self.config.file_size_range.1);
129 let file_path = self.generate_file_path(evidence_type, self.evidence_counter);
130 let file_hash = format!("sha256:{:064x}", self.rng.random::<u128>());
131 evidence = evidence.with_file_info(&file_path, &file_hash, file_size);
132
133 let reliability = self.generate_reliability_assessment(source_type);
135 evidence = evidence.with_reliability(reliability);
136
137 if assertions.is_empty() {
139 evidence = evidence.with_assertions(vec![self.random_assertion()]);
140 } else {
141 evidence = evidence.with_assertions(assertions.to_vec());
142 }
143
144 if let Some(wp_id) = workpaper_id {
146 evidence.link_workpaper(wp_id);
147 }
148
149 if self.rng.random::<f64>() < self.config.ai_extraction_probability {
151 let terms = self.generate_ai_terms(evidence_type);
152 let confidence = self.rng.random_range(0.75..0.98);
153 let summary = self.generate_ai_summary(evidence_type);
154 evidence = evidence.with_ai_extraction(terms, confidence, &summary);
155 }
156
157 evidence
158 }
159
160 pub fn generate_evidence_for_engagement(
162 &mut self,
163 engagement: &AuditEngagement,
164 workpapers: &[Workpaper],
165 team_members: &[String],
166 ) -> Vec<AuditEvidence> {
167 let mut all_evidence = Vec::new();
168
169 for workpaper in workpapers {
170 let evidence = self.generate_evidence_for_workpaper(
171 workpaper,
172 team_members,
173 workpaper.preparer_date,
174 );
175 all_evidence.extend(evidence);
176 }
177
178 let standalone_count = self.rng.random_range(5..15);
180 for i in 0..standalone_count {
181 let date = engagement.fieldwork_start + Duration::days(i as i64 * 3);
182 let evidence =
183 self.generate_evidence(engagement.engagement_id, None, &[], team_members, date);
184 all_evidence.push(evidence);
185 }
186
187 all_evidence
188 }
189
190 fn select_evidence_type_and_source(&mut self) -> (EvidenceType, EvidenceSource) {
192 let is_external = self.rng.random::<f64>() < self.config.external_third_party_probability;
193
194 if is_external {
195 let external_types = [
196 (
197 EvidenceType::Confirmation,
198 EvidenceSource::ExternalThirdParty,
199 ),
200 (
201 EvidenceType::BankStatement,
202 EvidenceSource::ExternalThirdParty,
203 ),
204 (
205 EvidenceType::LegalLetter,
206 EvidenceSource::ExternalThirdParty,
207 ),
208 (
209 EvidenceType::Contract,
210 EvidenceSource::ExternalClientProvided,
211 ),
212 ];
213 let idx = self.rng.random_range(0..external_types.len());
214 external_types[idx]
215 } else {
216 let internal_types = [
217 (
218 EvidenceType::Document,
219 EvidenceSource::InternalClientPrepared,
220 ),
221 (
222 EvidenceType::Invoice,
223 EvidenceSource::InternalClientPrepared,
224 ),
225 (
226 EvidenceType::SystemExtract,
227 EvidenceSource::InternalClientPrepared,
228 ),
229 (EvidenceType::Analysis, EvidenceSource::AuditorPrepared),
230 (EvidenceType::Recalculation, EvidenceSource::AuditorPrepared),
231 (
232 EvidenceType::MeetingMinutes,
233 EvidenceSource::InternalClientPrepared,
234 ),
235 (EvidenceType::Email, EvidenceSource::InternalClientPrepared),
236 ];
237 let idx = self.rng.random_range(0..internal_types.len());
238 internal_types[idx]
239 }
240 }
241
242 fn generate_evidence_title(&mut self, evidence_type: EvidenceType) -> String {
244 let titles = match evidence_type {
245 EvidenceType::Confirmation => vec![
246 "Bank Confirmation - Primary Account",
247 "AR Confirmation - Major Customer",
248 "AP Confirmation - Key Vendor",
249 "Legal Confirmation",
250 "Investment Confirmation",
251 ],
252 EvidenceType::BankStatement => vec![
253 "Bank Statement - Operating Account",
254 "Bank Statement - Payroll Account",
255 "Bank Statement - Investment Account",
256 "Bank Statement - Foreign Currency",
257 ],
258 EvidenceType::Invoice => vec![
259 "Vendor Invoice Sample",
260 "Customer Invoice Sample",
261 "Intercompany Invoice",
262 "Service Invoice",
263 ],
264 EvidenceType::Contract => vec![
265 "Customer Contract",
266 "Vendor Agreement",
267 "Lease Agreement",
268 "Employment Contract Sample",
269 "Loan Agreement",
270 ],
271 EvidenceType::Document => vec![
272 "Supporting Documentation",
273 "Source Document",
274 "Transaction Support",
275 "Authorization Document",
276 ],
277 EvidenceType::Analysis => vec![
278 "Analytical Review",
279 "Variance Analysis",
280 "Trend Analysis",
281 "Ratio Analysis",
282 "Account Reconciliation Review",
283 ],
284 EvidenceType::SystemExtract => vec![
285 "ERP System Extract",
286 "GL Detail Extract",
287 "Transaction Log Extract",
288 "User Access Report",
289 ],
290 EvidenceType::MeetingMinutes => vec![
291 "Board Meeting Minutes",
292 "Audit Committee Minutes",
293 "Management Meeting Notes",
294 ],
295 EvidenceType::Email => vec![
296 "Management Inquiry Response",
297 "Confirmation Follow-up",
298 "Exception Explanation",
299 ],
300 EvidenceType::Recalculation => vec![
301 "Depreciation Recalculation",
302 "Interest Recalculation",
303 "Tax Provision Recalculation",
304 "Allowance Recalculation",
305 ],
306 EvidenceType::LegalLetter => vec!["Attorney Response Letter", "Litigation Summary"],
307 EvidenceType::ManagementRepresentation => vec![
308 "Management Representation Letter",
309 "Specific Representation",
310 ],
311 EvidenceType::SpecialistReport => vec![
312 "Valuation Specialist Report",
313 "Actuary Report",
314 "IT Specialist Assessment",
315 ],
316 EvidenceType::PhysicalObservation => vec![
317 "Inventory Count Observation",
318 "Fixed Asset Inspection",
319 "Physical Verification",
320 ],
321 };
322
323 let idx = self.rng.random_range(0..titles.len());
324 titles[idx].to_string()
325 }
326
327 fn generate_evidence_description(
329 &mut self,
330 evidence_type: EvidenceType,
331 source: EvidenceSource,
332 ) -> String {
333 let source_desc = source.description();
334 match evidence_type {
335 EvidenceType::Confirmation => {
336 format!("External confirmation {}. Response received and agreed to client records.", source_desc)
337 }
338 EvidenceType::BankStatement => {
339 format!("Bank statement {}. Statement obtained for period-end reconciliation.", source_desc)
340 }
341 EvidenceType::Invoice => {
342 "Invoice selected as part of sample testing. Examined for appropriate approval, accuracy, and proper period recording.".into()
343 }
344 EvidenceType::Analysis => {
345 "Auditor-prepared analytical procedure. Expectations developed based on prior year, industry data, and management budgets.".into()
346 }
347 EvidenceType::SystemExtract => {
348 format!("System report {}. Extract validated for completeness and accuracy.", source_desc)
349 }
350 _ => format!("Supporting documentation {}.", source_desc),
351 }
352 }
353
354 fn generate_reliability_assessment(&mut self, source: EvidenceSource) -> ReliabilityAssessment {
356 let base_reliability = source.inherent_reliability();
357
358 let independence = base_reliability;
359 let controls = if self.rng.random::<f64>() < self.config.high_reliability_probability {
360 ReliabilityLevel::High
361 } else {
362 ReliabilityLevel::Medium
363 };
364 let qualifications = if self.rng.random::<f64>() < 0.7 {
365 ReliabilityLevel::High
366 } else {
367 ReliabilityLevel::Medium
368 };
369 let objectivity = match source {
370 EvidenceSource::ExternalThirdParty | EvidenceSource::AuditorPrepared => {
371 ReliabilityLevel::High
372 }
373 _ => {
374 if self.rng.random::<f64>() < 0.5 {
375 ReliabilityLevel::Medium
376 } else {
377 ReliabilityLevel::Low
378 }
379 }
380 };
381
382 let notes = match base_reliability {
383 ReliabilityLevel::High => {
384 "Evidence obtained from independent source with high reliability"
385 }
386 ReliabilityLevel::Medium => "Evidence obtained from client with adequate controls",
387 ReliabilityLevel::Low => "Internal evidence requires corroboration",
388 };
389
390 ReliabilityAssessment::new(independence, controls, qualifications, objectivity, notes)
391 }
392
393 fn generate_file_path(&mut self, evidence_type: EvidenceType, counter: u32) -> String {
395 let extension = match evidence_type {
396 EvidenceType::SystemExtract => "xlsx",
397 EvidenceType::Analysis | EvidenceType::Recalculation => "xlsx",
398 EvidenceType::MeetingMinutes | EvidenceType::ManagementRepresentation => "pdf",
399 EvidenceType::Email => "msg",
400 _ => {
401 if self.rng.random::<f64>() < 0.6 {
402 "pdf"
403 } else {
404 "xlsx"
405 }
406 }
407 };
408
409 format!("/evidence/EV-{:06}.{}", counter, extension)
410 }
411
412 fn select_team_member(&mut self, team_members: &[String]) -> String {
414 if team_members.is_empty() {
415 format!("STAFF{:03}", self.rng.random_range(1..100))
416 } else {
417 let idx = self.rng.random_range(0..team_members.len());
418 team_members[idx].clone()
419 }
420 }
421
422 fn random_assertion(&mut self) -> Assertion {
424 let assertions = [
425 Assertion::Occurrence,
426 Assertion::Completeness,
427 Assertion::Accuracy,
428 Assertion::Cutoff,
429 Assertion::Classification,
430 Assertion::Existence,
431 Assertion::RightsAndObligations,
432 Assertion::ValuationAndAllocation,
433 Assertion::PresentationAndDisclosure,
434 ];
435 let idx = self.rng.random_range(0..assertions.len());
436 assertions[idx]
437 }
438
439 fn generate_ai_terms(
441 &mut self,
442 evidence_type: EvidenceType,
443 ) -> std::collections::HashMap<String, String> {
444 let mut terms = std::collections::HashMap::new();
445
446 let default_end = NaiveDate::from_ymd_opt(2025, 12, 31).expect("valid date");
447 let period_end = self.config.period_end_date.unwrap_or(default_end);
448 let period_end_str = period_end.format("%Y-%m-%d").to_string();
449 let period_start_str = NaiveDate::from_ymd_opt(period_end.year(), 1, 1)
451 .expect("valid date")
452 .format("%Y-%m-%d")
453 .to_string();
454
455 match evidence_type {
456 EvidenceType::Invoice => {
457 terms.insert(
458 "invoice_number".into(),
459 format!("INV-{:06}", self.rng.random_range(100000..999999)),
460 );
461 terms.insert(
462 "amount".into(),
463 format!("{:.2}", self.rng.random_range(1000.0..100000.0)),
464 );
465 terms.insert("vendor".into(), "Extracted Vendor Name".into());
466 }
467 EvidenceType::Contract => {
468 terms.insert("effective_date".into(), period_start_str);
469 terms.insert(
470 "term_years".into(),
471 format!("{}", self.rng.random_range(1..5)),
472 );
473 terms.insert(
474 "total_value".into(),
475 format!("{:.2}", self.rng.random_range(50000.0..500000.0)),
476 );
477 }
478 EvidenceType::BankStatement => {
479 terms.insert(
480 "ending_balance".into(),
481 format!("{:.2}", self.rng.random_range(100000.0..10000000.0)),
482 );
483 terms.insert("statement_date".into(), period_end_str);
484 }
485 _ => {
486 terms.insert("document_date".into(), period_end_str);
487 terms.insert(
488 "reference".into(),
489 format!("REF-{:06}", self.rng.random_range(100000..999999)),
490 );
491 }
492 }
493
494 terms
495 }
496
497 fn generate_ai_summary(&mut self, evidence_type: EvidenceType) -> String {
499 match evidence_type {
500 EvidenceType::Invoice => {
501 "Invoice for goods/services with standard payment terms. Amount within expected range.".into()
502 }
503 EvidenceType::Contract => {
504 "Multi-year agreement with standard commercial terms. Key provisions identified.".into()
505 }
506 EvidenceType::BankStatement => {
507 "Month-end bank statement showing reconciled balance. No unusual items noted.".into()
508 }
509 _ => "Document reviewed and key data points extracted.".into(),
510 }
511 }
512}
513
514#[cfg(test)]
515#[allow(clippy::unwrap_used)]
516mod tests {
517 use super::*;
518
519 #[test]
520 fn test_evidence_generation() {
521 let mut generator = EvidenceGenerator::new(42);
522 let evidence = generator.generate_evidence(
523 Uuid::new_v4(),
524 None,
525 &[Assertion::Occurrence],
526 &["STAFF001".into()],
527 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
528 );
529
530 assert!(!evidence.evidence_ref.is_empty());
531 assert!(!evidence.title.is_empty());
532 assert!(evidence.file_size.is_some());
533 }
534
535 #[test]
536 fn test_evidence_reliability() {
537 let mut generator = EvidenceGenerator::new(42);
538
539 for _ in 0..10 {
541 let evidence = generator.generate_evidence(
542 Uuid::new_v4(),
543 None,
544 &[],
545 &["STAFF001".into()],
546 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
547 );
548
549 assert!(!evidence.reliability_assessment.notes.is_empty());
551 }
552 }
553
554 #[test]
555 fn test_evidence_with_ai_extraction() {
556 let config = EvidenceGeneratorConfig {
557 ai_extraction_probability: 1.0, ..Default::default()
559 };
560 let mut generator = EvidenceGenerator::with_config(42, config);
561
562 let evidence = generator.generate_evidence(
563 Uuid::new_v4(),
564 None,
565 &[],
566 &["STAFF001".into()],
567 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
568 );
569
570 assert!(evidence.ai_extracted_terms.is_some());
571 assert!(evidence.ai_confidence.is_some());
572 assert!(evidence.ai_summary.is_some());
573 }
574
575 #[test]
576 fn test_evidence_workpaper_link() {
577 let mut generator = EvidenceGenerator::new(42);
578 let workpaper_id = Uuid::new_v4();
579
580 let evidence = generator.generate_evidence(
581 Uuid::new_v4(),
582 Some(workpaper_id),
583 &[Assertion::Completeness],
584 &["STAFF001".into()],
585 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
586 );
587
588 assert!(evidence.linked_workpapers.contains(&workpaper_id));
589 }
590}