1use chrono::{Duration, NaiveDate};
7use datasynth_core::utils::seeded_rng;
8use rand::Rng;
9use rand_chacha::ChaCha8Rng;
10use uuid::Uuid;
11
12use datasynth_core::models::audit::{
13 Assertion, AuditEngagement, AuditEvidence, EvidenceSource, EvidenceType, ReliabilityAssessment,
14 ReliabilityLevel, Workpaper,
15};
16
17#[derive(Debug, Clone)]
19pub struct EvidenceGeneratorConfig {
20 pub evidence_per_workpaper: (u32, u32),
22 pub external_third_party_probability: f64,
24 pub high_reliability_probability: f64,
26 pub ai_extraction_probability: f64,
28 pub file_size_range: (u64, u64),
30}
31
32impl Default for EvidenceGeneratorConfig {
33 fn default() -> Self {
34 Self {
35 evidence_per_workpaper: (1, 5),
36 external_third_party_probability: 0.20,
37 high_reliability_probability: 0.40,
38 ai_extraction_probability: 0.15,
39 file_size_range: (10_000, 5_000_000),
40 }
41 }
42}
43
44pub struct EvidenceGenerator {
46 rng: ChaCha8Rng,
47 config: EvidenceGeneratorConfig,
48 evidence_counter: u32,
49}
50
51impl EvidenceGenerator {
52 pub fn new(seed: u64) -> Self {
54 Self {
55 rng: seeded_rng(seed, 0),
56 config: EvidenceGeneratorConfig::default(),
57 evidence_counter: 0,
58 }
59 }
60
61 pub fn with_config(seed: u64, config: EvidenceGeneratorConfig) -> Self {
63 Self {
64 rng: seeded_rng(seed, 0),
65 config,
66 evidence_counter: 0,
67 }
68 }
69
70 pub fn generate_evidence_for_workpaper(
72 &mut self,
73 workpaper: &Workpaper,
74 team_members: &[String],
75 base_date: NaiveDate,
76 ) -> Vec<AuditEvidence> {
77 let count = self.rng.random_range(
78 self.config.evidence_per_workpaper.0..=self.config.evidence_per_workpaper.1,
79 );
80
81 (0..count)
82 .map(|i| {
83 self.generate_evidence(
84 workpaper.engagement_id,
85 Some(workpaper.workpaper_id),
86 &workpaper.assertions_tested,
87 team_members,
88 base_date + Duration::days(i as i64),
89 )
90 })
91 .collect()
92 }
93
94 pub fn generate_evidence(
96 &mut self,
97 engagement_id: Uuid,
98 workpaper_id: Option<Uuid>,
99 assertions: &[Assertion],
100 team_members: &[String],
101 obtained_date: NaiveDate,
102 ) -> AuditEvidence {
103 self.evidence_counter += 1;
104
105 let (evidence_type, source_type) = self.select_evidence_type_and_source();
107 let title = self.generate_evidence_title(evidence_type);
108
109 let mut evidence = AuditEvidence::new(engagement_id, evidence_type, source_type, &title);
110
111 evidence.evidence_ref = format!("EV-{:06}", self.evidence_counter);
112
113 let description = self.generate_evidence_description(evidence_type, source_type);
115 evidence = evidence.with_description(&description);
116
117 let obtainer = self.select_team_member(team_members);
119 evidence = evidence.with_obtained_by(&obtainer, obtained_date);
120
121 let file_size = self
123 .rng
124 .random_range(self.config.file_size_range.0..=self.config.file_size_range.1);
125 let file_path = self.generate_file_path(evidence_type, self.evidence_counter);
126 let file_hash = format!("sha256:{:064x}", self.rng.random::<u128>());
127 evidence = evidence.with_file_info(&file_path, &file_hash, file_size);
128
129 let reliability = self.generate_reliability_assessment(source_type);
131 evidence = evidence.with_reliability(reliability);
132
133 if assertions.is_empty() {
135 evidence = evidence.with_assertions(vec![self.random_assertion()]);
136 } else {
137 evidence = evidence.with_assertions(assertions.to_vec());
138 }
139
140 if let Some(wp_id) = workpaper_id {
142 evidence.link_workpaper(wp_id);
143 }
144
145 if self.rng.random::<f64>() < self.config.ai_extraction_probability {
147 let terms = self.generate_ai_terms(evidence_type);
148 let confidence = self.rng.random_range(0.75..0.98);
149 let summary = self.generate_ai_summary(evidence_type);
150 evidence = evidence.with_ai_extraction(terms, confidence, &summary);
151 }
152
153 evidence
154 }
155
156 pub fn generate_evidence_for_engagement(
158 &mut self,
159 engagement: &AuditEngagement,
160 workpapers: &[Workpaper],
161 team_members: &[String],
162 ) -> Vec<AuditEvidence> {
163 let mut all_evidence = Vec::new();
164
165 for workpaper in workpapers {
166 let evidence = self.generate_evidence_for_workpaper(
167 workpaper,
168 team_members,
169 workpaper.preparer_date,
170 );
171 all_evidence.extend(evidence);
172 }
173
174 let standalone_count = self.rng.random_range(5..15);
176 for i in 0..standalone_count {
177 let date = engagement.fieldwork_start + Duration::days(i as i64 * 3);
178 let evidence =
179 self.generate_evidence(engagement.engagement_id, None, &[], team_members, date);
180 all_evidence.push(evidence);
181 }
182
183 all_evidence
184 }
185
186 fn select_evidence_type_and_source(&mut self) -> (EvidenceType, EvidenceSource) {
188 let is_external = self.rng.random::<f64>() < self.config.external_third_party_probability;
189
190 if is_external {
191 let external_types = [
192 (
193 EvidenceType::Confirmation,
194 EvidenceSource::ExternalThirdParty,
195 ),
196 (
197 EvidenceType::BankStatement,
198 EvidenceSource::ExternalThirdParty,
199 ),
200 (
201 EvidenceType::LegalLetter,
202 EvidenceSource::ExternalThirdParty,
203 ),
204 (
205 EvidenceType::Contract,
206 EvidenceSource::ExternalClientProvided,
207 ),
208 ];
209 let idx = self.rng.random_range(0..external_types.len());
210 external_types[idx]
211 } else {
212 let internal_types = [
213 (
214 EvidenceType::Document,
215 EvidenceSource::InternalClientPrepared,
216 ),
217 (
218 EvidenceType::Invoice,
219 EvidenceSource::InternalClientPrepared,
220 ),
221 (
222 EvidenceType::SystemExtract,
223 EvidenceSource::InternalClientPrepared,
224 ),
225 (EvidenceType::Analysis, EvidenceSource::AuditorPrepared),
226 (EvidenceType::Recalculation, EvidenceSource::AuditorPrepared),
227 (
228 EvidenceType::MeetingMinutes,
229 EvidenceSource::InternalClientPrepared,
230 ),
231 (EvidenceType::Email, EvidenceSource::InternalClientPrepared),
232 ];
233 let idx = self.rng.random_range(0..internal_types.len());
234 internal_types[idx]
235 }
236 }
237
238 fn generate_evidence_title(&mut self, evidence_type: EvidenceType) -> String {
240 let titles = match evidence_type {
241 EvidenceType::Confirmation => vec![
242 "Bank Confirmation - Primary Account",
243 "AR Confirmation - Major Customer",
244 "AP Confirmation - Key Vendor",
245 "Legal Confirmation",
246 "Investment Confirmation",
247 ],
248 EvidenceType::BankStatement => vec![
249 "Bank Statement - Operating Account",
250 "Bank Statement - Payroll Account",
251 "Bank Statement - Investment Account",
252 "Bank Statement - Foreign Currency",
253 ],
254 EvidenceType::Invoice => vec![
255 "Vendor Invoice Sample",
256 "Customer Invoice Sample",
257 "Intercompany Invoice",
258 "Service Invoice",
259 ],
260 EvidenceType::Contract => vec![
261 "Customer Contract",
262 "Vendor Agreement",
263 "Lease Agreement",
264 "Employment Contract Sample",
265 "Loan Agreement",
266 ],
267 EvidenceType::Document => vec![
268 "Supporting Documentation",
269 "Source Document",
270 "Transaction Support",
271 "Authorization Document",
272 ],
273 EvidenceType::Analysis => vec![
274 "Analytical Review",
275 "Variance Analysis",
276 "Trend Analysis",
277 "Ratio Analysis",
278 "Account Reconciliation Review",
279 ],
280 EvidenceType::SystemExtract => vec![
281 "ERP System Extract",
282 "GL Detail Extract",
283 "Transaction Log Extract",
284 "User Access Report",
285 ],
286 EvidenceType::MeetingMinutes => vec![
287 "Board Meeting Minutes",
288 "Audit Committee Minutes",
289 "Management Meeting Notes",
290 ],
291 EvidenceType::Email => vec![
292 "Management Inquiry Response",
293 "Confirmation Follow-up",
294 "Exception Explanation",
295 ],
296 EvidenceType::Recalculation => vec![
297 "Depreciation Recalculation",
298 "Interest Recalculation",
299 "Tax Provision Recalculation",
300 "Allowance Recalculation",
301 ],
302 EvidenceType::LegalLetter => vec!["Attorney Response Letter", "Litigation Summary"],
303 EvidenceType::ManagementRepresentation => vec![
304 "Management Representation Letter",
305 "Specific Representation",
306 ],
307 EvidenceType::SpecialistReport => vec![
308 "Valuation Specialist Report",
309 "Actuary Report",
310 "IT Specialist Assessment",
311 ],
312 EvidenceType::PhysicalObservation => vec![
313 "Inventory Count Observation",
314 "Fixed Asset Inspection",
315 "Physical Verification",
316 ],
317 };
318
319 let idx = self.rng.random_range(0..titles.len());
320 titles[idx].to_string()
321 }
322
323 fn generate_evidence_description(
325 &mut self,
326 evidence_type: EvidenceType,
327 source: EvidenceSource,
328 ) -> String {
329 let source_desc = source.description();
330 match evidence_type {
331 EvidenceType::Confirmation => {
332 format!("External confirmation {}. Response received and agreed to client records.", source_desc)
333 }
334 EvidenceType::BankStatement => {
335 format!("Bank statement {}. Statement obtained for period-end reconciliation.", source_desc)
336 }
337 EvidenceType::Invoice => {
338 "Invoice selected as part of sample testing. Examined for appropriate approval, accuracy, and proper period recording.".into()
339 }
340 EvidenceType::Analysis => {
341 "Auditor-prepared analytical procedure. Expectations developed based on prior year, industry data, and management budgets.".into()
342 }
343 EvidenceType::SystemExtract => {
344 format!("System report {}. Extract validated for completeness and accuracy.", source_desc)
345 }
346 _ => format!("Supporting documentation {}.", source_desc),
347 }
348 }
349
350 fn generate_reliability_assessment(&mut self, source: EvidenceSource) -> ReliabilityAssessment {
352 let base_reliability = source.inherent_reliability();
353
354 let independence = base_reliability;
355 let controls = if self.rng.random::<f64>() < self.config.high_reliability_probability {
356 ReliabilityLevel::High
357 } else {
358 ReliabilityLevel::Medium
359 };
360 let qualifications = if self.rng.random::<f64>() < 0.7 {
361 ReliabilityLevel::High
362 } else {
363 ReliabilityLevel::Medium
364 };
365 let objectivity = match source {
366 EvidenceSource::ExternalThirdParty | EvidenceSource::AuditorPrepared => {
367 ReliabilityLevel::High
368 }
369 _ => {
370 if self.rng.random::<f64>() < 0.5 {
371 ReliabilityLevel::Medium
372 } else {
373 ReliabilityLevel::Low
374 }
375 }
376 };
377
378 let notes = match base_reliability {
379 ReliabilityLevel::High => {
380 "Evidence obtained from independent source with high reliability"
381 }
382 ReliabilityLevel::Medium => "Evidence obtained from client with adequate controls",
383 ReliabilityLevel::Low => "Internal evidence requires corroboration",
384 };
385
386 ReliabilityAssessment::new(independence, controls, qualifications, objectivity, notes)
387 }
388
389 fn generate_file_path(&mut self, evidence_type: EvidenceType, counter: u32) -> String {
391 let extension = match evidence_type {
392 EvidenceType::SystemExtract => "xlsx",
393 EvidenceType::Analysis | EvidenceType::Recalculation => "xlsx",
394 EvidenceType::MeetingMinutes | EvidenceType::ManagementRepresentation => "pdf",
395 EvidenceType::Email => "msg",
396 _ => {
397 if self.rng.random::<f64>() < 0.6 {
398 "pdf"
399 } else {
400 "xlsx"
401 }
402 }
403 };
404
405 format!("/evidence/EV-{:06}.{}", counter, extension)
406 }
407
408 fn select_team_member(&mut self, team_members: &[String]) -> String {
410 if team_members.is_empty() {
411 format!("STAFF{:03}", self.rng.random_range(1..100))
412 } else {
413 let idx = self.rng.random_range(0..team_members.len());
414 team_members[idx].clone()
415 }
416 }
417
418 fn random_assertion(&mut self) -> Assertion {
420 let assertions = [
421 Assertion::Occurrence,
422 Assertion::Completeness,
423 Assertion::Accuracy,
424 Assertion::Cutoff,
425 Assertion::Classification,
426 Assertion::Existence,
427 Assertion::RightsAndObligations,
428 Assertion::ValuationAndAllocation,
429 Assertion::PresentationAndDisclosure,
430 ];
431 let idx = self.rng.random_range(0..assertions.len());
432 assertions[idx]
433 }
434
435 fn generate_ai_terms(
437 &mut self,
438 evidence_type: EvidenceType,
439 ) -> std::collections::HashMap<String, String> {
440 let mut terms = std::collections::HashMap::new();
441
442 match evidence_type {
443 EvidenceType::Invoice => {
444 terms.insert(
445 "invoice_number".into(),
446 format!("INV-{:06}", self.rng.random_range(100000..999999)),
447 );
448 terms.insert(
449 "amount".into(),
450 format!("{:.2}", self.rng.random_range(1000.0..100000.0)),
451 );
452 terms.insert("vendor".into(), "Extracted Vendor Name".into());
453 }
454 EvidenceType::Contract => {
455 terms.insert("effective_date".into(), "2025-01-01".into());
456 terms.insert(
457 "term_years".into(),
458 format!("{}", self.rng.random_range(1..5)),
459 );
460 terms.insert(
461 "total_value".into(),
462 format!("{:.2}", self.rng.random_range(50000.0..500000.0)),
463 );
464 }
465 EvidenceType::BankStatement => {
466 terms.insert(
467 "ending_balance".into(),
468 format!("{:.2}", self.rng.random_range(100000.0..10000000.0)),
469 );
470 terms.insert("statement_date".into(), "2025-12-31".into());
471 }
472 _ => {
473 terms.insert("document_date".into(), "2025-12-31".into());
474 terms.insert(
475 "reference".into(),
476 format!("REF-{:06}", self.rng.random_range(100000..999999)),
477 );
478 }
479 }
480
481 terms
482 }
483
484 fn generate_ai_summary(&mut self, evidence_type: EvidenceType) -> String {
486 match evidence_type {
487 EvidenceType::Invoice => {
488 "Invoice for goods/services with standard payment terms. Amount within expected range.".into()
489 }
490 EvidenceType::Contract => {
491 "Multi-year agreement with standard commercial terms. Key provisions identified.".into()
492 }
493 EvidenceType::BankStatement => {
494 "Month-end bank statement showing reconciled balance. No unusual items noted.".into()
495 }
496 _ => "Document reviewed and key data points extracted.".into(),
497 }
498 }
499}
500
501#[cfg(test)]
502#[allow(clippy::unwrap_used)]
503mod tests {
504 use super::*;
505
506 #[test]
507 fn test_evidence_generation() {
508 let mut generator = EvidenceGenerator::new(42);
509 let evidence = generator.generate_evidence(
510 Uuid::new_v4(),
511 None,
512 &[Assertion::Occurrence],
513 &["STAFF001".into()],
514 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
515 );
516
517 assert!(!evidence.evidence_ref.is_empty());
518 assert!(!evidence.title.is_empty());
519 assert!(evidence.file_size.is_some());
520 }
521
522 #[test]
523 fn test_evidence_reliability() {
524 let mut generator = EvidenceGenerator::new(42);
525
526 for _ in 0..10 {
528 let evidence = generator.generate_evidence(
529 Uuid::new_v4(),
530 None,
531 &[],
532 &["STAFF001".into()],
533 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
534 );
535
536 assert!(!evidence.reliability_assessment.notes.is_empty());
538 }
539 }
540
541 #[test]
542 fn test_evidence_with_ai_extraction() {
543 let config = EvidenceGeneratorConfig {
544 ai_extraction_probability: 1.0, ..Default::default()
546 };
547 let mut generator = EvidenceGenerator::with_config(42, config);
548
549 let evidence = generator.generate_evidence(
550 Uuid::new_v4(),
551 None,
552 &[],
553 &["STAFF001".into()],
554 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
555 );
556
557 assert!(evidence.ai_extracted_terms.is_some());
558 assert!(evidence.ai_confidence.is_some());
559 assert!(evidence.ai_summary.is_some());
560 }
561
562 #[test]
563 fn test_evidence_workpaper_link() {
564 let mut generator = EvidenceGenerator::new(42);
565 let workpaper_id = Uuid::new_v4();
566
567 let evidence = generator.generate_evidence(
568 Uuid::new_v4(),
569 Some(workpaper_id),
570 &[Assertion::Completeness],
571 &["STAFF001".into()],
572 NaiveDate::from_ymd_opt(2025, 1, 15).unwrap(),
573 );
574
575 assert!(evidence.linked_workpapers.contains(&workpaper_id));
576 }
577}