1use chrono::NaiveDate;
8use datasynth_core::distributions::{AmountDistributionConfig, AmountSampler};
9use datasynth_core::models::{PriorYearComparative, PriorYearFinding, PriorYearSummary};
10use datasynth_core::utils::seeded_rng;
11use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use rand_distr::{Distribution, Normal};
15use rust_decimal::Decimal;
16
17const FINDING_DESCRIPTIONS: &[(&str, &str, &str)] = &[
23 (
25 "control_deficiency",
26 "revenue",
27 "Insufficient segregation of duties in revenue posting process",
28 ),
29 (
30 "control_deficiency",
31 "receivables",
32 "Lack of timely reconciliation of accounts receivable subsidiary ledger",
33 ),
34 (
35 "control_deficiency",
36 "payables",
37 "Missing secondary approval for vendor master data changes",
38 ),
39 (
40 "control_deficiency",
41 "inventory",
42 "Cycle count procedures not performed on schedule for high-value items",
43 ),
44 (
45 "control_deficiency",
46 "estimates",
47 "No formal review process for management's key accounting estimates",
48 ),
49 (
51 "misstatement",
52 "revenue",
53 "Revenue recognised before transfer of control per ASC 606 criteria",
54 ),
55 (
56 "misstatement",
57 "receivables",
58 "Overstatement of accounts receivable due to improper cutoff at period end",
59 ),
60 (
61 "misstatement",
62 "payables",
63 "Unrecorded liabilities identified through subsequent disbursement testing",
64 ),
65 (
66 "misstatement",
67 "inventory",
68 "Inventory obsolescence reserve understated based on ageing analysis",
69 ),
70 (
71 "misstatement",
72 "estimates",
73 "Fair value measurement for Level 3 assets not supported by observable inputs",
74 ),
75 (
77 "significant_deficiency",
78 "revenue",
79 "Percentage-of-completion estimates lack corroborating project data",
80 ),
81 (
82 "significant_deficiency",
83 "receivables",
84 "Expected credit loss model uses outdated forward-looking information",
85 ),
86 (
87 "significant_deficiency",
88 "payables",
89 "Automated three-way match tolerance set above materiality threshold",
90 ),
91 (
92 "significant_deficiency",
93 "inventory",
94 "Standard cost variances not analysed or allocated on a timely basis",
95 ),
96 (
97 "significant_deficiency",
98 "estimates",
99 "Inadequate documentation of key assumptions in impairment model",
100 ),
101 (
103 "material_weakness",
104 "revenue",
105 "Pervasive override of revenue recognition controls by senior management",
106 ),
107 (
108 "material_weakness",
109 "receivables",
110 "Systematic failure to record allowance for doubtful accounts",
111 ),
112 (
113 "material_weakness",
114 "payables",
115 "Duplicate payments processed without detection across multiple periods",
116 ),
117 (
118 "material_weakness",
119 "inventory",
120 "Physical inventory counts not reconciled to perpetual records for the full year",
121 ),
122 (
123 "material_weakness",
124 "estimates",
125 "Material misstatement in goodwill impairment due to unsubstantiated growth assumptions",
126 ),
127];
128
129const KAM_POOL: &[&str] = &[
131 "Revenue recognition",
132 "Goodwill impairment",
133 "Expected credit losses",
134 "Inventory valuation",
135 "Provisions and contingencies",
136 "Fair value measurement of financial instruments",
137 "Business combination purchase price allocation",
138 "Going concern assessment",
139 "Tax provisions and uncertain tax positions",
140 "Lease accounting transition",
141];
142
143const FINDING_TYPES: &[(&str, f64)] = &[
145 ("control_deficiency", 0.40),
146 ("misstatement", 0.70),
147 ("significant_deficiency", 0.90),
148 ("material_weakness", 1.00),
149];
150
151const FINDING_STATUSES: &[(&str, f64)] = &[
153 ("remediated", 0.50),
154 ("open", 0.70),
155 ("partially_remediated", 0.90),
156 ("recurring", 1.00),
157];
158
159const RISK_AREAS: &[(&str, f64)] = &[
161 ("revenue", 0.30),
162 ("receivables", 0.50),
163 ("estimates", 0.70),
164 ("payables", 0.85),
165 ("inventory", 1.00),
166];
167
168pub struct PriorYearGenerator {
170 rng: ChaCha8Rng,
171 uuid_factory: DeterministicUuidFactory,
172 amount_sampler: AmountSampler,
173}
174
175impl PriorYearGenerator {
176 pub fn new(seed: u64) -> Self {
178 Self {
179 rng: seeded_rng(seed, 0x4E00),
180 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::PriorYear),
181 amount_sampler: AmountSampler::with_benford(
182 seed.wrapping_add(0x4E01),
183 AmountDistributionConfig::default(),
184 ),
185 }
186 }
187
188 pub fn generate_comparatives(
194 &mut self,
195 entity_code: &str,
196 fiscal_year: i32,
197 current_balances: &[(String, String, Decimal)],
198 ) -> Vec<PriorYearComparative> {
199 let normal = Normal::new(0.03_f64, 0.12_f64).expect("valid normal params");
200 let period = format!("{}-12", fiscal_year);
201
202 current_balances
203 .iter()
204 .map(|(code, name, current)| {
205 let growth: f64 = normal.sample(&mut self.rng);
208 let divisor = 1.0 + growth;
209 let current_f64 = decimal_to_f64(*current);
210
211 let raw_prior = if divisor.abs() < 1e-10 {
213 current_f64
214 } else {
215 current_f64 / divisor
216 };
217
218 let prior_f64 = if raw_prior.abs() > 10.0 && self.rng.random_bool(0.30) {
224 benford_first_digit_adjust(raw_prior, &mut self.rng)
225 } else {
226 raw_prior
227 };
228
229 let prior = f64_to_decimal(prior_f64);
230 let variance = *current - prior;
231 let variance_pct = if prior.is_zero() {
232 0.0
233 } else {
234 let prior_abs_f64 = decimal_to_f64(prior).abs();
235 if prior_abs_f64 < 1e-10 {
236 0.0
237 } else {
238 decimal_to_f64(variance) / prior_abs_f64 * 100.0
239 }
240 };
241
242 PriorYearComparative {
243 account_code: code.clone(),
244 account_name: name.clone(),
245 current_year_amount: *current,
246 prior_year_amount: prior,
247 variance,
248 variance_pct,
249 entity_code: entity_code.to_string(),
250 period: period.clone(),
251 }
252 })
253 .collect()
254 }
255
256 pub fn generate_findings(
261 &mut self,
262 entity_code: &str,
263 fiscal_year: i32,
264 ) -> Vec<PriorYearFinding> {
265 let count = self.rng.random_range(3..=8_usize);
266 let prior_year = fiscal_year - 1;
267
268 (0..count)
269 .map(|_| {
270 let finding_type = weighted_pick(&mut self.rng, FINDING_TYPES);
271 let status = weighted_pick(&mut self.rng, FINDING_STATUSES);
272 let risk_area = weighted_pick(&mut self.rng, RISK_AREAS);
273
274 let description = self.pick_description(finding_type, risk_area);
275
276 let follow_up_required = status == "open" || status == "recurring";
278
279 let remediation_date = if status == "remediated" || status == "partially_remediated"
281 {
282 let day_offset = self.rng.random_range(30..=270_i64);
285 NaiveDate::from_ymd_opt(prior_year, 12, 31)
286 .and_then(|d| d.checked_add_signed(chrono::Duration::days(day_offset)))
287 } else {
288 None
289 };
290
291 let has_amount = finding_type == "misstatement"
294 || finding_type == "material_weakness"
295 || self.rng.random_bool(0.3);
296 let original_amount = if has_amount {
297 Some(self.amount_sampler.sample())
298 } else {
299 None
300 };
301
302 let _entity = entity_code; PriorYearFinding {
304 finding_id: self.uuid_factory.next(),
305 fiscal_year: prior_year,
306 finding_type: finding_type.to_string(),
307 description,
308 status: status.to_string(),
309 risk_area: risk_area.to_string(),
310 original_amount,
311 remediation_date,
312 follow_up_required,
313 }
314 })
315 .collect()
316 }
317
318 pub fn generate_summary(
321 &mut self,
322 entity_code: &str,
323 fiscal_year: i32,
324 current_balances: &[(String, String, Decimal)],
325 ) -> PriorYearSummary {
326 let comparatives = self.generate_comparatives(entity_code, fiscal_year, current_balances);
327 let findings = self.generate_findings(entity_code, fiscal_year);
328 let open = findings
329 .iter()
330 .filter(|f| f.status == "open" || f.status == "recurring")
331 .count();
332
333 let opinion_roll: f64 = self.rng.random();
335 let opinion_type = if opinion_roll < 0.90 {
336 "unmodified"
337 } else if opinion_roll < 0.98 {
338 "qualified"
339 } else {
340 "adverse"
341 };
342
343 let total_abs: f64 = current_balances
346 .iter()
347 .map(|(_, _, amt)| decimal_to_f64(*amt).abs())
348 .sum();
349 let materiality_pct = 0.01 + self.rng.random::<f64>() * 0.01; let materiality = f64_to_decimal(total_abs * materiality_pct);
351
352 let kam_count = self.rng.random_range(2..=4_usize).min(KAM_POOL.len());
354 let mut kam_indices: Vec<usize> = (0..KAM_POOL.len()).collect();
355 kam_indices.shuffle(&mut self.rng);
356 kam_indices.truncate(kam_count);
357 kam_indices.sort_unstable();
358 let key_audit_matters: Vec<String> = kam_indices
359 .iter()
360 .map(|&i| KAM_POOL[i].to_string())
361 .collect();
362
363 PriorYearSummary {
364 fiscal_year: fiscal_year - 1,
365 entity_code: entity_code.to_string(),
366 opinion_type: opinion_type.to_string(),
367 materiality,
368 total_findings: findings.len(),
369 open_findings: open,
370 key_audit_matters,
371 comparatives,
372 findings,
373 }
374 }
375
376 fn pick_description(&mut self, finding_type: &str, risk_area: &str) -> String {
378 let matches: Vec<&str> = FINDING_DESCRIPTIONS
380 .iter()
381 .filter(|(ft, ra, _)| *ft == finding_type && *ra == risk_area)
382 .map(|(_, _, desc)| *desc)
383 .collect();
384
385 if matches.is_empty() {
386 let type_matches: Vec<&str> = FINDING_DESCRIPTIONS
388 .iter()
389 .filter(|(ft, _, _)| *ft == finding_type)
390 .map(|(_, _, desc)| *desc)
391 .collect();
392 if type_matches.is_empty() {
393 return format!("Prior-year {} in {} area", finding_type, risk_area);
394 }
395 let idx = self.rng.random_range(0..type_matches.len());
396 return type_matches[idx].to_string();
397 }
398
399 let idx = self.rng.random_range(0..matches.len());
400 matches[idx].to_string()
401 }
402}
403
404fn weighted_pick<'a>(rng: &mut ChaCha8Rng, items: &[(&'a str, f64)]) -> &'a str {
410 let roll: f64 = rng.random();
411 for (item, threshold) in items {
412 if roll < *threshold {
413 return item;
414 }
415 }
416 items.last().map(|(item, _)| *item).unwrap_or("unknown")
417}
418
419const BENFORD_PROBS: [f64; 9] = [
421 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
422];
423
424fn sample_benford_digit(rng: &mut ChaCha8Rng) -> u32 {
426 let roll: f64 = rng.random();
427 let mut cumulative = 0.0;
428 for (i, &p) in BENFORD_PROBS.iter().enumerate() {
429 cumulative += p;
430 if roll < cumulative {
431 return (i + 1) as u32;
432 }
433 }
434 9
435}
436
437fn benford_first_digit_adjust(raw: f64, rng: &mut ChaCha8Rng) -> f64 {
443 let abs_raw = raw.abs();
444 if abs_raw < 1.0 {
445 return raw;
446 }
447
448 let magnitude = abs_raw.log10().floor() as i32;
449 let scale = 10_f64.powi(magnitude);
450
451 let normalised = abs_raw / scale; let current_first = normalised.floor() as u32;
454
455 let benford_digit = sample_benford_digit(rng);
457
458 let fractional = normalised - current_first as f64; let adjusted = (benford_digit as f64 + fractional) * scale;
461
462 if raw < 0.0 {
463 -adjusted
464 } else {
465 adjusted
466 }
467}
468
469fn decimal_to_f64(d: Decimal) -> f64 {
470 use std::str::FromStr;
471 f64::from_str(&d.to_string()).unwrap_or(0.0)
472}
473
474fn f64_to_decimal(v: f64) -> Decimal {
475 use rust_decimal::prelude::FromPrimitive;
476 Decimal::from_f64(v).unwrap_or(Decimal::ZERO).round_dp(2)
477}
478
479#[cfg(test)]
484#[allow(clippy::unwrap_used)]
485mod tests {
486 use super::*;
487 use rust_decimal_macros::dec;
488 use std::collections::HashMap;
489
490 fn sample_balances() -> Vec<(String, String, Decimal)> {
491 vec![
492 ("1100".into(), "Accounts Receivable".into(), dec!(500_000)),
493 ("1200".into(), "Inventory".into(), dec!(300_000)),
494 ("2000".into(), "Accounts Payable".into(), dec!(200_000)),
495 ("4000".into(), "Revenue".into(), dec!(1_500_000)),
496 ("5000".into(), "Cost of Goods Sold".into(), dec!(900_000)),
497 ("1000".into(), "Cash".into(), dec!(150_000)),
498 ("3000".into(), "Retained Earnings".into(), dec!(400_000)),
499 ("6000".into(), "Operating Expenses".into(), dec!(250_000)),
500 ]
501 }
502
503 #[test]
504 fn test_comparatives_generated() {
505 let mut gen = PriorYearGenerator::new(42);
506 let balances = sample_balances();
507 let comps = gen.generate_comparatives("C001", 2025, &balances);
508
509 assert_eq!(comps.len(), balances.len());
510 for comp in &comps {
511 assert_eq!(comp.entity_code, "C001");
512 assert_eq!(comp.period, "2025-12");
513 assert!(!comp.account_code.is_empty());
514 assert!(!comp.account_name.is_empty());
515 }
516 }
517
518 #[test]
519 fn test_variance_distribution() {
520 let mut gen = PriorYearGenerator::new(123);
528 let balances = sample_balances();
529
530 let mut all_pcts = Vec::new();
531 for _ in 0..50 {
532 let comps = gen.generate_comparatives("C001", 2025, &balances);
533 for c in &comps {
534 all_pcts.push(c.variance_pct);
535 }
536 }
537
538 let within_50 = all_pcts.iter().filter(|p| p.abs() < 50.0).count();
540 let ratio = within_50 as f64 / all_pcts.len() as f64;
541 assert!(
542 ratio > 0.40,
543 "Expected >40% of variances within 50%, got {:.1}%",
544 ratio * 100.0
545 );
546
547 let mut sorted = all_pcts.clone();
549 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
550 let median = sorted[sorted.len() / 2];
551 assert!(
552 median.abs() < 50.0,
553 "Expected median variance within 50%, got {:.2}%",
554 median
555 );
556 }
557
558 #[test]
559 fn test_comparatives_arithmetic() {
560 let mut gen = PriorYearGenerator::new(77);
561 let balances = sample_balances();
562 let comps = gen.generate_comparatives("C001", 2025, &balances);
563
564 for comp in &comps {
565 let expected_variance = comp.current_year_amount - comp.prior_year_amount;
567 assert_eq!(
568 comp.variance, expected_variance,
569 "Variance mismatch for account {}",
570 comp.account_code
571 );
572
573 if !comp.prior_year_amount.is_zero() {
575 let prior_abs_f64 = decimal_to_f64(comp.prior_year_amount).abs();
576 if prior_abs_f64 > 1e-10 {
577 let expected_pct = decimal_to_f64(comp.variance) / prior_abs_f64 * 100.0;
578 let diff = (comp.variance_pct - expected_pct).abs();
579 assert!(
580 diff < 0.01,
581 "Variance pct mismatch for {}: got {}, expected {}",
582 comp.account_code,
583 comp.variance_pct,
584 expected_pct
585 );
586 }
587 }
588 }
589 }
590
591 #[test]
592 fn test_findings_generated() {
593 let mut gen = PriorYearGenerator::new(42);
594 let findings = gen.generate_findings("C001", 2025);
595
596 assert!(
597 findings.len() >= 3 && findings.len() <= 8,
598 "Expected 3-8 findings, got {}",
599 findings.len()
600 );
601
602 for f in &findings {
603 assert_eq!(f.fiscal_year, 2024);
604 assert!(!f.finding_type.is_empty());
605 assert!(!f.description.is_empty());
606 assert!(!f.status.is_empty());
607 assert!(!f.risk_area.is_empty());
608 }
609 }
610
611 #[test]
612 fn test_finding_status_distribution() {
613 let mut status_counts: HashMap<String, usize> = HashMap::new();
615 for seed in 0..50_u64 {
616 let mut gen = PriorYearGenerator::new(seed);
617 let findings = gen.generate_findings("C001", 2025);
618 for f in &findings {
619 *status_counts.entry(f.status.clone()).or_insert(0) += 1;
620 }
621 }
622
623 assert!(
625 status_counts.contains_key("remediated"),
626 "Missing 'remediated' status"
627 );
628 assert!(status_counts.contains_key("open"), "Missing 'open' status");
629
630 assert!(
632 status_counts.len() >= 2,
633 "Expected at least 2 distinct statuses, got {}",
634 status_counts.len()
635 );
636 }
637
638 #[test]
639 fn test_summary_consistent() {
640 let mut gen = PriorYearGenerator::new(42);
641 let balances = sample_balances();
642 let summary = gen.generate_summary("C001", 2025, &balances);
643
644 assert_eq!(summary.fiscal_year, 2024);
645 assert_eq!(summary.entity_code, "C001");
646 assert_eq!(summary.total_findings, summary.findings.len());
647
648 let actual_open = summary
650 .findings
651 .iter()
652 .filter(|f| f.status == "open" || f.status == "recurring")
653 .count();
654 assert_eq!(
655 summary.open_findings, actual_open,
656 "open_findings {} doesn't match actual open/recurring count {}",
657 summary.open_findings, actual_open
658 );
659
660 assert_eq!(summary.comparatives.len(), balances.len());
662
663 assert!(!summary.key_audit_matters.is_empty());
665
666 let valid_opinions = ["unmodified", "qualified", "adverse", "disclaimer"];
668 assert!(
669 valid_opinions.contains(&summary.opinion_type.as_str()),
670 "Invalid opinion type: {}",
671 summary.opinion_type
672 );
673
674 for f in &summary.findings {
676 if f.status == "open" || f.status == "recurring" {
677 assert!(
678 f.follow_up_required,
679 "Open/recurring finding {} should have follow_up_required=true",
680 f.finding_id
681 );
682 }
683 }
684
685 for f in &summary.findings {
687 if f.status == "remediated" {
688 assert!(
689 f.remediation_date.is_some(),
690 "Remediated finding {} should have a remediation_date",
691 f.finding_id
692 );
693 }
694 }
695 }
696
697 #[test]
698 fn test_prior_year_amounts_benford() {
699 let mut digit_counts = [0_usize; 10]; for seed in 0..100_u64 {
704 let mut gen = PriorYearGenerator::new(seed);
705 let balances = sample_balances();
706 let comps = gen.generate_comparatives("C001", 2025, &balances);
707 for c in &comps {
708 let abs_str = decimal_to_f64(c.prior_year_amount).abs().to_string();
709 if let Some(first_char) = abs_str.chars().find(|c| c.is_ascii_digit() && *c != '0')
710 {
711 let digit = first_char.to_digit(10).unwrap_or(0) as usize;
712 if (1..=9).contains(&digit) {
713 digit_counts[digit] += 1;
714 }
715 }
716 }
717 }
718
719 let total: usize = digit_counts[1..].iter().sum();
720 if total < 50 {
721 return;
723 }
724
725 let benford_expected = [
727 0.0, 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
728 ];
729
730 let freq_1 = digit_counts[1] as f64 / total as f64;
732 assert!(
733 freq_1 > 0.15,
734 "Digit 1 frequency {:.3} is too low for Benford (expected ~{:.3})",
735 freq_1,
736 benford_expected[1]
737 );
738
739 let mut mad = 0.0;
742 for d in 1..=9 {
743 let observed = digit_counts[d] as f64 / total as f64;
744 mad += (observed - benford_expected[d]).abs();
745 }
746 mad /= 9.0;
747
748 assert!(
751 mad < 0.06,
752 "Benford MAD {:.4} is too high (expected < 0.06)",
753 mad
754 );
755 }
756
757 #[test]
758 fn test_serialization_roundtrip() {
759 let mut gen = PriorYearGenerator::new(42);
760 let balances = sample_balances();
761 let summary = gen.generate_summary("C001", 2025, &balances);
762
763 let json = serde_json::to_string(&summary).expect("serialize");
764 let parsed: PriorYearSummary = serde_json::from_str(&json).expect("deserialize");
765
766 assert_eq!(summary.fiscal_year, parsed.fiscal_year);
767 assert_eq!(summary.entity_code, parsed.entity_code);
768 assert_eq!(summary.opinion_type, parsed.opinion_type);
769 assert_eq!(summary.total_findings, parsed.total_findings);
770 assert_eq!(summary.open_findings, parsed.open_findings);
771 assert_eq!(summary.comparatives.len(), parsed.comparatives.len());
772 assert_eq!(summary.findings.len(), parsed.findings.len());
773
774 for (orig, rt) in summary.findings.iter().zip(parsed.findings.iter()) {
775 assert_eq!(orig.finding_id, rt.finding_id);
776 assert_eq!(orig.finding_type, rt.finding_type);
777 assert_eq!(orig.status, rt.status);
778 }
779 }
780}