1use chrono::NaiveDate;
8use datasynth_core::distributions::{AmountDistributionConfig, AmountSampler};
9use datasynth_core::models::{PriorYearComparative, PriorYearFinding, PriorYearSummary};
10use datasynth_core::utils::seeded_rng;
11use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use rand_distr::{Distribution, Normal};
15use rust_decimal::Decimal;
16
17const FINDING_DESCRIPTIONS: &[(&str, &str, &str)] = &[
23 (
25 "control_deficiency",
26 "revenue",
27 "Insufficient segregation of duties in revenue posting process",
28 ),
29 (
30 "control_deficiency",
31 "receivables",
32 "Lack of timely reconciliation of accounts receivable subsidiary ledger",
33 ),
34 (
35 "control_deficiency",
36 "payables",
37 "Missing secondary approval for vendor master data changes",
38 ),
39 (
40 "control_deficiency",
41 "inventory",
42 "Cycle count procedures not performed on schedule for high-value items",
43 ),
44 (
45 "control_deficiency",
46 "estimates",
47 "No formal review process for management's key accounting estimates",
48 ),
49 (
51 "misstatement",
52 "revenue",
53 "Revenue recognised before transfer of control per ASC 606 criteria",
54 ),
55 (
56 "misstatement",
57 "receivables",
58 "Overstatement of accounts receivable due to improper cutoff at period end",
59 ),
60 (
61 "misstatement",
62 "payables",
63 "Unrecorded liabilities identified through subsequent disbursement testing",
64 ),
65 (
66 "misstatement",
67 "inventory",
68 "Inventory obsolescence reserve understated based on ageing analysis",
69 ),
70 (
71 "misstatement",
72 "estimates",
73 "Fair value measurement for Level 3 assets not supported by observable inputs",
74 ),
75 (
77 "significant_deficiency",
78 "revenue",
79 "Percentage-of-completion estimates lack corroborating project data",
80 ),
81 (
82 "significant_deficiency",
83 "receivables",
84 "Expected credit loss model uses outdated forward-looking information",
85 ),
86 (
87 "significant_deficiency",
88 "payables",
89 "Automated three-way match tolerance set above materiality threshold",
90 ),
91 (
92 "significant_deficiency",
93 "inventory",
94 "Standard cost variances not analysed or allocated on a timely basis",
95 ),
96 (
97 "significant_deficiency",
98 "estimates",
99 "Inadequate documentation of key assumptions in impairment model",
100 ),
101 (
103 "material_weakness",
104 "revenue",
105 "Pervasive override of revenue recognition controls by senior management",
106 ),
107 (
108 "material_weakness",
109 "receivables",
110 "Systematic failure to record allowance for doubtful accounts",
111 ),
112 (
113 "material_weakness",
114 "payables",
115 "Duplicate payments processed without detection across multiple periods",
116 ),
117 (
118 "material_weakness",
119 "inventory",
120 "Physical inventory counts not reconciled to perpetual records for the full year",
121 ),
122 (
123 "material_weakness",
124 "estimates",
125 "Material misstatement in goodwill impairment due to unsubstantiated growth assumptions",
126 ),
127];
128
129const KAM_POOL: &[&str] = &[
131 "Revenue recognition",
132 "Goodwill impairment",
133 "Expected credit losses",
134 "Inventory valuation",
135 "Provisions and contingencies",
136 "Fair value measurement of financial instruments",
137 "Business combination purchase price allocation",
138 "Going concern assessment",
139 "Tax provisions and uncertain tax positions",
140 "Lease accounting transition",
141];
142
143const FINDING_TYPES: &[(&str, f64)] = &[
145 ("control_deficiency", 0.40),
146 ("misstatement", 0.70),
147 ("significant_deficiency", 0.90),
148 ("material_weakness", 1.00),
149];
150
151const FINDING_STATUSES: &[(&str, f64)] = &[
153 ("remediated", 0.50),
154 ("open", 0.70),
155 ("partially_remediated", 0.90),
156 ("recurring", 1.00),
157];
158
159const RISK_AREAS: &[(&str, f64)] = &[
161 ("revenue", 0.30),
162 ("receivables", 0.50),
163 ("estimates", 0.70),
164 ("payables", 0.85),
165 ("inventory", 1.00),
166];
167
168pub struct PriorYearGenerator {
170 rng: ChaCha8Rng,
171 uuid_factory: DeterministicUuidFactory,
172 amount_sampler: AmountSampler,
173}
174
175impl PriorYearGenerator {
176 pub fn new(seed: u64) -> Self {
178 Self {
179 rng: seeded_rng(seed, 0x4E00),
180 uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::PriorYear),
181 amount_sampler: AmountSampler::with_benford(
182 seed.wrapping_add(0x4E01),
183 AmountDistributionConfig::default(),
184 ),
185 }
186 }
187
188 pub fn generate_comparatives(
194 &mut self,
195 entity_code: &str,
196 fiscal_year: i32,
197 current_balances: &[(String, String, Decimal)],
198 ) -> Vec<PriorYearComparative> {
199 let normal = Normal::new(0.03_f64, 0.12_f64).expect("valid normal params");
200 let period = format!("{}-12", fiscal_year);
201
202 current_balances
203 .iter()
204 .map(|(code, name, current)| {
205 let growth: f64 = normal.sample(&mut self.rng);
208 let divisor = 1.0 + growth;
209 let current_f64 = decimal_to_f64(*current);
210
211 let raw_prior = if divisor.abs() < 1e-10 {
213 current_f64
214 } else {
215 current_f64 / divisor
216 };
217
218 let prior_f64 = if raw_prior.abs() > 10.0 && self.rng.random_bool(0.30) {
224 benford_first_digit_adjust(raw_prior, &mut self.rng)
225 } else {
226 raw_prior
227 };
228
229 let prior = f64_to_decimal(prior_f64);
230 let variance = *current - prior;
231 let variance_pct = if prior.is_zero() {
232 0.0
233 } else {
234 let prior_abs_f64 = decimal_to_f64(prior).abs();
235 if prior_abs_f64 < 1e-10 {
236 0.0
237 } else {
238 decimal_to_f64(variance) / prior_abs_f64 * 100.0
239 }
240 };
241
242 PriorYearComparative {
243 account_code: code.clone(),
244 account_name: name.clone(),
245 current_year_amount: *current,
246 prior_year_amount: prior,
247 variance,
248 variance_pct,
249 entity_code: entity_code.to_string(),
250 period: period.clone(),
251 }
252 })
253 .collect()
254 }
255
256 pub fn generate_findings(
261 &mut self,
262 entity_code: &str,
263 fiscal_year: i32,
264 ) -> Vec<PriorYearFinding> {
265 let count = self.rng.random_range(3..=8_usize);
266 let prior_year = fiscal_year - 1;
267
268 (0..count)
269 .map(|_| {
270 let finding_type = weighted_pick(&mut self.rng, FINDING_TYPES);
271 let status = weighted_pick(&mut self.rng, FINDING_STATUSES);
272 let risk_area = weighted_pick(&mut self.rng, RISK_AREAS);
273
274 let description = self.pick_description(finding_type, risk_area);
275
276 let follow_up_required = status == "open" || status == "recurring";
278
279 let remediation_date = if status == "remediated" || status == "partially_remediated"
281 {
282 let day_offset = self.rng.random_range(30..=270_i64);
285 NaiveDate::from_ymd_opt(prior_year, 12, 31)
286 .and_then(|d| d.checked_add_signed(chrono::Duration::days(day_offset)))
287 } else {
288 None
289 };
290
291 let has_amount = finding_type == "misstatement"
294 || finding_type == "material_weakness"
295 || self.rng.random_bool(0.3);
296 let original_amount = if has_amount {
297 Some(self.amount_sampler.sample())
298 } else {
299 None
300 };
301
302 let _entity = entity_code; PriorYearFinding {
304 finding_id: self.uuid_factory.next(),
305 fiscal_year: prior_year,
306 finding_type: finding_type.to_string(),
307 description,
308 status: status.to_string(),
309 risk_area: risk_area.to_string(),
310 original_amount,
311 remediation_date,
312 follow_up_required,
313 }
314 })
315 .collect()
316 }
317
318 pub fn generate_summary(
321 &mut self,
322 entity_code: &str,
323 fiscal_year: i32,
324 current_balances: &[(String, String, Decimal)],
325 ) -> PriorYearSummary {
326 let comparatives = self.generate_comparatives(entity_code, fiscal_year, current_balances);
327 let findings = self.generate_findings(entity_code, fiscal_year);
328 let open = findings
329 .iter()
330 .filter(|f| f.status == "open" || f.status == "recurring")
331 .count();
332
333 let opinion_roll: f64 = self.rng.random();
335 let opinion_type = if opinion_roll < 0.90 {
336 "unmodified"
337 } else if opinion_roll < 0.98 {
338 "qualified"
339 } else {
340 "adverse"
341 };
342
343 let total_abs: f64 = current_balances
346 .iter()
347 .map(|(_, _, amt)| decimal_to_f64(*amt).abs())
348 .sum();
349 let materiality_pct = 0.01 + self.rng.random::<f64>() * 0.01; let materiality = f64_to_decimal(total_abs * materiality_pct);
351
352 let kam_count = self.rng.random_range(2..=4_usize).min(KAM_POOL.len());
354 let mut kam_indices: Vec<usize> = (0..KAM_POOL.len()).collect();
355 kam_indices.shuffle(&mut self.rng);
356 kam_indices.truncate(kam_count);
357 kam_indices.sort_unstable();
358 let key_audit_matters: Vec<String> = kam_indices
359 .iter()
360 .map(|&i| KAM_POOL[i].to_string())
361 .collect();
362
363 PriorYearSummary {
364 fiscal_year: fiscal_year - 1,
365 entity_code: entity_code.to_string(),
366 opinion_type: opinion_type.to_string(),
367 materiality,
368 total_findings: findings.len(),
369 open_findings: open,
370 key_audit_matters,
371 comparatives,
372 findings,
373 }
374 }
375
376 fn pick_description(&mut self, finding_type: &str, risk_area: &str) -> String {
378 let matches: Vec<&str> = FINDING_DESCRIPTIONS
380 .iter()
381 .filter(|(ft, ra, _)| *ft == finding_type && *ra == risk_area)
382 .map(|(_, _, desc)| *desc)
383 .collect();
384
385 if matches.is_empty() {
386 let type_matches: Vec<&str> = FINDING_DESCRIPTIONS
388 .iter()
389 .filter(|(ft, _, _)| *ft == finding_type)
390 .map(|(_, _, desc)| *desc)
391 .collect();
392 if type_matches.is_empty() {
393 return format!("Prior-year {} in {} area", finding_type, risk_area);
394 }
395 let idx = self.rng.random_range(0..type_matches.len());
396 return type_matches[idx].to_string();
397 }
398
399 let idx = self.rng.random_range(0..matches.len());
400 matches[idx].to_string()
401 }
402}
403
404fn weighted_pick<'a>(rng: &mut ChaCha8Rng, items: &[(&'a str, f64)]) -> &'a str {
410 let roll: f64 = rng.random();
411 for (item, threshold) in items {
412 if roll < *threshold {
413 return item;
414 }
415 }
416 items.last().map(|(item, _)| *item).unwrap_or("unknown")
417}
418
419const BENFORD_PROBS: [f64; 9] = [
421 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
422];
423
424fn sample_benford_digit(rng: &mut ChaCha8Rng) -> u32 {
426 let roll: f64 = rng.random();
427 let mut cumulative = 0.0;
428 for (i, &p) in BENFORD_PROBS.iter().enumerate() {
429 cumulative += p;
430 if roll < cumulative {
431 return (i + 1) as u32;
432 }
433 }
434 9
435}
436
437fn benford_first_digit_adjust(raw: f64, rng: &mut ChaCha8Rng) -> f64 {
443 let abs_raw = raw.abs();
444 if abs_raw < 1.0 {
445 return raw;
446 }
447
448 let magnitude = abs_raw.log10().floor() as i32;
449 let scale = 10_f64.powi(magnitude);
450
451 let normalised = abs_raw / scale; let current_first = normalised.floor() as u32;
454
455 let benford_digit = sample_benford_digit(rng);
457
458 let fractional = normalised - current_first as f64; let adjusted = (benford_digit as f64 + fractional) * scale;
461
462 if raw < 0.0 {
463 -adjusted
464 } else {
465 adjusted
466 }
467}
468
469fn decimal_to_f64(d: Decimal) -> f64 {
470 use std::str::FromStr;
471 f64::from_str(&d.to_string()).unwrap_or(0.0)
472}
473
474fn f64_to_decimal(v: f64) -> Decimal {
475 use rust_decimal::prelude::FromPrimitive;
476 Decimal::from_f64(v).unwrap_or(Decimal::ZERO).round_dp(2)
477}
478
479#[cfg(test)]
484mod tests {
485 use super::*;
486 use rust_decimal_macros::dec;
487 use std::collections::HashMap;
488
489 fn sample_balances() -> Vec<(String, String, Decimal)> {
490 vec![
491 ("1100".into(), "Accounts Receivable".into(), dec!(500_000)),
492 ("1200".into(), "Inventory".into(), dec!(300_000)),
493 ("2000".into(), "Accounts Payable".into(), dec!(200_000)),
494 ("4000".into(), "Revenue".into(), dec!(1_500_000)),
495 ("5000".into(), "Cost of Goods Sold".into(), dec!(900_000)),
496 ("1000".into(), "Cash".into(), dec!(150_000)),
497 ("3000".into(), "Retained Earnings".into(), dec!(400_000)),
498 ("6000".into(), "Operating Expenses".into(), dec!(250_000)),
499 ]
500 }
501
502 #[test]
503 fn test_comparatives_generated() {
504 let mut gen = PriorYearGenerator::new(42);
505 let balances = sample_balances();
506 let comps = gen.generate_comparatives("C001", 2025, &balances);
507
508 assert_eq!(comps.len(), balances.len());
509 for comp in &comps {
510 assert_eq!(comp.entity_code, "C001");
511 assert_eq!(comp.period, "2025-12");
512 assert!(!comp.account_code.is_empty());
513 assert!(!comp.account_name.is_empty());
514 }
515 }
516
517 #[test]
518 fn test_variance_distribution() {
519 let mut gen = PriorYearGenerator::new(123);
527 let balances = sample_balances();
528
529 let mut all_pcts = Vec::new();
530 for _ in 0..50 {
531 let comps = gen.generate_comparatives("C001", 2025, &balances);
532 for c in &comps {
533 all_pcts.push(c.variance_pct);
534 }
535 }
536
537 let within_50 = all_pcts.iter().filter(|p| p.abs() < 50.0).count();
539 let ratio = within_50 as f64 / all_pcts.len() as f64;
540 assert!(
541 ratio > 0.40,
542 "Expected >40% of variances within 50%, got {:.1}%",
543 ratio * 100.0
544 );
545
546 let mut sorted = all_pcts.clone();
548 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
549 let median = sorted[sorted.len() / 2];
550 assert!(
551 median.abs() < 50.0,
552 "Expected median variance within 50%, got {:.2}%",
553 median
554 );
555 }
556
557 #[test]
558 fn test_comparatives_arithmetic() {
559 let mut gen = PriorYearGenerator::new(77);
560 let balances = sample_balances();
561 let comps = gen.generate_comparatives("C001", 2025, &balances);
562
563 for comp in &comps {
564 let expected_variance = comp.current_year_amount - comp.prior_year_amount;
566 assert_eq!(
567 comp.variance, expected_variance,
568 "Variance mismatch for account {}",
569 comp.account_code
570 );
571
572 if !comp.prior_year_amount.is_zero() {
574 let prior_abs_f64 = decimal_to_f64(comp.prior_year_amount).abs();
575 if prior_abs_f64 > 1e-10 {
576 let expected_pct = decimal_to_f64(comp.variance) / prior_abs_f64 * 100.0;
577 let diff = (comp.variance_pct - expected_pct).abs();
578 assert!(
579 diff < 0.01,
580 "Variance pct mismatch for {}: got {}, expected {}",
581 comp.account_code,
582 comp.variance_pct,
583 expected_pct
584 );
585 }
586 }
587 }
588 }
589
590 #[test]
591 fn test_findings_generated() {
592 let mut gen = PriorYearGenerator::new(42);
593 let findings = gen.generate_findings("C001", 2025);
594
595 assert!(
596 findings.len() >= 3 && findings.len() <= 8,
597 "Expected 3-8 findings, got {}",
598 findings.len()
599 );
600
601 for f in &findings {
602 assert_eq!(f.fiscal_year, 2024);
603 assert!(!f.finding_type.is_empty());
604 assert!(!f.description.is_empty());
605 assert!(!f.status.is_empty());
606 assert!(!f.risk_area.is_empty());
607 }
608 }
609
610 #[test]
611 fn test_finding_status_distribution() {
612 let mut status_counts: HashMap<String, usize> = HashMap::new();
614 for seed in 0..50_u64 {
615 let mut gen = PriorYearGenerator::new(seed);
616 let findings = gen.generate_findings("C001", 2025);
617 for f in &findings {
618 *status_counts.entry(f.status.clone()).or_insert(0) += 1;
619 }
620 }
621
622 assert!(
624 status_counts.contains_key("remediated"),
625 "Missing 'remediated' status"
626 );
627 assert!(status_counts.contains_key("open"), "Missing 'open' status");
628
629 assert!(
631 status_counts.len() >= 2,
632 "Expected at least 2 distinct statuses, got {}",
633 status_counts.len()
634 );
635 }
636
637 #[test]
638 fn test_summary_consistent() {
639 let mut gen = PriorYearGenerator::new(42);
640 let balances = sample_balances();
641 let summary = gen.generate_summary("C001", 2025, &balances);
642
643 assert_eq!(summary.fiscal_year, 2024);
644 assert_eq!(summary.entity_code, "C001");
645 assert_eq!(summary.total_findings, summary.findings.len());
646
647 let actual_open = summary
649 .findings
650 .iter()
651 .filter(|f| f.status == "open" || f.status == "recurring")
652 .count();
653 assert_eq!(
654 summary.open_findings, actual_open,
655 "open_findings {} doesn't match actual open/recurring count {}",
656 summary.open_findings, actual_open
657 );
658
659 assert_eq!(summary.comparatives.len(), balances.len());
661
662 assert!(!summary.key_audit_matters.is_empty());
664
665 let valid_opinions = ["unmodified", "qualified", "adverse", "disclaimer"];
667 assert!(
668 valid_opinions.contains(&summary.opinion_type.as_str()),
669 "Invalid opinion type: {}",
670 summary.opinion_type
671 );
672
673 for f in &summary.findings {
675 if f.status == "open" || f.status == "recurring" {
676 assert!(
677 f.follow_up_required,
678 "Open/recurring finding {} should have follow_up_required=true",
679 f.finding_id
680 );
681 }
682 }
683
684 for f in &summary.findings {
686 if f.status == "remediated" {
687 assert!(
688 f.remediation_date.is_some(),
689 "Remediated finding {} should have a remediation_date",
690 f.finding_id
691 );
692 }
693 }
694 }
695
696 #[test]
697 fn test_prior_year_amounts_benford() {
698 let mut digit_counts = [0_usize; 10]; for seed in 0..100_u64 {
703 let mut gen = PriorYearGenerator::new(seed);
704 let balances = sample_balances();
705 let comps = gen.generate_comparatives("C001", 2025, &balances);
706 for c in &comps {
707 let abs_str = decimal_to_f64(c.prior_year_amount).abs().to_string();
708 if let Some(first_char) = abs_str.chars().find(|c| c.is_ascii_digit() && *c != '0')
709 {
710 let digit = first_char.to_digit(10).unwrap_or(0) as usize;
711 if (1..=9).contains(&digit) {
712 digit_counts[digit] += 1;
713 }
714 }
715 }
716 }
717
718 let total: usize = digit_counts[1..].iter().sum();
719 if total < 50 {
720 return;
722 }
723
724 let benford_expected = [
726 0.0, 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
727 ];
728
729 let freq_1 = digit_counts[1] as f64 / total as f64;
731 assert!(
732 freq_1 > 0.15,
733 "Digit 1 frequency {:.3} is too low for Benford (expected ~{:.3})",
734 freq_1,
735 benford_expected[1]
736 );
737
738 let mut mad = 0.0;
741 for d in 1..=9 {
742 let observed = digit_counts[d] as f64 / total as f64;
743 mad += (observed - benford_expected[d]).abs();
744 }
745 mad /= 9.0;
746
747 assert!(
750 mad < 0.06,
751 "Benford MAD {:.4} is too high (expected < 0.06)",
752 mad
753 );
754 }
755
756 #[test]
757 fn test_serialization_roundtrip() {
758 let mut gen = PriorYearGenerator::new(42);
759 let balances = sample_balances();
760 let summary = gen.generate_summary("C001", 2025, &balances);
761
762 let json = serde_json::to_string(&summary).expect("serialize");
763 let parsed: PriorYearSummary = serde_json::from_str(&json).expect("deserialize");
764
765 assert_eq!(summary.fiscal_year, parsed.fiscal_year);
766 assert_eq!(summary.entity_code, parsed.entity_code);
767 assert_eq!(summary.opinion_type, parsed.opinion_type);
768 assert_eq!(summary.total_findings, parsed.total_findings);
769 assert_eq!(summary.open_findings, parsed.open_findings);
770 assert_eq!(summary.comparatives.len(), parsed.comparatives.len());
771 assert_eq!(summary.findings.len(), parsed.findings.len());
772
773 for (orig, rt) in summary.findings.iter().zip(parsed.findings.iter()) {
774 assert_eq!(orig.finding_id, rt.finding_id);
775 assert_eq!(orig.finding_type, rt.finding_type);
776 assert_eq!(orig.status, rt.status);
777 }
778 }
779}