datasynth_generators/
prior_year_generator.rs

1//! Prior-year comparative data generator (WI-2).
2//!
3//! Generates prior-year balances, audit findings, and engagement summaries
4//! from current-year account data. Supports ISA 315 (risk assessment via
5//! year-over-year comparison) and ISA 520 (analytical procedures).
6
7use chrono::NaiveDate;
8use datasynth_core::distributions::{AmountDistributionConfig, AmountSampler};
9use datasynth_core::models::{PriorYearComparative, PriorYearFinding, PriorYearSummary};
10use datasynth_core::utils::seeded_rng;
11use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use rand_distr::{Distribution, Normal};
15use rust_decimal::Decimal;
16
17// ---------------------------------------------------------------------------
18// Finding description templates
19// ---------------------------------------------------------------------------
20
21/// (finding_type, risk_area) -> description template pool
22const FINDING_DESCRIPTIONS: &[(&str, &str, &str)] = &[
23    // control_deficiency
24    (
25        "control_deficiency",
26        "revenue",
27        "Insufficient segregation of duties in revenue posting process",
28    ),
29    (
30        "control_deficiency",
31        "receivables",
32        "Lack of timely reconciliation of accounts receivable subsidiary ledger",
33    ),
34    (
35        "control_deficiency",
36        "payables",
37        "Missing secondary approval for vendor master data changes",
38    ),
39    (
40        "control_deficiency",
41        "inventory",
42        "Cycle count procedures not performed on schedule for high-value items",
43    ),
44    (
45        "control_deficiency",
46        "estimates",
47        "No formal review process for management's key accounting estimates",
48    ),
49    // misstatement
50    (
51        "misstatement",
52        "revenue",
53        "Revenue recognised before transfer of control per ASC 606 criteria",
54    ),
55    (
56        "misstatement",
57        "receivables",
58        "Overstatement of accounts receivable due to improper cutoff at period end",
59    ),
60    (
61        "misstatement",
62        "payables",
63        "Unrecorded liabilities identified through subsequent disbursement testing",
64    ),
65    (
66        "misstatement",
67        "inventory",
68        "Inventory obsolescence reserve understated based on ageing analysis",
69    ),
70    (
71        "misstatement",
72        "estimates",
73        "Fair value measurement for Level 3 assets not supported by observable inputs",
74    ),
75    // significant_deficiency
76    (
77        "significant_deficiency",
78        "revenue",
79        "Percentage-of-completion estimates lack corroborating project data",
80    ),
81    (
82        "significant_deficiency",
83        "receivables",
84        "Expected credit loss model uses outdated forward-looking information",
85    ),
86    (
87        "significant_deficiency",
88        "payables",
89        "Automated three-way match tolerance set above materiality threshold",
90    ),
91    (
92        "significant_deficiency",
93        "inventory",
94        "Standard cost variances not analysed or allocated on a timely basis",
95    ),
96    (
97        "significant_deficiency",
98        "estimates",
99        "Inadequate documentation of key assumptions in impairment model",
100    ),
101    // material_weakness
102    (
103        "material_weakness",
104        "revenue",
105        "Pervasive override of revenue recognition controls by senior management",
106    ),
107    (
108        "material_weakness",
109        "receivables",
110        "Systematic failure to record allowance for doubtful accounts",
111    ),
112    (
113        "material_weakness",
114        "payables",
115        "Duplicate payments processed without detection across multiple periods",
116    ),
117    (
118        "material_weakness",
119        "inventory",
120        "Physical inventory counts not reconciled to perpetual records for the full year",
121    ),
122    (
123        "material_weakness",
124        "estimates",
125        "Material misstatement in goodwill impairment due to unsubstantiated growth assumptions",
126    ),
127];
128
129/// Key audit matter templates.
130const KAM_POOL: &[&str] = &[
131    "Revenue recognition",
132    "Goodwill impairment",
133    "Expected credit losses",
134    "Inventory valuation",
135    "Provisions and contingencies",
136    "Fair value measurement of financial instruments",
137    "Business combination purchase price allocation",
138    "Going concern assessment",
139    "Tax provisions and uncertain tax positions",
140    "Lease accounting transition",
141];
142
143/// Weighted finding types: (type, cumulative weight).
144const FINDING_TYPES: &[(&str, f64)] = &[
145    ("control_deficiency", 0.40),
146    ("misstatement", 0.70),
147    ("significant_deficiency", 0.90),
148    ("material_weakness", 1.00),
149];
150
151/// Weighted statuses: (status, cumulative weight).
152const FINDING_STATUSES: &[(&str, f64)] = &[
153    ("remediated", 0.50),
154    ("open", 0.70),
155    ("partially_remediated", 0.90),
156    ("recurring", 1.00),
157];
158
159/// Risk areas with weights.
160const RISK_AREAS: &[(&str, f64)] = &[
161    ("revenue", 0.30),
162    ("receivables", 0.50),
163    ("estimates", 0.70),
164    ("payables", 0.85),
165    ("inventory", 1.00),
166];
167
168/// Generates prior-year comparative data from current-year balances.
169pub struct PriorYearGenerator {
170    rng: ChaCha8Rng,
171    uuid_factory: DeterministicUuidFactory,
172    amount_sampler: AmountSampler,
173}
174
175impl PriorYearGenerator {
176    /// Create a new generator with the given seed.
177    pub fn new(seed: u64) -> Self {
178        Self {
179            rng: seeded_rng(seed, 0x4E00),
180            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::PriorYear),
181            amount_sampler: AmountSampler::with_benford(
182                seed.wrapping_add(0x4E01),
183                AmountDistributionConfig::default(),
184            ),
185        }
186    }
187
188    /// Generate prior-year comparative data from current-year account balances.
189    ///
190    /// For each account the prior-year amount is derived by applying a realistic
191    /// year-over-year growth factor drawn from N(0.03, 0.12). The prior-year
192    /// amount is then adjusted to follow Benford's law on its first digit.
193    pub fn generate_comparatives(
194        &mut self,
195        entity_code: &str,
196        fiscal_year: i32,
197        current_balances: &[(String, String, Decimal)],
198    ) -> Vec<PriorYearComparative> {
199        let normal = Normal::new(0.03_f64, 0.12_f64).expect("valid normal params");
200        let period = format!("{}-12", fiscal_year);
201
202        current_balances
203            .iter()
204            .map(|(code, name, current)| {
205                // Derive prior year: prior = current / (1 + growth)
206                // where growth ~ N(0.03, 0.12)
207                let growth: f64 = normal.sample(&mut self.rng);
208                let divisor = 1.0 + growth;
209                let current_f64 = decimal_to_f64(*current);
210
211                // Compute raw prior-year amount
212                let raw_prior = if divisor.abs() < 1e-10 {
213                    current_f64
214                } else {
215                    current_f64 / divisor
216                };
217
218                // Apply Benford-compliant first-digit nudge.
219                // With 30% probability, replace the leading digit with a
220                // Benford-sampled digit to ensure the aggregate distribution
221                // conforms to Benford's law. The remaining 70% are left
222                // as-is (log-normal variance already trends Benford).
223                let prior_f64 = if raw_prior.abs() > 10.0 && self.rng.random_bool(0.30) {
224                    benford_first_digit_adjust(raw_prior, &mut self.rng)
225                } else {
226                    raw_prior
227                };
228
229                let prior = f64_to_decimal(prior_f64);
230                let variance = *current - prior;
231                let variance_pct = if prior.is_zero() {
232                    0.0
233                } else {
234                    let prior_abs_f64 = decimal_to_f64(prior).abs();
235                    if prior_abs_f64 < 1e-10 {
236                        0.0
237                    } else {
238                        decimal_to_f64(variance) / prior_abs_f64 * 100.0
239                    }
240                };
241
242                PriorYearComparative {
243                    account_code: code.clone(),
244                    account_name: name.clone(),
245                    current_year_amount: *current,
246                    prior_year_amount: prior,
247                    variance,
248                    variance_pct,
249                    entity_code: entity_code.to_string(),
250                    period: period.clone(),
251                }
252            })
253            .collect()
254    }
255
256    /// Generate prior-year audit findings.
257    ///
258    /// Produces 3-8 findings with realistic distributions across finding types,
259    /// statuses, and risk areas.
260    pub fn generate_findings(
261        &mut self,
262        entity_code: &str,
263        fiscal_year: i32,
264    ) -> Vec<PriorYearFinding> {
265        let count = self.rng.random_range(3..=8_usize);
266        let prior_year = fiscal_year - 1;
267
268        (0..count)
269            .map(|_| {
270                let finding_type = weighted_pick(&mut self.rng, FINDING_TYPES);
271                let status = weighted_pick(&mut self.rng, FINDING_STATUSES);
272                let risk_area = weighted_pick(&mut self.rng, RISK_AREAS);
273
274                let description = self.pick_description(finding_type, risk_area);
275
276                // Open and recurring findings require follow-up
277                let follow_up_required = status == "open" || status == "recurring";
278
279                // Remediated findings get a remediation date
280                let remediation_date = if status == "remediated" || status == "partially_remediated"
281                {
282                    // Remediation happened between the prior year-end and
283                    // 6 months into the current year
284                    let day_offset = self.rng.random_range(30..=270_i64);
285                    NaiveDate::from_ymd_opt(prior_year, 12, 31)
286                        .and_then(|d| d.checked_add_signed(chrono::Duration::days(day_offset)))
287                } else {
288                    None
289                };
290
291                // Misstatements and material weaknesses always have an amount;
292                // other finding types have a 30% chance.
293                let has_amount = finding_type == "misstatement"
294                    || finding_type == "material_weakness"
295                    || self.rng.random_bool(0.3);
296                let original_amount = if has_amount {
297                    Some(self.amount_sampler.sample())
298                } else {
299                    None
300                };
301
302                let _entity = entity_code; // used for context
303                PriorYearFinding {
304                    finding_id: self.uuid_factory.next(),
305                    fiscal_year: prior_year,
306                    finding_type: finding_type.to_string(),
307                    description,
308                    status: status.to_string(),
309                    risk_area: risk_area.to_string(),
310                    original_amount,
311                    remediation_date,
312                    follow_up_required,
313                }
314            })
315            .collect()
316    }
317
318    /// Generate a complete prior-year summary including comparatives, findings,
319    /// and the prior-year engagement metadata.
320    pub fn generate_summary(
321        &mut self,
322        entity_code: &str,
323        fiscal_year: i32,
324        current_balances: &[(String, String, Decimal)],
325    ) -> PriorYearSummary {
326        let comparatives = self.generate_comparatives(entity_code, fiscal_year, current_balances);
327        let findings = self.generate_findings(entity_code, fiscal_year);
328        let open = findings
329            .iter()
330            .filter(|f| f.status == "open" || f.status == "recurring")
331            .count();
332
333        // Opinion type: 90% unmodified, 8% qualified, 2% adverse/disclaimer
334        let opinion_roll: f64 = self.rng.random();
335        let opinion_type = if opinion_roll < 0.90 {
336            "unmodified"
337        } else if opinion_roll < 0.98 {
338            "qualified"
339        } else {
340            "adverse"
341        };
342
343        // Derive materiality from the total absolute current-year amounts
344        // (roughly 1-2% of total revenue/assets)
345        let total_abs: f64 = current_balances
346            .iter()
347            .map(|(_, _, amt)| decimal_to_f64(*amt).abs())
348            .sum();
349        let materiality_pct = 0.01 + self.rng.random::<f64>() * 0.01; // 1-2%
350        let materiality = f64_to_decimal(total_abs * materiality_pct);
351
352        // Pick 2-4 KAMs
353        let kam_count = self.rng.random_range(2..=4_usize).min(KAM_POOL.len());
354        let mut kam_indices: Vec<usize> = (0..KAM_POOL.len()).collect();
355        kam_indices.shuffle(&mut self.rng);
356        kam_indices.truncate(kam_count);
357        kam_indices.sort_unstable();
358        let key_audit_matters: Vec<String> = kam_indices
359            .iter()
360            .map(|&i| KAM_POOL[i].to_string())
361            .collect();
362
363        PriorYearSummary {
364            fiscal_year: fiscal_year - 1,
365            entity_code: entity_code.to_string(),
366            opinion_type: opinion_type.to_string(),
367            materiality,
368            total_findings: findings.len(),
369            open_findings: open,
370            key_audit_matters,
371            comparatives,
372            findings,
373        }
374    }
375
376    /// Pick a finding description that matches the given type and risk area.
377    fn pick_description(&mut self, finding_type: &str, risk_area: &str) -> String {
378        // Find all matching templates
379        let matches: Vec<&str> = FINDING_DESCRIPTIONS
380            .iter()
381            .filter(|(ft, ra, _)| *ft == finding_type && *ra == risk_area)
382            .map(|(_, _, desc)| *desc)
383            .collect();
384
385        if matches.is_empty() {
386            // Fallback: pick any description for the finding type
387            let type_matches: Vec<&str> = FINDING_DESCRIPTIONS
388                .iter()
389                .filter(|(ft, _, _)| *ft == finding_type)
390                .map(|(_, _, desc)| *desc)
391                .collect();
392            if type_matches.is_empty() {
393                return format!("Prior-year {} in {} area", finding_type, risk_area);
394            }
395            let idx = self.rng.random_range(0..type_matches.len());
396            return type_matches[idx].to_string();
397        }
398
399        let idx = self.rng.random_range(0..matches.len());
400        matches[idx].to_string()
401    }
402}
403
404// ---------------------------------------------------------------------------
405// Helpers
406// ---------------------------------------------------------------------------
407
408/// Pick from a weighted list using cumulative weights.
409fn weighted_pick<'a>(rng: &mut ChaCha8Rng, items: &[(&'a str, f64)]) -> &'a str {
410    let roll: f64 = rng.random();
411    for (item, threshold) in items {
412        if roll < *threshold {
413            return item;
414        }
415    }
416    items.last().map(|(item, _)| *item).unwrap_or("unknown")
417}
418
419/// Benford's law probabilities for digits 1-9.
420const BENFORD_PROBS: [f64; 9] = [
421    0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
422];
423
424/// Sample a first digit (1-9) according to Benford's law.
425fn sample_benford_digit(rng: &mut ChaCha8Rng) -> u32 {
426    let roll: f64 = rng.random();
427    let mut cumulative = 0.0;
428    for (i, &p) in BENFORD_PROBS.iter().enumerate() {
429        cumulative += p;
430        if roll < cumulative {
431            return (i + 1) as u32;
432        }
433    }
434    9
435}
436
437/// Adjust the first significant digit of a value to follow Benford's law.
438///
439/// This preserves the order of magnitude and the lower digits, making only
440/// a small perturbation that keeps the prior-year amount close to the raw
441/// variance-derived value.
442fn benford_first_digit_adjust(raw: f64, rng: &mut ChaCha8Rng) -> f64 {
443    let abs_raw = raw.abs();
444    if abs_raw < 1.0 {
445        return raw;
446    }
447
448    let magnitude = abs_raw.log10().floor() as i32;
449    let scale = 10_f64.powi(magnitude);
450
451    // Current first digit (1-9)
452    let normalised = abs_raw / scale; // value in [1.0, 10.0)
453    let current_first = normalised.floor() as u32;
454
455    // Sample a Benford-distributed first digit
456    let benford_digit = sample_benford_digit(rng);
457
458    // Replace the first digit while preserving the fractional part
459    let fractional = normalised - current_first as f64; // in [0.0, 1.0)
460    let adjusted = (benford_digit as f64 + fractional) * scale;
461
462    if raw < 0.0 {
463        -adjusted
464    } else {
465        adjusted
466    }
467}
468
469fn decimal_to_f64(d: Decimal) -> f64 {
470    use std::str::FromStr;
471    f64::from_str(&d.to_string()).unwrap_or(0.0)
472}
473
474fn f64_to_decimal(v: f64) -> Decimal {
475    use rust_decimal::prelude::FromPrimitive;
476    Decimal::from_f64(v).unwrap_or(Decimal::ZERO).round_dp(2)
477}
478
479// ---------------------------------------------------------------------------
480// Tests
481// ---------------------------------------------------------------------------
482
483#[cfg(test)]
484mod tests {
485    use super::*;
486    use rust_decimal_macros::dec;
487    use std::collections::HashMap;
488
489    fn sample_balances() -> Vec<(String, String, Decimal)> {
490        vec![
491            ("1100".into(), "Accounts Receivable".into(), dec!(500_000)),
492            ("1200".into(), "Inventory".into(), dec!(300_000)),
493            ("2000".into(), "Accounts Payable".into(), dec!(200_000)),
494            ("4000".into(), "Revenue".into(), dec!(1_500_000)),
495            ("5000".into(), "Cost of Goods Sold".into(), dec!(900_000)),
496            ("1000".into(), "Cash".into(), dec!(150_000)),
497            ("3000".into(), "Retained Earnings".into(), dec!(400_000)),
498            ("6000".into(), "Operating Expenses".into(), dec!(250_000)),
499        ]
500    }
501
502    #[test]
503    fn test_comparatives_generated() {
504        let mut gen = PriorYearGenerator::new(42);
505        let balances = sample_balances();
506        let comps = gen.generate_comparatives("C001", 2025, &balances);
507
508        assert_eq!(comps.len(), balances.len());
509        for comp in &comps {
510            assert_eq!(comp.entity_code, "C001");
511            assert_eq!(comp.period, "2025-12");
512            assert!(!comp.account_code.is_empty());
513            assert!(!comp.account_name.is_empty());
514        }
515    }
516
517    #[test]
518    fn test_variance_distribution() {
519        // Generate many comparatives and verify:
520        // 1. Most variances are within a reasonable range (< 50%)
521        // 2. The median variance is moderate (near 0)
522        //
523        // Note: ~30% of prior-year amounts get a Benford first-digit
524        // adjustment, which can shift values significantly (e.g. first
525        // digit 1 → 5). Unadjusted amounts follow N(3%, 12%).
526        let mut gen = PriorYearGenerator::new(123);
527        let balances = sample_balances();
528
529        let mut all_pcts = Vec::new();
530        for _ in 0..50 {
531            let comps = gen.generate_comparatives("C001", 2025, &balances);
532            for c in &comps {
533                all_pcts.push(c.variance_pct);
534            }
535        }
536
537        // At least 40% should be within 50%
538        let within_50 = all_pcts.iter().filter(|p| p.abs() < 50.0).count();
539        let ratio = within_50 as f64 / all_pcts.len() as f64;
540        assert!(
541            ratio > 0.40,
542            "Expected >40% of variances within 50%, got {:.1}%",
543            ratio * 100.0
544        );
545
546        // Median should be moderate (within +/- 50%)
547        let mut sorted = all_pcts.clone();
548        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
549        let median = sorted[sorted.len() / 2];
550        assert!(
551            median.abs() < 50.0,
552            "Expected median variance within 50%, got {:.2}%",
553            median
554        );
555    }
556
557    #[test]
558    fn test_comparatives_arithmetic() {
559        let mut gen = PriorYearGenerator::new(77);
560        let balances = sample_balances();
561        let comps = gen.generate_comparatives("C001", 2025, &balances);
562
563        for comp in &comps {
564            // variance = current - prior
565            let expected_variance = comp.current_year_amount - comp.prior_year_amount;
566            assert_eq!(
567                comp.variance, expected_variance,
568                "Variance mismatch for account {}",
569                comp.account_code
570            );
571
572            // variance_pct = (current - prior) / |prior| * 100
573            if !comp.prior_year_amount.is_zero() {
574                let prior_abs_f64 = decimal_to_f64(comp.prior_year_amount).abs();
575                if prior_abs_f64 > 1e-10 {
576                    let expected_pct = decimal_to_f64(comp.variance) / prior_abs_f64 * 100.0;
577                    let diff = (comp.variance_pct - expected_pct).abs();
578                    assert!(
579                        diff < 0.01,
580                        "Variance pct mismatch for {}: got {}, expected {}",
581                        comp.account_code,
582                        comp.variance_pct,
583                        expected_pct
584                    );
585                }
586            }
587        }
588    }
589
590    #[test]
591    fn test_findings_generated() {
592        let mut gen = PriorYearGenerator::new(42);
593        let findings = gen.generate_findings("C001", 2025);
594
595        assert!(
596            findings.len() >= 3 && findings.len() <= 8,
597            "Expected 3-8 findings, got {}",
598            findings.len()
599        );
600
601        for f in &findings {
602            assert_eq!(f.fiscal_year, 2024);
603            assert!(!f.finding_type.is_empty());
604            assert!(!f.description.is_empty());
605            assert!(!f.status.is_empty());
606            assert!(!f.risk_area.is_empty());
607        }
608    }
609
610    #[test]
611    fn test_finding_status_distribution() {
612        // Run many times and check that we see a mix of statuses
613        let mut status_counts: HashMap<String, usize> = HashMap::new();
614        for seed in 0..50_u64 {
615            let mut gen = PriorYearGenerator::new(seed);
616            let findings = gen.generate_findings("C001", 2025);
617            for f in &findings {
618                *status_counts.entry(f.status.clone()).or_insert(0) += 1;
619            }
620        }
621
622        // We should see all four statuses across 50 runs
623        assert!(
624            status_counts.contains_key("remediated"),
625            "Missing 'remediated' status"
626        );
627        assert!(status_counts.contains_key("open"), "Missing 'open' status");
628
629        // At least 2 distinct statuses (very conservative)
630        assert!(
631            status_counts.len() >= 2,
632            "Expected at least 2 distinct statuses, got {}",
633            status_counts.len()
634        );
635    }
636
637    #[test]
638    fn test_summary_consistent() {
639        let mut gen = PriorYearGenerator::new(42);
640        let balances = sample_balances();
641        let summary = gen.generate_summary("C001", 2025, &balances);
642
643        assert_eq!(summary.fiscal_year, 2024);
644        assert_eq!(summary.entity_code, "C001");
645        assert_eq!(summary.total_findings, summary.findings.len());
646
647        // open_findings should match actual open/recurring count
648        let actual_open = summary
649            .findings
650            .iter()
651            .filter(|f| f.status == "open" || f.status == "recurring")
652            .count();
653        assert_eq!(
654            summary.open_findings, actual_open,
655            "open_findings {} doesn't match actual open/recurring count {}",
656            summary.open_findings, actual_open
657        );
658
659        // Comparatives should match input size
660        assert_eq!(summary.comparatives.len(), balances.len());
661
662        // Key audit matters should be non-empty
663        assert!(!summary.key_audit_matters.is_empty());
664
665        // Opinion should be a valid type
666        let valid_opinions = ["unmodified", "qualified", "adverse", "disclaimer"];
667        assert!(
668            valid_opinions.contains(&summary.opinion_type.as_str()),
669            "Invalid opinion type: {}",
670            summary.opinion_type
671        );
672
673        // Open findings must have follow_up_required = true
674        for f in &summary.findings {
675            if f.status == "open" || f.status == "recurring" {
676                assert!(
677                    f.follow_up_required,
678                    "Open/recurring finding {} should have follow_up_required=true",
679                    f.finding_id
680                );
681            }
682        }
683
684        // Remediated findings should have remediation_date set
685        for f in &summary.findings {
686            if f.status == "remediated" {
687                assert!(
688                    f.remediation_date.is_some(),
689                    "Remediated finding {} should have a remediation_date",
690                    f.finding_id
691                );
692            }
693        }
694    }
695
696    #[test]
697    fn test_prior_year_amounts_benford() {
698        // Check that prior-year amounts follow Benford's first-digit law.
699        // We generate many comparatives and tally the first digit.
700        let mut digit_counts = [0_usize; 10]; // index 0 unused
701
702        for seed in 0..100_u64 {
703            let mut gen = PriorYearGenerator::new(seed);
704            let balances = sample_balances();
705            let comps = gen.generate_comparatives("C001", 2025, &balances);
706            for c in &comps {
707                let abs_str = decimal_to_f64(c.prior_year_amount).abs().to_string();
708                if let Some(first_char) = abs_str.chars().find(|c| c.is_ascii_digit() && *c != '0')
709                {
710                    let digit = first_char.to_digit(10).unwrap_or(0) as usize;
711                    if (1..=9).contains(&digit) {
712                        digit_counts[digit] += 1;
713                    }
714                }
715            }
716        }
717
718        let total: usize = digit_counts[1..].iter().sum();
719        if total < 50 {
720            // Not enough data to test
721            return;
722        }
723
724        // Benford expected frequencies
725        let benford_expected = [
726            0.0, 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
727        ];
728
729        // Check that digit 1 is the most frequent (basic Benford sanity)
730        let freq_1 = digit_counts[1] as f64 / total as f64;
731        assert!(
732            freq_1 > 0.15,
733            "Digit 1 frequency {:.3} is too low for Benford (expected ~{:.3})",
734            freq_1,
735            benford_expected[1]
736        );
737
738        // Check mean absolute deviation (MAD) is reasonable
739        // Benford conformity: MAD < 0.015 is close, < 0.04 is acceptable
740        let mut mad = 0.0;
741        for d in 1..=9 {
742            let observed = digit_counts[d] as f64 / total as f64;
743            mad += (observed - benford_expected[d]).abs();
744        }
745        mad /= 9.0;
746
747        // Use a generous threshold since we're adjusting rather than directly
748        // sampling Benford — 0.06 allows for some variance in small samples
749        assert!(
750            mad < 0.06,
751            "Benford MAD {:.4} is too high (expected < 0.06)",
752            mad
753        );
754    }
755
756    #[test]
757    fn test_serialization_roundtrip() {
758        let mut gen = PriorYearGenerator::new(42);
759        let balances = sample_balances();
760        let summary = gen.generate_summary("C001", 2025, &balances);
761
762        let json = serde_json::to_string(&summary).expect("serialize");
763        let parsed: PriorYearSummary = serde_json::from_str(&json).expect("deserialize");
764
765        assert_eq!(summary.fiscal_year, parsed.fiscal_year);
766        assert_eq!(summary.entity_code, parsed.entity_code);
767        assert_eq!(summary.opinion_type, parsed.opinion_type);
768        assert_eq!(summary.total_findings, parsed.total_findings);
769        assert_eq!(summary.open_findings, parsed.open_findings);
770        assert_eq!(summary.comparatives.len(), parsed.comparatives.len());
771        assert_eq!(summary.findings.len(), parsed.findings.len());
772
773        for (orig, rt) in summary.findings.iter().zip(parsed.findings.iter()) {
774            assert_eq!(orig.finding_id, rt.finding_id);
775            assert_eq!(orig.finding_type, rt.finding_type);
776            assert_eq!(orig.status, rt.status);
777        }
778    }
779}
datasynth_generators/prior_year_generator.rs

datasynth_generators/
prior_year_generator.rs