datasynth_generators/
prior_year_generator.rs

1//! Prior-year comparative data generator (WI-2).
2//!
3//! Generates prior-year balances, audit findings, and engagement summaries
4//! from current-year account data. Supports ISA 315 (risk assessment via
5//! year-over-year comparison) and ISA 520 (analytical procedures).
6
7use chrono::NaiveDate;
8use datasynth_core::distributions::{AmountDistributionConfig, AmountSampler};
9use datasynth_core::models::{PriorYearComparative, PriorYearFinding, PriorYearSummary};
10use datasynth_core::utils::seeded_rng;
11use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
12use rand::prelude::*;
13use rand_chacha::ChaCha8Rng;
14use rand_distr::{Distribution, Normal};
15use rust_decimal::Decimal;
16
17// ---------------------------------------------------------------------------
18// Finding description templates
19// ---------------------------------------------------------------------------
20
21/// (finding_type, risk_area) -> description template pool
22const FINDING_DESCRIPTIONS: &[(&str, &str, &str)] = &[
23    // control_deficiency
24    (
25        "control_deficiency",
26        "revenue",
27        "Insufficient segregation of duties in revenue posting process",
28    ),
29    (
30        "control_deficiency",
31        "receivables",
32        "Lack of timely reconciliation of accounts receivable subsidiary ledger",
33    ),
34    (
35        "control_deficiency",
36        "payables",
37        "Missing secondary approval for vendor master data changes",
38    ),
39    (
40        "control_deficiency",
41        "inventory",
42        "Cycle count procedures not performed on schedule for high-value items",
43    ),
44    (
45        "control_deficiency",
46        "estimates",
47        "No formal review process for management's key accounting estimates",
48    ),
49    // misstatement
50    (
51        "misstatement",
52        "revenue",
53        "Revenue recognised before transfer of control per ASC 606 criteria",
54    ),
55    (
56        "misstatement",
57        "receivables",
58        "Overstatement of accounts receivable due to improper cutoff at period end",
59    ),
60    (
61        "misstatement",
62        "payables",
63        "Unrecorded liabilities identified through subsequent disbursement testing",
64    ),
65    (
66        "misstatement",
67        "inventory",
68        "Inventory obsolescence reserve understated based on ageing analysis",
69    ),
70    (
71        "misstatement",
72        "estimates",
73        "Fair value measurement for Level 3 assets not supported by observable inputs",
74    ),
75    // significant_deficiency
76    (
77        "significant_deficiency",
78        "revenue",
79        "Percentage-of-completion estimates lack corroborating project data",
80    ),
81    (
82        "significant_deficiency",
83        "receivables",
84        "Expected credit loss model uses outdated forward-looking information",
85    ),
86    (
87        "significant_deficiency",
88        "payables",
89        "Automated three-way match tolerance set above materiality threshold",
90    ),
91    (
92        "significant_deficiency",
93        "inventory",
94        "Standard cost variances not analysed or allocated on a timely basis",
95    ),
96    (
97        "significant_deficiency",
98        "estimates",
99        "Inadequate documentation of key assumptions in impairment model",
100    ),
101    // material_weakness
102    (
103        "material_weakness",
104        "revenue",
105        "Pervasive override of revenue recognition controls by senior management",
106    ),
107    (
108        "material_weakness",
109        "receivables",
110        "Systematic failure to record allowance for doubtful accounts",
111    ),
112    (
113        "material_weakness",
114        "payables",
115        "Duplicate payments processed without detection across multiple periods",
116    ),
117    (
118        "material_weakness",
119        "inventory",
120        "Physical inventory counts not reconciled to perpetual records for the full year",
121    ),
122    (
123        "material_weakness",
124        "estimates",
125        "Material misstatement in goodwill impairment due to unsubstantiated growth assumptions",
126    ),
127];
128
129/// Key audit matter templates.
130const KAM_POOL: &[&str] = &[
131    "Revenue recognition",
132    "Goodwill impairment",
133    "Expected credit losses",
134    "Inventory valuation",
135    "Provisions and contingencies",
136    "Fair value measurement of financial instruments",
137    "Business combination purchase price allocation",
138    "Going concern assessment",
139    "Tax provisions and uncertain tax positions",
140    "Lease accounting transition",
141];
142
143/// Weighted finding types: (type, cumulative weight).
144const FINDING_TYPES: &[(&str, f64)] = &[
145    ("control_deficiency", 0.40),
146    ("misstatement", 0.70),
147    ("significant_deficiency", 0.90),
148    ("material_weakness", 1.00),
149];
150
151/// Weighted statuses: (status, cumulative weight).
152const FINDING_STATUSES: &[(&str, f64)] = &[
153    ("remediated", 0.50),
154    ("open", 0.70),
155    ("partially_remediated", 0.90),
156    ("recurring", 1.00),
157];
158
159/// Risk areas with weights.
160const RISK_AREAS: &[(&str, f64)] = &[
161    ("revenue", 0.30),
162    ("receivables", 0.50),
163    ("estimates", 0.70),
164    ("payables", 0.85),
165    ("inventory", 1.00),
166];
167
168/// Generates prior-year comparative data from current-year balances.
169pub struct PriorYearGenerator {
170    rng: ChaCha8Rng,
171    uuid_factory: DeterministicUuidFactory,
172    amount_sampler: AmountSampler,
173}
174
175impl PriorYearGenerator {
176    /// Create a new generator with the given seed.
177    pub fn new(seed: u64) -> Self {
178        Self {
179            rng: seeded_rng(seed, 0x4E00),
180            uuid_factory: DeterministicUuidFactory::new(seed, GeneratorType::PriorYear),
181            amount_sampler: AmountSampler::with_benford(
182                seed.wrapping_add(0x4E01),
183                AmountDistributionConfig::default(),
184            ),
185        }
186    }
187
188    /// Generate prior-year comparative data from current-year account balances.
189    ///
190    /// For each account the prior-year amount is derived by applying a realistic
191    /// year-over-year growth factor drawn from N(0.03, 0.12). The prior-year
192    /// amount is then adjusted to follow Benford's law on its first digit.
193    pub fn generate_comparatives(
194        &mut self,
195        entity_code: &str,
196        fiscal_year: i32,
197        current_balances: &[(String, String, Decimal)],
198    ) -> Vec<PriorYearComparative> {
199        let normal = Normal::new(0.03_f64, 0.12_f64).expect("valid normal params");
200        let period = format!("{}-12", fiscal_year);
201
202        current_balances
203            .iter()
204            .map(|(code, name, current)| {
205                // Derive prior year: prior = current / (1 + growth)
206                // where growth ~ N(0.03, 0.12)
207                let growth: f64 = normal.sample(&mut self.rng);
208                let divisor = 1.0 + growth;
209                let current_f64 = decimal_to_f64(*current);
210
211                // Compute raw prior-year amount
212                let raw_prior = if divisor.abs() < 1e-10 {
213                    current_f64
214                } else {
215                    current_f64 / divisor
216                };
217
218                // Apply Benford-compliant first-digit nudge.
219                // With 30% probability, replace the leading digit with a
220                // Benford-sampled digit to ensure the aggregate distribution
221                // conforms to Benford's law. The remaining 70% are left
222                // as-is (log-normal variance already trends Benford).
223                let prior_f64 = if raw_prior.abs() > 10.0 && self.rng.random_bool(0.30) {
224                    benford_first_digit_adjust(raw_prior, &mut self.rng)
225                } else {
226                    raw_prior
227                };
228
229                let prior = f64_to_decimal(prior_f64);
230                let variance = *current - prior;
231                let variance_pct = if prior.is_zero() {
232                    0.0
233                } else {
234                    let prior_abs_f64 = decimal_to_f64(prior).abs();
235                    if prior_abs_f64 < 1e-10 {
236                        0.0
237                    } else {
238                        decimal_to_f64(variance) / prior_abs_f64 * 100.0
239                    }
240                };
241
242                PriorYearComparative {
243                    account_code: code.clone(),
244                    account_name: name.clone(),
245                    current_year_amount: *current,
246                    prior_year_amount: prior,
247                    variance,
248                    variance_pct,
249                    entity_code: entity_code.to_string(),
250                    period: period.clone(),
251                }
252            })
253            .collect()
254    }
255
256    /// Generate prior-year audit findings.
257    ///
258    /// Produces 3-8 findings with realistic distributions across finding types,
259    /// statuses, and risk areas.
260    pub fn generate_findings(
261        &mut self,
262        entity_code: &str,
263        fiscal_year: i32,
264    ) -> Vec<PriorYearFinding> {
265        let count = self.rng.random_range(3..=8_usize);
266        let prior_year = fiscal_year - 1;
267
268        (0..count)
269            .map(|_| {
270                let finding_type = weighted_pick(&mut self.rng, FINDING_TYPES);
271                let status = weighted_pick(&mut self.rng, FINDING_STATUSES);
272                let risk_area = weighted_pick(&mut self.rng, RISK_AREAS);
273
274                let description = self.pick_description(finding_type, risk_area);
275
276                // Open and recurring findings require follow-up
277                let follow_up_required = status == "open" || status == "recurring";
278
279                // Remediated findings get a remediation date
280                let remediation_date = if status == "remediated" || status == "partially_remediated"
281                {
282                    // Remediation happened between the prior year-end and
283                    // 6 months into the current year
284                    let day_offset = self.rng.random_range(30..=270_i64);
285                    NaiveDate::from_ymd_opt(prior_year, 12, 31)
286                        .and_then(|d| d.checked_add_signed(chrono::Duration::days(day_offset)))
287                } else {
288                    None
289                };
290
291                // Misstatements and material weaknesses always have an amount;
292                // other finding types have a 30% chance.
293                let has_amount = finding_type == "misstatement"
294                    || finding_type == "material_weakness"
295                    || self.rng.random_bool(0.3);
296                let original_amount = if has_amount {
297                    Some(self.amount_sampler.sample())
298                } else {
299                    None
300                };
301
302                let _entity = entity_code; // used for context
303                PriorYearFinding {
304                    finding_id: self.uuid_factory.next(),
305                    fiscal_year: prior_year,
306                    finding_type: finding_type.to_string(),
307                    description,
308                    status: status.to_string(),
309                    risk_area: risk_area.to_string(),
310                    original_amount,
311                    remediation_date,
312                    follow_up_required,
313                }
314            })
315            .collect()
316    }
317
318    /// Generate a complete prior-year summary including comparatives, findings,
319    /// and the prior-year engagement metadata.
320    pub fn generate_summary(
321        &mut self,
322        entity_code: &str,
323        fiscal_year: i32,
324        current_balances: &[(String, String, Decimal)],
325    ) -> PriorYearSummary {
326        let comparatives = self.generate_comparatives(entity_code, fiscal_year, current_balances);
327        let findings = self.generate_findings(entity_code, fiscal_year);
328        let open = findings
329            .iter()
330            .filter(|f| f.status == "open" || f.status == "recurring")
331            .count();
332
333        // Opinion type: 90% unmodified, 8% qualified, 2% adverse/disclaimer
334        let opinion_roll: f64 = self.rng.random();
335        let opinion_type = if opinion_roll < 0.90 {
336            "unmodified"
337        } else if opinion_roll < 0.98 {
338            "qualified"
339        } else {
340            "adverse"
341        };
342
343        // Derive materiality from the total absolute current-year amounts
344        // (roughly 1-2% of total revenue/assets)
345        let total_abs: f64 = current_balances
346            .iter()
347            .map(|(_, _, amt)| decimal_to_f64(*amt).abs())
348            .sum();
349        let materiality_pct = 0.01 + self.rng.random::<f64>() * 0.01; // 1-2%
350        let materiality = f64_to_decimal(total_abs * materiality_pct);
351
352        // Pick 2-4 KAMs
353        let kam_count = self.rng.random_range(2..=4_usize).min(KAM_POOL.len());
354        let mut kam_indices: Vec<usize> = (0..KAM_POOL.len()).collect();
355        kam_indices.shuffle(&mut self.rng);
356        kam_indices.truncate(kam_count);
357        kam_indices.sort_unstable();
358        let key_audit_matters: Vec<String> = kam_indices
359            .iter()
360            .map(|&i| KAM_POOL[i].to_string())
361            .collect();
362
363        PriorYearSummary {
364            fiscal_year: fiscal_year - 1,
365            entity_code: entity_code.to_string(),
366            opinion_type: opinion_type.to_string(),
367            materiality,
368            total_findings: findings.len(),
369            open_findings: open,
370            key_audit_matters,
371            comparatives,
372            findings,
373        }
374    }
375
376    /// Pick a finding description that matches the given type and risk area.
377    fn pick_description(&mut self, finding_type: &str, risk_area: &str) -> String {
378        // Find all matching templates
379        let matches: Vec<&str> = FINDING_DESCRIPTIONS
380            .iter()
381            .filter(|(ft, ra, _)| *ft == finding_type && *ra == risk_area)
382            .map(|(_, _, desc)| *desc)
383            .collect();
384
385        if matches.is_empty() {
386            // Fallback: pick any description for the finding type
387            let type_matches: Vec<&str> = FINDING_DESCRIPTIONS
388                .iter()
389                .filter(|(ft, _, _)| *ft == finding_type)
390                .map(|(_, _, desc)| *desc)
391                .collect();
392            if type_matches.is_empty() {
393                return format!("Prior-year {} in {} area", finding_type, risk_area);
394            }
395            let idx = self.rng.random_range(0..type_matches.len());
396            return type_matches[idx].to_string();
397        }
398
399        let idx = self.rng.random_range(0..matches.len());
400        matches[idx].to_string()
401    }
402}
403
404// ---------------------------------------------------------------------------
405// Helpers
406// ---------------------------------------------------------------------------
407
408/// Pick from a weighted list using cumulative weights.
409fn weighted_pick<'a>(rng: &mut ChaCha8Rng, items: &[(&'a str, f64)]) -> &'a str {
410    let roll: f64 = rng.random();
411    for (item, threshold) in items {
412        if roll < *threshold {
413            return item;
414        }
415    }
416    items.last().map(|(item, _)| *item).unwrap_or("unknown")
417}
418
419/// Benford's law probabilities for digits 1-9.
420const BENFORD_PROBS: [f64; 9] = [
421    0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
422];
423
424/// Sample a first digit (1-9) according to Benford's law.
425fn sample_benford_digit(rng: &mut ChaCha8Rng) -> u32 {
426    let roll: f64 = rng.random();
427    let mut cumulative = 0.0;
428    for (i, &p) in BENFORD_PROBS.iter().enumerate() {
429        cumulative += p;
430        if roll < cumulative {
431            return (i + 1) as u32;
432        }
433    }
434    9
435}
436
437/// Adjust the first significant digit of a value to follow Benford's law.
438///
439/// This preserves the order of magnitude and the lower digits, making only
440/// a small perturbation that keeps the prior-year amount close to the raw
441/// variance-derived value.
442fn benford_first_digit_adjust(raw: f64, rng: &mut ChaCha8Rng) -> f64 {
443    let abs_raw = raw.abs();
444    if abs_raw < 1.0 {
445        return raw;
446    }
447
448    let magnitude = abs_raw.log10().floor() as i32;
449    let scale = 10_f64.powi(magnitude);
450
451    // Current first digit (1-9)
452    let normalised = abs_raw / scale; // value in [1.0, 10.0)
453    let current_first = normalised.floor() as u32;
454
455    // Sample a Benford-distributed first digit
456    let benford_digit = sample_benford_digit(rng);
457
458    // Replace the first digit while preserving the fractional part
459    let fractional = normalised - current_first as f64; // in [0.0, 1.0)
460    let adjusted = (benford_digit as f64 + fractional) * scale;
461
462    if raw < 0.0 {
463        -adjusted
464    } else {
465        adjusted
466    }
467}
468
469fn decimal_to_f64(d: Decimal) -> f64 {
470    use std::str::FromStr;
471    f64::from_str(&d.to_string()).unwrap_or(0.0)
472}
473
474fn f64_to_decimal(v: f64) -> Decimal {
475    use rust_decimal::prelude::FromPrimitive;
476    Decimal::from_f64(v).unwrap_or(Decimal::ZERO).round_dp(2)
477}
478
479// ---------------------------------------------------------------------------
480// Tests
481// ---------------------------------------------------------------------------
482
483#[cfg(test)]
484#[allow(clippy::unwrap_used)]
485mod tests {
486    use super::*;
487    use rust_decimal_macros::dec;
488    use std::collections::HashMap;
489
490    fn sample_balances() -> Vec<(String, String, Decimal)> {
491        vec![
492            ("1100".into(), "Accounts Receivable".into(), dec!(500_000)),
493            ("1200".into(), "Inventory".into(), dec!(300_000)),
494            ("2000".into(), "Accounts Payable".into(), dec!(200_000)),
495            ("4000".into(), "Revenue".into(), dec!(1_500_000)),
496            ("5000".into(), "Cost of Goods Sold".into(), dec!(900_000)),
497            ("1000".into(), "Cash".into(), dec!(150_000)),
498            ("3000".into(), "Retained Earnings".into(), dec!(400_000)),
499            ("6000".into(), "Operating Expenses".into(), dec!(250_000)),
500        ]
501    }
502
503    #[test]
504    fn test_comparatives_generated() {
505        let mut gen = PriorYearGenerator::new(42);
506        let balances = sample_balances();
507        let comps = gen.generate_comparatives("C001", 2025, &balances);
508
509        assert_eq!(comps.len(), balances.len());
510        for comp in &comps {
511            assert_eq!(comp.entity_code, "C001");
512            assert_eq!(comp.period, "2025-12");
513            assert!(!comp.account_code.is_empty());
514            assert!(!comp.account_name.is_empty());
515        }
516    }
517
518    #[test]
519    fn test_variance_distribution() {
520        // Generate many comparatives and verify:
521        // 1. Most variances are within a reasonable range (< 50%)
522        // 2. The median variance is moderate (near 0)
523        //
524        // Note: ~30% of prior-year amounts get a Benford first-digit
525        // adjustment, which can shift values significantly (e.g. first
526        // digit 1 → 5). Unadjusted amounts follow N(3%, 12%).
527        let mut gen = PriorYearGenerator::new(123);
528        let balances = sample_balances();
529
530        let mut all_pcts = Vec::new();
531        for _ in 0..50 {
532            let comps = gen.generate_comparatives("C001", 2025, &balances);
533            for c in &comps {
534                all_pcts.push(c.variance_pct);
535            }
536        }
537
538        // At least 40% should be within 50%
539        let within_50 = all_pcts.iter().filter(|p| p.abs() < 50.0).count();
540        let ratio = within_50 as f64 / all_pcts.len() as f64;
541        assert!(
542            ratio > 0.40,
543            "Expected >40% of variances within 50%, got {:.1}%",
544            ratio * 100.0
545        );
546
547        // Median should be moderate (within +/- 50%)
548        let mut sorted = all_pcts.clone();
549        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
550        let median = sorted[sorted.len() / 2];
551        assert!(
552            median.abs() < 50.0,
553            "Expected median variance within 50%, got {:.2}%",
554            median
555        );
556    }
557
558    #[test]
559    fn test_comparatives_arithmetic() {
560        let mut gen = PriorYearGenerator::new(77);
561        let balances = sample_balances();
562        let comps = gen.generate_comparatives("C001", 2025, &balances);
563
564        for comp in &comps {
565            // variance = current - prior
566            let expected_variance = comp.current_year_amount - comp.prior_year_amount;
567            assert_eq!(
568                comp.variance, expected_variance,
569                "Variance mismatch for account {}",
570                comp.account_code
571            );
572
573            // variance_pct = (current - prior) / |prior| * 100
574            if !comp.prior_year_amount.is_zero() {
575                let prior_abs_f64 = decimal_to_f64(comp.prior_year_amount).abs();
576                if prior_abs_f64 > 1e-10 {
577                    let expected_pct = decimal_to_f64(comp.variance) / prior_abs_f64 * 100.0;
578                    let diff = (comp.variance_pct - expected_pct).abs();
579                    assert!(
580                        diff < 0.01,
581                        "Variance pct mismatch for {}: got {}, expected {}",
582                        comp.account_code,
583                        comp.variance_pct,
584                        expected_pct
585                    );
586                }
587            }
588        }
589    }
590
591    #[test]
592    fn test_findings_generated() {
593        let mut gen = PriorYearGenerator::new(42);
594        let findings = gen.generate_findings("C001", 2025);
595
596        assert!(
597            findings.len() >= 3 && findings.len() <= 8,
598            "Expected 3-8 findings, got {}",
599            findings.len()
600        );
601
602        for f in &findings {
603            assert_eq!(f.fiscal_year, 2024);
604            assert!(!f.finding_type.is_empty());
605            assert!(!f.description.is_empty());
606            assert!(!f.status.is_empty());
607            assert!(!f.risk_area.is_empty());
608        }
609    }
610
611    #[test]
612    fn test_finding_status_distribution() {
613        // Run many times and check that we see a mix of statuses
614        let mut status_counts: HashMap<String, usize> = HashMap::new();
615        for seed in 0..50_u64 {
616            let mut gen = PriorYearGenerator::new(seed);
617            let findings = gen.generate_findings("C001", 2025);
618            for f in &findings {
619                *status_counts.entry(f.status.clone()).or_insert(0) += 1;
620            }
621        }
622
623        // We should see all four statuses across 50 runs
624        assert!(
625            status_counts.contains_key("remediated"),
626            "Missing 'remediated' status"
627        );
628        assert!(status_counts.contains_key("open"), "Missing 'open' status");
629
630        // At least 2 distinct statuses (very conservative)
631        assert!(
632            status_counts.len() >= 2,
633            "Expected at least 2 distinct statuses, got {}",
634            status_counts.len()
635        );
636    }
637
638    #[test]
639    fn test_summary_consistent() {
640        let mut gen = PriorYearGenerator::new(42);
641        let balances = sample_balances();
642        let summary = gen.generate_summary("C001", 2025, &balances);
643
644        assert_eq!(summary.fiscal_year, 2024);
645        assert_eq!(summary.entity_code, "C001");
646        assert_eq!(summary.total_findings, summary.findings.len());
647
648        // open_findings should match actual open/recurring count
649        let actual_open = summary
650            .findings
651            .iter()
652            .filter(|f| f.status == "open" || f.status == "recurring")
653            .count();
654        assert_eq!(
655            summary.open_findings, actual_open,
656            "open_findings {} doesn't match actual open/recurring count {}",
657            summary.open_findings, actual_open
658        );
659
660        // Comparatives should match input size
661        assert_eq!(summary.comparatives.len(), balances.len());
662
663        // Key audit matters should be non-empty
664        assert!(!summary.key_audit_matters.is_empty());
665
666        // Opinion should be a valid type
667        let valid_opinions = ["unmodified", "qualified", "adverse", "disclaimer"];
668        assert!(
669            valid_opinions.contains(&summary.opinion_type.as_str()),
670            "Invalid opinion type: {}",
671            summary.opinion_type
672        );
673
674        // Open findings must have follow_up_required = true
675        for f in &summary.findings {
676            if f.status == "open" || f.status == "recurring" {
677                assert!(
678                    f.follow_up_required,
679                    "Open/recurring finding {} should have follow_up_required=true",
680                    f.finding_id
681                );
682            }
683        }
684
685        // Remediated findings should have remediation_date set
686        for f in &summary.findings {
687            if f.status == "remediated" {
688                assert!(
689                    f.remediation_date.is_some(),
690                    "Remediated finding {} should have a remediation_date",
691                    f.finding_id
692                );
693            }
694        }
695    }
696
697    #[test]
698    fn test_prior_year_amounts_benford() {
699        // Check that prior-year amounts follow Benford's first-digit law.
700        // We generate many comparatives and tally the first digit.
701        let mut digit_counts = [0_usize; 10]; // index 0 unused
702
703        for seed in 0..100_u64 {
704            let mut gen = PriorYearGenerator::new(seed);
705            let balances = sample_balances();
706            let comps = gen.generate_comparatives("C001", 2025, &balances);
707            for c in &comps {
708                let abs_str = decimal_to_f64(c.prior_year_amount).abs().to_string();
709                if let Some(first_char) = abs_str.chars().find(|c| c.is_ascii_digit() && *c != '0')
710                {
711                    let digit = first_char.to_digit(10).unwrap_or(0) as usize;
712                    if (1..=9).contains(&digit) {
713                        digit_counts[digit] += 1;
714                    }
715                }
716            }
717        }
718
719        let total: usize = digit_counts[1..].iter().sum();
720        if total < 50 {
721            // Not enough data to test
722            return;
723        }
724
725        // Benford expected frequencies
726        let benford_expected = [
727            0.0, 0.301, 0.176, 0.125, 0.097, 0.079, 0.067, 0.058, 0.051, 0.046,
728        ];
729
730        // Check that digit 1 is the most frequent (basic Benford sanity)
731        let freq_1 = digit_counts[1] as f64 / total as f64;
732        assert!(
733            freq_1 > 0.15,
734            "Digit 1 frequency {:.3} is too low for Benford (expected ~{:.3})",
735            freq_1,
736            benford_expected[1]
737        );
738
739        // Check mean absolute deviation (MAD) is reasonable
740        // Benford conformity: MAD < 0.015 is close, < 0.04 is acceptable
741        let mut mad = 0.0;
742        for d in 1..=9 {
743            let observed = digit_counts[d] as f64 / total as f64;
744            mad += (observed - benford_expected[d]).abs();
745        }
746        mad /= 9.0;
747
748        // Use a generous threshold since we're adjusting rather than directly
749        // sampling Benford — 0.06 allows for some variance in small samples
750        assert!(
751            mad < 0.06,
752            "Benford MAD {:.4} is too high (expected < 0.06)",
753            mad
754        );
755    }
756
757    #[test]
758    fn test_serialization_roundtrip() {
759        let mut gen = PriorYearGenerator::new(42);
760        let balances = sample_balances();
761        let summary = gen.generate_summary("C001", 2025, &balances);
762
763        let json = serde_json::to_string(&summary).expect("serialize");
764        let parsed: PriorYearSummary = serde_json::from_str(&json).expect("deserialize");
765
766        assert_eq!(summary.fiscal_year, parsed.fiscal_year);
767        assert_eq!(summary.entity_code, parsed.entity_code);
768        assert_eq!(summary.opinion_type, parsed.opinion_type);
769        assert_eq!(summary.total_findings, parsed.total_findings);
770        assert_eq!(summary.open_findings, parsed.open_findings);
771        assert_eq!(summary.comparatives.len(), parsed.comparatives.len());
772        assert_eq!(summary.findings.len(), parsed.findings.len());
773
774        for (orig, rt) in summary.findings.iter().zip(parsed.findings.iter()) {
775            assert_eq!(orig.finding_id, rt.finding_id);
776            assert_eq!(orig.finding_type, rt.finding_type);
777            assert_eq!(orig.status, rt.status);
778        }
779    }
780}
datasynth_generators/prior_year_generator.rs

datasynth_generators/
prior_year_generator.rs