Skip to main content

datasynth_generators/
industry_benchmark_generator.rs

1//! Industry benchmark generator.
2//!
3//! Generates synthetic industry-average metrics for comparative analysis.
4//! Auditors use these benchmarks under ISA 520 (Analytical Procedures) to
5//! evaluate an entity's financial ratios against sector norms.
6
7use datasynth_core::models::IndustryBenchmark;
8use datasynth_core::utils::seeded_rng;
9use rand::prelude::*;
10use rand_chacha::ChaCha8Rng;
11use rust_decimal::Decimal;
12
13/// A metric template with a base value and standard deviation for perturbation.
14struct MetricDef {
15    name: &'static str,
16    base: f64,
17    sigma: f64,
18}
19
20/// Retail industry benchmark definitions.
21const RETAIL_METRICS: &[MetricDef] = &[
22    MetricDef {
23        name: "median_revenue",
24        base: 50_000_000.0,
25        sigma: 0.30,
26    },
27    MetricDef {
28        name: "gross_margin_pct",
29        base: 0.35,
30        sigma: 0.05,
31    },
32    MetricDef {
33        name: "net_margin_pct",
34        base: 0.05,
35        sigma: 0.02,
36    },
37    MetricDef {
38        name: "current_ratio",
39        base: 1.5,
40        sigma: 0.30,
41    },
42    MetricDef {
43        name: "debt_to_equity",
44        base: 0.8,
45        sigma: 0.20,
46    },
47    MetricDef {
48        name: "revenue_growth_pct",
49        base: 0.03,
50        sigma: 0.02,
51    },
52    MetricDef {
53        name: "inventory_turnover",
54        base: 8.0,
55        sigma: 2.0,
56    },
57    MetricDef {
58        name: "interest_rate_pct",
59        base: 0.045,
60        sigma: 0.01,
61    },
62    MetricDef {
63        name: "return_on_assets_pct",
64        base: 0.06,
65        sigma: 0.02,
66    },
67    MetricDef {
68        name: "days_sales_outstanding",
69        base: 35.0,
70        sigma: 8.0,
71    },
72];
73
74/// Manufacturing industry benchmark definitions.
75const MANUFACTURING_METRICS: &[MetricDef] = &[
76    MetricDef {
77        name: "median_revenue",
78        base: 100_000_000.0,
79        sigma: 0.30,
80    },
81    MetricDef {
82        name: "gross_margin_pct",
83        base: 0.30,
84        sigma: 0.05,
85    },
86    MetricDef {
87        name: "net_margin_pct",
88        base: 0.07,
89        sigma: 0.02,
90    },
91    MetricDef {
92        name: "current_ratio",
93        base: 1.8,
94        sigma: 0.30,
95    },
96    MetricDef {
97        name: "debt_to_equity",
98        base: 0.6,
99        sigma: 0.20,
100    },
101    MetricDef {
102        name: "revenue_growth_pct",
103        base: 0.04,
104        sigma: 0.02,
105    },
106    MetricDef {
107        name: "inventory_turnover",
108        base: 5.0,
109        sigma: 1.5,
110    },
111    MetricDef {
112        name: "interest_rate_pct",
113        base: 0.04,
114        sigma: 0.01,
115    },
116    MetricDef {
117        name: "return_on_assets_pct",
118        base: 0.07,
119        sigma: 0.02,
120    },
121    MetricDef {
122        name: "asset_turnover",
123        base: 1.2,
124        sigma: 0.3,
125    },
126];
127
128/// Financial services industry benchmark definitions.
129const FINANCIAL_SERVICES_METRICS: &[MetricDef] = &[
130    MetricDef {
131        name: "median_revenue",
132        base: 200_000_000.0,
133        sigma: 0.30,
134    },
135    MetricDef {
136        name: "net_interest_margin_pct",
137        base: 0.03,
138        sigma: 0.005,
139    },
140    MetricDef {
141        name: "net_margin_pct",
142        base: 0.20,
143        sigma: 0.05,
144    },
145    MetricDef {
146        name: "tier1_capital_ratio",
147        base: 0.12,
148        sigma: 0.02,
149    },
150    MetricDef {
151        name: "cost_to_income_ratio",
152        base: 0.55,
153        sigma: 0.08,
154    },
155    MetricDef {
156        name: "loan_to_deposit_ratio",
157        base: 0.80,
158        sigma: 0.10,
159    },
160    MetricDef {
161        name: "return_on_equity_pct",
162        base: 0.10,
163        sigma: 0.03,
164    },
165    MetricDef {
166        name: "non_performing_loan_pct",
167        base: 0.02,
168        sigma: 0.01,
169    },
170    MetricDef {
171        name: "interest_rate_pct",
172        base: 0.05,
173        sigma: 0.01,
174    },
175    MetricDef {
176        name: "revenue_growth_pct",
177        base: 0.05,
178        sigma: 0.03,
179    },
180];
181
182/// Generic fallback benchmark definitions for unrecognized industries.
183const GENERIC_METRICS: &[MetricDef] = &[
184    MetricDef {
185        name: "median_revenue",
186        base: 75_000_000.0,
187        sigma: 0.30,
188    },
189    MetricDef {
190        name: "gross_margin_pct",
191        base: 0.40,
192        sigma: 0.08,
193    },
194    MetricDef {
195        name: "net_margin_pct",
196        base: 0.08,
197        sigma: 0.03,
198    },
199    MetricDef {
200        name: "current_ratio",
201        base: 1.6,
202        sigma: 0.30,
203    },
204    MetricDef {
205        name: "debt_to_equity",
206        base: 0.7,
207        sigma: 0.20,
208    },
209    MetricDef {
210        name: "revenue_growth_pct",
211        base: 0.04,
212        sigma: 0.02,
213    },
214    MetricDef {
215        name: "return_on_assets_pct",
216        base: 0.06,
217        sigma: 0.02,
218    },
219    MetricDef {
220        name: "interest_rate_pct",
221        base: 0.045,
222        sigma: 0.01,
223    },
224];
225
226/// Generates [`IndustryBenchmark`] records with industry-specific metrics
227/// perturbed around realistic base values.
228pub struct IndustryBenchmarkGenerator {
229    rng: ChaCha8Rng,
230}
231
232impl IndustryBenchmarkGenerator {
233    /// Create a new generator with the given seed.
234    pub fn new(seed: u64) -> Self {
235        Self {
236            rng: seeded_rng(seed, 0),
237        }
238    }
239
240    /// Generate benchmarks for the given industry and fiscal year.
241    ///
242    /// Returns 8-10 metrics depending on the industry, each perturbed
243    /// around a realistic base value.
244    pub fn generate(&mut self, industry: &str, fiscal_year: i32) -> Vec<IndustryBenchmark> {
245        let metrics = match industry.to_lowercase().as_str() {
246            "retail" => RETAIL_METRICS,
247            "manufacturing" => MANUFACTURING_METRICS,
248            "financial_services" | "financial services" => FINANCIAL_SERVICES_METRICS,
249            _ => GENERIC_METRICS,
250        };
251
252        let period = format!("FY{fiscal_year}");
253
254        metrics
255            .iter()
256            .map(|def| {
257                let noise: f64 = self.rng.random_range(-1.0..1.0) * def.sigma;
258                let raw = def.base * (1.0 + noise);
259                // Clamp to non-negative
260                let raw = if raw < 0.0 { 0.0 } else { raw };
261                let value = if raw.is_finite() {
262                    Decimal::from_f64_retain(raw)
263                        .unwrap_or(Decimal::ZERO)
264                        .round_dp(4)
265                } else {
266                    Decimal::ZERO
267                };
268
269                IndustryBenchmark {
270                    industry: industry.to_string(),
271                    metric: def.name.to_string(),
272                    value,
273                    source: "Industry Average (Synthetic)".to_string(),
274                    period: period.clone(),
275                }
276            })
277            .collect()
278    }
279}
280
281// ---------------------------------------------------------------------------
282// Tests
283// ---------------------------------------------------------------------------
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288
289    #[test]
290    fn test_generates_non_empty_output() {
291        let mut gen = IndustryBenchmarkGenerator::new(42);
292        let benchmarks = gen.generate("retail", 2025);
293        assert!(!benchmarks.is_empty(), "should produce benchmarks");
294        assert!(benchmarks.len() >= 8, "should produce at least 8 metrics");
295    }
296
297    #[test]
298    fn test_industry_specific_content_differs() {
299        let mut gen = IndustryBenchmarkGenerator::new(42);
300        let retail = gen.generate("retail", 2025);
301
302        let mut gen2 = IndustryBenchmarkGenerator::new(42);
303        let manufacturing = gen2.generate("manufacturing", 2025);
304
305        // Different industries should have at least some different metric names
306        let retail_metrics: std::collections::HashSet<_> =
307            retail.iter().map(|b| b.metric.as_str()).collect();
308        let mfg_metrics: std::collections::HashSet<_> =
309            manufacturing.iter().map(|b| b.metric.as_str()).collect();
310
311        assert_ne!(
312            retail_metrics, mfg_metrics,
313            "retail and manufacturing metrics should differ"
314        );
315    }
316
317    #[test]
318    fn test_financial_services_has_unique_metrics() {
319        let mut gen = IndustryBenchmarkGenerator::new(99);
320        let fs = gen.generate("financial_services", 2025);
321
322        let metric_names: Vec<_> = fs.iter().map(|b| b.metric.as_str()).collect();
323        assert!(
324            metric_names.contains(&"net_interest_margin_pct"),
325            "financial services should include net interest margin"
326        );
327        assert!(
328            metric_names.contains(&"tier1_capital_ratio"),
329            "financial services should include tier-1 capital ratio"
330        );
331    }
332
333    #[test]
334    fn test_source_is_synthetic() {
335        let mut gen = IndustryBenchmarkGenerator::new(1);
336        let benchmarks = gen.generate("retail", 2025);
337        for b in &benchmarks {
338            assert_eq!(b.source, "Industry Average (Synthetic)");
339        }
340    }
341
342    #[test]
343    fn test_period_label() {
344        let mut gen = IndustryBenchmarkGenerator::new(1);
345        let benchmarks = gen.generate("retail", 2026);
346        for b in &benchmarks {
347            assert_eq!(b.period, "FY2026");
348        }
349    }
350
351    #[test]
352    fn test_deterministic_with_same_seed() {
353        let mut gen1 = IndustryBenchmarkGenerator::new(555);
354        let b1 = gen1.generate("manufacturing", 2025);
355
356        let mut gen2 = IndustryBenchmarkGenerator::new(555);
357        let b2 = gen2.generate("manufacturing", 2025);
358
359        assert_eq!(b1.len(), b2.len());
360        for (a, b) in b1.iter().zip(b2.iter()) {
361            assert_eq!(a.metric, b.metric);
362            assert_eq!(a.value, b.value);
363        }
364    }
365
366    #[test]
367    fn test_values_are_non_negative() {
368        let mut gen = IndustryBenchmarkGenerator::new(42);
369        for industry in &[
370            "retail",
371            "manufacturing",
372            "financial_services",
373            "healthcare",
374        ] {
375            let benchmarks = gen.generate(industry, 2025);
376            for b in &benchmarks {
377                assert!(
378                    b.value >= Decimal::ZERO,
379                    "benchmark value should be non-negative: {} = {}",
380                    b.metric,
381                    b.value
382                );
383            }
384        }
385    }
386
387    #[test]
388    fn test_serialization_roundtrip() {
389        let mut gen = IndustryBenchmarkGenerator::new(42);
390        let benchmarks = gen.generate("retail", 2025);
391        let json = serde_json::to_string(&benchmarks).expect("serialize");
392        let parsed: Vec<IndustryBenchmark> = serde_json::from_str(&json).expect("deserialize");
393        assert_eq!(benchmarks.len(), parsed.len());
394        for (orig, rt) in benchmarks.iter().zip(parsed.iter()) {
395            assert_eq!(orig.metric, rt.metric);
396            assert_eq!(orig.value, rt.value);
397            assert_eq!(orig.industry, rt.industry);
398        }
399    }
400
401    #[test]
402    fn test_unknown_industry_falls_back_to_generic() {
403        let mut gen = IndustryBenchmarkGenerator::new(42);
404        let benchmarks = gen.generate("space_exploration", 2025);
405        assert!(
406            !benchmarks.is_empty(),
407            "unknown industry should still produce output"
408        );
409        // Generic set has 8 metrics
410        assert_eq!(benchmarks.len(), 8);
411    }
412}