1use datasynth_core::models::IndustryBenchmark;
8use datasynth_core::utils::seeded_rng;
9use rand::prelude::*;
10use rand_chacha::ChaCha8Rng;
11use rust_decimal::Decimal;
12
13struct MetricDef {
15 name: &'static str,
16 base: f64,
17 sigma: f64,
18}
19
20const RETAIL_METRICS: &[MetricDef] = &[
22 MetricDef {
23 name: "median_revenue",
24 base: 50_000_000.0,
25 sigma: 0.30,
26 },
27 MetricDef {
28 name: "gross_margin_pct",
29 base: 0.35,
30 sigma: 0.05,
31 },
32 MetricDef {
33 name: "net_margin_pct",
34 base: 0.05,
35 sigma: 0.02,
36 },
37 MetricDef {
38 name: "current_ratio",
39 base: 1.5,
40 sigma: 0.30,
41 },
42 MetricDef {
43 name: "debt_to_equity",
44 base: 0.8,
45 sigma: 0.20,
46 },
47 MetricDef {
48 name: "revenue_growth_pct",
49 base: 0.03,
50 sigma: 0.02,
51 },
52 MetricDef {
53 name: "inventory_turnover",
54 base: 8.0,
55 sigma: 2.0,
56 },
57 MetricDef {
58 name: "interest_rate_pct",
59 base: 0.045,
60 sigma: 0.01,
61 },
62 MetricDef {
63 name: "return_on_assets_pct",
64 base: 0.06,
65 sigma: 0.02,
66 },
67 MetricDef {
68 name: "days_sales_outstanding",
69 base: 35.0,
70 sigma: 8.0,
71 },
72];
73
74const MANUFACTURING_METRICS: &[MetricDef] = &[
76 MetricDef {
77 name: "median_revenue",
78 base: 100_000_000.0,
79 sigma: 0.30,
80 },
81 MetricDef {
82 name: "gross_margin_pct",
83 base: 0.30,
84 sigma: 0.05,
85 },
86 MetricDef {
87 name: "net_margin_pct",
88 base: 0.07,
89 sigma: 0.02,
90 },
91 MetricDef {
92 name: "current_ratio",
93 base: 1.8,
94 sigma: 0.30,
95 },
96 MetricDef {
97 name: "debt_to_equity",
98 base: 0.6,
99 sigma: 0.20,
100 },
101 MetricDef {
102 name: "revenue_growth_pct",
103 base: 0.04,
104 sigma: 0.02,
105 },
106 MetricDef {
107 name: "inventory_turnover",
108 base: 5.0,
109 sigma: 1.5,
110 },
111 MetricDef {
112 name: "interest_rate_pct",
113 base: 0.04,
114 sigma: 0.01,
115 },
116 MetricDef {
117 name: "return_on_assets_pct",
118 base: 0.07,
119 sigma: 0.02,
120 },
121 MetricDef {
122 name: "asset_turnover",
123 base: 1.2,
124 sigma: 0.3,
125 },
126];
127
128const FINANCIAL_SERVICES_METRICS: &[MetricDef] = &[
130 MetricDef {
131 name: "median_revenue",
132 base: 200_000_000.0,
133 sigma: 0.30,
134 },
135 MetricDef {
136 name: "net_interest_margin_pct",
137 base: 0.03,
138 sigma: 0.005,
139 },
140 MetricDef {
141 name: "net_margin_pct",
142 base: 0.20,
143 sigma: 0.05,
144 },
145 MetricDef {
146 name: "tier1_capital_ratio",
147 base: 0.12,
148 sigma: 0.02,
149 },
150 MetricDef {
151 name: "cost_to_income_ratio",
152 base: 0.55,
153 sigma: 0.08,
154 },
155 MetricDef {
156 name: "loan_to_deposit_ratio",
157 base: 0.80,
158 sigma: 0.10,
159 },
160 MetricDef {
161 name: "return_on_equity_pct",
162 base: 0.10,
163 sigma: 0.03,
164 },
165 MetricDef {
166 name: "non_performing_loan_pct",
167 base: 0.02,
168 sigma: 0.01,
169 },
170 MetricDef {
171 name: "interest_rate_pct",
172 base: 0.05,
173 sigma: 0.01,
174 },
175 MetricDef {
176 name: "revenue_growth_pct",
177 base: 0.05,
178 sigma: 0.03,
179 },
180];
181
182const GENERIC_METRICS: &[MetricDef] = &[
184 MetricDef {
185 name: "median_revenue",
186 base: 75_000_000.0,
187 sigma: 0.30,
188 },
189 MetricDef {
190 name: "gross_margin_pct",
191 base: 0.40,
192 sigma: 0.08,
193 },
194 MetricDef {
195 name: "net_margin_pct",
196 base: 0.08,
197 sigma: 0.03,
198 },
199 MetricDef {
200 name: "current_ratio",
201 base: 1.6,
202 sigma: 0.30,
203 },
204 MetricDef {
205 name: "debt_to_equity",
206 base: 0.7,
207 sigma: 0.20,
208 },
209 MetricDef {
210 name: "revenue_growth_pct",
211 base: 0.04,
212 sigma: 0.02,
213 },
214 MetricDef {
215 name: "return_on_assets_pct",
216 base: 0.06,
217 sigma: 0.02,
218 },
219 MetricDef {
220 name: "interest_rate_pct",
221 base: 0.045,
222 sigma: 0.01,
223 },
224];
225
226pub struct IndustryBenchmarkGenerator {
229 rng: ChaCha8Rng,
230}
231
232impl IndustryBenchmarkGenerator {
233 pub fn new(seed: u64) -> Self {
235 Self {
236 rng: seeded_rng(seed, 0),
237 }
238 }
239
240 pub fn generate(&mut self, industry: &str, fiscal_year: i32) -> Vec<IndustryBenchmark> {
245 let metrics = match industry.to_lowercase().as_str() {
246 "retail" => RETAIL_METRICS,
247 "manufacturing" => MANUFACTURING_METRICS,
248 "financial_services" | "financial services" => FINANCIAL_SERVICES_METRICS,
249 _ => GENERIC_METRICS,
250 };
251
252 let period = format!("FY{fiscal_year}");
253
254 metrics
255 .iter()
256 .map(|def| {
257 let noise: f64 = self.rng.random_range(-1.0..1.0) * def.sigma;
258 let raw = def.base * (1.0 + noise);
259 let raw = if raw < 0.0 { 0.0 } else { raw };
261 let value = if raw.is_finite() {
262 Decimal::from_f64_retain(raw)
263 .unwrap_or(Decimal::ZERO)
264 .round_dp(4)
265 } else {
266 Decimal::ZERO
267 };
268
269 IndustryBenchmark {
270 industry: industry.to_string(),
271 metric: def.name.to_string(),
272 value,
273 source: "Industry Average (Synthetic)".to_string(),
274 period: period.clone(),
275 }
276 })
277 .collect()
278 }
279}
280
281#[cfg(test)]
286mod tests {
287 use super::*;
288
289 #[test]
290 fn test_generates_non_empty_output() {
291 let mut gen = IndustryBenchmarkGenerator::new(42);
292 let benchmarks = gen.generate("retail", 2025);
293 assert!(!benchmarks.is_empty(), "should produce benchmarks");
294 assert!(benchmarks.len() >= 8, "should produce at least 8 metrics");
295 }
296
297 #[test]
298 fn test_industry_specific_content_differs() {
299 let mut gen = IndustryBenchmarkGenerator::new(42);
300 let retail = gen.generate("retail", 2025);
301
302 let mut gen2 = IndustryBenchmarkGenerator::new(42);
303 let manufacturing = gen2.generate("manufacturing", 2025);
304
305 let retail_metrics: std::collections::HashSet<_> =
307 retail.iter().map(|b| b.metric.as_str()).collect();
308 let mfg_metrics: std::collections::HashSet<_> =
309 manufacturing.iter().map(|b| b.metric.as_str()).collect();
310
311 assert_ne!(
312 retail_metrics, mfg_metrics,
313 "retail and manufacturing metrics should differ"
314 );
315 }
316
317 #[test]
318 fn test_financial_services_has_unique_metrics() {
319 let mut gen = IndustryBenchmarkGenerator::new(99);
320 let fs = gen.generate("financial_services", 2025);
321
322 let metric_names: Vec<_> = fs.iter().map(|b| b.metric.as_str()).collect();
323 assert!(
324 metric_names.contains(&"net_interest_margin_pct"),
325 "financial services should include net interest margin"
326 );
327 assert!(
328 metric_names.contains(&"tier1_capital_ratio"),
329 "financial services should include tier-1 capital ratio"
330 );
331 }
332
333 #[test]
334 fn test_source_is_synthetic() {
335 let mut gen = IndustryBenchmarkGenerator::new(1);
336 let benchmarks = gen.generate("retail", 2025);
337 for b in &benchmarks {
338 assert_eq!(b.source, "Industry Average (Synthetic)");
339 }
340 }
341
342 #[test]
343 fn test_period_label() {
344 let mut gen = IndustryBenchmarkGenerator::new(1);
345 let benchmarks = gen.generate("retail", 2026);
346 for b in &benchmarks {
347 assert_eq!(b.period, "FY2026");
348 }
349 }
350
351 #[test]
352 fn test_deterministic_with_same_seed() {
353 let mut gen1 = IndustryBenchmarkGenerator::new(555);
354 let b1 = gen1.generate("manufacturing", 2025);
355
356 let mut gen2 = IndustryBenchmarkGenerator::new(555);
357 let b2 = gen2.generate("manufacturing", 2025);
358
359 assert_eq!(b1.len(), b2.len());
360 for (a, b) in b1.iter().zip(b2.iter()) {
361 assert_eq!(a.metric, b.metric);
362 assert_eq!(a.value, b.value);
363 }
364 }
365
366 #[test]
367 fn test_values_are_non_negative() {
368 let mut gen = IndustryBenchmarkGenerator::new(42);
369 for industry in &[
370 "retail",
371 "manufacturing",
372 "financial_services",
373 "healthcare",
374 ] {
375 let benchmarks = gen.generate(industry, 2025);
376 for b in &benchmarks {
377 assert!(
378 b.value >= Decimal::ZERO,
379 "benchmark value should be non-negative: {} = {}",
380 b.metric,
381 b.value
382 );
383 }
384 }
385 }
386
387 #[test]
388 fn test_serialization_roundtrip() {
389 let mut gen = IndustryBenchmarkGenerator::new(42);
390 let benchmarks = gen.generate("retail", 2025);
391 let json = serde_json::to_string(&benchmarks).expect("serialize");
392 let parsed: Vec<IndustryBenchmark> = serde_json::from_str(&json).expect("deserialize");
393 assert_eq!(benchmarks.len(), parsed.len());
394 for (orig, rt) in benchmarks.iter().zip(parsed.iter()) {
395 assert_eq!(orig.metric, rt.metric);
396 assert_eq!(orig.value, rt.value);
397 assert_eq!(orig.industry, rt.industry);
398 }
399 }
400
401 #[test]
402 fn test_unknown_industry_falls_back_to_generic() {
403 let mut gen = IndustryBenchmarkGenerator::new(42);
404 let benchmarks = gen.generate("space_exploration", 2025);
405 assert!(
406 !benchmarks.is_empty(),
407 "unknown industry should still produce output"
408 );
409 assert_eq!(benchmarks.len(), 8);
411 }
412}