datasynth_core/distributions/
amount.rs1use rand::prelude::*;
7use rand_chacha::ChaCha8Rng;
8use rand_distr::{Distribution, LogNormal};
9use rust_decimal::Decimal;
10use serde::{Deserialize, Serialize};
11
12use super::benford::{BenfordSampler, FraudAmountGenerator, FraudAmountPattern, ThresholdConfig};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct AmountDistributionConfig {
17 pub min_amount: f64,
19 pub max_amount: f64,
21 pub lognormal_mu: f64,
23 pub lognormal_sigma: f64,
25 pub decimal_places: u8,
27 pub round_number_probability: f64,
29 pub nice_number_probability: f64,
31}
32
33impl Default for AmountDistributionConfig {
34 fn default() -> Self {
35 Self {
36 min_amount: 0.01,
37 max_amount: 100_000_000.0, lognormal_mu: 9.85,
49 lognormal_sigma: 2.3,
52 decimal_places: 2,
53 round_number_probability: 0.25, nice_number_probability: 0.15, }
56 }
57}
58
59impl AmountDistributionConfig {
60 pub fn small_transactions() -> Self {
62 Self {
63 min_amount: 0.01,
64 max_amount: 10_000.0,
65 lognormal_mu: 4.0, lognormal_sigma: 1.5,
67 decimal_places: 2,
68 round_number_probability: 0.30,
69 nice_number_probability: 0.20,
70 }
71 }
72
73 pub fn medium_transactions() -> Self {
75 Self {
76 min_amount: 100.0,
77 max_amount: 1_000_000.0,
78 lognormal_mu: 8.5, lognormal_sigma: 2.0,
80 decimal_places: 2,
81 round_number_probability: 0.20,
82 nice_number_probability: 0.15,
83 }
84 }
85
86 pub fn large_transactions() -> Self {
88 Self {
89 min_amount: 1000.0,
90 max_amount: 100_000_000.0,
91 lognormal_mu: 10.0, lognormal_sigma: 2.5,
93 decimal_places: 2,
94 round_number_probability: 0.15,
95 nice_number_probability: 0.10,
96 }
97 }
98}
99
100pub struct AmountSampler {
102 rng: ChaCha8Rng,
104 config: AmountDistributionConfig,
106 lognormal: LogNormal<f64>,
108 decimal_multiplier: f64,
110 benford_sampler: Option<BenfordSampler>,
112 fraud_generator: Option<FraudAmountGenerator>,
114 benford_enabled: bool,
116}
117
118impl AmountSampler {
119 pub fn new(seed: u64) -> Self {
121 Self::with_config(seed, AmountDistributionConfig::default())
122 }
123
124 pub fn with_config(seed: u64, config: AmountDistributionConfig) -> Self {
126 let lognormal = LogNormal::new(config.lognormal_mu, config.lognormal_sigma)
127 .expect("Invalid log-normal parameters");
128 let decimal_multiplier = 10_f64.powi(config.decimal_places as i32);
129
130 Self {
131 rng: ChaCha8Rng::seed_from_u64(seed),
132 config,
133 lognormal,
134 decimal_multiplier,
135 benford_sampler: None,
136 fraud_generator: None,
137 benford_enabled: false,
138 }
139 }
140
141 pub fn with_benford(seed: u64, config: AmountDistributionConfig) -> Self {
143 let lognormal = LogNormal::new(config.lognormal_mu, config.lognormal_sigma)
144 .expect("Invalid log-normal parameters");
145 let decimal_multiplier = 10_f64.powi(config.decimal_places as i32);
146
147 Self {
148 rng: ChaCha8Rng::seed_from_u64(seed),
149 benford_sampler: Some(BenfordSampler::new(seed + 100, config.clone())),
150 fraud_generator: Some(FraudAmountGenerator::new(
151 seed + 200,
152 config.clone(),
153 ThresholdConfig::default(),
154 )),
155 config,
156 lognormal,
157 decimal_multiplier,
158 benford_enabled: true,
159 }
160 }
161
162 pub fn with_fraud_config(
164 seed: u64,
165 config: AmountDistributionConfig,
166 threshold_config: ThresholdConfig,
167 benford_enabled: bool,
168 ) -> Self {
169 let lognormal = LogNormal::new(config.lognormal_mu, config.lognormal_sigma)
170 .expect("Invalid log-normal parameters");
171 let decimal_multiplier = 10_f64.powi(config.decimal_places as i32);
172
173 Self {
174 rng: ChaCha8Rng::seed_from_u64(seed),
175 benford_sampler: if benford_enabled {
176 Some(BenfordSampler::new(seed + 100, config.clone()))
177 } else {
178 None
179 },
180 fraud_generator: Some(FraudAmountGenerator::new(
181 seed + 200,
182 config.clone(),
183 threshold_config,
184 )),
185 config,
186 lognormal,
187 decimal_multiplier,
188 benford_enabled,
189 }
190 }
191
192 pub fn set_benford_enabled(&mut self, enabled: bool) {
194 self.benford_enabled = enabled;
195 if enabled && self.benford_sampler.is_none() {
196 let seed = self.rng.random();
198 self.benford_sampler = Some(BenfordSampler::new(seed, self.config.clone()));
199 }
200 }
201
202 pub fn is_benford_enabled(&self) -> bool {
204 self.benford_enabled
205 }
206
207 #[inline]
212 pub fn sample(&mut self) -> Decimal {
213 if self.benford_enabled {
215 if let Some(ref mut benford) = self.benford_sampler {
216 return benford.sample();
217 }
218 }
219
220 self.sample_lognormal()
222 }
223
224 #[inline]
226 pub fn sample_lognormal(&mut self) -> Decimal {
227 let mut amount = self.lognormal.sample(&mut self.rng);
228
229 amount = amount.clamp(self.config.min_amount, self.config.max_amount);
231
232 let p: f64 = self.rng.random();
234 if p < self.config.round_number_probability {
235 amount = (amount / 100.0).round() * 100.0;
237 } else if p < self.config.round_number_probability + self.config.nice_number_probability {
238 amount = (amount / 5.0).round() * 5.0;
240 }
241
242 amount = (amount * self.decimal_multiplier).round() / self.decimal_multiplier;
244
245 amount = amount.max(self.config.min_amount);
247
248 let cents = (amount * 100.0).round() as i64;
252 Decimal::new(cents, 2)
253 }
254
255 pub fn sample_fraud(&mut self, pattern: FraudAmountPattern) -> Decimal {
259 if let Some(ref mut fraud_gen) = self.fraud_generator {
260 fraud_gen.sample(pattern)
261 } else {
262 self.sample()
264 }
265 }
266
267 pub fn sample_summing_to(&mut self, count: usize, total: Decimal) -> Vec<Decimal> {
274 use rust_decimal::prelude::ToPrimitive;
275
276 let min_amount = Decimal::new(1, 2); if count == 0 {
279 return Vec::new();
280 }
281 if count == 1 {
282 return vec![total];
283 }
284
285 let total_f64 = total.to_f64().unwrap_or(0.0);
286
287 let mut weights: Vec<f64> = (0..count)
289 .map(|_| self.rng.random::<f64>().max(0.01))
290 .collect();
291 let sum: f64 = weights.iter().sum();
292 weights.iter_mut().for_each(|w| *w /= sum);
293
294 let mut amounts: Vec<Decimal> = weights
296 .iter()
297 .map(|w| {
298 let amount = total_f64 * w;
299 let rounded = (amount * self.decimal_multiplier).round() / self.decimal_multiplier;
300 let cents = (rounded * 100.0).round() as i64;
302 Decimal::new(cents, 2)
303 })
304 .collect();
305
306 let current_sum: Decimal = amounts.iter().copied().sum();
308 let diff = total - current_sum;
309 let last_idx = amounts.len() - 1;
310 amounts[last_idx] += diff;
311
312 if amounts[last_idx] < Decimal::ZERO {
314 let mut remaining = amounts[last_idx].abs();
315 amounts[last_idx] = Decimal::ZERO;
316
317 for amt in amounts.iter_mut().take(last_idx).rev() {
319 if remaining <= Decimal::ZERO {
320 break;
321 }
322 let take = remaining.min(*amt);
323 *amt -= take;
324 remaining -= take;
325 }
326
327 if remaining > Decimal::ZERO {
329 for amt in amounts.iter_mut() {
330 if *amt > Decimal::ZERO {
331 *amt -= remaining;
332 break;
333 }
334 }
335 }
336 }
337
338 if total >= min_amount * Decimal::from(count as u32) {
342 loop {
343 let zero_idx = amounts.iter().position(|a| *a == Decimal::ZERO);
345 let Some(zi) = zero_idx else { break };
346
347 let donor = amounts
349 .iter()
350 .enumerate()
351 .filter(|&(j, _)| j != zi)
352 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
353 .map(|(j, _)| j);
354
355 if let Some(di) = donor {
356 if amounts[di] > min_amount {
357 amounts[zi] = min_amount;
358 amounts[di] -= min_amount;
359 } else {
360 break; }
362 } else {
363 break;
364 }
365 }
366 }
367
368 amounts
369 }
370
371 pub fn sample_in_range(&mut self, min: Decimal, max: Decimal) -> Decimal {
373 let min_f64 = min.to_string().parse::<f64>().unwrap_or(0.0);
374 let max_f64 = max.to_string().parse::<f64>().unwrap_or(1000000.0);
375
376 let range = max_f64 - min_f64;
377 let amount = min_f64 + self.rng.random::<f64>() * range;
378
379 let rounded = (amount * self.decimal_multiplier).round() / self.decimal_multiplier;
380 Decimal::from_f64_retain(rounded).unwrap_or(min)
381 }
382
383 pub fn reset(&mut self, seed: u64) {
385 self.rng = ChaCha8Rng::seed_from_u64(seed);
386 }
387
388 pub fn set_lognormal_sigma(&mut self, sigma: f64) {
394 if sigma > 0.0 {
395 self.config.lognormal_sigma = sigma;
396 if let Ok(dist) = LogNormal::new(self.config.lognormal_mu, sigma) {
397 self.lognormal = dist;
398 }
399 }
400 }
401
402 pub fn set_round_number_probability(&mut self, p: f64) {
406 self.config.round_number_probability = p.clamp(0.0, 1.0);
407 }
408
409 pub fn lognormal_sigma(&self) -> f64 {
411 self.config.lognormal_sigma
412 }
413
414 pub fn round_number_probability(&self) -> f64 {
416 self.config.round_number_probability
417 }
418}
419
420pub struct ExchangeRateSampler {
422 rng: ChaCha8Rng,
423 base_rates: std::collections::HashMap<String, f64>,
425 volatility: f64,
427}
428
429impl ExchangeRateSampler {
430 pub fn new(seed: u64) -> Self {
432 let mut base_rates = std::collections::HashMap::new();
433 base_rates.insert("EUR".to_string(), 0.92);
435 base_rates.insert("GBP".to_string(), 0.79);
436 base_rates.insert("CHF".to_string(), 0.88);
437 base_rates.insert("JPY".to_string(), 149.0);
438 base_rates.insert("CNY".to_string(), 7.24);
439 base_rates.insert("CAD".to_string(), 1.36);
440 base_rates.insert("AUD".to_string(), 1.53);
441 base_rates.insert("INR".to_string(), 83.0);
442 base_rates.insert("USD".to_string(), 1.0);
443
444 Self {
445 rng: ChaCha8Rng::seed_from_u64(seed),
446 base_rates,
447 volatility: 0.005, }
449 }
450
451 pub fn get_rate(&mut self, from: &str, to: &str) -> Decimal {
453 let from_usd = self.base_rates.get(from).copied().unwrap_or(1.0);
454 let to_usd = self.base_rates.get(to).copied().unwrap_or(1.0);
455
456 let base_rate = to_usd / from_usd;
458
459 let variation = 1.0 + (self.rng.random::<f64>() - 0.5) * 2.0 * self.volatility;
461 let rate = base_rate * variation;
462
463 let rounded = (rate * 1_000_000.0).round() / 1_000_000.0;
465 Decimal::from_f64_retain(rounded).unwrap_or(Decimal::ONE)
466 }
467}
468
469#[cfg(test)]
470mod tests {
471 use super::*;
472
473 #[test]
474 fn test_amount_sampler_determinism() {
475 let mut sampler1 = AmountSampler::new(42);
476 let mut sampler2 = AmountSampler::new(42);
477
478 for _ in 0..100 {
479 assert_eq!(sampler1.sample(), sampler2.sample());
480 }
481 }
482
483 #[test]
484 fn test_amount_sampler_range() {
485 let config = AmountDistributionConfig {
486 min_amount: 100.0,
487 max_amount: 1000.0,
488 ..Default::default()
489 };
490 let mut sampler = AmountSampler::with_config(42, config);
491
492 for _ in 0..1000 {
493 let amount = sampler.sample();
494 let amount_f64: f64 = amount.to_string().parse().unwrap();
495 assert!(amount_f64 >= 100.0, "Amount {} below minimum", amount);
496 assert!(amount_f64 <= 1000.0, "Amount {} above maximum", amount);
497 }
498 }
499
500 #[test]
501 fn test_summing_amounts() {
502 let mut sampler = AmountSampler::new(42);
503 let total = Decimal::from(10000);
504 let amounts = sampler.sample_summing_to(5, total);
505
506 assert_eq!(amounts.len(), 5);
507
508 let sum: Decimal = amounts.iter().sum();
509 assert_eq!(sum, total, "Sum {} doesn't match total {}", sum, total);
510 }
511
512 #[test]
513 fn test_exchange_rate() {
514 let mut sampler = ExchangeRateSampler::new(42);
515
516 let eur_usd = sampler.get_rate("EUR", "USD");
517 let eur_f64: f64 = eur_usd.to_string().parse().unwrap();
518 assert!(
519 eur_f64 > 0.8 && eur_f64 < 1.2,
520 "EUR/USD rate {} out of range",
521 eur_f64
522 );
523
524 let usd_usd = sampler.get_rate("USD", "USD");
525 let usd_f64: f64 = usd_usd.to_string().parse().unwrap();
526 assert!(
527 (usd_f64 - 1.0).abs() < 0.01,
528 "USD/USD rate {} should be ~1.0",
529 usd_f64
530 );
531 }
532}