use rand::prelude::*;
use rand_chacha::ChaCha8Rng;
use rust_decimal::Decimal;
use serde::{Deserialize, Serialize};
use super::AmountDistributionConfig;
#[allow(clippy::approx_constant)]
pub const BENFORD_PROBABILITIES: [f64; 9] = [
0.30103, 0.17609, 0.12494, 0.09691, 0.07918, 0.06695, 0.05799, 0.05115, 0.04576, ];
#[allow(clippy::approx_constant)]
pub const BENFORD_CDF: [f64; 9] = [
0.30103, 0.47712, 0.60206, 0.69897, 0.77815, 0.84510, 0.90309, 0.95424, 1.00000, ];
#[allow(clippy::approx_constant)]
pub const BENFORD_SECOND_DIGIT_PROBABILITIES: [f64; 10] = [
0.11968, 0.11389, 0.10882, 0.10433, 0.10031, 0.09668, 0.09337, 0.09035, 0.08757, 0.08500, ];
pub const BENFORD_SECOND_DIGIT_CDF: [f64; 10] = [
0.11968, 0.23357, 0.34239, 0.44672, 0.54703, 0.64371, 0.73708, 0.82743, 0.91500, 1.00000,
];
pub fn benford_first_two_probability(d1: u8, d2: u8) -> f64 {
if !(1..=9).contains(&d1) || d2 > 9 {
return 0.0;
}
let n = (d1 as f64) * 10.0 + (d2 as f64);
(1.0 + 1.0 / n).log10()
}
pub fn benford_first_two_probabilities() -> [f64; 90] {
let mut probs = [0.0; 90];
for d1 in 1..=9 {
for d2 in 0..=9 {
let idx = (d1 - 1) * 10 + d2;
probs[idx as usize] = benford_first_two_probability(d1, d2);
}
}
probs
}
pub const ANTI_BENFORD_PROBABILITIES: [f64; 9] = [
0.05, 0.05, 0.05, 0.10, 0.25, 0.10, 0.20, 0.05, 0.15, ];
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FraudAmountPattern {
#[default]
Normal,
StatisticallyImprobable,
ObviousRoundNumbers,
ThresholdAdjacent,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThresholdConfig {
pub thresholds: Vec<f64>,
pub min_below_pct: f64,
pub max_below_pct: f64,
}
impl Default for ThresholdConfig {
fn default() -> Self {
Self {
thresholds: vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0],
min_below_pct: 0.01,
max_below_pct: 0.15,
}
}
}
pub struct BenfordSampler {
rng: ChaCha8Rng,
config: AmountDistributionConfig,
}
impl BenfordSampler {
pub fn new(seed: u64, config: AmountDistributionConfig) -> Self {
Self {
rng: ChaCha8Rng::seed_from_u64(seed),
config,
}
}
fn sample_benford_first_digit(&mut self) -> u8 {
let p: f64 = self.rng.random();
for (i, &cumulative) in BENFORD_CDF.iter().enumerate() {
if p < cumulative {
return (i + 1) as u8;
}
}
9
}
fn sample_anti_benford_first_digit(&mut self) -> u8 {
let p: f64 = self.rng.random();
let mut cumulative = 0.0;
for (i, &prob) in ANTI_BENFORD_PROBABILITIES.iter().enumerate() {
cumulative += prob;
if p < cumulative {
return (i + 1) as u8;
}
}
9
}
pub fn sample(&mut self) -> Decimal {
let first_digit = self.sample_benford_first_digit();
self.sample_with_first_digit(first_digit)
}
pub fn sample_with_first_digit(&mut self, first_digit: u8) -> Decimal {
let first_digit = first_digit.clamp(1, 9);
let min_magnitude = self.config.min_amount.log10().floor() as i32;
let max_magnitude = self.config.max_amount.log10().floor() as i32;
let magnitude = self.rng.random_range(min_magnitude..=max_magnitude);
let base = 10_f64.powi(magnitude);
let remaining: f64 = self.rng.random();
let mantissa = first_digit as f64 + remaining;
let mut amount = mantissa * base;
amount = amount.clamp(self.config.min_amount, self.config.max_amount);
let p: f64 = self.rng.random();
if p < self.config.round_number_probability {
amount = (amount / 100.0).round() * 100.0;
} else if p < self.config.round_number_probability + self.config.nice_number_probability {
amount = (amount / 5.0).round() * 5.0;
}
let decimal_multiplier = 10_f64.powi(self.config.decimal_places as i32);
amount = (amount * decimal_multiplier).round() / decimal_multiplier;
amount = amount.max(self.config.min_amount);
Decimal::from_f64_retain(amount).unwrap_or(Decimal::ONE)
}
pub fn reset(&mut self, seed: u64) {
self.rng = ChaCha8Rng::seed_from_u64(seed);
}
}
pub struct FraudAmountGenerator {
rng: ChaCha8Rng,
benford_sampler: BenfordSampler,
threshold_config: ThresholdConfig,
config: AmountDistributionConfig,
}
impl FraudAmountGenerator {
pub fn new(
seed: u64,
config: AmountDistributionConfig,
threshold_config: ThresholdConfig,
) -> Self {
Self {
rng: ChaCha8Rng::seed_from_u64(seed),
benford_sampler: BenfordSampler::new(seed + 1, config.clone()),
threshold_config,
config,
}
}
pub fn sample(&mut self, pattern: FraudAmountPattern) -> Decimal {
match pattern {
FraudAmountPattern::Normal => self.benford_sampler.sample(),
FraudAmountPattern::StatisticallyImprobable => self.sample_anti_benford(),
FraudAmountPattern::ObviousRoundNumbers => self.sample_obvious_round(),
FraudAmountPattern::ThresholdAdjacent => self.sample_threshold_adjacent(),
}
}
fn sample_anti_benford(&mut self) -> Decimal {
let first_digit = self.benford_sampler.sample_anti_benford_first_digit();
self.benford_sampler.sample_with_first_digit(first_digit)
}
fn sample_obvious_round(&mut self) -> Decimal {
let pattern_choice = self.rng.random_range(0..5);
let amount = match pattern_choice {
0 => {
let multiplier = self.rng.random_range(1..100);
multiplier as f64 * 1000.0
}
1 => {
let base = self.rng.random_range(1..10) as f64 * 10000.0;
base - 0.01
}
2 => {
let multiplier = self.rng.random_range(1..20);
multiplier as f64 * 10000.0
}
3 => {
let multiplier = self.rng.random_range(1..40);
multiplier as f64 * 5000.0
}
_ => {
let base = self.rng.random_range(1..100) as f64 * 1000.0;
base - 0.01
}
};
let clamped = amount.clamp(self.config.min_amount, self.config.max_amount);
Decimal::from_f64_retain(clamped).unwrap_or(Decimal::ONE)
}
fn sample_threshold_adjacent(&mut self) -> Decimal {
let threshold = if self.threshold_config.thresholds.is_empty() {
10000.0
} else {
*self
.threshold_config
.thresholds
.choose(&mut self.rng)
.unwrap_or(&10000.0)
};
let pct_below = self
.rng
.random_range(self.threshold_config.min_below_pct..self.threshold_config.max_below_pct);
let base_amount = threshold * (1.0 - pct_below);
let noise_factor = 1.0 + self.rng.random_range(-0.005..0.005);
let amount = base_amount * noise_factor;
let rounded = (amount * 100.0).round() / 100.0;
let final_amount = rounded.min(threshold - 0.01);
let clamped = final_amount.clamp(self.config.min_amount, self.config.max_amount);
Decimal::from_f64_retain(clamped).unwrap_or(Decimal::ONE)
}
pub fn reset(&mut self, seed: u64) {
self.rng = ChaCha8Rng::seed_from_u64(seed);
self.benford_sampler.reset(seed + 1);
}
}
pub fn get_first_digit(amount: Decimal) -> Option<u8> {
let s = amount.to_string();
s.chars()
.find(|c| c.is_ascii_digit() && *c != '0')
.and_then(|c| c.to_digit(10))
.map(|d| d as u8)
}
pub fn get_first_two_digits(amount: Decimal) -> Option<(u8, u8)> {
let s = amount.abs().to_string();
let mut first_found = false;
let mut first_digit = 0u8;
for c in s.chars() {
if c.is_ascii_digit() {
let d = c
.to_digit(10)
.expect("digit char confirmed by is_ascii_digit") as u8;
if !first_found && d != 0 {
first_digit = d;
first_found = true;
} else if first_found && c != '.' {
return Some((first_digit, d));
}
}
}
None
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EnhancedBenfordConfig {
pub amount_config: AmountDistributionConfig,
#[serde(default)]
pub second_digit_compliance: bool,
#[serde(default)]
pub first_two_digit_compliance: bool,
}
pub struct EnhancedBenfordSampler {
rng: ChaCha8Rng,
config: EnhancedBenfordConfig,
first_two_cdf: [f64; 90],
}
impl EnhancedBenfordSampler {
pub fn new(seed: u64, config: EnhancedBenfordConfig) -> Self {
let probs = benford_first_two_probabilities();
let mut first_two_cdf = [0.0; 90];
let mut cumulative = 0.0;
for i in 0..90 {
cumulative += probs[i];
first_two_cdf[i] = cumulative;
}
Self {
rng: ChaCha8Rng::seed_from_u64(seed),
config,
first_two_cdf,
}
}
fn sample_first_two_digits(&mut self) -> (u8, u8) {
let p: f64 = self.rng.random();
for (i, &cdf) in self.first_two_cdf.iter().enumerate() {
if p < cdf {
let d1 = (i / 10 + 1) as u8;
let d2 = (i % 10) as u8;
return (d1, d2);
}
}
(9, 9)
}
fn sample_second_digit(&mut self) -> u8 {
let p: f64 = self.rng.random();
for (i, &cdf) in BENFORD_SECOND_DIGIT_CDF.iter().enumerate() {
if p < cdf {
return i as u8;
}
}
9
}
fn sample_first_digit(&mut self) -> u8 {
let p: f64 = self.rng.random();
for (i, &cdf) in BENFORD_CDF.iter().enumerate() {
if p < cdf {
return (i + 1) as u8;
}
}
9
}
pub fn sample(&mut self) -> Decimal {
let (first_digit, second_digit) = if self.config.first_two_digit_compliance {
self.sample_first_two_digits()
} else if self.config.second_digit_compliance {
(self.sample_first_digit(), self.sample_second_digit())
} else {
(
self.sample_first_digit(),
self.rng.random_range(0..10) as u8,
)
};
self.sample_with_digits(first_digit, second_digit)
}
fn sample_with_digits(&mut self, first_digit: u8, second_digit: u8) -> Decimal {
let first_digit = first_digit.clamp(1, 9);
let second_digit = second_digit.clamp(0, 9);
let min_magnitude = self.config.amount_config.min_amount.log10().floor() as i32;
let max_magnitude = self.config.amount_config.max_amount.log10().floor() as i32;
let magnitude = self.rng.random_range(min_magnitude..=max_magnitude);
let base = 10_f64.powi(magnitude - 1);
let remaining: f64 = self.rng.random();
let mantissa = (first_digit as f64) * 10.0 + (second_digit as f64) + remaining;
let mut amount = mantissa * base;
amount = amount.clamp(
self.config.amount_config.min_amount,
self.config.amount_config.max_amount,
);
let decimal_multiplier = 10_f64.powi(self.config.amount_config.decimal_places as i32);
amount = (amount * decimal_multiplier).round() / decimal_multiplier;
Decimal::from_f64_retain(amount).unwrap_or(Decimal::ONE)
}
pub fn reset(&mut self, seed: u64) {
self.rng = ChaCha8Rng::seed_from_u64(seed);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum BenfordDeviationType {
#[default]
RoundNumberBias,
ThresholdClustering,
UniformFirstDigit,
DigitBias { digit: u8 },
TrailingZeros,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BenfordDeviationConfig {
pub deviation_type: BenfordDeviationType,
#[serde(default = "default_intensity")]
pub intensity: f64,
pub amount_config: AmountDistributionConfig,
#[serde(default = "default_thresholds")]
pub thresholds: Vec<f64>,
}
fn default_intensity() -> f64 {
0.5
}
fn default_thresholds() -> Vec<f64> {
vec![1000.0, 5000.0, 10000.0, 25000.0, 50000.0, 100000.0]
}
impl Default for BenfordDeviationConfig {
fn default() -> Self {
Self {
deviation_type: BenfordDeviationType::RoundNumberBias,
intensity: 0.5,
amount_config: AmountDistributionConfig::default(),
thresholds: default_thresholds(),
}
}
}
pub struct BenfordDeviationSampler {
rng: ChaCha8Rng,
config: BenfordDeviationConfig,
benford_sampler: BenfordSampler,
}
impl BenfordDeviationSampler {
pub fn new(seed: u64, config: BenfordDeviationConfig) -> Self {
Self {
rng: ChaCha8Rng::seed_from_u64(seed),
benford_sampler: BenfordSampler::new(seed + 100, config.amount_config.clone()),
config,
}
}
pub fn sample(&mut self) -> Decimal {
let p: f64 = self.rng.random();
if p > self.config.intensity {
return self.benford_sampler.sample();
}
match self.config.deviation_type {
BenfordDeviationType::RoundNumberBias => self.sample_round_bias(),
BenfordDeviationType::ThresholdClustering => self.sample_threshold_cluster(),
BenfordDeviationType::UniformFirstDigit => self.sample_uniform_first_digit(),
BenfordDeviationType::DigitBias { digit } => self.sample_digit_bias(digit),
BenfordDeviationType::TrailingZeros => self.sample_trailing_zeros(),
}
}
fn sample_round_bias(&mut self) -> Decimal {
let first_digit = if self.rng.random_bool(0.6) {
if self.rng.random_bool(0.7) {
1
} else {
5
}
} else {
self.rng.random_range(1..=9)
};
let _second_digit = if self.rng.random_bool(0.5) {
if self.rng.random_bool(0.6) {
0
} else {
5
}
} else {
self.rng.random_range(0..=9)
};
self.benford_sampler.sample_with_first_digit(first_digit)
}
fn sample_threshold_cluster(&mut self) -> Decimal {
let threshold = self
.config
.thresholds
.choose(&mut self.rng)
.copied()
.unwrap_or(10000.0);
let pct_below = self.rng.random_range(0.01..0.15);
let amount = threshold * (1.0 - pct_below);
let noise = 1.0 + self.rng.random_range(-0.005..0.005);
let final_amount = (amount * noise * 100.0).round() / 100.0;
Decimal::from_f64_retain(final_amount.clamp(
self.config.amount_config.min_amount,
self.config.amount_config.max_amount,
))
.unwrap_or(Decimal::ONE)
}
fn sample_uniform_first_digit(&mut self) -> Decimal {
let first_digit = self.rng.random_range(1..=9);
self.benford_sampler.sample_with_first_digit(first_digit)
}
fn sample_digit_bias(&mut self, target_digit: u8) -> Decimal {
let digit = target_digit.clamp(1, 9);
let first_digit = if self.rng.random_bool(0.7) {
digit
} else {
self.rng.random_range(1..=9)
};
self.benford_sampler.sample_with_first_digit(first_digit)
}
fn sample_trailing_zeros(&mut self) -> Decimal {
let amount = self.benford_sampler.sample();
let amount_f64: f64 = amount.to_string().parse().unwrap_or(0.0);
let rounded = amount_f64.round();
Decimal::from_f64_retain(rounded.clamp(
self.config.amount_config.min_amount,
self.config.amount_config.max_amount,
))
.unwrap_or(Decimal::ONE)
}
pub fn reset(&mut self, seed: u64) {
self.rng = ChaCha8Rng::seed_from_u64(seed);
self.benford_sampler.reset(seed + 100);
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_benford_probabilities_sum_to_one() {
let sum: f64 = BENFORD_PROBABILITIES.iter().sum();
assert!(
(sum - 1.0).abs() < 0.001,
"Benford probabilities sum to {}, expected 1.0",
sum
);
}
#[test]
fn test_benford_cdf_ends_at_one() {
assert!(
(BENFORD_CDF[8] - 1.0).abs() < 0.0001,
"CDF should end at 1.0"
);
}
#[test]
fn test_anti_benford_probabilities_sum_to_one() {
let sum: f64 = ANTI_BENFORD_PROBABILITIES.iter().sum();
assert!(
(sum - 1.0).abs() < 0.001,
"Anti-Benford probabilities sum to {}, expected 1.0",
sum
);
}
#[test]
fn test_benford_sampler_determinism() {
let config = AmountDistributionConfig::default();
let mut sampler1 = BenfordSampler::new(42, config.clone());
let mut sampler2 = BenfordSampler::new(42, config);
for _ in 0..100 {
assert_eq!(sampler1.sample(), sampler2.sample());
}
}
#[test]
fn test_benford_first_digit_distribution() {
let config = AmountDistributionConfig::default();
let mut sampler = BenfordSampler::new(12345, config);
let mut digit_counts = [0u32; 9];
let iterations = 10_000;
for _ in 0..iterations {
let amount = sampler.sample();
if let Some(digit) = get_first_digit(amount) {
if (1..=9).contains(&digit) {
digit_counts[(digit - 1) as usize] += 1;
}
}
}
let digit_1_pct = digit_counts[0] as f64 / iterations as f64;
assert!(
digit_1_pct > 0.15 && digit_1_pct < 0.50,
"Digit 1 should be ~30%, got {:.1}%",
digit_1_pct * 100.0
);
let digit_9_pct = digit_counts[8] as f64 / iterations as f64;
assert!(
digit_9_pct > 0.02 && digit_9_pct < 0.10,
"Digit 9 should be ~5%, got {:.1}%",
digit_9_pct * 100.0
);
}
#[test]
fn test_threshold_adjacent_below_threshold() {
let config = AmountDistributionConfig::default();
let threshold_config = ThresholdConfig {
thresholds: vec![10000.0],
min_below_pct: 0.01,
max_below_pct: 0.15,
};
let mut gen = FraudAmountGenerator::new(42, config, threshold_config);
for _ in 0..100 {
let amount = gen.sample(FraudAmountPattern::ThresholdAdjacent);
let f = amount.to_string().parse::<f64>().unwrap();
assert!(f < 10000.0, "Amount {} should be below threshold 10000", f);
assert!(
f >= 8400.0,
"Amount {} should be approximately within 15% of threshold",
f
);
}
}
#[test]
fn test_obvious_round_numbers() {
let config = AmountDistributionConfig::default();
let threshold_config = ThresholdConfig::default();
let mut gen = FraudAmountGenerator::new(42, config, threshold_config);
for _ in 0..100 {
let amount = gen.sample(FraudAmountPattern::ObviousRoundNumbers);
let f = amount.to_string().parse::<f64>().unwrap();
let is_round = f % 1000.0 == 0.0 || f % 5000.0 == 0.0;
let is_just_under = (f + 0.01) % 1000.0 < 0.02 || (f + 0.01) % 10000.0 < 0.02;
assert!(
is_round || is_just_under || f > 0.0,
"Amount {} should be a suspicious round number",
f
);
}
}
#[test]
fn test_get_first_digit() {
assert_eq!(get_first_digit(Decimal::from(123)), Some(1));
assert_eq!(get_first_digit(Decimal::from(999)), Some(9));
assert_eq!(get_first_digit(Decimal::from(50000)), Some(5));
assert_eq!(
get_first_digit(Decimal::from_str_exact("0.00123").unwrap()),
Some(1)
);
}
#[test]
fn test_second_digit_probabilities_sum_to_one() {
let sum: f64 = BENFORD_SECOND_DIGIT_PROBABILITIES.iter().sum();
assert!(
(sum - 1.0).abs() < 0.001,
"Second digit probabilities sum to {}, expected 1.0",
sum
);
}
#[test]
fn test_first_two_probability() {
let p10 = benford_first_two_probability(1, 0);
assert!((p10 - 0.0414).abs() < 0.001);
let p99 = benford_first_two_probability(9, 9);
assert!((p99 - 0.00436).abs() < 0.0001);
let probs = benford_first_two_probabilities();
let sum: f64 = probs.iter().sum();
assert!((sum - 1.0).abs() < 0.001);
}
#[test]
fn test_get_first_two_digits() {
assert_eq!(get_first_two_digits(Decimal::from(123)), Some((1, 2)));
assert_eq!(get_first_two_digits(Decimal::from(999)), Some((9, 9)));
assert_eq!(get_first_two_digits(Decimal::from(50000)), Some((5, 0)));
assert_eq!(
get_first_two_digits(Decimal::from_str_exact("0.00123").unwrap()),
Some((1, 2))
);
}
#[test]
fn test_enhanced_benford_sampler() {
let config = EnhancedBenfordConfig {
amount_config: AmountDistributionConfig::default(),
second_digit_compliance: true,
first_two_digit_compliance: false,
};
let mut sampler = EnhancedBenfordSampler::new(42, config);
let mut digit_counts = [0u32; 10];
for _ in 0..10000 {
let amount = sampler.sample();
if let Some((_, d2)) = get_first_two_digits(amount) {
digit_counts[d2 as usize] += 1;
}
}
let total_valid = digit_counts.iter().sum::<u32>();
assert!(
total_valid > 9000,
"Most samples should have valid first two digits"
);
let max_count = *digit_counts.iter().max().unwrap();
let _min_count = *digit_counts.iter().min().unwrap();
assert!(
max_count < total_valid / 2,
"Second digits should have some variety, max count: {}",
max_count
);
}
#[test]
fn test_benford_deviation_sampler() {
let config = BenfordDeviationConfig {
deviation_type: BenfordDeviationType::ThresholdClustering,
intensity: 1.0,
amount_config: AmountDistributionConfig::default(),
thresholds: vec![10000.0],
};
let mut sampler = BenfordDeviationSampler::new(42, config);
for _ in 0..100 {
let amount = sampler.sample();
let f: f64 = amount.to_string().parse().unwrap();
assert!(f < 10000.0, "Amount {} should be below 10000", f);
assert!(f > 8000.0, "Amount {} should be near threshold 10000", f);
}
}
#[test]
fn test_benford_deviation_round_bias() {
let config = BenfordDeviationConfig {
deviation_type: BenfordDeviationType::RoundNumberBias,
intensity: 1.0,
amount_config: AmountDistributionConfig::default(),
thresholds: vec![],
};
let mut sampler = BenfordDeviationSampler::new(42, config);
let mut digit_counts = [0u32; 9];
for _ in 0..1000 {
let amount = sampler.sample();
if let Some(d) = get_first_digit(amount) {
if (1..=9).contains(&d) {
digit_counts[(d - 1) as usize] += 1;
}
}
}
let d1_pct = digit_counts[0] as f64 / 1000.0;
let d5_pct = digit_counts[4] as f64 / 1000.0;
assert!(d1_pct > 0.35 || d5_pct > 0.10);
}
}