use chrono::NaiveDate;
use datasynth_core::utils::seeded_rng;
use rand::RngExt;
use rand_chacha::ChaCha8Rng;
use rust_decimal::Decimal;
use std::collections::HashMap;
use tracing::debug;
use datasynth_core::models::{
AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
RelationalAnomalyType,
};
use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
use super::context::{
AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
EntityAwareInjector, VendorContext,
};
use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
use super::difficulty::DifficultyCalculator;
use super::near_miss::{NearMissConfig, NearMissGenerator};
use super::patterns::{
should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
TemporalPattern,
};
use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
use super::schemes::{SchemeAction, SchemeContext};
use super::strategies::{DuplicationStrategy, StrategyCollection};
use super::types::AnomalyTypeSelector;
#[derive(Debug, Clone)]
pub struct AnomalyInjectorConfig {
pub rates: AnomalyRateConfig,
pub patterns: AnomalyPatternConfig,
pub seed: u64,
pub generate_labels: bool,
pub allow_duplicates: bool,
pub max_anomalies_per_document: usize,
pub target_companies: Vec<String>,
pub date_range: Option<(NaiveDate, NaiveDate)>,
pub enhanced: EnhancedInjectionConfig,
}
#[derive(Debug, Clone, Default)]
pub struct EnhancedInjectionConfig {
pub multi_stage_schemes_enabled: bool,
pub scheme_probability: f64,
pub correlated_injection_enabled: bool,
pub temporal_clustering_enabled: bool,
pub period_end_multiplier: f64,
pub near_miss_enabled: bool,
pub near_miss_proportion: f64,
pub approval_thresholds: Vec<Decimal>,
pub difficulty_classification_enabled: bool,
pub context_aware_enabled: bool,
pub behavioral_baseline_config: BehavioralBaselineConfig,
pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
}
#[derive(Debug, Clone)]
pub struct FraudBehavioralBiasConfig {
pub enabled: bool,
pub weekend_bias: f64,
pub round_dollar_bias: f64,
pub off_hours_bias: f64,
pub post_close_bias: f64,
}
impl Default for FraudBehavioralBiasConfig {
fn default() -> Self {
Self {
enabled: true,
weekend_bias: 0.30,
round_dollar_bias: 0.40,
off_hours_bias: 0.35,
post_close_bias: 0.25,
}
}
}
impl Default for AnomalyInjectorConfig {
fn default() -> Self {
Self {
rates: AnomalyRateConfig::default(),
patterns: AnomalyPatternConfig::default(),
seed: 42,
generate_labels: true,
allow_duplicates: true,
max_anomalies_per_document: 2,
target_companies: Vec::new(),
date_range: None,
enhanced: EnhancedInjectionConfig::default(),
}
}
}
#[derive(Debug, Clone)]
pub struct InjectionBatchResult {
pub entries_processed: usize,
pub anomalies_injected: usize,
pub duplicates_created: usize,
pub labels: Vec<LabeledAnomaly>,
pub summary: AnomalySummary,
pub modified_documents: Vec<String>,
pub near_miss_labels: Vec<NearMissLabel>,
pub scheme_actions: Vec<SchemeAction>,
pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
}
pub struct AnomalyInjector {
config: AnomalyInjectorConfig,
rng: ChaCha8Rng,
uuid_factory: DeterministicUuidFactory,
type_selector: AnomalyTypeSelector,
strategies: StrategyCollection,
cluster_manager: ClusterManager,
entity_targeting: EntityTargetingManager,
document_anomaly_counts: HashMap<String, usize>,
labels: Vec<LabeledAnomaly>,
stats: InjectorStats,
scheme_advancer: Option<SchemeAdvancer>,
near_miss_generator: Option<NearMissGenerator>,
near_miss_labels: Vec<NearMissLabel>,
co_occurrence_handler: Option<AnomalyCoOccurrence>,
queued_co_occurrences: Vec<QueuedAnomaly>,
temporal_cluster_generator: Option<TemporalClusterGenerator>,
difficulty_calculator: Option<DifficultyCalculator>,
entity_aware_injector: Option<EntityAwareInjector>,
behavioral_baseline: Option<BehavioralBaseline>,
scheme_actions: Vec<SchemeAction>,
difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
vendor_contexts: HashMap<String, VendorContext>,
employee_contexts: HashMap<String, EmployeeContext>,
account_contexts: HashMap<String, AccountContext>,
}
#[derive(Debug, Clone, Default)]
pub struct InjectorStats {
pub total_processed: usize,
pub total_injected: usize,
pub by_category: HashMap<String, usize>,
pub by_type: HashMap<String, usize>,
pub by_company: HashMap<String, usize>,
pub skipped_rate: usize,
pub skipped_date: usize,
pub skipped_company: usize,
pub skipped_max_per_doc: usize,
pub fraud_weekend_bias_applied: usize,
pub fraud_round_dollar_bias_applied: usize,
pub fraud_off_hours_bias_applied: usize,
pub fraud_post_close_bias_applied: usize,
}
struct QueuedAnomaly {
anomaly_type: AnomalyType,
target_entity: Option<String>,
earliest_date: NaiveDate,
description: String,
}
impl AnomalyInjector {
pub fn new(config: AnomalyInjectorConfig) -> Self {
let mut rng = seeded_rng(config.seed, 0);
let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
let entity_targeting =
EntityTargetingManager::new(config.patterns.entity_targeting.clone());
let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
let scheme_config = SchemeAdvancerConfig {
embezzlement_probability: config.enhanced.scheme_probability,
revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
kickback_probability: config.enhanced.scheme_probability * 0.5,
seed: rng.random(),
..Default::default()
};
Some(SchemeAdvancer::new(scheme_config))
} else {
None
};
let near_miss_generator = if config.enhanced.near_miss_enabled {
let near_miss_config = NearMissConfig {
proportion: config.enhanced.near_miss_proportion,
seed: rng.random(),
..Default::default()
};
Some(NearMissGenerator::new(near_miss_config))
} else {
None
};
let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
Some(AnomalyCoOccurrence::new())
} else {
None
};
let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
Some(TemporalClusterGenerator::new())
} else {
None
};
let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
Some(DifficultyCalculator::new())
} else {
None
};
let entity_aware_injector = if config.enhanced.context_aware_enabled {
Some(EntityAwareInjector::default())
} else {
None
};
let behavioral_baseline = if config.enhanced.context_aware_enabled
&& config.enhanced.behavioral_baseline_config.enabled
{
Some(BehavioralBaseline::new(
config.enhanced.behavioral_baseline_config.clone(),
))
} else {
None
};
let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
Self {
config,
rng,
uuid_factory,
type_selector: AnomalyTypeSelector::new(),
strategies: StrategyCollection::default(),
cluster_manager,
entity_targeting,
document_anomaly_counts: HashMap::new(),
labels: Vec::new(),
stats: InjectorStats::default(),
scheme_advancer,
near_miss_generator,
near_miss_labels: Vec::new(),
co_occurrence_handler,
queued_co_occurrences: Vec::new(),
temporal_cluster_generator,
difficulty_calculator,
entity_aware_injector,
behavioral_baseline,
scheme_actions: Vec::new(),
difficulty_distribution: HashMap::new(),
vendor_contexts: HashMap::new(),
employee_contexts: HashMap::new(),
account_contexts: HashMap::new(),
}
}
pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
debug!(
entry_count = entries.len(),
total_rate = self.config.rates.total_rate,
seed = self.config.seed,
"Injecting anomalies into journal entries"
);
let mut modified_documents = Vec::new();
let mut duplicates = Vec::new();
for entry in entries.iter_mut() {
self.stats.total_processed += 1;
if let Some(ref mut baseline) = self.behavioral_baseline {
use super::context::Observation;
let entity_id = entry.header.created_by.clone();
let observation =
Observation::new(entry.posting_date()).with_amount(entry.total_debit());
baseline.record_observation(&entity_id, observation);
}
if !self.should_process(entry) {
continue;
}
let entry_date = entry.posting_date();
let ready_indices: Vec<usize> = self
.queued_co_occurrences
.iter()
.enumerate()
.filter(|(_, q)| entry_date >= q.earliest_date)
.map(|(i, _)| i)
.collect();
if let Some(&idx) = ready_indices.first() {
let queued = self.queued_co_occurrences.remove(idx);
if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
label = label.with_metadata("co_occurrence", "true");
label = label.with_metadata("co_occurrence_description", &queued.description);
if let Some(ref target) = queued.target_entity {
label = label.with_related_entity(target);
label = label.with_metadata("co_occurrence_target", target);
}
modified_documents.push(entry.document_number().clone());
self.labels.push(label);
self.stats.total_injected += 1;
}
continue; }
let base_rate = self.config.rates.total_rate;
let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
let employee_id = &entry.header.created_by;
let first_account = entry
.lines
.first()
.map(|l| l.gl_account.as_str())
.unwrap_or("");
let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
let vendor_ctx = self.vendor_contexts.get(vendor_ref);
let employee_ctx = self.employee_contexts.get(employee_id);
let account_ctx = self.account_contexts.get(first_account);
let multiplier =
injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
(base_rate * multiplier).min(1.0)
} else {
self.calculate_context_rate_multiplier(entry) * base_rate
};
if let Some(ref tcg) = self.temporal_cluster_generator {
let temporal_multiplier = tcg
.get_active_clusters(entry_date)
.iter()
.map(|c| c.rate_multiplier)
.fold(1.0_f64, f64::max);
effective_rate = (effective_rate * temporal_multiplier).min(1.0);
}
if should_inject_anomaly(
effective_rate,
entry_date,
&self.config.patterns.temporal_pattern,
&mut self.rng,
) {
if let Some(ref mut near_miss_gen) = self.near_miss_generator {
let account = entry
.lines
.first()
.map(|l| l.gl_account.clone())
.unwrap_or_default();
near_miss_gen.record_transaction(
entry.document_number().clone(),
entry_date,
entry.total_debit(),
&account,
None,
);
if let Some(near_miss_label) = near_miss_gen.check_near_miss(
entry.document_number().clone(),
entry_date,
entry.total_debit(),
&account,
None,
&self.config.enhanced.approval_thresholds,
) {
self.near_miss_labels.push(near_miss_label);
continue; }
}
let anomaly_type = self.select_anomaly_category();
let target_entity = {
let mut candidates: Vec<String> =
self.vendor_contexts.keys().cloned().collect();
candidates.extend(self.employee_contexts.keys().cloned());
if candidates.is_empty() {
if let Some(ref r) = entry.header.reference {
candidates.push(r.clone());
}
}
self.entity_targeting
.select_entity(&candidates, &mut self.rng)
};
if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
if let Some(ref entity_id) = target_entity {
label = label.with_metadata("entity_target", entity_id);
label = label.with_related_entity(entity_id);
label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
target_type: "Entity".to_string(),
target_id: entity_id.clone(),
});
}
if let Some(ref calculator) = self.difficulty_calculator {
let difficulty = calculator.calculate(&label);
label =
label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
label = label.with_metadata(
"difficulty_score",
&difficulty.difficulty_score().to_string(),
);
*self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
}
modified_documents.push(entry.document_number().clone());
self.labels.push(label);
self.stats.total_injected += 1;
if let Some(ref co_occ) = self.co_occurrence_handler {
let correlated =
co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
for result in correlated {
self.queued_co_occurrences.push(QueuedAnomaly {
anomaly_type: result.anomaly_type,
target_entity: if result.same_entity {
target_entity.clone()
} else {
None
},
earliest_date: entry_date
+ chrono::Duration::days(i64::from(result.lag_days)),
description: result.description,
});
}
}
}
if self.config.allow_duplicates
&& matches!(
self.labels.last().map(|l| &l.anomaly_type),
Some(AnomalyType::Error(ErrorType::DuplicateEntry))
| Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
)
{
let dup_strategy = DuplicationStrategy::default();
let duplicate =
dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
duplicates.push(duplicate);
}
}
}
let duplicates_created = duplicates.len();
let summary = AnomalySummary::from_anomalies(&self.labels);
InjectionBatchResult {
entries_processed: self.stats.total_processed,
anomalies_injected: self.stats.total_injected,
duplicates_created,
labels: self.labels.clone(),
summary,
modified_documents,
near_miss_labels: self.near_miss_labels.clone(),
scheme_actions: self.scheme_actions.clone(),
difficulty_distribution: self.difficulty_distribution.clone(),
}
}
fn should_process(&mut self, entry: &JournalEntry) -> bool {
if !self.config.target_companies.is_empty()
&& !self
.config
.target_companies
.iter()
.any(|c| c == entry.company_code())
{
self.stats.skipped_company += 1;
return false;
}
if let Some((start, end)) = self.config.date_range {
if entry.posting_date() < start || entry.posting_date() > end {
self.stats.skipped_date += 1;
return false;
}
}
let current_count = self
.document_anomaly_counts
.get(&entry.document_number())
.copied()
.unwrap_or(0);
if current_count >= self.config.max_anomalies_per_document {
self.stats.skipped_max_per_doc += 1;
return false;
}
true
}
fn select_anomaly_category(&mut self) -> AnomalyType {
let r = self.rng.random::<f64>();
let rates = &self.config.rates;
let mut cumulative = 0.0;
cumulative += rates.fraud_rate;
if r < cumulative {
return self.type_selector.select_fraud(&mut self.rng);
}
cumulative += rates.error_rate;
if r < cumulative {
return self.type_selector.select_error(&mut self.rng);
}
cumulative += rates.process_issue_rate;
if r < cumulative {
return self.type_selector.select_process_issue(&mut self.rng);
}
cumulative += rates.statistical_rate;
if r < cumulative {
return self.type_selector.select_statistical(&mut self.rng);
}
self.type_selector.select_relational(&mut self.rng)
}
fn inject_anomaly(
&mut self,
entry: &mut JournalEntry,
anomaly_type: AnomalyType,
) -> Option<LabeledAnomaly> {
if !self.strategies.can_apply(entry, &anomaly_type) {
return None;
}
let result = self
.strategies
.apply_strategy(entry, &anomaly_type, &mut self.rng);
if !result.success {
return None;
}
*self
.document_anomaly_counts
.entry(entry.document_number().clone())
.or_insert(0) += 1;
let category = anomaly_type.category().to_string();
let type_name = anomaly_type.type_name();
*self.stats.by_category.entry(category).or_insert(0) += 1;
*self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
*self
.stats
.by_company
.entry(entry.company_code().to_string())
.or_insert(0) += 1;
if self.config.generate_labels {
let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
entry.header.is_anomaly = true;
entry.header.anomaly_id = Some(anomaly_id.clone());
entry.header.anomaly_type = Some(type_name.clone());
let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
Vec::new();
if matches!(anomaly_type, AnomalyType::Fraud(_)) {
entry.header.is_fraud = true;
if let AnomalyType::Fraud(ref ft) = anomaly_type {
entry.header.fraud_type = Some(*ft);
}
secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
}
let mut label = LabeledAnomaly::new(
anomaly_id,
anomaly_type.clone(),
entry.document_number().clone(),
"JE".to_string(),
entry.company_code().to_string(),
entry.posting_date(),
)
.with_description(&result.description)
.with_injection_strategy(&type_name);
let causal_reason = AnomalyCausalReason::RandomRate {
base_rate: self.config.rates.total_rate,
};
label = label.with_causal_reason(causal_reason);
let context_multiplier = self.calculate_context_rate_multiplier(entry);
if (context_multiplier - 1.0).abs() > f64::EPSILON {
label = label.with_metadata(
"entity_context_multiplier",
&format!("{context_multiplier:.3}"),
);
label = label.with_metadata(
"effective_rate",
&format!(
"{:.6}",
(self.config.rates.total_rate * context_multiplier).min(1.0)
),
);
}
if let Some(impact) = result.monetary_impact {
label = label.with_monetary_impact(impact);
}
for entity in &result.related_entities {
label = label.with_related_entity(entity);
}
for (key, value) in &result.metadata {
label = label.with_metadata(key, value);
}
if let Some(cluster_id) =
self.cluster_manager
.assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
{
label = label.with_cluster(&cluster_id);
label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
cluster_id: cluster_id.clone(),
});
}
for issue_type in &secondary_process_issues {
let child_id = format!("ANO{:08}", self.labels.len() + 1);
let child = LabeledAnomaly::new(
child_id,
AnomalyType::ProcessIssue(*issue_type),
entry.document_number().clone(),
"JE".to_string(),
entry.company_code().to_string(),
entry.posting_date(),
)
.with_description("Forensic pattern from fraud behavioral bias")
.with_injection_strategy("behavioral_bias")
.with_parent_anomaly(&label.anomaly_id);
self.labels.push(child);
}
return Some(label);
}
None
}
pub fn inject_specific(
&mut self,
entry: &mut JournalEntry,
anomaly_type: AnomalyType,
) -> Option<LabeledAnomaly> {
self.inject_anomaly(entry, anomaly_type)
}
pub fn create_self_approval(
&mut self,
entry: &mut JournalEntry,
user_id: &str,
) -> Option<LabeledAnomaly> {
let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
let label = LabeledAnomaly::new(
format!("ANO{:08}", self.labels.len() + 1),
anomaly_type,
entry.document_number().clone(),
"JE".to_string(),
entry.company_code().to_string(),
entry.posting_date(),
)
.with_description(&format!("User {user_id} approved their own transaction"))
.with_related_entity(user_id)
.with_injection_strategy("ManualSelfApproval")
.with_causal_reason(AnomalyCausalReason::EntityTargeting {
target_type: "User".to_string(),
target_id: user_id.to_string(),
});
entry.header.is_anomaly = true;
entry.header.is_fraud = true;
entry.header.anomaly_id = Some(label.anomaly_id.clone());
entry.header.anomaly_type = Some("SelfApproval".to_string());
entry.header.fraud_type = Some(FraudType::SelfApproval);
entry.header.created_by = user_id.to_string();
self.labels.push(label.clone());
Some(label)
}
pub fn create_sod_violation(
&mut self,
entry: &mut JournalEntry,
user_id: &str,
conflicting_duties: (&str, &str),
) -> Option<LabeledAnomaly> {
let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
let label = LabeledAnomaly::new(
format!("ANO{:08}", self.labels.len() + 1),
anomaly_type,
entry.document_number().clone(),
"JE".to_string(),
entry.company_code().to_string(),
entry.posting_date(),
)
.with_description(&format!(
"User {} performed conflicting duties: {} and {}",
user_id, conflicting_duties.0, conflicting_duties.1
))
.with_related_entity(user_id)
.with_metadata("duty1", conflicting_duties.0)
.with_metadata("duty2", conflicting_duties.1)
.with_injection_strategy("ManualSoDViolation")
.with_causal_reason(AnomalyCausalReason::EntityTargeting {
target_type: "User".to_string(),
target_id: user_id.to_string(),
});
entry.header.is_anomaly = true;
entry.header.is_fraud = true;
entry.header.anomaly_id = Some(label.anomaly_id.clone());
entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
self.labels.push(label.clone());
Some(label)
}
pub fn create_ic_mismatch(
&mut self,
entry: &mut JournalEntry,
matching_company: &str,
expected_amount: Decimal,
actual_amount: Decimal,
) -> Option<LabeledAnomaly> {
let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
let label = LabeledAnomaly::new(
format!("ANO{:08}", self.labels.len() + 1),
anomaly_type,
entry.document_number().clone(),
"JE".to_string(),
entry.company_code().to_string(),
entry.posting_date(),
)
.with_description(&format!(
"Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
))
.with_related_entity(matching_company)
.with_monetary_impact(actual_amount - expected_amount)
.with_metadata("expected_amount", &expected_amount.to_string())
.with_metadata("actual_amount", &actual_amount.to_string())
.with_injection_strategy("ManualICMismatch")
.with_causal_reason(AnomalyCausalReason::EntityTargeting {
target_type: "Intercompany".to_string(),
target_id: matching_company.to_string(),
});
entry.header.is_anomaly = true;
entry.header.anomaly_id = Some(label.anomaly_id.clone());
entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
self.labels.push(label.clone());
Some(label)
}
pub fn get_labels(&self) -> &[LabeledAnomaly] {
&self.labels
}
pub fn get_summary(&self) -> AnomalySummary {
AnomalySummary::from_anomalies(&self.labels)
}
pub fn get_stats(&self) -> &InjectorStats {
&self.stats
}
pub fn reset(&mut self) {
self.labels.clear();
self.document_anomaly_counts.clear();
self.stats = InjectorStats::default();
self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
self.near_miss_labels.clear();
self.scheme_actions.clear();
self.difficulty_distribution.clear();
if let Some(ref mut baseline) = self.behavioral_baseline {
*baseline =
BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
}
}
pub fn cluster_count(&self) -> usize {
self.cluster_manager.cluster_count()
}
pub fn set_entity_contexts(
&mut self,
vendors: HashMap<String, VendorContext>,
employees: HashMap<String, EmployeeContext>,
accounts: HashMap<String, AccountContext>,
) {
self.vendor_contexts = vendors;
self.employee_contexts = employees;
self.account_contexts = accounts;
}
pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
&self.vendor_contexts
}
pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
&self.employee_contexts
}
pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
&self.account_contexts
}
fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
if self.vendor_contexts.is_empty()
&& self.employee_contexts.is_empty()
&& self.account_contexts.is_empty()
{
return 1.0;
}
let mut multiplier = 1.0;
if let Some(ref vendor_ref) = entry.header.reference {
if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
if ctx.is_new {
multiplier *= 2.0;
}
if ctx.is_dormant_reactivation {
multiplier *= 1.5;
}
}
}
if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
if ctx.is_new {
multiplier *= 1.5;
}
if ctx.is_volume_fatigued {
multiplier *= 1.3;
}
if ctx.is_overtime {
multiplier *= 1.2;
}
}
if let Some(first_line) = entry.lines.first() {
if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
if ctx.is_high_risk {
multiplier *= 2.0;
}
}
}
multiplier
}
fn apply_fraud_behavioral_bias(
&mut self,
entry: &mut JournalEntry,
) -> Vec<datasynth_core::models::ProcessIssueType> {
use chrono::{Datelike, Duration, TimeZone, Utc, Weekday};
use datasynth_core::models::ProcessIssueType;
let mut fired: Vec<ProcessIssueType> = Vec::new();
let cfg = &self.config.enhanced.fraud_behavioral_bias;
if !cfg.enabled {
return fired;
}
if cfg.weekend_bias > 0.0 && self.rng.random::<f64>() < cfg.weekend_bias {
let original = entry.header.posting_date;
let days_to_weekend = match original.weekday() {
Weekday::Mon => 5,
Weekday::Tue => 4,
Weekday::Wed => 3,
Weekday::Thu => 2,
Weekday::Fri => 1,
Weekday::Sat | Weekday::Sun => 0,
};
let extra = if self.rng.random_bool(0.5) { 0 } else { 1 };
entry.header.posting_date = original + Duration::days(days_to_weekend + extra);
self.stats.fraud_weekend_bias_applied += 1;
fired.push(ProcessIssueType::WeekendPosting);
}
if cfg.round_dollar_bias > 0.0 && self.rng.random::<f64>() < cfg.round_dollar_bias {
const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
if entry.lines.len() == 2 {
let (debit_idx, credit_idx) = if entry.lines[0].is_debit() {
(0, 1)
} else {
(1, 0)
};
let current = entry.lines[debit_idx]
.debit_amount
.max(entry.lines[credit_idx].credit_amount);
if current > Decimal::ZERO {
let current_f64: f64 = current.try_into().unwrap_or(0.0);
let target = ROUND_TARGETS
.iter()
.min_by(|a, b| {
let da = (**a as f64 - current_f64).abs();
let db = (**b as f64 - current_f64).abs();
da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
})
.copied()
.unwrap_or(1_000);
let rounded = Decimal::from(target);
entry.lines[debit_idx].debit_amount = rounded;
entry.lines[debit_idx].credit_amount = Decimal::ZERO;
entry.lines[credit_idx].debit_amount = Decimal::ZERO;
entry.lines[credit_idx].credit_amount = rounded;
self.stats.fraud_round_dollar_bias_applied += 1;
}
}
}
if cfg.off_hours_bias > 0.0 && self.rng.random::<f64>() < cfg.off_hours_bias {
let hour: u32 = if self.rng.random_bool(0.5) {
self.rng.random_range(22..24)
} else {
self.rng.random_range(0..6)
};
let minute: u32 = self.rng.random_range(0..60);
let second: u32 = self.rng.random_range(0..60);
if let chrono::LocalResult::Single(new_ts) = Utc.with_ymd_and_hms(
entry.header.posting_date.year(),
entry.header.posting_date.month(),
entry.header.posting_date.day(),
hour,
minute,
second,
) {
entry.header.created_at = new_ts;
self.stats.fraud_off_hours_bias_applied += 1;
fired.push(ProcessIssueType::AfterHoursPosting);
}
}
if cfg.post_close_bias > 0.0
&& self.rng.random::<f64>() < cfg.post_close_bias
&& !entry.header.is_post_close
{
entry.header.is_post_close = true;
self.stats.fraud_post_close_bias_applied += 1;
fired.push(ProcessIssueType::PostClosePosting);
}
fired
}
pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
if let Some(ref mut advancer) = self.scheme_advancer {
let context = SchemeContext::new(date, company_code);
let actions = advancer.advance_all(&context);
self.scheme_actions.extend(actions.clone());
actions
} else {
Vec::new()
}
}
pub fn maybe_start_scheme(
&mut self,
date: NaiveDate,
company_code: &str,
available_users: Vec<String>,
available_accounts: Vec<String>,
available_counterparties: Vec<String>,
) -> Option<uuid::Uuid> {
if let Some(ref mut advancer) = self.scheme_advancer {
let mut context = SchemeContext::new(date, company_code);
context.available_users = available_users;
context.available_accounts = available_accounts;
context.available_counterparties = available_counterparties;
advancer.maybe_start_scheme(&context)
} else {
None
}
}
pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
&self.near_miss_labels
}
pub fn get_scheme_actions(&self) -> &[SchemeAction] {
&self.scheme_actions
}
pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
&self.difficulty_distribution
}
pub fn check_behavioral_deviations(
&self,
entity_id: &str,
observation: &super::context::Observation,
) -> Vec<super::context::BehavioralDeviation> {
if let Some(ref baseline) = self.behavioral_baseline {
baseline.check_deviation(entity_id, observation)
} else {
Vec::new()
}
}
pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
if let Some(ref baseline) = self.behavioral_baseline {
baseline.get_baseline(entity_id)
} else {
None
}
}
pub fn active_scheme_count(&self) -> usize {
if let Some(ref advancer) = self.scheme_advancer {
advancer.active_scheme_count()
} else {
0
}
}
pub fn has_enhanced_features(&self) -> bool {
self.scheme_advancer.is_some()
|| self.near_miss_generator.is_some()
|| self.difficulty_calculator.is_some()
|| self.entity_aware_injector.is_some()
}
}
pub struct AnomalyInjectorConfigBuilder {
config: AnomalyInjectorConfig,
}
impl AnomalyInjectorConfigBuilder {
pub fn new() -> Self {
Self {
config: AnomalyInjectorConfig::default(),
}
}
pub fn with_total_rate(mut self, rate: f64) -> Self {
self.config.rates.total_rate = rate;
self
}
pub fn with_fraud_rate(mut self, rate: f64) -> Self {
self.config.rates.fraud_rate = rate;
self
}
pub fn with_error_rate(mut self, rate: f64) -> Self {
self.config.rates.error_rate = rate;
self
}
pub fn with_seed(mut self, seed: u64) -> Self {
self.config.seed = seed;
self
}
pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
self.config.patterns.temporal_pattern = pattern;
self
}
pub fn with_labels(mut self, generate: bool) -> Self {
self.config.generate_labels = generate;
self
}
pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
self.config.target_companies = companies;
self
}
pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
self.config.date_range = Some((start, end));
self
}
pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
self.config.enhanced.multi_stage_schemes_enabled = enabled;
self.config.enhanced.scheme_probability = probability;
self
}
pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
self.config.enhanced.near_miss_enabled = enabled;
self.config.enhanced.near_miss_proportion = proportion;
self
}
pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
self.config.enhanced.approval_thresholds = thresholds;
self
}
pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
self.config.enhanced.correlated_injection_enabled = enabled;
self
}
pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
self.config.enhanced.temporal_clustering_enabled = enabled;
self.config.enhanced.period_end_multiplier = multiplier;
self
}
pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
self.config.enhanced.difficulty_classification_enabled = enabled;
self
}
pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
self.config.enhanced.context_aware_enabled = enabled;
self
}
pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
self.config.enhanced.behavioral_baseline_config = config;
self
}
pub fn with_all_enhanced_features(mut self) -> Self {
self.config.enhanced.multi_stage_schemes_enabled = true;
self.config.enhanced.scheme_probability = 0.02;
self.config.enhanced.correlated_injection_enabled = true;
self.config.enhanced.temporal_clustering_enabled = true;
self.config.enhanced.period_end_multiplier = 2.5;
self.config.enhanced.near_miss_enabled = true;
self.config.enhanced.near_miss_proportion = 0.30;
self.config.enhanced.difficulty_classification_enabled = true;
self.config.enhanced.context_aware_enabled = true;
self.config.enhanced.behavioral_baseline_config.enabled = true;
self
}
pub fn build(self) -> AnomalyInjectorConfig {
self.config
}
}
impl Default for AnomalyInjectorConfigBuilder {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
use chrono::NaiveDate;
use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
use rust_decimal_macros::dec;
fn create_test_entry(doc_num: &str) -> JournalEntry {
let mut entry = JournalEntry::new_simple(
doc_num.to_string(),
"1000".to_string(),
NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
"Test Entry".to_string(),
);
entry.add_line(JournalEntryLine {
line_number: 1,
gl_account: "5000".to_string(),
debit_amount: dec!(1000),
..Default::default()
});
entry.add_line(JournalEntryLine {
line_number: 2,
gl_account: "1000".to_string(),
credit_amount: dec!(1000),
..Default::default()
});
entry
}
#[test]
fn test_anomaly_injector_basic() {
let config = AnomalyInjectorConfigBuilder::new()
.with_total_rate(0.5) .with_seed(42)
.build();
let mut injector = AnomalyInjector::new(config);
let mut entries: Vec<_> = (0..100)
.map(|i| create_test_entry(&format!("JE{:04}", i)))
.collect();
let result = injector.process_entries(&mut entries);
assert!(result.anomalies_injected > 0);
assert!(!result.labels.is_empty());
assert!(result.labels.len() >= result.anomalies_injected);
}
#[test]
fn test_specific_injection() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut entry = create_test_entry("JE001");
let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
let label = injector.inject_specific(&mut entry, anomaly_type);
assert!(label.is_some());
let label = label.unwrap();
assert!(!label.document_id.is_empty());
assert_eq!(label.document_id, entry.document_number());
}
#[test]
fn test_self_approval_injection() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut entry = create_test_entry("JE001");
let label = injector.create_self_approval(&mut entry, "USER001");
assert!(label.is_some());
let label = label.unwrap();
assert!(matches!(
label.anomaly_type,
AnomalyType::Fraud(FraudType::SelfApproval)
));
assert!(label.related_entities.contains(&"USER001".to_string()));
}
#[test]
fn test_company_filtering() {
let config = AnomalyInjectorConfigBuilder::new()
.with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
.build();
let mut injector = AnomalyInjector::new(config);
let mut entries = vec![
create_test_entry("JE001"), create_test_entry("JE002"), ];
let result = injector.process_entries(&mut entries);
assert_eq!(result.anomalies_injected, 0);
}
fn create_test_entry_with_context(
doc_num: &str,
vendor_ref: Option<&str>,
employee_id: &str,
gl_account: &str,
) -> JournalEntry {
let mut entry = JournalEntry::new_simple(
doc_num.to_string(),
"1000".to_string(),
NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
"Test Entry".to_string(),
);
entry.header.reference = vendor_ref.map(|v| v.to_string());
entry.header.created_by = employee_id.to_string();
entry.add_line(JournalEntryLine {
line_number: 1,
gl_account: gl_account.to_string(),
debit_amount: dec!(1000),
..Default::default()
});
entry.add_line(JournalEntryLine {
line_number: 2,
gl_account: "1000".to_string(),
credit_amount: dec!(1000),
..Default::default()
});
entry
}
#[test]
fn test_set_entity_contexts() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
assert!(injector.vendor_contexts().is_empty());
assert!(injector.employee_contexts().is_empty());
assert!(injector.account_contexts().is_empty());
let mut vendors = HashMap::new();
vendors.insert(
"V001".to_string(),
VendorContext {
vendor_id: "V001".to_string(),
is_new: true,
..Default::default()
},
);
let mut employees = HashMap::new();
employees.insert(
"EMP001".to_string(),
EmployeeContext {
employee_id: "EMP001".to_string(),
is_new: true,
..Default::default()
},
);
let mut accounts = HashMap::new();
accounts.insert(
"8100".to_string(),
AccountContext {
account_code: "8100".to_string(),
is_high_risk: true,
..Default::default()
},
);
injector.set_entity_contexts(vendors, employees, accounts);
assert_eq!(injector.vendor_contexts().len(), 1);
assert_eq!(injector.employee_contexts().len(), 1);
assert_eq!(injector.account_contexts().len(), 1);
assert!(injector.vendor_contexts().contains_key("V001"));
assert!(injector.employee_contexts().contains_key("EMP001"));
assert!(injector.account_contexts().contains_key("8100"));
}
#[test]
fn test_default_behavior_no_contexts() {
let config = AnomalyInjectorConfigBuilder::new()
.with_total_rate(0.5)
.with_seed(42)
.build();
let mut injector = AnomalyInjector::new(config);
let mut entries: Vec<_> = (0..200)
.map(|i| create_test_entry(&format!("JE{:04}", i)))
.collect();
let result = injector.process_entries(&mut entries);
assert!(result.anomalies_injected > 0);
let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
assert!(
rate > 0.2 && rate < 0.8,
"Expected ~50% rate, got {:.2}%",
rate * 100.0
);
}
#[test]
fn test_entity_context_increases_injection_rate() {
let base_rate = 0.10;
let config_no_ctx = AnomalyInjectorConfigBuilder::new()
.with_total_rate(base_rate)
.with_seed(123)
.build();
let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
let mut entries_no_ctx: Vec<_> = (0..500)
.map(|i| {
create_test_entry_with_context(
&format!("JE{:04}", i),
Some("V001"),
"EMP001",
"8100",
)
})
.collect();
let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
let config_ctx = AnomalyInjectorConfigBuilder::new()
.with_total_rate(base_rate)
.with_seed(123)
.build();
let mut injector_ctx = AnomalyInjector::new(config_ctx);
let mut vendors = HashMap::new();
vendors.insert(
"V001".to_string(),
VendorContext {
vendor_id: "V001".to_string(),
is_new: true, is_dormant_reactivation: true, ..Default::default()
},
);
let mut employees = HashMap::new();
employees.insert(
"EMP001".to_string(),
EmployeeContext {
employee_id: "EMP001".to_string(),
is_new: true, ..Default::default()
},
);
let mut accounts = HashMap::new();
accounts.insert(
"8100".to_string(),
AccountContext {
account_code: "8100".to_string(),
is_high_risk: true, ..Default::default()
},
);
injector_ctx.set_entity_contexts(vendors, employees, accounts);
let mut entries_ctx: Vec<_> = (0..500)
.map(|i| {
create_test_entry_with_context(
&format!("JE{:04}", i),
Some("V001"),
"EMP001",
"8100",
)
})
.collect();
let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
assert!(
result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
"Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
result_ctx.anomalies_injected,
result_no_ctx.anomalies_injected,
);
}
#[test]
fn test_risk_score_multiplication() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
assert!(
(injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
);
let mut vendors = HashMap::new();
vendors.insert(
"V_RISKY".to_string(),
VendorContext {
vendor_id: "V_RISKY".to_string(),
is_new: true,
..Default::default()
},
);
let mut accounts = HashMap::new();
accounts.insert(
"9000".to_string(),
AccountContext {
account_code: "9000".to_string(),
is_high_risk: true,
..Default::default()
},
);
injector.set_entity_contexts(vendors, HashMap::new(), accounts);
let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
assert!(
(multiplier - 4.0).abs() < f64::EPSILON,
"Expected 4.0x multiplier, got {}",
multiplier,
);
let entry_vendor_only =
create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
assert!(
(multiplier_vendor - 2.0).abs() < f64::EPSILON,
"Expected 2.0x multiplier (vendor only), got {}",
multiplier_vendor,
);
let entry_no_match =
create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
assert!(
(multiplier_none - 1.0).abs() < f64::EPSILON,
"Expected 1.0x multiplier (no match), got {}",
multiplier_none,
);
}
#[test]
fn test_employee_context_multiplier() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut employees = HashMap::new();
employees.insert(
"EMP_NEW".to_string(),
EmployeeContext {
employee_id: "EMP_NEW".to_string(),
is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
},
);
injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
let multiplier = injector.calculate_context_rate_multiplier(&entry);
let expected = 1.5 * 1.3 * 1.2;
assert!(
(multiplier - expected).abs() < 0.01,
"Expected {:.3}x multiplier, got {:.3}",
expected,
multiplier,
);
}
#[test]
fn test_entity_contexts_persist_across_reset() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut vendors = HashMap::new();
vendors.insert(
"V001".to_string(),
VendorContext {
vendor_id: "V001".to_string(),
is_new: true,
..Default::default()
},
);
injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
assert_eq!(injector.vendor_contexts().len(), 1);
injector.reset();
assert_eq!(injector.vendor_contexts().len(), 1);
}
#[test]
fn test_set_empty_contexts_clears() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut vendors = HashMap::new();
vendors.insert(
"V001".to_string(),
VendorContext {
vendor_id: "V001".to_string(),
..Default::default()
},
);
injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
assert_eq!(injector.vendor_contexts().len(), 1);
injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
assert!(injector.vendor_contexts().is_empty());
}
#[test]
fn test_dormant_vendor_multiplier() {
let config = AnomalyInjectorConfig::default();
let mut injector = AnomalyInjector::new(config);
let mut vendors = HashMap::new();
vendors.insert(
"V_DORMANT".to_string(),
VendorContext {
vendor_id: "V_DORMANT".to_string(),
is_dormant_reactivation: true, ..Default::default()
},
);
injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
let multiplier = injector.calculate_context_rate_multiplier(&entry);
assert!(
(multiplier - 1.5).abs() < f64::EPSILON,
"Expected 1.5x multiplier for dormant vendor, got {}",
multiplier,
);
}
#[test]
fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
use chrono::{Datelike, Timelike, Weekday};
use datasynth_core::models::FraudType;
let mut config = AnomalyInjectorConfig::default();
config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
enabled: true,
weekend_bias: 1.0,
round_dollar_bias: 1.0,
off_hours_bias: 1.0,
post_close_bias: 1.0,
};
let mut injector = AnomalyInjector::new(config);
let mut entry = JournalEntry::new_simple(
"JE001".to_string(),
"1000".to_string(),
NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
);
entry.add_line(JournalEntryLine {
line_number: 1,
gl_account: "5000".to_string(),
debit_amount: dec!(1237),
..Default::default()
});
entry.add_line(JournalEntryLine {
line_number: 2,
gl_account: "1000".to_string(),
credit_amount: dec!(1237),
..Default::default()
});
let _ =
injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
assert!(
matches!(
entry.header.posting_date.weekday(),
Weekday::Sat | Weekday::Sun
),
"expected weekend posting date, got {:?}",
entry.header.posting_date.weekday()
);
let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
assert_eq!(debit_total, credit_total, "entry must remain balanced");
assert!(
[
dec!(1_000),
dec!(5_000),
dec!(10_000),
dec!(25_000),
dec!(50_000),
dec!(100_000)
]
.contains(&debit_total),
"expected round-dollar total, got {}",
debit_total
);
let hour = entry.header.created_at.hour();
assert!(
!(6..22).contains(&hour),
"expected off-hours timestamp, got hour {}",
hour
);
assert!(entry.header.is_post_close);
let stats = injector.get_stats();
assert_eq!(stats.fraud_weekend_bias_applied, 1);
assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
assert_eq!(stats.fraud_off_hours_bias_applied, 1);
assert_eq!(stats.fraud_post_close_bias_applied, 1);
}
#[test]
fn fraud_behavioral_bias_rate_zero_applies_nothing() {
use datasynth_core::models::FraudType;
let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
enabled: true,
weekend_bias: 0.0,
round_dollar_bias: 0.0,
off_hours_bias: 0.0,
post_close_bias: 0.0,
};
let mut injector = AnomalyInjector::new(config);
let mut entry = create_test_entry("JE001");
entry.header.posting_date = original_date;
let _ =
injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
assert_eq!(entry.header.posting_date, original_date);
assert!(!entry.header.is_post_close);
let stats = injector.get_stats();
assert_eq!(stats.fraud_weekend_bias_applied, 0);
assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
assert_eq!(stats.fraud_off_hours_bias_applied, 0);
assert_eq!(stats.fraud_post_close_bias_applied, 0);
}
#[test]
fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
enabled: true,
weekend_bias: 1.0,
round_dollar_bias: 1.0,
off_hours_bias: 1.0,
post_close_bias: 1.0,
};
let mut injector = AnomalyInjector::new(config);
let mut entry = create_test_entry("JE001");
entry.header.posting_date = original_date;
let _ = injector.inject_specific(
&mut entry,
AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
);
assert_eq!(entry.header.posting_date, original_date);
let stats = injector.get_stats();
assert_eq!(stats.fraud_weekend_bias_applied, 0);
}
#[test]
fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
use datasynth_core::models::{FraudType, ProcessIssueType};
let mut config = AnomalyInjectorConfig::default();
config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
enabled: true,
weekend_bias: 1.0,
round_dollar_bias: 0.0, off_hours_bias: 1.0,
post_close_bias: 1.0,
};
let mut injector = AnomalyInjector::new(config);
let mut entry = JournalEntry::new_simple(
"JE001".into(),
"1000".into(),
NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
"Test".into(),
);
entry.add_line(JournalEntryLine {
line_number: 1,
gl_account: "5000".into(),
debit_amount: dec!(1000),
..Default::default()
});
entry.add_line(JournalEntryLine {
line_number: 2,
gl_account: "1000".into(),
credit_amount: dec!(1000),
..Default::default()
});
let primary = injector
.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
.expect("fraud label should be produced");
let labels = injector.get_labels();
assert_eq!(
labels.len(),
3,
"expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
);
let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
assert!(types.contains(&AnomalyType::ProcessIssue(
ProcessIssueType::AfterHoursPosting
)));
assert!(types.contains(&AnomalyType::ProcessIssue(
ProcessIssueType::PostClosePosting
)));
assert_eq!(
primary.anomaly_type,
AnomalyType::Fraud(FraudType::FictitiousEntry)
);
}
}