1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
23use datasynth_core::models::{
24 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
25 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
26 RelationalAnomalyType,
27};
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29
30use super::context::{
31 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
32 EntityAwareInjector, VendorContext,
33};
34use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
35use super::difficulty::DifficultyCalculator;
36use super::near_miss::{NearMissConfig, NearMissGenerator};
37use super::patterns::{
38 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
39 TemporalPattern,
40};
41use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
42use super::schemes::{SchemeAction, SchemeContext};
43use super::strategies::{DuplicationStrategy, StrategyCollection};
44use super::types::AnomalyTypeSelector;
45
46#[derive(Debug, Clone)]
48pub struct AnomalyInjectorConfig {
49 pub rates: AnomalyRateConfig,
51 pub patterns: AnomalyPatternConfig,
53 pub seed: u64,
55 pub generate_labels: bool,
57 pub allow_duplicates: bool,
59 pub max_anomalies_per_document: usize,
61 pub target_companies: Vec<String>,
63 pub date_range: Option<(NaiveDate, NaiveDate)>,
65 pub enhanced: EnhancedInjectionConfig,
67}
68
69#[derive(Debug, Clone, Default)]
71pub struct EnhancedInjectionConfig {
72 pub multi_stage_schemes_enabled: bool,
74 pub scheme_probability: f64,
76 pub correlated_injection_enabled: bool,
78 pub temporal_clustering_enabled: bool,
80 pub period_end_multiplier: f64,
82 pub near_miss_enabled: bool,
84 pub near_miss_proportion: f64,
86 pub approval_thresholds: Vec<Decimal>,
88 pub difficulty_classification_enabled: bool,
90 pub context_aware_enabled: bool,
92 pub behavioral_baseline_config: BehavioralBaselineConfig,
94 pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
99}
100
101impl Default for AnomalyInjectorConfig {
102 fn default() -> Self {
103 Self {
104 rates: AnomalyRateConfig::default(),
105 patterns: AnomalyPatternConfig::default(),
106 seed: 42,
107 generate_labels: true,
108 allow_duplicates: true,
109 max_anomalies_per_document: 2,
110 target_companies: Vec::new(),
111 date_range: None,
112 enhanced: EnhancedInjectionConfig::default(),
113 }
114 }
115}
116
117#[derive(Debug, Clone)]
119pub struct InjectionBatchResult {
120 pub entries_processed: usize,
122 pub anomalies_injected: usize,
124 pub duplicates_created: usize,
126 pub labels: Vec<LabeledAnomaly>,
128 pub summary: AnomalySummary,
130 pub modified_documents: Vec<String>,
132 pub near_miss_labels: Vec<NearMissLabel>,
134 pub scheme_actions: Vec<SchemeAction>,
136 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
138}
139
140pub struct AnomalyInjector {
142 config: AnomalyInjectorConfig,
143 rng: ChaCha8Rng,
144 uuid_factory: DeterministicUuidFactory,
145 type_selector: AnomalyTypeSelector,
146 strategies: StrategyCollection,
147 cluster_manager: ClusterManager,
148 entity_targeting: EntityTargetingManager,
150 document_anomaly_counts: HashMap<String, usize>,
152 labels: Vec<LabeledAnomaly>,
154 stats: InjectorStats,
156 scheme_advancer: Option<SchemeAdvancer>,
159 near_miss_generator: Option<NearMissGenerator>,
161 near_miss_labels: Vec<NearMissLabel>,
163 co_occurrence_handler: Option<AnomalyCoOccurrence>,
165 queued_co_occurrences: Vec<QueuedAnomaly>,
167 temporal_cluster_generator: Option<TemporalClusterGenerator>,
169 difficulty_calculator: Option<DifficultyCalculator>,
171 entity_aware_injector: Option<EntityAwareInjector>,
173 behavioral_baseline: Option<BehavioralBaseline>,
175 scheme_actions: Vec<SchemeAction>,
177 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
179 vendor_contexts: HashMap<String, VendorContext>,
182 employee_contexts: HashMap<String, EmployeeContext>,
184 account_contexts: HashMap<String, AccountContext>,
186}
187
188#[derive(Debug, Clone, Default)]
190pub struct InjectorStats {
191 pub total_processed: usize,
193 pub total_injected: usize,
195 pub by_category: HashMap<String, usize>,
197 pub by_type: HashMap<String, usize>,
199 pub by_company: HashMap<String, usize>,
201 pub skipped_rate: usize,
203 pub skipped_date: usize,
205 pub skipped_company: usize,
207 pub skipped_max_per_doc: usize,
209 pub fraud_weekend_bias_applied: usize,
211 pub fraud_round_dollar_bias_applied: usize,
213 pub fraud_off_hours_bias_applied: usize,
215 pub fraud_post_close_bias_applied: usize,
217}
218
219struct QueuedAnomaly {
221 anomaly_type: AnomalyType,
223 target_entity: Option<String>,
225 earliest_date: NaiveDate,
227 description: String,
229}
230
231impl AnomalyInjector {
232 pub fn new(config: AnomalyInjectorConfig) -> Self {
234 let mut rng = seeded_rng(config.seed, 0);
235 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
236 let entity_targeting =
237 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
238
239 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
241 let scheme_config = SchemeAdvancerConfig {
242 embezzlement_probability: config.enhanced.scheme_probability,
243 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
244 kickback_probability: config.enhanced.scheme_probability * 0.5,
245 seed: rng.random(),
246 ..Default::default()
247 };
248 Some(SchemeAdvancer::new(scheme_config))
249 } else {
250 None
251 };
252
253 let near_miss_generator = if config.enhanced.near_miss_enabled {
254 let near_miss_config = NearMissConfig {
255 proportion: config.enhanced.near_miss_proportion,
256 seed: rng.random(),
257 ..Default::default()
258 };
259 Some(NearMissGenerator::new(near_miss_config))
260 } else {
261 None
262 };
263
264 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
265 Some(AnomalyCoOccurrence::new())
266 } else {
267 None
268 };
269
270 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
271 Some(TemporalClusterGenerator::new())
272 } else {
273 None
274 };
275
276 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
277 Some(DifficultyCalculator::new())
278 } else {
279 None
280 };
281
282 let entity_aware_injector = if config.enhanced.context_aware_enabled {
283 Some(EntityAwareInjector::default())
284 } else {
285 None
286 };
287
288 let behavioral_baseline = if config.enhanced.context_aware_enabled
289 && config.enhanced.behavioral_baseline_config.enabled
290 {
291 Some(BehavioralBaseline::new(
292 config.enhanced.behavioral_baseline_config.clone(),
293 ))
294 } else {
295 None
296 };
297
298 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
299
300 Self {
301 config,
302 rng,
303 uuid_factory,
304 type_selector: AnomalyTypeSelector::new(),
305 strategies: StrategyCollection::default(),
306 cluster_manager,
307 entity_targeting,
308 document_anomaly_counts: HashMap::new(),
309 labels: Vec::new(),
310 stats: InjectorStats::default(),
311 scheme_advancer,
312 near_miss_generator,
313 near_miss_labels: Vec::new(),
314 co_occurrence_handler,
315 queued_co_occurrences: Vec::new(),
316 temporal_cluster_generator,
317 difficulty_calculator,
318 entity_aware_injector,
319 behavioral_baseline,
320 scheme_actions: Vec::new(),
321 difficulty_distribution: HashMap::new(),
322 vendor_contexts: HashMap::new(),
323 employee_contexts: HashMap::new(),
324 account_contexts: HashMap::new(),
325 }
326 }
327
328 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
330 debug!(
331 entry_count = entries.len(),
332 total_rate = self.config.rates.total_rate,
333 seed = self.config.seed,
334 "Injecting anomalies into journal entries"
335 );
336
337 let mut modified_documents = Vec::new();
338 let mut duplicates = Vec::new();
339
340 for entry in entries.iter_mut() {
341 self.stats.total_processed += 1;
342
343 if let Some(ref mut baseline) = self.behavioral_baseline {
345 use super::context::Observation;
346 let entity_id = entry.header.created_by.clone();
348 let observation =
349 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
350 baseline.record_observation(&entity_id, observation);
351 }
352
353 if !self.should_process(entry) {
355 continue;
356 }
357
358 let entry_date = entry.posting_date();
360 let ready_indices: Vec<usize> = self
361 .queued_co_occurrences
362 .iter()
363 .enumerate()
364 .filter(|(_, q)| entry_date >= q.earliest_date)
365 .map(|(i, _)| i)
366 .collect();
367
368 if let Some(&idx) = ready_indices.first() {
369 let queued = self.queued_co_occurrences.remove(idx);
370 if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
371 label = label.with_metadata("co_occurrence", "true");
372 label = label.with_metadata("co_occurrence_description", &queued.description);
373 if let Some(ref target) = queued.target_entity {
374 label = label.with_related_entity(target);
375 label = label.with_metadata("co_occurrence_target", target);
376 }
377 modified_documents.push(entry.document_number().clone());
378 self.labels.push(label);
379 self.stats.total_injected += 1;
380 }
381 continue; }
383
384 let base_rate = self.config.rates.total_rate;
386
387 let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
389 let employee_id = &entry.header.created_by;
390 let first_account = entry
391 .lines
392 .first()
393 .map(|l| l.gl_account.as_str())
394 .unwrap_or("");
395 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
397
398 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
399 let employee_ctx = self.employee_contexts.get(employee_id);
400 let account_ctx = self.account_contexts.get(first_account);
401
402 let multiplier =
403 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
404 (base_rate * multiplier).min(1.0)
405 } else {
406 self.calculate_context_rate_multiplier(entry) * base_rate
408 };
409
410 if let Some(ref tcg) = self.temporal_cluster_generator {
412 let temporal_multiplier = tcg
413 .get_active_clusters(entry_date)
414 .iter()
415 .map(|c| c.rate_multiplier)
416 .fold(1.0_f64, f64::max);
417 effective_rate = (effective_rate * temporal_multiplier).min(1.0);
418 }
419
420 if should_inject_anomaly(
422 effective_rate,
423 entry_date,
424 &self.config.patterns.temporal_pattern,
425 &mut self.rng,
426 ) {
427 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
429 let account = entry
431 .lines
432 .first()
433 .map(|l| l.gl_account.clone())
434 .unwrap_or_default();
435 near_miss_gen.record_transaction(
436 entry.document_number().clone(),
437 entry_date,
438 entry.total_debit(),
439 &account,
440 None,
441 );
442
443 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
445 entry.document_number().clone(),
446 entry_date,
447 entry.total_debit(),
448 &account,
449 None,
450 &self.config.enhanced.approval_thresholds,
451 ) {
452 self.near_miss_labels.push(near_miss_label);
453 continue; }
455 }
456
457 let anomaly_type = self.select_anomaly_category();
459
460 let target_entity = {
462 let mut candidates: Vec<String> =
463 self.vendor_contexts.keys().cloned().collect();
464 candidates.extend(self.employee_contexts.keys().cloned());
465 if candidates.is_empty() {
466 if let Some(ref r) = entry.header.reference {
468 candidates.push(r.clone());
469 }
470 }
471 self.entity_targeting
472 .select_entity(&candidates, &mut self.rng)
473 };
474
475 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
477 if let Some(ref entity_id) = target_entity {
479 label = label.with_metadata("entity_target", entity_id);
480 label = label.with_related_entity(entity_id);
481 label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
482 target_type: "Entity".to_string(),
483 target_id: entity_id.clone(),
484 });
485 }
486
487 if let Some(ref calculator) = self.difficulty_calculator {
489 let difficulty = calculator.calculate(&label);
490
491 label =
493 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
494 label = label.with_metadata(
495 "difficulty_score",
496 &difficulty.difficulty_score().to_string(),
497 );
498
499 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
501 }
502
503 modified_documents.push(entry.document_number().clone());
504 self.labels.push(label);
505 self.stats.total_injected += 1;
506
507 if let Some(ref co_occ) = self.co_occurrence_handler {
509 let correlated =
510 co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
511 for result in correlated {
512 self.queued_co_occurrences.push(QueuedAnomaly {
513 anomaly_type: result.anomaly_type,
514 target_entity: if result.same_entity {
515 target_entity.clone()
516 } else {
517 None
518 },
519 earliest_date: entry_date
520 + chrono::Duration::days(i64::from(result.lag_days)),
521 description: result.description,
522 });
523 }
524 }
525 }
526
527 if self.config.allow_duplicates
539 && entry.header.ic_pair_id.is_none()
540 && matches!(
541 self.labels.last().map(|l| &l.anomaly_type),
542 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
543 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
544 )
545 {
546 let dup_strategy = DuplicationStrategy::default();
547 let duplicate =
548 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
549 duplicates.push(duplicate);
550 }
551 }
552 }
553
554 let duplicates_created = duplicates.len();
556
557 let summary = AnomalySummary::from_anomalies(&self.labels);
559
560 InjectionBatchResult {
561 entries_processed: self.stats.total_processed,
562 anomalies_injected: self.stats.total_injected,
563 duplicates_created,
564 labels: self.labels.clone(),
565 summary,
566 modified_documents,
567 near_miss_labels: self.near_miss_labels.clone(),
568 scheme_actions: self.scheme_actions.clone(),
569 difficulty_distribution: self.difficulty_distribution.clone(),
570 }
571 }
572
573 fn should_process(&mut self, entry: &JournalEntry) -> bool {
575 if !self.config.target_companies.is_empty()
577 && !self
578 .config
579 .target_companies
580 .iter()
581 .any(|c| c == entry.company_code())
582 {
583 self.stats.skipped_company += 1;
584 return false;
585 }
586
587 if let Some((start, end)) = self.config.date_range {
589 if entry.posting_date() < start || entry.posting_date() > end {
590 self.stats.skipped_date += 1;
591 return false;
592 }
593 }
594
595 let current_count = self
597 .document_anomaly_counts
598 .get(&entry.document_number())
599 .copied()
600 .unwrap_or(0);
601 if current_count >= self.config.max_anomalies_per_document {
602 self.stats.skipped_max_per_doc += 1;
603 return false;
604 }
605
606 true
607 }
608
609 fn select_anomaly_category(&mut self) -> AnomalyType {
611 let r = self.rng.random::<f64>();
612 let rates = &self.config.rates;
613
614 let mut cumulative = 0.0;
615
616 cumulative += rates.fraud_rate;
617 if r < cumulative {
618 return self.type_selector.select_fraud(&mut self.rng);
619 }
620
621 cumulative += rates.error_rate;
622 if r < cumulative {
623 return self.type_selector.select_error(&mut self.rng);
624 }
625
626 cumulative += rates.process_issue_rate;
627 if r < cumulative {
628 return self.type_selector.select_process_issue(&mut self.rng);
629 }
630
631 cumulative += rates.statistical_rate;
632 if r < cumulative {
633 return self.type_selector.select_statistical(&mut self.rng);
634 }
635
636 self.type_selector.select_relational(&mut self.rng)
637 }
638
639 fn inject_anomaly(
641 &mut self,
642 entry: &mut JournalEntry,
643 anomaly_type: AnomalyType,
644 ) -> Option<LabeledAnomaly> {
645 if !self.strategies.can_apply(entry, &anomaly_type) {
647 return None;
648 }
649
650 let result = self
652 .strategies
653 .apply_strategy(entry, &anomaly_type, &mut self.rng);
654
655 if !result.success {
656 return None;
657 }
658
659 *self
661 .document_anomaly_counts
662 .entry(entry.document_number().clone())
663 .or_insert(0) += 1;
664
665 let category = anomaly_type.category().to_string();
667 let type_name = anomaly_type.type_name();
668
669 *self.stats.by_category.entry(category).or_insert(0) += 1;
670 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
671 *self
672 .stats
673 .by_company
674 .entry(entry.company_code().to_string())
675 .or_insert(0) += 1;
676
677 if self.config.generate_labels {
679 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
680
681 entry.header.is_anomaly = true;
683 entry.header.anomaly_id = Some(anomaly_id.clone());
684 entry.header.anomaly_type = Some(type_name.clone());
685
686 let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
688 Vec::new();
689 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
690 entry.header.is_fraud = true;
691 if let AnomalyType::Fraud(ref ft) = anomaly_type {
692 entry.header.fraud_type = Some(*ft);
693 }
694 secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
700 }
701
702 let mut label = LabeledAnomaly::new(
703 anomaly_id,
704 anomaly_type.clone(),
705 entry.document_number().clone(),
706 "JE".to_string(),
707 entry.company_code().to_string(),
708 entry.posting_date(),
709 )
710 .with_description(&result.description)
711 .with_injection_strategy(&type_name);
712
713 let causal_reason = AnomalyCausalReason::RandomRate {
715 base_rate: self.config.rates.total_rate,
716 };
717 label = label.with_causal_reason(causal_reason);
718
719 let context_multiplier = self.calculate_context_rate_multiplier(entry);
721 if (context_multiplier - 1.0).abs() > f64::EPSILON {
722 label = label.with_metadata(
723 "entity_context_multiplier",
724 &format!("{context_multiplier:.3}"),
725 );
726 label = label.with_metadata(
727 "effective_rate",
728 &format!(
729 "{:.6}",
730 (self.config.rates.total_rate * context_multiplier).min(1.0)
731 ),
732 );
733 }
734
735 if let Some(impact) = result.monetary_impact {
737 label = label.with_monetary_impact(impact);
738 }
739
740 for entity in &result.related_entities {
742 label = label.with_related_entity(entity);
743 }
744
745 for (key, value) in &result.metadata {
747 label = label.with_metadata(key, value);
748 }
749
750 if let Some(cluster_id) =
752 self.cluster_manager
753 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
754 {
755 label = label.with_cluster(&cluster_id);
756 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
758 cluster_id: cluster_id.clone(),
759 });
760 }
761
762 for issue_type in &secondary_process_issues {
769 let child_id = format!("ANO{:08}", self.labels.len() + 1);
770 let child = LabeledAnomaly::new(
771 child_id,
772 AnomalyType::ProcessIssue(*issue_type),
773 entry.document_number().clone(),
774 "JE".to_string(),
775 entry.company_code().to_string(),
776 entry.posting_date(),
777 )
778 .with_description("Forensic pattern from fraud behavioral bias")
779 .with_injection_strategy("behavioral_bias")
780 .with_parent_anomaly(&label.anomaly_id);
781 self.labels.push(child);
782 }
783
784 return Some(label);
785 }
786
787 None
788 }
789
790 pub fn inject_specific(
792 &mut self,
793 entry: &mut JournalEntry,
794 anomaly_type: AnomalyType,
795 ) -> Option<LabeledAnomaly> {
796 self.inject_anomaly(entry, anomaly_type)
797 }
798
799 pub fn create_self_approval(
801 &mut self,
802 entry: &mut JournalEntry,
803 user_id: &str,
804 ) -> Option<LabeledAnomaly> {
805 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
806
807 let label = LabeledAnomaly::new(
808 format!("ANO{:08}", self.labels.len() + 1),
809 anomaly_type,
810 entry.document_number().clone(),
811 "JE".to_string(),
812 entry.company_code().to_string(),
813 entry.posting_date(),
814 )
815 .with_description(&format!("User {user_id} approved their own transaction"))
816 .with_related_entity(user_id)
817 .with_injection_strategy("ManualSelfApproval")
818 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
819 target_type: "User".to_string(),
820 target_id: user_id.to_string(),
821 });
822
823 entry.header.is_anomaly = true;
825 entry.header.is_fraud = true;
826 entry.header.anomaly_id = Some(label.anomaly_id.clone());
827 entry.header.anomaly_type = Some("SelfApproval".to_string());
828 entry.header.fraud_type = Some(FraudType::SelfApproval);
829
830 entry.header.created_by = user_id.to_string();
832
833 self.apply_fraud_behavioral_bias(entry);
836
837 self.labels.push(label.clone());
838 Some(label)
839 }
840
841 pub fn create_sod_violation(
843 &mut self,
844 entry: &mut JournalEntry,
845 user_id: &str,
846 conflicting_duties: (&str, &str),
847 ) -> Option<LabeledAnomaly> {
848 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
849
850 let label = LabeledAnomaly::new(
851 format!("ANO{:08}", self.labels.len() + 1),
852 anomaly_type,
853 entry.document_number().clone(),
854 "JE".to_string(),
855 entry.company_code().to_string(),
856 entry.posting_date(),
857 )
858 .with_description(&format!(
859 "User {} performed conflicting duties: {} and {}",
860 user_id, conflicting_duties.0, conflicting_duties.1
861 ))
862 .with_related_entity(user_id)
863 .with_metadata("duty1", conflicting_duties.0)
864 .with_metadata("duty2", conflicting_duties.1)
865 .with_injection_strategy("ManualSoDViolation")
866 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
867 target_type: "User".to_string(),
868 target_id: user_id.to_string(),
869 });
870
871 entry.header.is_anomaly = true;
873 entry.header.is_fraud = true;
874 entry.header.anomaly_id = Some(label.anomaly_id.clone());
875 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
876 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
877
878 self.apply_fraud_behavioral_bias(entry);
880
881 self.labels.push(label.clone());
882 Some(label)
883 }
884
885 pub fn create_ic_mismatch(
887 &mut self,
888 entry: &mut JournalEntry,
889 matching_company: &str,
890 expected_amount: Decimal,
891 actual_amount: Decimal,
892 ) -> Option<LabeledAnomaly> {
893 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
894
895 let label = LabeledAnomaly::new(
896 format!("ANO{:08}", self.labels.len() + 1),
897 anomaly_type,
898 entry.document_number().clone(),
899 "JE".to_string(),
900 entry.company_code().to_string(),
901 entry.posting_date(),
902 )
903 .with_description(&format!(
904 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
905 ))
906 .with_related_entity(matching_company)
907 .with_monetary_impact(actual_amount - expected_amount)
908 .with_metadata("expected_amount", &expected_amount.to_string())
909 .with_metadata("actual_amount", &actual_amount.to_string())
910 .with_injection_strategy("ManualICMismatch")
911 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
912 target_type: "Intercompany".to_string(),
913 target_id: matching_company.to_string(),
914 });
915
916 entry.header.is_anomaly = true;
918 entry.header.anomaly_id = Some(label.anomaly_id.clone());
919 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
920
921 self.labels.push(label.clone());
922 Some(label)
923 }
924
925 pub fn get_labels(&self) -> &[LabeledAnomaly] {
927 &self.labels
928 }
929
930 pub fn get_summary(&self) -> AnomalySummary {
932 AnomalySummary::from_anomalies(&self.labels)
933 }
934
935 pub fn get_stats(&self) -> &InjectorStats {
937 &self.stats
938 }
939
940 pub fn reset(&mut self) {
942 self.labels.clear();
943 self.document_anomaly_counts.clear();
944 self.stats = InjectorStats::default();
945 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
946
947 self.near_miss_labels.clear();
949 self.scheme_actions.clear();
950 self.difficulty_distribution.clear();
951
952 if let Some(ref mut baseline) = self.behavioral_baseline {
953 *baseline =
954 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
955 }
956 }
957
958 pub fn cluster_count(&self) -> usize {
960 self.cluster_manager.cluster_count()
961 }
962
963 pub fn set_entity_contexts(
976 &mut self,
977 vendors: HashMap<String, VendorContext>,
978 employees: HashMap<String, EmployeeContext>,
979 accounts: HashMap<String, AccountContext>,
980 ) {
981 self.vendor_contexts = vendors;
982 self.employee_contexts = employees;
983 self.account_contexts = accounts;
984 }
985
986 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
988 &self.vendor_contexts
989 }
990
991 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
993 &self.employee_contexts
994 }
995
996 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
998 &self.account_contexts
999 }
1000
1001 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1010 if self.vendor_contexts.is_empty()
1011 && self.employee_contexts.is_empty()
1012 && self.account_contexts.is_empty()
1013 {
1014 return 1.0;
1015 }
1016
1017 let mut multiplier = 1.0;
1018
1019 if let Some(ref vendor_ref) = entry.header.reference {
1021 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1022 if ctx.is_new {
1024 multiplier *= 2.0;
1025 }
1026 if ctx.is_dormant_reactivation {
1027 multiplier *= 1.5;
1028 }
1029 }
1030 }
1031
1032 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1034 if ctx.is_new {
1035 multiplier *= 1.5;
1036 }
1037 if ctx.is_volume_fatigued {
1038 multiplier *= 1.3;
1039 }
1040 if ctx.is_overtime {
1041 multiplier *= 1.2;
1042 }
1043 }
1044
1045 if let Some(first_line) = entry.lines.first() {
1047 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1048 if ctx.is_high_risk {
1049 multiplier *= 2.0;
1050 }
1051 }
1052 }
1053
1054 multiplier
1055 }
1056
1057 fn apply_fraud_behavioral_bias(
1066 &mut self,
1067 entry: &mut JournalEntry,
1068 ) -> Vec<datasynth_core::models::ProcessIssueType> {
1069 use datasynth_core::models::ProcessIssueType;
1070
1071 let cfg = self.config.enhanced.fraud_behavioral_bias;
1072 let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1073 for issue in &fired {
1074 match issue {
1075 ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1076 ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1077 ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1078 _ => {}
1079 }
1080 }
1081 if cfg.round_dollar_bias > 0.0 {
1086 const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1087 let max_amt: Decimal = entry
1088 .lines
1089 .iter()
1090 .map(|l| l.debit_amount.max(l.credit_amount))
1091 .max()
1092 .unwrap_or(Decimal::ZERO);
1093 if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1094 self.stats.fraud_round_dollar_bias_applied += 1;
1095 }
1096 }
1097 fired
1098 }
1099
1100 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1109 if let Some(ref mut advancer) = self.scheme_advancer {
1110 let context = SchemeContext::new(date, company_code);
1111 let actions = advancer.advance_all(&context);
1112 self.scheme_actions.extend(actions.clone());
1113 actions
1114 } else {
1115 Vec::new()
1116 }
1117 }
1118
1119 pub fn maybe_start_scheme(
1125 &mut self,
1126 date: NaiveDate,
1127 company_code: &str,
1128 available_users: Vec<String>,
1129 available_accounts: Vec<String>,
1130 available_counterparties: Vec<String>,
1131 ) -> Option<uuid::Uuid> {
1132 if let Some(ref mut advancer) = self.scheme_advancer {
1133 let mut context = SchemeContext::new(date, company_code);
1134 context.available_users = available_users;
1135 context.available_accounts = available_accounts;
1136 context.available_counterparties = available_counterparties;
1137
1138 advancer.maybe_start_scheme(&context)
1139 } else {
1140 None
1141 }
1142 }
1143
1144 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1146 &self.near_miss_labels
1147 }
1148
1149 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1151 &self.scheme_actions
1152 }
1153
1154 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1156 &self.difficulty_distribution
1157 }
1158
1159 pub fn check_behavioral_deviations(
1161 &self,
1162 entity_id: &str,
1163 observation: &super::context::Observation,
1164 ) -> Vec<super::context::BehavioralDeviation> {
1165 if let Some(ref baseline) = self.behavioral_baseline {
1166 baseline.check_deviation(entity_id, observation)
1167 } else {
1168 Vec::new()
1169 }
1170 }
1171
1172 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1174 if let Some(ref baseline) = self.behavioral_baseline {
1175 baseline.get_baseline(entity_id)
1176 } else {
1177 None
1178 }
1179 }
1180
1181 pub fn active_scheme_count(&self) -> usize {
1183 if let Some(ref advancer) = self.scheme_advancer {
1184 advancer.active_scheme_count()
1185 } else {
1186 0
1187 }
1188 }
1189
1190 pub fn has_enhanced_features(&self) -> bool {
1192 self.scheme_advancer.is_some()
1193 || self.near_miss_generator.is_some()
1194 || self.difficulty_calculator.is_some()
1195 || self.entity_aware_injector.is_some()
1196 }
1197}
1198
1199pub struct AnomalyInjectorConfigBuilder {
1201 config: AnomalyInjectorConfig,
1202}
1203
1204impl AnomalyInjectorConfigBuilder {
1205 pub fn new() -> Self {
1207 Self {
1208 config: AnomalyInjectorConfig::default(),
1209 }
1210 }
1211
1212 pub fn with_total_rate(mut self, rate: f64) -> Self {
1214 self.config.rates.total_rate = rate;
1215 self
1216 }
1217
1218 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1220 self.config.rates.fraud_rate = rate;
1221 self
1222 }
1223
1224 pub fn with_error_rate(mut self, rate: f64) -> Self {
1226 self.config.rates.error_rate = rate;
1227 self
1228 }
1229
1230 pub fn with_seed(mut self, seed: u64) -> Self {
1232 self.config.seed = seed;
1233 self
1234 }
1235
1236 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1238 self.config.patterns.temporal_pattern = pattern;
1239 self
1240 }
1241
1242 pub fn with_labels(mut self, generate: bool) -> Self {
1244 self.config.generate_labels = generate;
1245 self
1246 }
1247
1248 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1250 self.config.target_companies = companies;
1251 self
1252 }
1253
1254 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1256 self.config.date_range = Some((start, end));
1257 self
1258 }
1259
1260 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1266 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1267 self.config.enhanced.scheme_probability = probability;
1268 self
1269 }
1270
1271 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1273 self.config.enhanced.near_miss_enabled = enabled;
1274 self.config.enhanced.near_miss_proportion = proportion;
1275 self
1276 }
1277
1278 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1280 self.config.enhanced.approval_thresholds = thresholds;
1281 self
1282 }
1283
1284 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1286 self.config.enhanced.correlated_injection_enabled = enabled;
1287 self
1288 }
1289
1290 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1292 self.config.enhanced.temporal_clustering_enabled = enabled;
1293 self.config.enhanced.period_end_multiplier = multiplier;
1294 self
1295 }
1296
1297 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1299 self.config.enhanced.difficulty_classification_enabled = enabled;
1300 self
1301 }
1302
1303 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1305 self.config.enhanced.context_aware_enabled = enabled;
1306 self
1307 }
1308
1309 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1311 self.config.enhanced.behavioral_baseline_config = config;
1312 self
1313 }
1314
1315 pub fn with_all_enhanced_features(mut self) -> Self {
1317 self.config.enhanced.multi_stage_schemes_enabled = true;
1318 self.config.enhanced.scheme_probability = 0.02;
1319 self.config.enhanced.correlated_injection_enabled = true;
1320 self.config.enhanced.temporal_clustering_enabled = true;
1321 self.config.enhanced.period_end_multiplier = 2.5;
1322 self.config.enhanced.near_miss_enabled = true;
1323 self.config.enhanced.near_miss_proportion = 0.30;
1324 self.config.enhanced.difficulty_classification_enabled = true;
1325 self.config.enhanced.context_aware_enabled = true;
1326 self.config.enhanced.behavioral_baseline_config.enabled = true;
1327 self
1328 }
1329
1330 pub fn build(self) -> AnomalyInjectorConfig {
1332 self.config
1333 }
1334}
1335
1336impl Default for AnomalyInjectorConfigBuilder {
1337 fn default() -> Self {
1338 Self::new()
1339 }
1340}
1341
1342#[cfg(test)]
1343mod tests {
1344 use super::*;
1345 use chrono::NaiveDate;
1346 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1347 use rust_decimal_macros::dec;
1348
1349 fn create_test_entry(doc_num: &str) -> JournalEntry {
1350 let mut entry = JournalEntry::new_simple(
1351 doc_num.to_string(),
1352 "1000".to_string(),
1353 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1354 "Test Entry".to_string(),
1355 );
1356
1357 entry.add_line(JournalEntryLine {
1358 line_number: 1,
1359 gl_account: "5000".to_string(),
1360 debit_amount: dec!(1000),
1361 ..Default::default()
1362 });
1363
1364 entry.add_line(JournalEntryLine {
1365 line_number: 2,
1366 gl_account: "1000".to_string(),
1367 credit_amount: dec!(1000),
1368 ..Default::default()
1369 });
1370
1371 entry
1372 }
1373
1374 #[test]
1375 fn test_anomaly_injector_basic() {
1376 let config = AnomalyInjectorConfigBuilder::new()
1377 .with_total_rate(0.5) .with_seed(42)
1379 .build();
1380
1381 let mut injector = AnomalyInjector::new(config);
1382
1383 let mut entries: Vec<_> = (0..100)
1384 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1385 .collect();
1386
1387 let result = injector.process_entries(&mut entries);
1388
1389 assert!(result.anomalies_injected > 0);
1391 assert!(!result.labels.is_empty());
1392 assert!(result.labels.len() >= result.anomalies_injected);
1397 }
1398
1399 #[test]
1400 fn test_specific_injection() {
1401 let config = AnomalyInjectorConfig::default();
1402 let mut injector = AnomalyInjector::new(config);
1403
1404 let mut entry = create_test_entry("JE001");
1405 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1406
1407 let label = injector.inject_specific(&mut entry, anomaly_type);
1408
1409 assert!(label.is_some());
1410 let label = label.unwrap();
1411 assert!(!label.document_id.is_empty());
1413 assert_eq!(label.document_id, entry.document_number());
1414 }
1415
1416 #[test]
1417 fn test_self_approval_injection() {
1418 let config = AnomalyInjectorConfig::default();
1419 let mut injector = AnomalyInjector::new(config);
1420
1421 let mut entry = create_test_entry("JE001");
1422 let label = injector.create_self_approval(&mut entry, "USER001");
1423
1424 assert!(label.is_some());
1425 let label = label.unwrap();
1426 assert!(matches!(
1427 label.anomaly_type,
1428 AnomalyType::Fraud(FraudType::SelfApproval)
1429 ));
1430 assert!(label.related_entities.contains(&"USER001".to_string()));
1431 }
1432
1433 #[test]
1434 fn test_company_filtering() {
1435 let config = AnomalyInjectorConfigBuilder::new()
1436 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1438 .build();
1439
1440 let mut injector = AnomalyInjector::new(config);
1441
1442 let mut entries = vec![
1443 create_test_entry("JE001"), create_test_entry("JE002"), ];
1446
1447 let result = injector.process_entries(&mut entries);
1448
1449 assert_eq!(result.anomalies_injected, 0);
1451 }
1452
1453 fn create_test_entry_with_context(
1459 doc_num: &str,
1460 vendor_ref: Option<&str>,
1461 employee_id: &str,
1462 gl_account: &str,
1463 ) -> JournalEntry {
1464 let mut entry = JournalEntry::new_simple(
1465 doc_num.to_string(),
1466 "1000".to_string(),
1467 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1468 "Test Entry".to_string(),
1469 );
1470
1471 entry.header.reference = vendor_ref.map(|v| v.to_string());
1472 entry.header.created_by = employee_id.to_string();
1473
1474 entry.add_line(JournalEntryLine {
1475 line_number: 1,
1476 gl_account: gl_account.to_string(),
1477 debit_amount: dec!(1000),
1478 ..Default::default()
1479 });
1480
1481 entry.add_line(JournalEntryLine {
1482 line_number: 2,
1483 gl_account: "1000".to_string(),
1484 credit_amount: dec!(1000),
1485 ..Default::default()
1486 });
1487
1488 entry
1489 }
1490
1491 #[test]
1492 fn test_set_entity_contexts() {
1493 let config = AnomalyInjectorConfig::default();
1494 let mut injector = AnomalyInjector::new(config);
1495
1496 assert!(injector.vendor_contexts().is_empty());
1498 assert!(injector.employee_contexts().is_empty());
1499 assert!(injector.account_contexts().is_empty());
1500
1501 let mut vendors = HashMap::new();
1503 vendors.insert(
1504 "V001".to_string(),
1505 VendorContext {
1506 vendor_id: "V001".to_string(),
1507 is_new: true,
1508 ..Default::default()
1509 },
1510 );
1511
1512 let mut employees = HashMap::new();
1513 employees.insert(
1514 "EMP001".to_string(),
1515 EmployeeContext {
1516 employee_id: "EMP001".to_string(),
1517 is_new: true,
1518 ..Default::default()
1519 },
1520 );
1521
1522 let mut accounts = HashMap::new();
1523 accounts.insert(
1524 "8100".to_string(),
1525 AccountContext {
1526 account_code: "8100".to_string(),
1527 is_high_risk: true,
1528 ..Default::default()
1529 },
1530 );
1531
1532 injector.set_entity_contexts(vendors, employees, accounts);
1533
1534 assert_eq!(injector.vendor_contexts().len(), 1);
1535 assert_eq!(injector.employee_contexts().len(), 1);
1536 assert_eq!(injector.account_contexts().len(), 1);
1537 assert!(injector.vendor_contexts().contains_key("V001"));
1538 assert!(injector.employee_contexts().contains_key("EMP001"));
1539 assert!(injector.account_contexts().contains_key("8100"));
1540 }
1541
1542 #[test]
1543 fn test_default_behavior_no_contexts() {
1544 let config = AnomalyInjectorConfigBuilder::new()
1546 .with_total_rate(0.5)
1547 .with_seed(42)
1548 .build();
1549
1550 let mut injector = AnomalyInjector::new(config);
1551
1552 let mut entries: Vec<_> = (0..200)
1553 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1554 .collect();
1555
1556 let result = injector.process_entries(&mut entries);
1557
1558 assert!(result.anomalies_injected > 0);
1561 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1562 assert!(
1563 rate > 0.2 && rate < 0.8,
1564 "Expected ~50% rate, got {:.2}%",
1565 rate * 100.0
1566 );
1567 }
1568
1569 #[test]
1570 fn test_entity_context_increases_injection_rate() {
1571 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1577 .with_total_rate(base_rate)
1578 .with_seed(123)
1579 .build();
1580
1581 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1582
1583 let mut entries_no_ctx: Vec<_> = (0..500)
1584 .map(|i| {
1585 create_test_entry_with_context(
1586 &format!("JE{:04}", i),
1587 Some("V001"),
1588 "EMP001",
1589 "8100",
1590 )
1591 })
1592 .collect();
1593
1594 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1595
1596 let config_ctx = AnomalyInjectorConfigBuilder::new()
1598 .with_total_rate(base_rate)
1599 .with_seed(123)
1600 .build();
1601
1602 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1603
1604 let mut vendors = HashMap::new();
1606 vendors.insert(
1607 "V001".to_string(),
1608 VendorContext {
1609 vendor_id: "V001".to_string(),
1610 is_new: true, is_dormant_reactivation: true, ..Default::default()
1613 },
1614 );
1615
1616 let mut employees = HashMap::new();
1617 employees.insert(
1618 "EMP001".to_string(),
1619 EmployeeContext {
1620 employee_id: "EMP001".to_string(),
1621 is_new: true, ..Default::default()
1623 },
1624 );
1625
1626 let mut accounts = HashMap::new();
1627 accounts.insert(
1628 "8100".to_string(),
1629 AccountContext {
1630 account_code: "8100".to_string(),
1631 is_high_risk: true, ..Default::default()
1633 },
1634 );
1635
1636 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1637
1638 let mut entries_ctx: Vec<_> = (0..500)
1639 .map(|i| {
1640 create_test_entry_with_context(
1641 &format!("JE{:04}", i),
1642 Some("V001"),
1643 "EMP001",
1644 "8100",
1645 )
1646 })
1647 .collect();
1648
1649 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1650
1651 assert!(
1653 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1654 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1655 result_ctx.anomalies_injected,
1656 result_no_ctx.anomalies_injected,
1657 );
1658 }
1659
1660 #[test]
1661 fn test_risk_score_multiplication() {
1662 let config = AnomalyInjectorConfig::default();
1664 let mut injector = AnomalyInjector::new(config);
1665
1666 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1668 assert!(
1669 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1670 );
1671
1672 let mut vendors = HashMap::new();
1674 vendors.insert(
1675 "V_RISKY".to_string(),
1676 VendorContext {
1677 vendor_id: "V_RISKY".to_string(),
1678 is_new: true,
1679 ..Default::default()
1680 },
1681 );
1682
1683 let mut accounts = HashMap::new();
1684 accounts.insert(
1685 "9000".to_string(),
1686 AccountContext {
1687 account_code: "9000".to_string(),
1688 is_high_risk: true,
1689 ..Default::default()
1690 },
1691 );
1692
1693 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1694
1695 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1696 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1697 assert!(
1699 (multiplier - 4.0).abs() < f64::EPSILON,
1700 "Expected 4.0x multiplier, got {}",
1701 multiplier,
1702 );
1703
1704 let entry_vendor_only =
1706 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1707 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1708 assert!(
1709 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1710 "Expected 2.0x multiplier (vendor only), got {}",
1711 multiplier_vendor,
1712 );
1713
1714 let entry_no_match =
1716 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1717 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1718 assert!(
1719 (multiplier_none - 1.0).abs() < f64::EPSILON,
1720 "Expected 1.0x multiplier (no match), got {}",
1721 multiplier_none,
1722 );
1723 }
1724
1725 #[test]
1726 fn test_employee_context_multiplier() {
1727 let config = AnomalyInjectorConfig::default();
1728 let mut injector = AnomalyInjector::new(config);
1729
1730 let mut employees = HashMap::new();
1731 employees.insert(
1732 "EMP_NEW".to_string(),
1733 EmployeeContext {
1734 employee_id: "EMP_NEW".to_string(),
1735 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1739 },
1740 );
1741
1742 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1743
1744 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1745 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1746
1747 let expected = 1.5 * 1.3 * 1.2;
1749 assert!(
1750 (multiplier - expected).abs() < 0.01,
1751 "Expected {:.3}x multiplier, got {:.3}",
1752 expected,
1753 multiplier,
1754 );
1755 }
1756
1757 #[test]
1758 fn test_entity_contexts_persist_across_reset() {
1759 let config = AnomalyInjectorConfig::default();
1760 let mut injector = AnomalyInjector::new(config);
1761
1762 let mut vendors = HashMap::new();
1763 vendors.insert(
1764 "V001".to_string(),
1765 VendorContext {
1766 vendor_id: "V001".to_string(),
1767 is_new: true,
1768 ..Default::default()
1769 },
1770 );
1771
1772 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1773 assert_eq!(injector.vendor_contexts().len(), 1);
1774
1775 injector.reset();
1777 assert_eq!(injector.vendor_contexts().len(), 1);
1778 }
1779
1780 #[test]
1781 fn test_set_empty_contexts_clears() {
1782 let config = AnomalyInjectorConfig::default();
1783 let mut injector = AnomalyInjector::new(config);
1784
1785 let mut vendors = HashMap::new();
1786 vendors.insert(
1787 "V001".to_string(),
1788 VendorContext {
1789 vendor_id: "V001".to_string(),
1790 ..Default::default()
1791 },
1792 );
1793
1794 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1795 assert_eq!(injector.vendor_contexts().len(), 1);
1796
1797 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1799 assert!(injector.vendor_contexts().is_empty());
1800 }
1801
1802 #[test]
1803 fn test_dormant_vendor_multiplier() {
1804 let config = AnomalyInjectorConfig::default();
1805 let mut injector = AnomalyInjector::new(config);
1806
1807 let mut vendors = HashMap::new();
1808 vendors.insert(
1809 "V_DORMANT".to_string(),
1810 VendorContext {
1811 vendor_id: "V_DORMANT".to_string(),
1812 is_dormant_reactivation: true, ..Default::default()
1814 },
1815 );
1816
1817 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1818
1819 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1820 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1821 assert!(
1822 (multiplier - 1.5).abs() < f64::EPSILON,
1823 "Expected 1.5x multiplier for dormant vendor, got {}",
1824 multiplier,
1825 );
1826 }
1827
1828 #[test]
1837 fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1838 use chrono::{Datelike, Timelike, Weekday};
1839 use datasynth_core::models::FraudType;
1840
1841 let mut config = AnomalyInjectorConfig::default();
1842 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1843 enabled: true,
1844 weekend_bias: 1.0,
1845 round_dollar_bias: 1.0,
1846 off_hours_bias: 1.0,
1847 post_close_bias: 1.0,
1848 };
1849 let mut injector = AnomalyInjector::new(config);
1850
1851 let mut entry = JournalEntry::new_simple(
1853 "JE001".to_string(),
1854 "1000".to_string(),
1855 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
1857 );
1858 entry.add_line(JournalEntryLine {
1859 line_number: 1,
1860 gl_account: "5000".to_string(),
1861 debit_amount: dec!(1237),
1862 ..Default::default()
1863 });
1864 entry.add_line(JournalEntryLine {
1865 line_number: 2,
1866 gl_account: "1000".to_string(),
1867 credit_amount: dec!(1237),
1868 ..Default::default()
1869 });
1870
1871 let _ =
1872 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1873
1874 assert!(
1876 matches!(
1877 entry.header.posting_date.weekday(),
1878 Weekday::Sat | Weekday::Sun
1879 ),
1880 "expected weekend posting date, got {:?}",
1881 entry.header.posting_date.weekday()
1882 );
1883 let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1885 let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1886 assert_eq!(debit_total, credit_total, "entry must remain balanced");
1887 assert!(
1888 [
1889 dec!(1_000),
1890 dec!(5_000),
1891 dec!(10_000),
1892 dec!(25_000),
1893 dec!(50_000),
1894 dec!(100_000)
1895 ]
1896 .contains(&debit_total),
1897 "expected round-dollar total, got {}",
1898 debit_total
1899 );
1900 let hour = entry.header.created_at.hour();
1902 assert!(
1903 !(6..22).contains(&hour),
1904 "expected off-hours timestamp, got hour {}",
1905 hour
1906 );
1907 assert!(entry.header.is_post_close);
1909
1910 let stats = injector.get_stats();
1912 assert_eq!(stats.fraud_weekend_bias_applied, 1);
1913 assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1914 assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1915 assert_eq!(stats.fraud_post_close_bias_applied, 1);
1916 }
1917
1918 #[test]
1921 fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1922 use datasynth_core::models::FraudType;
1923
1924 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1926 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1927 enabled: true,
1928 weekend_bias: 0.0,
1929 round_dollar_bias: 0.0,
1930 off_hours_bias: 0.0,
1931 post_close_bias: 0.0,
1932 };
1933 let mut injector = AnomalyInjector::new(config);
1934 let mut entry = create_test_entry("JE001");
1935 entry.header.posting_date = original_date;
1936
1937 let _ =
1938 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1939
1940 assert_eq!(entry.header.posting_date, original_date);
1941 assert!(!entry.header.is_post_close);
1942 let stats = injector.get_stats();
1943 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1944 assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1945 assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1946 assert_eq!(stats.fraud_post_close_bias_applied, 0);
1947 }
1948
1949 #[test]
1952 fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1953 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1955 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1956 enabled: true,
1957 weekend_bias: 1.0,
1958 round_dollar_bias: 1.0,
1959 off_hours_bias: 1.0,
1960 post_close_bias: 1.0,
1961 };
1962 let mut injector = AnomalyInjector::new(config);
1963 let mut entry = create_test_entry("JE001");
1964 entry.header.posting_date = original_date;
1965
1966 let _ = injector.inject_specific(
1967 &mut entry,
1968 AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
1969 );
1970
1971 assert_eq!(entry.header.posting_date, original_date);
1972 let stats = injector.get_stats();
1973 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1974 }
1975
1976 #[test]
1980 fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
1981 use datasynth_core::models::{FraudType, ProcessIssueType};
1982
1983 let mut config = AnomalyInjectorConfig::default();
1984 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1985 enabled: true,
1986 weekend_bias: 1.0,
1987 round_dollar_bias: 0.0, off_hours_bias: 1.0,
1989 post_close_bias: 1.0,
1990 };
1991 let mut injector = AnomalyInjector::new(config);
1992 let mut entry = JournalEntry::new_simple(
1993 "JE001".into(),
1994 "1000".into(),
1995 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
1996 "Test".into(),
1997 );
1998 entry.add_line(JournalEntryLine {
1999 line_number: 1,
2000 gl_account: "5000".into(),
2001 debit_amount: dec!(1000),
2002 ..Default::default()
2003 });
2004 entry.add_line(JournalEntryLine {
2005 line_number: 2,
2006 gl_account: "1000".into(),
2007 credit_amount: dec!(1000),
2008 ..Default::default()
2009 });
2010
2011 let primary = injector
2012 .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2013 .expect("fraud label should be produced");
2014
2015 let labels = injector.get_labels();
2017 assert_eq!(
2018 labels.len(),
2019 3,
2020 "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2021 );
2022 let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2023 assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2024 assert!(types.contains(&AnomalyType::ProcessIssue(
2025 ProcessIssueType::AfterHoursPosting
2026 )));
2027 assert!(types.contains(&AnomalyType::ProcessIssue(
2028 ProcessIssueType::PostClosePosting
2029 )));
2030 assert_eq!(
2031 primary.anomaly_type,
2032 AnomalyType::Fraud(FraudType::FictitiousEntry)
2033 );
2034 }
2035}