1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
23use datasynth_core::models::{
24 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
25 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
26 RelationalAnomalyType,
27};
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29
30use super::context::{
31 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
32 EntityAwareInjector, VendorContext,
33};
34use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
35use super::difficulty::DifficultyCalculator;
36use super::near_miss::{NearMissConfig, NearMissGenerator};
37use super::patterns::{
38 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
39 TemporalPattern,
40};
41use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
42use super::schemes::{SchemeAction, SchemeContext};
43use super::strategies::{DuplicationStrategy, StrategyCollection};
44use super::types::AnomalyTypeSelector;
45
46#[derive(Debug, Clone)]
48pub struct AnomalyInjectorConfig {
49 pub rates: AnomalyRateConfig,
51 pub patterns: AnomalyPatternConfig,
53 pub seed: u64,
55 pub generate_labels: bool,
57 pub allow_duplicates: bool,
59 pub max_anomalies_per_document: usize,
61 pub target_companies: Vec<String>,
63 pub date_range: Option<(NaiveDate, NaiveDate)>,
65 pub enhanced: EnhancedInjectionConfig,
67}
68
69#[derive(Debug, Clone, Default)]
71pub struct EnhancedInjectionConfig {
72 pub multi_stage_schemes_enabled: bool,
74 pub scheme_probability: f64,
76 pub correlated_injection_enabled: bool,
78 pub temporal_clustering_enabled: bool,
80 pub period_end_multiplier: f64,
82 pub near_miss_enabled: bool,
84 pub near_miss_proportion: f64,
86 pub approval_thresholds: Vec<Decimal>,
88 pub difficulty_classification_enabled: bool,
90 pub context_aware_enabled: bool,
92 pub behavioral_baseline_config: BehavioralBaselineConfig,
94 pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
99}
100
101impl Default for AnomalyInjectorConfig {
102 fn default() -> Self {
103 Self {
104 rates: AnomalyRateConfig::default(),
105 patterns: AnomalyPatternConfig::default(),
106 seed: 42,
107 generate_labels: true,
108 allow_duplicates: true,
109 max_anomalies_per_document: 2,
110 target_companies: Vec::new(),
111 date_range: None,
112 enhanced: EnhancedInjectionConfig::default(),
113 }
114 }
115}
116
117#[derive(Debug, Clone)]
119pub struct InjectionBatchResult {
120 pub entries_processed: usize,
122 pub anomalies_injected: usize,
124 pub duplicates_created: usize,
126 pub labels: Vec<LabeledAnomaly>,
128 pub summary: AnomalySummary,
130 pub modified_documents: Vec<String>,
132 pub near_miss_labels: Vec<NearMissLabel>,
134 pub scheme_actions: Vec<SchemeAction>,
136 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
138}
139
140pub struct AnomalyInjector {
142 config: AnomalyInjectorConfig,
143 rng: ChaCha8Rng,
144 uuid_factory: DeterministicUuidFactory,
145 type_selector: AnomalyTypeSelector,
146 strategies: StrategyCollection,
147 cluster_manager: ClusterManager,
148 entity_targeting: EntityTargetingManager,
150 document_anomaly_counts: HashMap<String, usize>,
152 labels: Vec<LabeledAnomaly>,
154 stats: InjectorStats,
156 scheme_advancer: Option<SchemeAdvancer>,
159 near_miss_generator: Option<NearMissGenerator>,
161 near_miss_labels: Vec<NearMissLabel>,
163 co_occurrence_handler: Option<AnomalyCoOccurrence>,
165 queued_co_occurrences: Vec<QueuedAnomaly>,
167 temporal_cluster_generator: Option<TemporalClusterGenerator>,
169 difficulty_calculator: Option<DifficultyCalculator>,
171 entity_aware_injector: Option<EntityAwareInjector>,
173 behavioral_baseline: Option<BehavioralBaseline>,
175 scheme_actions: Vec<SchemeAction>,
177 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
179 vendor_contexts: HashMap<String, VendorContext>,
182 employee_contexts: HashMap<String, EmployeeContext>,
184 account_contexts: HashMap<String, AccountContext>,
186}
187
188#[derive(Debug, Clone, Default)]
190pub struct InjectorStats {
191 pub total_processed: usize,
193 pub total_injected: usize,
195 pub by_category: HashMap<String, usize>,
197 pub by_type: HashMap<String, usize>,
199 pub by_company: HashMap<String, usize>,
201 pub skipped_rate: usize,
203 pub skipped_date: usize,
205 pub skipped_company: usize,
207 pub skipped_max_per_doc: usize,
209 pub fraud_weekend_bias_applied: usize,
211 pub fraud_round_dollar_bias_applied: usize,
213 pub fraud_off_hours_bias_applied: usize,
215 pub fraud_post_close_bias_applied: usize,
217}
218
219struct QueuedAnomaly {
221 anomaly_type: AnomalyType,
223 target_entity: Option<String>,
225 earliest_date: NaiveDate,
227 description: String,
229}
230
231impl AnomalyInjector {
232 pub fn new(config: AnomalyInjectorConfig) -> Self {
234 let mut rng = seeded_rng(config.seed, 0);
235 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
236 let entity_targeting =
237 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
238
239 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
241 let scheme_config = SchemeAdvancerConfig {
242 embezzlement_probability: config.enhanced.scheme_probability,
243 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
244 kickback_probability: config.enhanced.scheme_probability * 0.5,
245 seed: rng.random(),
246 ..Default::default()
247 };
248 Some(SchemeAdvancer::new(scheme_config))
249 } else {
250 None
251 };
252
253 let near_miss_generator = if config.enhanced.near_miss_enabled {
254 let near_miss_config = NearMissConfig {
255 proportion: config.enhanced.near_miss_proportion,
256 seed: rng.random(),
257 ..Default::default()
258 };
259 Some(NearMissGenerator::new(near_miss_config))
260 } else {
261 None
262 };
263
264 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
265 Some(AnomalyCoOccurrence::new())
266 } else {
267 None
268 };
269
270 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
271 Some(TemporalClusterGenerator::new())
272 } else {
273 None
274 };
275
276 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
277 Some(DifficultyCalculator::new())
278 } else {
279 None
280 };
281
282 let entity_aware_injector = if config.enhanced.context_aware_enabled {
283 Some(EntityAwareInjector::default())
284 } else {
285 None
286 };
287
288 let behavioral_baseline = if config.enhanced.context_aware_enabled
289 && config.enhanced.behavioral_baseline_config.enabled
290 {
291 Some(BehavioralBaseline::new(
292 config.enhanced.behavioral_baseline_config.clone(),
293 ))
294 } else {
295 None
296 };
297
298 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
299
300 Self {
301 config,
302 rng,
303 uuid_factory,
304 type_selector: AnomalyTypeSelector::new(),
305 strategies: StrategyCollection::default(),
306 cluster_manager,
307 entity_targeting,
308 document_anomaly_counts: HashMap::new(),
309 labels: Vec::new(),
310 stats: InjectorStats::default(),
311 scheme_advancer,
312 near_miss_generator,
313 near_miss_labels: Vec::new(),
314 co_occurrence_handler,
315 queued_co_occurrences: Vec::new(),
316 temporal_cluster_generator,
317 difficulty_calculator,
318 entity_aware_injector,
319 behavioral_baseline,
320 scheme_actions: Vec::new(),
321 difficulty_distribution: HashMap::new(),
322 vendor_contexts: HashMap::new(),
323 employee_contexts: HashMap::new(),
324 account_contexts: HashMap::new(),
325 }
326 }
327
328 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
330 debug!(
331 entry_count = entries.len(),
332 total_rate = self.config.rates.total_rate,
333 seed = self.config.seed,
334 "Injecting anomalies into journal entries"
335 );
336
337 let mut modified_documents = Vec::new();
338 let mut duplicates = Vec::new();
339
340 for entry in entries.iter_mut() {
341 self.stats.total_processed += 1;
342
343 if let Some(ref mut baseline) = self.behavioral_baseline {
345 use super::context::Observation;
346 let entity_id = entry.header.created_by.clone();
348 let observation =
349 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
350 baseline.record_observation(&entity_id, observation);
351 }
352
353 if !self.should_process(entry) {
355 continue;
356 }
357
358 let entry_date = entry.posting_date();
360 let ready_indices: Vec<usize> = self
361 .queued_co_occurrences
362 .iter()
363 .enumerate()
364 .filter(|(_, q)| entry_date >= q.earliest_date)
365 .map(|(i, _)| i)
366 .collect();
367
368 if let Some(&idx) = ready_indices.first() {
369 let queued = self.queued_co_occurrences.remove(idx);
370 if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
371 label = label.with_metadata("co_occurrence", "true");
372 label = label.with_metadata("co_occurrence_description", &queued.description);
373 if let Some(ref target) = queued.target_entity {
374 label = label.with_related_entity(target);
375 label = label.with_metadata("co_occurrence_target", target);
376 }
377 modified_documents.push(entry.document_number().clone());
378 self.labels.push(label);
379 self.stats.total_injected += 1;
380 }
381 continue; }
383
384 let base_rate = self.config.rates.total_rate;
386
387 let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
389 let employee_id = &entry.header.created_by;
390 let first_account = entry
391 .lines
392 .first()
393 .map(|l| l.gl_account.as_str())
394 .unwrap_or("");
395 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
397
398 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
399 let employee_ctx = self.employee_contexts.get(employee_id);
400 let account_ctx = self.account_contexts.get(first_account);
401
402 let multiplier =
403 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
404 (base_rate * multiplier).min(1.0)
405 } else {
406 self.calculate_context_rate_multiplier(entry) * base_rate
408 };
409
410 if let Some(ref tcg) = self.temporal_cluster_generator {
412 let temporal_multiplier = tcg
413 .get_active_clusters(entry_date)
414 .iter()
415 .map(|c| c.rate_multiplier)
416 .fold(1.0_f64, f64::max);
417 effective_rate = (effective_rate * temporal_multiplier).min(1.0);
418 }
419
420 if should_inject_anomaly(
422 effective_rate,
423 entry_date,
424 &self.config.patterns.temporal_pattern,
425 &mut self.rng,
426 ) {
427 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
429 let account = entry
431 .lines
432 .first()
433 .map(|l| l.gl_account.clone())
434 .unwrap_or_default();
435 near_miss_gen.record_transaction(
436 entry.document_number().clone(),
437 entry_date,
438 entry.total_debit(),
439 &account,
440 None,
441 );
442
443 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
445 entry.document_number().clone(),
446 entry_date,
447 entry.total_debit(),
448 &account,
449 None,
450 &self.config.enhanced.approval_thresholds,
451 ) {
452 self.near_miss_labels.push(near_miss_label);
453 continue; }
455 }
456
457 let anomaly_type = self.select_anomaly_category();
459
460 let target_entity = {
462 let mut candidates: Vec<String> =
463 self.vendor_contexts.keys().cloned().collect();
464 candidates.extend(self.employee_contexts.keys().cloned());
465 if candidates.is_empty() {
466 if let Some(ref r) = entry.header.reference {
468 candidates.push(r.clone());
469 }
470 }
471 self.entity_targeting
472 .select_entity(&candidates, &mut self.rng)
473 };
474
475 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
477 if let Some(ref entity_id) = target_entity {
479 label = label.with_metadata("entity_target", entity_id);
480 label = label.with_related_entity(entity_id);
481 label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
482 target_type: "Entity".to_string(),
483 target_id: entity_id.clone(),
484 });
485 }
486
487 if let Some(ref calculator) = self.difficulty_calculator {
489 let difficulty = calculator.calculate(&label);
490
491 label =
493 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
494 label = label.with_metadata(
495 "difficulty_score",
496 &difficulty.difficulty_score().to_string(),
497 );
498
499 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
501 }
502
503 modified_documents.push(entry.document_number().clone());
504 self.labels.push(label);
505 self.stats.total_injected += 1;
506
507 if let Some(ref co_occ) = self.co_occurrence_handler {
509 let correlated =
510 co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
511 for result in correlated {
512 self.queued_co_occurrences.push(QueuedAnomaly {
513 anomaly_type: result.anomaly_type,
514 target_entity: if result.same_entity {
515 target_entity.clone()
516 } else {
517 None
518 },
519 earliest_date: entry_date
520 + chrono::Duration::days(i64::from(result.lag_days)),
521 description: result.description,
522 });
523 }
524 }
525 }
526
527 if self.config.allow_duplicates
529 && matches!(
530 self.labels.last().map(|l| &l.anomaly_type),
531 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
532 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
533 )
534 {
535 let dup_strategy = DuplicationStrategy::default();
536 let duplicate =
537 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
538 duplicates.push(duplicate);
539 }
540 }
541 }
542
543 let duplicates_created = duplicates.len();
545
546 let summary = AnomalySummary::from_anomalies(&self.labels);
548
549 InjectionBatchResult {
550 entries_processed: self.stats.total_processed,
551 anomalies_injected: self.stats.total_injected,
552 duplicates_created,
553 labels: self.labels.clone(),
554 summary,
555 modified_documents,
556 near_miss_labels: self.near_miss_labels.clone(),
557 scheme_actions: self.scheme_actions.clone(),
558 difficulty_distribution: self.difficulty_distribution.clone(),
559 }
560 }
561
562 fn should_process(&mut self, entry: &JournalEntry) -> bool {
564 if !self.config.target_companies.is_empty()
566 && !self
567 .config
568 .target_companies
569 .iter()
570 .any(|c| c == entry.company_code())
571 {
572 self.stats.skipped_company += 1;
573 return false;
574 }
575
576 if let Some((start, end)) = self.config.date_range {
578 if entry.posting_date() < start || entry.posting_date() > end {
579 self.stats.skipped_date += 1;
580 return false;
581 }
582 }
583
584 let current_count = self
586 .document_anomaly_counts
587 .get(&entry.document_number())
588 .copied()
589 .unwrap_or(0);
590 if current_count >= self.config.max_anomalies_per_document {
591 self.stats.skipped_max_per_doc += 1;
592 return false;
593 }
594
595 true
596 }
597
598 fn select_anomaly_category(&mut self) -> AnomalyType {
600 let r = self.rng.random::<f64>();
601 let rates = &self.config.rates;
602
603 let mut cumulative = 0.0;
604
605 cumulative += rates.fraud_rate;
606 if r < cumulative {
607 return self.type_selector.select_fraud(&mut self.rng);
608 }
609
610 cumulative += rates.error_rate;
611 if r < cumulative {
612 return self.type_selector.select_error(&mut self.rng);
613 }
614
615 cumulative += rates.process_issue_rate;
616 if r < cumulative {
617 return self.type_selector.select_process_issue(&mut self.rng);
618 }
619
620 cumulative += rates.statistical_rate;
621 if r < cumulative {
622 return self.type_selector.select_statistical(&mut self.rng);
623 }
624
625 self.type_selector.select_relational(&mut self.rng)
626 }
627
628 fn inject_anomaly(
630 &mut self,
631 entry: &mut JournalEntry,
632 anomaly_type: AnomalyType,
633 ) -> Option<LabeledAnomaly> {
634 if !self.strategies.can_apply(entry, &anomaly_type) {
636 return None;
637 }
638
639 let result = self
641 .strategies
642 .apply_strategy(entry, &anomaly_type, &mut self.rng);
643
644 if !result.success {
645 return None;
646 }
647
648 *self
650 .document_anomaly_counts
651 .entry(entry.document_number().clone())
652 .or_insert(0) += 1;
653
654 let category = anomaly_type.category().to_string();
656 let type_name = anomaly_type.type_name();
657
658 *self.stats.by_category.entry(category).or_insert(0) += 1;
659 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
660 *self
661 .stats
662 .by_company
663 .entry(entry.company_code().to_string())
664 .or_insert(0) += 1;
665
666 if self.config.generate_labels {
668 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
669
670 entry.header.is_anomaly = true;
672 entry.header.anomaly_id = Some(anomaly_id.clone());
673 entry.header.anomaly_type = Some(type_name.clone());
674
675 let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
677 Vec::new();
678 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
679 entry.header.is_fraud = true;
680 if let AnomalyType::Fraud(ref ft) = anomaly_type {
681 entry.header.fraud_type = Some(*ft);
682 }
683 secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
689 }
690
691 let mut label = LabeledAnomaly::new(
692 anomaly_id,
693 anomaly_type.clone(),
694 entry.document_number().clone(),
695 "JE".to_string(),
696 entry.company_code().to_string(),
697 entry.posting_date(),
698 )
699 .with_description(&result.description)
700 .with_injection_strategy(&type_name);
701
702 let causal_reason = AnomalyCausalReason::RandomRate {
704 base_rate: self.config.rates.total_rate,
705 };
706 label = label.with_causal_reason(causal_reason);
707
708 let context_multiplier = self.calculate_context_rate_multiplier(entry);
710 if (context_multiplier - 1.0).abs() > f64::EPSILON {
711 label = label.with_metadata(
712 "entity_context_multiplier",
713 &format!("{context_multiplier:.3}"),
714 );
715 label = label.with_metadata(
716 "effective_rate",
717 &format!(
718 "{:.6}",
719 (self.config.rates.total_rate * context_multiplier).min(1.0)
720 ),
721 );
722 }
723
724 if let Some(impact) = result.monetary_impact {
726 label = label.with_monetary_impact(impact);
727 }
728
729 for entity in &result.related_entities {
731 label = label.with_related_entity(entity);
732 }
733
734 for (key, value) in &result.metadata {
736 label = label.with_metadata(key, value);
737 }
738
739 if let Some(cluster_id) =
741 self.cluster_manager
742 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
743 {
744 label = label.with_cluster(&cluster_id);
745 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
747 cluster_id: cluster_id.clone(),
748 });
749 }
750
751 for issue_type in &secondary_process_issues {
758 let child_id = format!("ANO{:08}", self.labels.len() + 1);
759 let child = LabeledAnomaly::new(
760 child_id,
761 AnomalyType::ProcessIssue(*issue_type),
762 entry.document_number().clone(),
763 "JE".to_string(),
764 entry.company_code().to_string(),
765 entry.posting_date(),
766 )
767 .with_description("Forensic pattern from fraud behavioral bias")
768 .with_injection_strategy("behavioral_bias")
769 .with_parent_anomaly(&label.anomaly_id);
770 self.labels.push(child);
771 }
772
773 return Some(label);
774 }
775
776 None
777 }
778
779 pub fn inject_specific(
781 &mut self,
782 entry: &mut JournalEntry,
783 anomaly_type: AnomalyType,
784 ) -> Option<LabeledAnomaly> {
785 self.inject_anomaly(entry, anomaly_type)
786 }
787
788 pub fn create_self_approval(
790 &mut self,
791 entry: &mut JournalEntry,
792 user_id: &str,
793 ) -> Option<LabeledAnomaly> {
794 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
795
796 let label = LabeledAnomaly::new(
797 format!("ANO{:08}", self.labels.len() + 1),
798 anomaly_type,
799 entry.document_number().clone(),
800 "JE".to_string(),
801 entry.company_code().to_string(),
802 entry.posting_date(),
803 )
804 .with_description(&format!("User {user_id} approved their own transaction"))
805 .with_related_entity(user_id)
806 .with_injection_strategy("ManualSelfApproval")
807 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
808 target_type: "User".to_string(),
809 target_id: user_id.to_string(),
810 });
811
812 entry.header.is_anomaly = true;
814 entry.header.is_fraud = true;
815 entry.header.anomaly_id = Some(label.anomaly_id.clone());
816 entry.header.anomaly_type = Some("SelfApproval".to_string());
817 entry.header.fraud_type = Some(FraudType::SelfApproval);
818
819 entry.header.created_by = user_id.to_string();
821
822 self.apply_fraud_behavioral_bias(entry);
825
826 self.labels.push(label.clone());
827 Some(label)
828 }
829
830 pub fn create_sod_violation(
832 &mut self,
833 entry: &mut JournalEntry,
834 user_id: &str,
835 conflicting_duties: (&str, &str),
836 ) -> Option<LabeledAnomaly> {
837 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
838
839 let label = LabeledAnomaly::new(
840 format!("ANO{:08}", self.labels.len() + 1),
841 anomaly_type,
842 entry.document_number().clone(),
843 "JE".to_string(),
844 entry.company_code().to_string(),
845 entry.posting_date(),
846 )
847 .with_description(&format!(
848 "User {} performed conflicting duties: {} and {}",
849 user_id, conflicting_duties.0, conflicting_duties.1
850 ))
851 .with_related_entity(user_id)
852 .with_metadata("duty1", conflicting_duties.0)
853 .with_metadata("duty2", conflicting_duties.1)
854 .with_injection_strategy("ManualSoDViolation")
855 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
856 target_type: "User".to_string(),
857 target_id: user_id.to_string(),
858 });
859
860 entry.header.is_anomaly = true;
862 entry.header.is_fraud = true;
863 entry.header.anomaly_id = Some(label.anomaly_id.clone());
864 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
865 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
866
867 self.apply_fraud_behavioral_bias(entry);
869
870 self.labels.push(label.clone());
871 Some(label)
872 }
873
874 pub fn create_ic_mismatch(
876 &mut self,
877 entry: &mut JournalEntry,
878 matching_company: &str,
879 expected_amount: Decimal,
880 actual_amount: Decimal,
881 ) -> Option<LabeledAnomaly> {
882 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
883
884 let label = LabeledAnomaly::new(
885 format!("ANO{:08}", self.labels.len() + 1),
886 anomaly_type,
887 entry.document_number().clone(),
888 "JE".to_string(),
889 entry.company_code().to_string(),
890 entry.posting_date(),
891 )
892 .with_description(&format!(
893 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
894 ))
895 .with_related_entity(matching_company)
896 .with_monetary_impact(actual_amount - expected_amount)
897 .with_metadata("expected_amount", &expected_amount.to_string())
898 .with_metadata("actual_amount", &actual_amount.to_string())
899 .with_injection_strategy("ManualICMismatch")
900 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
901 target_type: "Intercompany".to_string(),
902 target_id: matching_company.to_string(),
903 });
904
905 entry.header.is_anomaly = true;
907 entry.header.anomaly_id = Some(label.anomaly_id.clone());
908 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
909
910 self.labels.push(label.clone());
911 Some(label)
912 }
913
914 pub fn get_labels(&self) -> &[LabeledAnomaly] {
916 &self.labels
917 }
918
919 pub fn get_summary(&self) -> AnomalySummary {
921 AnomalySummary::from_anomalies(&self.labels)
922 }
923
924 pub fn get_stats(&self) -> &InjectorStats {
926 &self.stats
927 }
928
929 pub fn reset(&mut self) {
931 self.labels.clear();
932 self.document_anomaly_counts.clear();
933 self.stats = InjectorStats::default();
934 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
935
936 self.near_miss_labels.clear();
938 self.scheme_actions.clear();
939 self.difficulty_distribution.clear();
940
941 if let Some(ref mut baseline) = self.behavioral_baseline {
942 *baseline =
943 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
944 }
945 }
946
947 pub fn cluster_count(&self) -> usize {
949 self.cluster_manager.cluster_count()
950 }
951
952 pub fn set_entity_contexts(
965 &mut self,
966 vendors: HashMap<String, VendorContext>,
967 employees: HashMap<String, EmployeeContext>,
968 accounts: HashMap<String, AccountContext>,
969 ) {
970 self.vendor_contexts = vendors;
971 self.employee_contexts = employees;
972 self.account_contexts = accounts;
973 }
974
975 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
977 &self.vendor_contexts
978 }
979
980 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
982 &self.employee_contexts
983 }
984
985 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
987 &self.account_contexts
988 }
989
990 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
999 if self.vendor_contexts.is_empty()
1000 && self.employee_contexts.is_empty()
1001 && self.account_contexts.is_empty()
1002 {
1003 return 1.0;
1004 }
1005
1006 let mut multiplier = 1.0;
1007
1008 if let Some(ref vendor_ref) = entry.header.reference {
1010 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1011 if ctx.is_new {
1013 multiplier *= 2.0;
1014 }
1015 if ctx.is_dormant_reactivation {
1016 multiplier *= 1.5;
1017 }
1018 }
1019 }
1020
1021 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1023 if ctx.is_new {
1024 multiplier *= 1.5;
1025 }
1026 if ctx.is_volume_fatigued {
1027 multiplier *= 1.3;
1028 }
1029 if ctx.is_overtime {
1030 multiplier *= 1.2;
1031 }
1032 }
1033
1034 if let Some(first_line) = entry.lines.first() {
1036 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1037 if ctx.is_high_risk {
1038 multiplier *= 2.0;
1039 }
1040 }
1041 }
1042
1043 multiplier
1044 }
1045
1046 fn apply_fraud_behavioral_bias(
1055 &mut self,
1056 entry: &mut JournalEntry,
1057 ) -> Vec<datasynth_core::models::ProcessIssueType> {
1058 use datasynth_core::models::ProcessIssueType;
1059
1060 let cfg = self.config.enhanced.fraud_behavioral_bias;
1061 let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1062 for issue in &fired {
1063 match issue {
1064 ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1065 ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1066 ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1067 _ => {}
1068 }
1069 }
1070 if cfg.round_dollar_bias > 0.0 {
1075 const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1076 let max_amt: Decimal = entry
1077 .lines
1078 .iter()
1079 .map(|l| l.debit_amount.max(l.credit_amount))
1080 .max()
1081 .unwrap_or(Decimal::ZERO);
1082 if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1083 self.stats.fraud_round_dollar_bias_applied += 1;
1084 }
1085 }
1086 fired
1087 }
1088
1089 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1098 if let Some(ref mut advancer) = self.scheme_advancer {
1099 let context = SchemeContext::new(date, company_code);
1100 let actions = advancer.advance_all(&context);
1101 self.scheme_actions.extend(actions.clone());
1102 actions
1103 } else {
1104 Vec::new()
1105 }
1106 }
1107
1108 pub fn maybe_start_scheme(
1114 &mut self,
1115 date: NaiveDate,
1116 company_code: &str,
1117 available_users: Vec<String>,
1118 available_accounts: Vec<String>,
1119 available_counterparties: Vec<String>,
1120 ) -> Option<uuid::Uuid> {
1121 if let Some(ref mut advancer) = self.scheme_advancer {
1122 let mut context = SchemeContext::new(date, company_code);
1123 context.available_users = available_users;
1124 context.available_accounts = available_accounts;
1125 context.available_counterparties = available_counterparties;
1126
1127 advancer.maybe_start_scheme(&context)
1128 } else {
1129 None
1130 }
1131 }
1132
1133 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1135 &self.near_miss_labels
1136 }
1137
1138 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1140 &self.scheme_actions
1141 }
1142
1143 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1145 &self.difficulty_distribution
1146 }
1147
1148 pub fn check_behavioral_deviations(
1150 &self,
1151 entity_id: &str,
1152 observation: &super::context::Observation,
1153 ) -> Vec<super::context::BehavioralDeviation> {
1154 if let Some(ref baseline) = self.behavioral_baseline {
1155 baseline.check_deviation(entity_id, observation)
1156 } else {
1157 Vec::new()
1158 }
1159 }
1160
1161 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1163 if let Some(ref baseline) = self.behavioral_baseline {
1164 baseline.get_baseline(entity_id)
1165 } else {
1166 None
1167 }
1168 }
1169
1170 pub fn active_scheme_count(&self) -> usize {
1172 if let Some(ref advancer) = self.scheme_advancer {
1173 advancer.active_scheme_count()
1174 } else {
1175 0
1176 }
1177 }
1178
1179 pub fn has_enhanced_features(&self) -> bool {
1181 self.scheme_advancer.is_some()
1182 || self.near_miss_generator.is_some()
1183 || self.difficulty_calculator.is_some()
1184 || self.entity_aware_injector.is_some()
1185 }
1186}
1187
1188pub struct AnomalyInjectorConfigBuilder {
1190 config: AnomalyInjectorConfig,
1191}
1192
1193impl AnomalyInjectorConfigBuilder {
1194 pub fn new() -> Self {
1196 Self {
1197 config: AnomalyInjectorConfig::default(),
1198 }
1199 }
1200
1201 pub fn with_total_rate(mut self, rate: f64) -> Self {
1203 self.config.rates.total_rate = rate;
1204 self
1205 }
1206
1207 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1209 self.config.rates.fraud_rate = rate;
1210 self
1211 }
1212
1213 pub fn with_error_rate(mut self, rate: f64) -> Self {
1215 self.config.rates.error_rate = rate;
1216 self
1217 }
1218
1219 pub fn with_seed(mut self, seed: u64) -> Self {
1221 self.config.seed = seed;
1222 self
1223 }
1224
1225 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1227 self.config.patterns.temporal_pattern = pattern;
1228 self
1229 }
1230
1231 pub fn with_labels(mut self, generate: bool) -> Self {
1233 self.config.generate_labels = generate;
1234 self
1235 }
1236
1237 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1239 self.config.target_companies = companies;
1240 self
1241 }
1242
1243 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1245 self.config.date_range = Some((start, end));
1246 self
1247 }
1248
1249 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1255 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1256 self.config.enhanced.scheme_probability = probability;
1257 self
1258 }
1259
1260 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1262 self.config.enhanced.near_miss_enabled = enabled;
1263 self.config.enhanced.near_miss_proportion = proportion;
1264 self
1265 }
1266
1267 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1269 self.config.enhanced.approval_thresholds = thresholds;
1270 self
1271 }
1272
1273 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1275 self.config.enhanced.correlated_injection_enabled = enabled;
1276 self
1277 }
1278
1279 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1281 self.config.enhanced.temporal_clustering_enabled = enabled;
1282 self.config.enhanced.period_end_multiplier = multiplier;
1283 self
1284 }
1285
1286 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1288 self.config.enhanced.difficulty_classification_enabled = enabled;
1289 self
1290 }
1291
1292 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1294 self.config.enhanced.context_aware_enabled = enabled;
1295 self
1296 }
1297
1298 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1300 self.config.enhanced.behavioral_baseline_config = config;
1301 self
1302 }
1303
1304 pub fn with_all_enhanced_features(mut self) -> Self {
1306 self.config.enhanced.multi_stage_schemes_enabled = true;
1307 self.config.enhanced.scheme_probability = 0.02;
1308 self.config.enhanced.correlated_injection_enabled = true;
1309 self.config.enhanced.temporal_clustering_enabled = true;
1310 self.config.enhanced.period_end_multiplier = 2.5;
1311 self.config.enhanced.near_miss_enabled = true;
1312 self.config.enhanced.near_miss_proportion = 0.30;
1313 self.config.enhanced.difficulty_classification_enabled = true;
1314 self.config.enhanced.context_aware_enabled = true;
1315 self.config.enhanced.behavioral_baseline_config.enabled = true;
1316 self
1317 }
1318
1319 pub fn build(self) -> AnomalyInjectorConfig {
1321 self.config
1322 }
1323}
1324
1325impl Default for AnomalyInjectorConfigBuilder {
1326 fn default() -> Self {
1327 Self::new()
1328 }
1329}
1330
1331#[cfg(test)]
1332#[allow(clippy::unwrap_used)]
1333mod tests {
1334 use super::*;
1335 use chrono::NaiveDate;
1336 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1337 use rust_decimal_macros::dec;
1338
1339 fn create_test_entry(doc_num: &str) -> JournalEntry {
1340 let mut entry = JournalEntry::new_simple(
1341 doc_num.to_string(),
1342 "1000".to_string(),
1343 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1344 "Test Entry".to_string(),
1345 );
1346
1347 entry.add_line(JournalEntryLine {
1348 line_number: 1,
1349 gl_account: "5000".to_string(),
1350 debit_amount: dec!(1000),
1351 ..Default::default()
1352 });
1353
1354 entry.add_line(JournalEntryLine {
1355 line_number: 2,
1356 gl_account: "1000".to_string(),
1357 credit_amount: dec!(1000),
1358 ..Default::default()
1359 });
1360
1361 entry
1362 }
1363
1364 #[test]
1365 fn test_anomaly_injector_basic() {
1366 let config = AnomalyInjectorConfigBuilder::new()
1367 .with_total_rate(0.5) .with_seed(42)
1369 .build();
1370
1371 let mut injector = AnomalyInjector::new(config);
1372
1373 let mut entries: Vec<_> = (0..100)
1374 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1375 .collect();
1376
1377 let result = injector.process_entries(&mut entries);
1378
1379 assert!(result.anomalies_injected > 0);
1381 assert!(!result.labels.is_empty());
1382 assert!(result.labels.len() >= result.anomalies_injected);
1387 }
1388
1389 #[test]
1390 fn test_specific_injection() {
1391 let config = AnomalyInjectorConfig::default();
1392 let mut injector = AnomalyInjector::new(config);
1393
1394 let mut entry = create_test_entry("JE001");
1395 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1396
1397 let label = injector.inject_specific(&mut entry, anomaly_type);
1398
1399 assert!(label.is_some());
1400 let label = label.unwrap();
1401 assert!(!label.document_id.is_empty());
1403 assert_eq!(label.document_id, entry.document_number());
1404 }
1405
1406 #[test]
1407 fn test_self_approval_injection() {
1408 let config = AnomalyInjectorConfig::default();
1409 let mut injector = AnomalyInjector::new(config);
1410
1411 let mut entry = create_test_entry("JE001");
1412 let label = injector.create_self_approval(&mut entry, "USER001");
1413
1414 assert!(label.is_some());
1415 let label = label.unwrap();
1416 assert!(matches!(
1417 label.anomaly_type,
1418 AnomalyType::Fraud(FraudType::SelfApproval)
1419 ));
1420 assert!(label.related_entities.contains(&"USER001".to_string()));
1421 }
1422
1423 #[test]
1424 fn test_company_filtering() {
1425 let config = AnomalyInjectorConfigBuilder::new()
1426 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1428 .build();
1429
1430 let mut injector = AnomalyInjector::new(config);
1431
1432 let mut entries = vec![
1433 create_test_entry("JE001"), create_test_entry("JE002"), ];
1436
1437 let result = injector.process_entries(&mut entries);
1438
1439 assert_eq!(result.anomalies_injected, 0);
1441 }
1442
1443 fn create_test_entry_with_context(
1449 doc_num: &str,
1450 vendor_ref: Option<&str>,
1451 employee_id: &str,
1452 gl_account: &str,
1453 ) -> JournalEntry {
1454 let mut entry = JournalEntry::new_simple(
1455 doc_num.to_string(),
1456 "1000".to_string(),
1457 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1458 "Test Entry".to_string(),
1459 );
1460
1461 entry.header.reference = vendor_ref.map(|v| v.to_string());
1462 entry.header.created_by = employee_id.to_string();
1463
1464 entry.add_line(JournalEntryLine {
1465 line_number: 1,
1466 gl_account: gl_account.to_string(),
1467 debit_amount: dec!(1000),
1468 ..Default::default()
1469 });
1470
1471 entry.add_line(JournalEntryLine {
1472 line_number: 2,
1473 gl_account: "1000".to_string(),
1474 credit_amount: dec!(1000),
1475 ..Default::default()
1476 });
1477
1478 entry
1479 }
1480
1481 #[test]
1482 fn test_set_entity_contexts() {
1483 let config = AnomalyInjectorConfig::default();
1484 let mut injector = AnomalyInjector::new(config);
1485
1486 assert!(injector.vendor_contexts().is_empty());
1488 assert!(injector.employee_contexts().is_empty());
1489 assert!(injector.account_contexts().is_empty());
1490
1491 let mut vendors = HashMap::new();
1493 vendors.insert(
1494 "V001".to_string(),
1495 VendorContext {
1496 vendor_id: "V001".to_string(),
1497 is_new: true,
1498 ..Default::default()
1499 },
1500 );
1501
1502 let mut employees = HashMap::new();
1503 employees.insert(
1504 "EMP001".to_string(),
1505 EmployeeContext {
1506 employee_id: "EMP001".to_string(),
1507 is_new: true,
1508 ..Default::default()
1509 },
1510 );
1511
1512 let mut accounts = HashMap::new();
1513 accounts.insert(
1514 "8100".to_string(),
1515 AccountContext {
1516 account_code: "8100".to_string(),
1517 is_high_risk: true,
1518 ..Default::default()
1519 },
1520 );
1521
1522 injector.set_entity_contexts(vendors, employees, accounts);
1523
1524 assert_eq!(injector.vendor_contexts().len(), 1);
1525 assert_eq!(injector.employee_contexts().len(), 1);
1526 assert_eq!(injector.account_contexts().len(), 1);
1527 assert!(injector.vendor_contexts().contains_key("V001"));
1528 assert!(injector.employee_contexts().contains_key("EMP001"));
1529 assert!(injector.account_contexts().contains_key("8100"));
1530 }
1531
1532 #[test]
1533 fn test_default_behavior_no_contexts() {
1534 let config = AnomalyInjectorConfigBuilder::new()
1536 .with_total_rate(0.5)
1537 .with_seed(42)
1538 .build();
1539
1540 let mut injector = AnomalyInjector::new(config);
1541
1542 let mut entries: Vec<_> = (0..200)
1543 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1544 .collect();
1545
1546 let result = injector.process_entries(&mut entries);
1547
1548 assert!(result.anomalies_injected > 0);
1551 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1552 assert!(
1553 rate > 0.2 && rate < 0.8,
1554 "Expected ~50% rate, got {:.2}%",
1555 rate * 100.0
1556 );
1557 }
1558
1559 #[test]
1560 fn test_entity_context_increases_injection_rate() {
1561 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1567 .with_total_rate(base_rate)
1568 .with_seed(123)
1569 .build();
1570
1571 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1572
1573 let mut entries_no_ctx: Vec<_> = (0..500)
1574 .map(|i| {
1575 create_test_entry_with_context(
1576 &format!("JE{:04}", i),
1577 Some("V001"),
1578 "EMP001",
1579 "8100",
1580 )
1581 })
1582 .collect();
1583
1584 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1585
1586 let config_ctx = AnomalyInjectorConfigBuilder::new()
1588 .with_total_rate(base_rate)
1589 .with_seed(123)
1590 .build();
1591
1592 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1593
1594 let mut vendors = HashMap::new();
1596 vendors.insert(
1597 "V001".to_string(),
1598 VendorContext {
1599 vendor_id: "V001".to_string(),
1600 is_new: true, is_dormant_reactivation: true, ..Default::default()
1603 },
1604 );
1605
1606 let mut employees = HashMap::new();
1607 employees.insert(
1608 "EMP001".to_string(),
1609 EmployeeContext {
1610 employee_id: "EMP001".to_string(),
1611 is_new: true, ..Default::default()
1613 },
1614 );
1615
1616 let mut accounts = HashMap::new();
1617 accounts.insert(
1618 "8100".to_string(),
1619 AccountContext {
1620 account_code: "8100".to_string(),
1621 is_high_risk: true, ..Default::default()
1623 },
1624 );
1625
1626 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1627
1628 let mut entries_ctx: Vec<_> = (0..500)
1629 .map(|i| {
1630 create_test_entry_with_context(
1631 &format!("JE{:04}", i),
1632 Some("V001"),
1633 "EMP001",
1634 "8100",
1635 )
1636 })
1637 .collect();
1638
1639 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1640
1641 assert!(
1643 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1644 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1645 result_ctx.anomalies_injected,
1646 result_no_ctx.anomalies_injected,
1647 );
1648 }
1649
1650 #[test]
1651 fn test_risk_score_multiplication() {
1652 let config = AnomalyInjectorConfig::default();
1654 let mut injector = AnomalyInjector::new(config);
1655
1656 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1658 assert!(
1659 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1660 );
1661
1662 let mut vendors = HashMap::new();
1664 vendors.insert(
1665 "V_RISKY".to_string(),
1666 VendorContext {
1667 vendor_id: "V_RISKY".to_string(),
1668 is_new: true,
1669 ..Default::default()
1670 },
1671 );
1672
1673 let mut accounts = HashMap::new();
1674 accounts.insert(
1675 "9000".to_string(),
1676 AccountContext {
1677 account_code: "9000".to_string(),
1678 is_high_risk: true,
1679 ..Default::default()
1680 },
1681 );
1682
1683 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1684
1685 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1686 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1687 assert!(
1689 (multiplier - 4.0).abs() < f64::EPSILON,
1690 "Expected 4.0x multiplier, got {}",
1691 multiplier,
1692 );
1693
1694 let entry_vendor_only =
1696 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1697 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1698 assert!(
1699 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1700 "Expected 2.0x multiplier (vendor only), got {}",
1701 multiplier_vendor,
1702 );
1703
1704 let entry_no_match =
1706 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1707 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1708 assert!(
1709 (multiplier_none - 1.0).abs() < f64::EPSILON,
1710 "Expected 1.0x multiplier (no match), got {}",
1711 multiplier_none,
1712 );
1713 }
1714
1715 #[test]
1716 fn test_employee_context_multiplier() {
1717 let config = AnomalyInjectorConfig::default();
1718 let mut injector = AnomalyInjector::new(config);
1719
1720 let mut employees = HashMap::new();
1721 employees.insert(
1722 "EMP_NEW".to_string(),
1723 EmployeeContext {
1724 employee_id: "EMP_NEW".to_string(),
1725 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1729 },
1730 );
1731
1732 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1733
1734 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1735 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1736
1737 let expected = 1.5 * 1.3 * 1.2;
1739 assert!(
1740 (multiplier - expected).abs() < 0.01,
1741 "Expected {:.3}x multiplier, got {:.3}",
1742 expected,
1743 multiplier,
1744 );
1745 }
1746
1747 #[test]
1748 fn test_entity_contexts_persist_across_reset() {
1749 let config = AnomalyInjectorConfig::default();
1750 let mut injector = AnomalyInjector::new(config);
1751
1752 let mut vendors = HashMap::new();
1753 vendors.insert(
1754 "V001".to_string(),
1755 VendorContext {
1756 vendor_id: "V001".to_string(),
1757 is_new: true,
1758 ..Default::default()
1759 },
1760 );
1761
1762 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1763 assert_eq!(injector.vendor_contexts().len(), 1);
1764
1765 injector.reset();
1767 assert_eq!(injector.vendor_contexts().len(), 1);
1768 }
1769
1770 #[test]
1771 fn test_set_empty_contexts_clears() {
1772 let config = AnomalyInjectorConfig::default();
1773 let mut injector = AnomalyInjector::new(config);
1774
1775 let mut vendors = HashMap::new();
1776 vendors.insert(
1777 "V001".to_string(),
1778 VendorContext {
1779 vendor_id: "V001".to_string(),
1780 ..Default::default()
1781 },
1782 );
1783
1784 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1785 assert_eq!(injector.vendor_contexts().len(), 1);
1786
1787 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1789 assert!(injector.vendor_contexts().is_empty());
1790 }
1791
1792 #[test]
1793 fn test_dormant_vendor_multiplier() {
1794 let config = AnomalyInjectorConfig::default();
1795 let mut injector = AnomalyInjector::new(config);
1796
1797 let mut vendors = HashMap::new();
1798 vendors.insert(
1799 "V_DORMANT".to_string(),
1800 VendorContext {
1801 vendor_id: "V_DORMANT".to_string(),
1802 is_dormant_reactivation: true, ..Default::default()
1804 },
1805 );
1806
1807 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1808
1809 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1810 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1811 assert!(
1812 (multiplier - 1.5).abs() < f64::EPSILON,
1813 "Expected 1.5x multiplier for dormant vendor, got {}",
1814 multiplier,
1815 );
1816 }
1817
1818 #[test]
1827 fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1828 use chrono::{Datelike, Timelike, Weekday};
1829 use datasynth_core::models::FraudType;
1830
1831 let mut config = AnomalyInjectorConfig::default();
1832 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1833 enabled: true,
1834 weekend_bias: 1.0,
1835 round_dollar_bias: 1.0,
1836 off_hours_bias: 1.0,
1837 post_close_bias: 1.0,
1838 };
1839 let mut injector = AnomalyInjector::new(config);
1840
1841 let mut entry = JournalEntry::new_simple(
1843 "JE001".to_string(),
1844 "1000".to_string(),
1845 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
1847 );
1848 entry.add_line(JournalEntryLine {
1849 line_number: 1,
1850 gl_account: "5000".to_string(),
1851 debit_amount: dec!(1237),
1852 ..Default::default()
1853 });
1854 entry.add_line(JournalEntryLine {
1855 line_number: 2,
1856 gl_account: "1000".to_string(),
1857 credit_amount: dec!(1237),
1858 ..Default::default()
1859 });
1860
1861 let _ =
1862 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1863
1864 assert!(
1866 matches!(
1867 entry.header.posting_date.weekday(),
1868 Weekday::Sat | Weekday::Sun
1869 ),
1870 "expected weekend posting date, got {:?}",
1871 entry.header.posting_date.weekday()
1872 );
1873 let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1875 let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1876 assert_eq!(debit_total, credit_total, "entry must remain balanced");
1877 assert!(
1878 [
1879 dec!(1_000),
1880 dec!(5_000),
1881 dec!(10_000),
1882 dec!(25_000),
1883 dec!(50_000),
1884 dec!(100_000)
1885 ]
1886 .contains(&debit_total),
1887 "expected round-dollar total, got {}",
1888 debit_total
1889 );
1890 let hour = entry.header.created_at.hour();
1892 assert!(
1893 !(6..22).contains(&hour),
1894 "expected off-hours timestamp, got hour {}",
1895 hour
1896 );
1897 assert!(entry.header.is_post_close);
1899
1900 let stats = injector.get_stats();
1902 assert_eq!(stats.fraud_weekend_bias_applied, 1);
1903 assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1904 assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1905 assert_eq!(stats.fraud_post_close_bias_applied, 1);
1906 }
1907
1908 #[test]
1911 fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1912 use datasynth_core::models::FraudType;
1913
1914 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1916 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1917 enabled: true,
1918 weekend_bias: 0.0,
1919 round_dollar_bias: 0.0,
1920 off_hours_bias: 0.0,
1921 post_close_bias: 0.0,
1922 };
1923 let mut injector = AnomalyInjector::new(config);
1924 let mut entry = create_test_entry("JE001");
1925 entry.header.posting_date = original_date;
1926
1927 let _ =
1928 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1929
1930 assert_eq!(entry.header.posting_date, original_date);
1931 assert!(!entry.header.is_post_close);
1932 let stats = injector.get_stats();
1933 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1934 assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1935 assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1936 assert_eq!(stats.fraud_post_close_bias_applied, 0);
1937 }
1938
1939 #[test]
1942 fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1943 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1945 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1946 enabled: true,
1947 weekend_bias: 1.0,
1948 round_dollar_bias: 1.0,
1949 off_hours_bias: 1.0,
1950 post_close_bias: 1.0,
1951 };
1952 let mut injector = AnomalyInjector::new(config);
1953 let mut entry = create_test_entry("JE001");
1954 entry.header.posting_date = original_date;
1955
1956 let _ = injector.inject_specific(
1957 &mut entry,
1958 AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
1959 );
1960
1961 assert_eq!(entry.header.posting_date, original_date);
1962 let stats = injector.get_stats();
1963 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1964 }
1965
1966 #[test]
1970 fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
1971 use datasynth_core::models::{FraudType, ProcessIssueType};
1972
1973 let mut config = AnomalyInjectorConfig::default();
1974 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1975 enabled: true,
1976 weekend_bias: 1.0,
1977 round_dollar_bias: 0.0, off_hours_bias: 1.0,
1979 post_close_bias: 1.0,
1980 };
1981 let mut injector = AnomalyInjector::new(config);
1982 let mut entry = JournalEntry::new_simple(
1983 "JE001".into(),
1984 "1000".into(),
1985 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
1986 "Test".into(),
1987 );
1988 entry.add_line(JournalEntryLine {
1989 line_number: 1,
1990 gl_account: "5000".into(),
1991 debit_amount: dec!(1000),
1992 ..Default::default()
1993 });
1994 entry.add_line(JournalEntryLine {
1995 line_number: 2,
1996 gl_account: "1000".into(),
1997 credit_amount: dec!(1000),
1998 ..Default::default()
1999 });
2000
2001 let primary = injector
2002 .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2003 .expect("fraud label should be produced");
2004
2005 let labels = injector.get_labels();
2007 assert_eq!(
2008 labels.len(),
2009 3,
2010 "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2011 );
2012 let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2013 assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2014 assert!(types.contains(&AnomalyType::ProcessIssue(
2015 ProcessIssueType::AfterHoursPosting
2016 )));
2017 assert!(types.contains(&AnomalyType::ProcessIssue(
2018 ProcessIssueType::PostClosePosting
2019 )));
2020 assert_eq!(
2021 primary.anomaly_type,
2022 AnomalyType::Fraud(FraudType::FictitiousEntry)
2023 );
2024 }
2025}