1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
23use datasynth_core::models::{
24 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
25 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
26 RelationalAnomalyType,
27};
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29
30use super::context::{
31 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
32 EntityAwareInjector, VendorContext,
33};
34use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
35use super::difficulty::DifficultyCalculator;
36use super::near_miss::{NearMissConfig, NearMissGenerator};
37use super::patterns::{
38 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
39 TemporalPattern,
40};
41use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
42use super::schemes::{SchemeAction, SchemeContext};
43use super::strategies::{DuplicationStrategy, StrategyCollection};
44use super::types::AnomalyTypeSelector;
45
46#[derive(Debug, Clone)]
48pub struct AnomalyInjectorConfig {
49 pub rates: AnomalyRateConfig,
51 pub patterns: AnomalyPatternConfig,
53 pub seed: u64,
55 pub generate_labels: bool,
57 pub allow_duplicates: bool,
59 pub max_anomalies_per_document: usize,
61 pub target_companies: Vec<String>,
63 pub date_range: Option<(NaiveDate, NaiveDate)>,
65 pub enhanced: EnhancedInjectionConfig,
67}
68
69#[derive(Debug, Clone, Default)]
71pub struct EnhancedInjectionConfig {
72 pub multi_stage_schemes_enabled: bool,
74 pub scheme_probability: f64,
76 pub correlated_injection_enabled: bool,
78 pub temporal_clustering_enabled: bool,
80 pub period_end_multiplier: f64,
82 pub near_miss_enabled: bool,
84 pub near_miss_proportion: f64,
86 pub approval_thresholds: Vec<Decimal>,
88 pub difficulty_classification_enabled: bool,
90 pub context_aware_enabled: bool,
92 pub behavioral_baseline_config: BehavioralBaselineConfig,
94 pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
99}
100
101impl Default for AnomalyInjectorConfig {
102 fn default() -> Self {
103 Self {
104 rates: AnomalyRateConfig::default(),
105 patterns: AnomalyPatternConfig::default(),
106 seed: 42,
107 generate_labels: true,
108 allow_duplicates: true,
109 max_anomalies_per_document: 2,
110 target_companies: Vec::new(),
111 date_range: None,
112 enhanced: EnhancedInjectionConfig::default(),
113 }
114 }
115}
116
117#[derive(Debug, Clone)]
119pub struct InjectionBatchResult {
120 pub entries_processed: usize,
122 pub anomalies_injected: usize,
124 pub duplicates_created: usize,
126 pub labels: Vec<LabeledAnomaly>,
128 pub summary: AnomalySummary,
130 pub modified_documents: Vec<String>,
132 pub near_miss_labels: Vec<NearMissLabel>,
134 pub scheme_actions: Vec<SchemeAction>,
136 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
138}
139
140pub struct AnomalyInjector {
142 config: AnomalyInjectorConfig,
143 rng: ChaCha8Rng,
144 uuid_factory: DeterministicUuidFactory,
145 type_selector: AnomalyTypeSelector,
146 strategies: StrategyCollection,
147 cluster_manager: ClusterManager,
148 entity_targeting: EntityTargetingManager,
150 document_anomaly_counts: HashMap<String, usize>,
152 labels: Vec<LabeledAnomaly>,
154 stats: InjectorStats,
156 scheme_advancer: Option<SchemeAdvancer>,
159 near_miss_generator: Option<NearMissGenerator>,
161 near_miss_labels: Vec<NearMissLabel>,
163 co_occurrence_handler: Option<AnomalyCoOccurrence>,
165 queued_co_occurrences: Vec<QueuedAnomaly>,
167 temporal_cluster_generator: Option<TemporalClusterGenerator>,
169 difficulty_calculator: Option<DifficultyCalculator>,
171 entity_aware_injector: Option<EntityAwareInjector>,
173 behavioral_baseline: Option<BehavioralBaseline>,
175 scheme_actions: Vec<SchemeAction>,
177 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
179 vendor_contexts: HashMap<String, VendorContext>,
182 employee_contexts: HashMap<String, EmployeeContext>,
184 account_contexts: HashMap<String, AccountContext>,
186}
187
188#[derive(Debug, Clone, Default)]
190pub struct InjectorStats {
191 pub total_processed: usize,
193 pub total_injected: usize,
195 pub by_category: HashMap<String, usize>,
197 pub by_type: HashMap<String, usize>,
199 pub by_company: HashMap<String, usize>,
201 pub skipped_rate: usize,
203 pub skipped_date: usize,
205 pub skipped_company: usize,
207 pub skipped_max_per_doc: usize,
209 pub fraud_weekend_bias_applied: usize,
211 pub fraud_round_dollar_bias_applied: usize,
213 pub fraud_off_hours_bias_applied: usize,
215 pub fraud_post_close_bias_applied: usize,
217}
218
219struct QueuedAnomaly {
221 anomaly_type: AnomalyType,
223 target_entity: Option<String>,
225 earliest_date: NaiveDate,
227 description: String,
229}
230
231impl AnomalyInjector {
232 pub fn new(config: AnomalyInjectorConfig) -> Self {
234 let mut rng = seeded_rng(config.seed, 0);
235 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
236 let entity_targeting =
237 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
238
239 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
241 let scheme_config = SchemeAdvancerConfig {
242 embezzlement_probability: config.enhanced.scheme_probability,
243 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
244 kickback_probability: config.enhanced.scheme_probability * 0.5,
245 seed: rng.random(),
246 ..Default::default()
247 };
248 Some(SchemeAdvancer::new(scheme_config))
249 } else {
250 None
251 };
252
253 let near_miss_generator = if config.enhanced.near_miss_enabled {
254 let near_miss_config = NearMissConfig {
255 proportion: config.enhanced.near_miss_proportion,
256 seed: rng.random(),
257 ..Default::default()
258 };
259 Some(NearMissGenerator::new(near_miss_config))
260 } else {
261 None
262 };
263
264 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
265 Some(AnomalyCoOccurrence::new())
266 } else {
267 None
268 };
269
270 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
271 Some(TemporalClusterGenerator::new())
272 } else {
273 None
274 };
275
276 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
277 Some(DifficultyCalculator::new())
278 } else {
279 None
280 };
281
282 let entity_aware_injector = if config.enhanced.context_aware_enabled {
283 Some(EntityAwareInjector::default())
284 } else {
285 None
286 };
287
288 let behavioral_baseline = if config.enhanced.context_aware_enabled
289 && config.enhanced.behavioral_baseline_config.enabled
290 {
291 Some(BehavioralBaseline::new(
292 config.enhanced.behavioral_baseline_config.clone(),
293 ))
294 } else {
295 None
296 };
297
298 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
299
300 Self {
301 config,
302 rng,
303 uuid_factory,
304 type_selector: AnomalyTypeSelector::new(),
305 strategies: StrategyCollection::default(),
306 cluster_manager,
307 entity_targeting,
308 document_anomaly_counts: HashMap::new(),
309 labels: Vec::new(),
310 stats: InjectorStats::default(),
311 scheme_advancer,
312 near_miss_generator,
313 near_miss_labels: Vec::new(),
314 co_occurrence_handler,
315 queued_co_occurrences: Vec::new(),
316 temporal_cluster_generator,
317 difficulty_calculator,
318 entity_aware_injector,
319 behavioral_baseline,
320 scheme_actions: Vec::new(),
321 difficulty_distribution: HashMap::new(),
322 vendor_contexts: HashMap::new(),
323 employee_contexts: HashMap::new(),
324 account_contexts: HashMap::new(),
325 }
326 }
327
328 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
330 debug!(
331 entry_count = entries.len(),
332 total_rate = self.config.rates.total_rate,
333 seed = self.config.seed,
334 "Injecting anomalies into journal entries"
335 );
336
337 let mut modified_documents = Vec::new();
338 let mut duplicates = Vec::new();
339
340 for entry in entries.iter_mut() {
341 self.stats.total_processed += 1;
342
343 if let Some(ref mut baseline) = self.behavioral_baseline {
345 use super::context::Observation;
346 let entity_id = entry.header.created_by.clone();
348 let observation =
349 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
350 baseline.record_observation(&entity_id, observation);
351 }
352
353 if !self.should_process(entry) {
355 continue;
356 }
357
358 let entry_date = entry.posting_date();
360 let ready_indices: Vec<usize> = self
361 .queued_co_occurrences
362 .iter()
363 .enumerate()
364 .filter(|(_, q)| entry_date >= q.earliest_date)
365 .map(|(i, _)| i)
366 .collect();
367
368 if let Some(&idx) = ready_indices.first() {
369 let queued = self.queued_co_occurrences.remove(idx);
370 if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
371 label = label.with_metadata("co_occurrence", "true");
372 label = label.with_metadata("co_occurrence_description", &queued.description);
373 if let Some(ref target) = queued.target_entity {
374 label = label.with_related_entity(target);
375 label = label.with_metadata("co_occurrence_target", target);
376 }
377 modified_documents.push(entry.document_number().clone());
378 self.labels.push(label);
379 self.stats.total_injected += 1;
380 }
381 continue; }
383
384 let base_rate = self.config.rates.total_rate;
386
387 let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
389 let employee_id = &entry.header.created_by;
390 let first_account = entry
391 .lines
392 .first()
393 .map(|l| l.gl_account.as_str())
394 .unwrap_or("");
395 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
397
398 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
399 let employee_ctx = self.employee_contexts.get(employee_id);
400 let account_ctx = self.account_contexts.get(first_account);
401
402 let multiplier =
403 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
404 (base_rate * multiplier).min(1.0)
405 } else {
406 self.calculate_context_rate_multiplier(entry) * base_rate
408 };
409
410 if let Some(ref tcg) = self.temporal_cluster_generator {
412 let temporal_multiplier = tcg
413 .get_active_clusters(entry_date)
414 .iter()
415 .map(|c| c.rate_multiplier)
416 .fold(1.0_f64, f64::max);
417 effective_rate = (effective_rate * temporal_multiplier).min(1.0);
418 }
419
420 if should_inject_anomaly(
422 effective_rate,
423 entry_date,
424 &self.config.patterns.temporal_pattern,
425 &mut self.rng,
426 ) {
427 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
429 let account = entry
431 .lines
432 .first()
433 .map(|l| l.gl_account.clone())
434 .unwrap_or_default();
435 near_miss_gen.record_transaction(
436 entry.document_number().clone(),
437 entry_date,
438 entry.total_debit(),
439 &account,
440 None,
441 );
442
443 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
445 entry.document_number().clone(),
446 entry_date,
447 entry.total_debit(),
448 &account,
449 None,
450 &self.config.enhanced.approval_thresholds,
451 ) {
452 self.near_miss_labels.push(near_miss_label);
453 continue; }
455 }
456
457 let anomaly_type = self.select_anomaly_category();
459
460 let target_entity = {
462 let mut candidates: Vec<String> =
463 self.vendor_contexts.keys().cloned().collect();
464 candidates.extend(self.employee_contexts.keys().cloned());
465 if candidates.is_empty() {
466 if let Some(ref r) = entry.header.reference {
468 candidates.push(r.clone());
469 }
470 }
471 self.entity_targeting
472 .select_entity(&candidates, &mut self.rng)
473 };
474
475 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
477 if let Some(ref entity_id) = target_entity {
479 label = label.with_metadata("entity_target", entity_id);
480 label = label.with_related_entity(entity_id);
481 label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
482 target_type: "Entity".to_string(),
483 target_id: entity_id.clone(),
484 });
485 }
486
487 if let Some(ref calculator) = self.difficulty_calculator {
489 let difficulty = calculator.calculate(&label);
490
491 label =
493 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
494 label = label.with_metadata(
495 "difficulty_score",
496 &difficulty.difficulty_score().to_string(),
497 );
498
499 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
501 }
502
503 modified_documents.push(entry.document_number().clone());
504 self.labels.push(label);
505 self.stats.total_injected += 1;
506
507 if let Some(ref co_occ) = self.co_occurrence_handler {
509 let correlated =
510 co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
511 for result in correlated {
512 self.queued_co_occurrences.push(QueuedAnomaly {
513 anomaly_type: result.anomaly_type,
514 target_entity: if result.same_entity {
515 target_entity.clone()
516 } else {
517 None
518 },
519 earliest_date: entry_date
520 + chrono::Duration::days(i64::from(result.lag_days)),
521 description: result.description,
522 });
523 }
524 }
525 }
526
527 if self.config.allow_duplicates
529 && matches!(
530 self.labels.last().map(|l| &l.anomaly_type),
531 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
532 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
533 )
534 {
535 let dup_strategy = DuplicationStrategy::default();
536 let duplicate =
537 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
538 duplicates.push(duplicate);
539 }
540 }
541 }
542
543 let duplicates_created = duplicates.len();
545
546 let summary = AnomalySummary::from_anomalies(&self.labels);
548
549 InjectionBatchResult {
550 entries_processed: self.stats.total_processed,
551 anomalies_injected: self.stats.total_injected,
552 duplicates_created,
553 labels: self.labels.clone(),
554 summary,
555 modified_documents,
556 near_miss_labels: self.near_miss_labels.clone(),
557 scheme_actions: self.scheme_actions.clone(),
558 difficulty_distribution: self.difficulty_distribution.clone(),
559 }
560 }
561
562 fn should_process(&mut self, entry: &JournalEntry) -> bool {
564 if !self.config.target_companies.is_empty()
566 && !self
567 .config
568 .target_companies
569 .iter()
570 .any(|c| c == entry.company_code())
571 {
572 self.stats.skipped_company += 1;
573 return false;
574 }
575
576 if let Some((start, end)) = self.config.date_range {
578 if entry.posting_date() < start || entry.posting_date() > end {
579 self.stats.skipped_date += 1;
580 return false;
581 }
582 }
583
584 let current_count = self
586 .document_anomaly_counts
587 .get(&entry.document_number())
588 .copied()
589 .unwrap_or(0);
590 if current_count >= self.config.max_anomalies_per_document {
591 self.stats.skipped_max_per_doc += 1;
592 return false;
593 }
594
595 true
596 }
597
598 fn select_anomaly_category(&mut self) -> AnomalyType {
600 let r = self.rng.random::<f64>();
601 let rates = &self.config.rates;
602
603 let mut cumulative = 0.0;
604
605 cumulative += rates.fraud_rate;
606 if r < cumulative {
607 return self.type_selector.select_fraud(&mut self.rng);
608 }
609
610 cumulative += rates.error_rate;
611 if r < cumulative {
612 return self.type_selector.select_error(&mut self.rng);
613 }
614
615 cumulative += rates.process_issue_rate;
616 if r < cumulative {
617 return self.type_selector.select_process_issue(&mut self.rng);
618 }
619
620 cumulative += rates.statistical_rate;
621 if r < cumulative {
622 return self.type_selector.select_statistical(&mut self.rng);
623 }
624
625 self.type_selector.select_relational(&mut self.rng)
626 }
627
628 fn inject_anomaly(
630 &mut self,
631 entry: &mut JournalEntry,
632 anomaly_type: AnomalyType,
633 ) -> Option<LabeledAnomaly> {
634 if !self.strategies.can_apply(entry, &anomaly_type) {
636 return None;
637 }
638
639 let result = self
641 .strategies
642 .apply_strategy(entry, &anomaly_type, &mut self.rng);
643
644 if !result.success {
645 return None;
646 }
647
648 *self
650 .document_anomaly_counts
651 .entry(entry.document_number().clone())
652 .or_insert(0) += 1;
653
654 let category = anomaly_type.category().to_string();
656 let type_name = anomaly_type.type_name();
657
658 *self.stats.by_category.entry(category).or_insert(0) += 1;
659 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
660 *self
661 .stats
662 .by_company
663 .entry(entry.company_code().to_string())
664 .or_insert(0) += 1;
665
666 if self.config.generate_labels {
668 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
669
670 entry.header.is_anomaly = true;
672 entry.header.anomaly_id = Some(anomaly_id.clone());
673 entry.header.anomaly_type = Some(type_name.clone());
674
675 let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
677 Vec::new();
678 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
679 entry.header.is_fraud = true;
680 if let AnomalyType::Fraud(ref ft) = anomaly_type {
681 entry.header.fraud_type = Some(*ft);
682 }
683 secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
689 }
690
691 let mut label = LabeledAnomaly::new(
692 anomaly_id,
693 anomaly_type.clone(),
694 entry.document_number().clone(),
695 "JE".to_string(),
696 entry.company_code().to_string(),
697 entry.posting_date(),
698 )
699 .with_description(&result.description)
700 .with_injection_strategy(&type_name);
701
702 let causal_reason = AnomalyCausalReason::RandomRate {
704 base_rate: self.config.rates.total_rate,
705 };
706 label = label.with_causal_reason(causal_reason);
707
708 let context_multiplier = self.calculate_context_rate_multiplier(entry);
710 if (context_multiplier - 1.0).abs() > f64::EPSILON {
711 label = label.with_metadata(
712 "entity_context_multiplier",
713 &format!("{context_multiplier:.3}"),
714 );
715 label = label.with_metadata(
716 "effective_rate",
717 &format!(
718 "{:.6}",
719 (self.config.rates.total_rate * context_multiplier).min(1.0)
720 ),
721 );
722 }
723
724 if let Some(impact) = result.monetary_impact {
726 label = label.with_monetary_impact(impact);
727 }
728
729 for entity in &result.related_entities {
731 label = label.with_related_entity(entity);
732 }
733
734 for (key, value) in &result.metadata {
736 label = label.with_metadata(key, value);
737 }
738
739 if let Some(cluster_id) =
741 self.cluster_manager
742 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
743 {
744 label = label.with_cluster(&cluster_id);
745 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
747 cluster_id: cluster_id.clone(),
748 });
749 }
750
751 for issue_type in &secondary_process_issues {
758 let child_id = format!("ANO{:08}", self.labels.len() + 1);
759 let child = LabeledAnomaly::new(
760 child_id,
761 AnomalyType::ProcessIssue(*issue_type),
762 entry.document_number().clone(),
763 "JE".to_string(),
764 entry.company_code().to_string(),
765 entry.posting_date(),
766 )
767 .with_description("Forensic pattern from fraud behavioral bias")
768 .with_injection_strategy("behavioral_bias")
769 .with_parent_anomaly(&label.anomaly_id);
770 self.labels.push(child);
771 }
772
773 return Some(label);
774 }
775
776 None
777 }
778
779 pub fn inject_specific(
781 &mut self,
782 entry: &mut JournalEntry,
783 anomaly_type: AnomalyType,
784 ) -> Option<LabeledAnomaly> {
785 self.inject_anomaly(entry, anomaly_type)
786 }
787
788 pub fn create_self_approval(
790 &mut self,
791 entry: &mut JournalEntry,
792 user_id: &str,
793 ) -> Option<LabeledAnomaly> {
794 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
795
796 let label = LabeledAnomaly::new(
797 format!("ANO{:08}", self.labels.len() + 1),
798 anomaly_type,
799 entry.document_number().clone(),
800 "JE".to_string(),
801 entry.company_code().to_string(),
802 entry.posting_date(),
803 )
804 .with_description(&format!("User {user_id} approved their own transaction"))
805 .with_related_entity(user_id)
806 .with_injection_strategy("ManualSelfApproval")
807 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
808 target_type: "User".to_string(),
809 target_id: user_id.to_string(),
810 });
811
812 entry.header.is_anomaly = true;
814 entry.header.is_fraud = true;
815 entry.header.anomaly_id = Some(label.anomaly_id.clone());
816 entry.header.anomaly_type = Some("SelfApproval".to_string());
817 entry.header.fraud_type = Some(FraudType::SelfApproval);
818
819 entry.header.created_by = user_id.to_string();
821
822 self.apply_fraud_behavioral_bias(entry);
825
826 self.labels.push(label.clone());
827 Some(label)
828 }
829
830 pub fn create_sod_violation(
832 &mut self,
833 entry: &mut JournalEntry,
834 user_id: &str,
835 conflicting_duties: (&str, &str),
836 ) -> Option<LabeledAnomaly> {
837 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
838
839 let label = LabeledAnomaly::new(
840 format!("ANO{:08}", self.labels.len() + 1),
841 anomaly_type,
842 entry.document_number().clone(),
843 "JE".to_string(),
844 entry.company_code().to_string(),
845 entry.posting_date(),
846 )
847 .with_description(&format!(
848 "User {} performed conflicting duties: {} and {}",
849 user_id, conflicting_duties.0, conflicting_duties.1
850 ))
851 .with_related_entity(user_id)
852 .with_metadata("duty1", conflicting_duties.0)
853 .with_metadata("duty2", conflicting_duties.1)
854 .with_injection_strategy("ManualSoDViolation")
855 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
856 target_type: "User".to_string(),
857 target_id: user_id.to_string(),
858 });
859
860 entry.header.is_anomaly = true;
862 entry.header.is_fraud = true;
863 entry.header.anomaly_id = Some(label.anomaly_id.clone());
864 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
865 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
866
867 self.apply_fraud_behavioral_bias(entry);
869
870 self.labels.push(label.clone());
871 Some(label)
872 }
873
874 pub fn create_ic_mismatch(
876 &mut self,
877 entry: &mut JournalEntry,
878 matching_company: &str,
879 expected_amount: Decimal,
880 actual_amount: Decimal,
881 ) -> Option<LabeledAnomaly> {
882 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
883
884 let label = LabeledAnomaly::new(
885 format!("ANO{:08}", self.labels.len() + 1),
886 anomaly_type,
887 entry.document_number().clone(),
888 "JE".to_string(),
889 entry.company_code().to_string(),
890 entry.posting_date(),
891 )
892 .with_description(&format!(
893 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
894 ))
895 .with_related_entity(matching_company)
896 .with_monetary_impact(actual_amount - expected_amount)
897 .with_metadata("expected_amount", &expected_amount.to_string())
898 .with_metadata("actual_amount", &actual_amount.to_string())
899 .with_injection_strategy("ManualICMismatch")
900 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
901 target_type: "Intercompany".to_string(),
902 target_id: matching_company.to_string(),
903 });
904
905 entry.header.is_anomaly = true;
907 entry.header.anomaly_id = Some(label.anomaly_id.clone());
908 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
909
910 self.labels.push(label.clone());
911 Some(label)
912 }
913
914 pub fn get_labels(&self) -> &[LabeledAnomaly] {
916 &self.labels
917 }
918
919 pub fn get_summary(&self) -> AnomalySummary {
921 AnomalySummary::from_anomalies(&self.labels)
922 }
923
924 pub fn get_stats(&self) -> &InjectorStats {
926 &self.stats
927 }
928
929 pub fn reset(&mut self) {
931 self.labels.clear();
932 self.document_anomaly_counts.clear();
933 self.stats = InjectorStats::default();
934 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
935
936 self.near_miss_labels.clear();
938 self.scheme_actions.clear();
939 self.difficulty_distribution.clear();
940
941 if let Some(ref mut baseline) = self.behavioral_baseline {
942 *baseline =
943 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
944 }
945 }
946
947 pub fn cluster_count(&self) -> usize {
949 self.cluster_manager.cluster_count()
950 }
951
952 pub fn set_entity_contexts(
965 &mut self,
966 vendors: HashMap<String, VendorContext>,
967 employees: HashMap<String, EmployeeContext>,
968 accounts: HashMap<String, AccountContext>,
969 ) {
970 self.vendor_contexts = vendors;
971 self.employee_contexts = employees;
972 self.account_contexts = accounts;
973 }
974
975 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
977 &self.vendor_contexts
978 }
979
980 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
982 &self.employee_contexts
983 }
984
985 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
987 &self.account_contexts
988 }
989
990 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
999 if self.vendor_contexts.is_empty()
1000 && self.employee_contexts.is_empty()
1001 && self.account_contexts.is_empty()
1002 {
1003 return 1.0;
1004 }
1005
1006 let mut multiplier = 1.0;
1007
1008 if let Some(ref vendor_ref) = entry.header.reference {
1010 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1011 if ctx.is_new {
1013 multiplier *= 2.0;
1014 }
1015 if ctx.is_dormant_reactivation {
1016 multiplier *= 1.5;
1017 }
1018 }
1019 }
1020
1021 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1023 if ctx.is_new {
1024 multiplier *= 1.5;
1025 }
1026 if ctx.is_volume_fatigued {
1027 multiplier *= 1.3;
1028 }
1029 if ctx.is_overtime {
1030 multiplier *= 1.2;
1031 }
1032 }
1033
1034 if let Some(first_line) = entry.lines.first() {
1036 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1037 if ctx.is_high_risk {
1038 multiplier *= 2.0;
1039 }
1040 }
1041 }
1042
1043 multiplier
1044 }
1045
1046 fn apply_fraud_behavioral_bias(
1055 &mut self,
1056 entry: &mut JournalEntry,
1057 ) -> Vec<datasynth_core::models::ProcessIssueType> {
1058 use datasynth_core::models::ProcessIssueType;
1059
1060 let cfg = self.config.enhanced.fraud_behavioral_bias;
1061 let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1062 for issue in &fired {
1063 match issue {
1064 ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1065 ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1066 ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1067 _ => {}
1068 }
1069 }
1070 if cfg.round_dollar_bias > 0.0 {
1075 const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1076 let max_amt: Decimal = entry
1077 .lines
1078 .iter()
1079 .map(|l| l.debit_amount.max(l.credit_amount))
1080 .max()
1081 .unwrap_or(Decimal::ZERO);
1082 if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1083 self.stats.fraud_round_dollar_bias_applied += 1;
1084 }
1085 }
1086 fired
1087 }
1088
1089 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1098 if let Some(ref mut advancer) = self.scheme_advancer {
1099 let context = SchemeContext::new(date, company_code);
1100 let actions = advancer.advance_all(&context);
1101 self.scheme_actions.extend(actions.clone());
1102 actions
1103 } else {
1104 Vec::new()
1105 }
1106 }
1107
1108 pub fn maybe_start_scheme(
1114 &mut self,
1115 date: NaiveDate,
1116 company_code: &str,
1117 available_users: Vec<String>,
1118 available_accounts: Vec<String>,
1119 available_counterparties: Vec<String>,
1120 ) -> Option<uuid::Uuid> {
1121 if let Some(ref mut advancer) = self.scheme_advancer {
1122 let mut context = SchemeContext::new(date, company_code);
1123 context.available_users = available_users;
1124 context.available_accounts = available_accounts;
1125 context.available_counterparties = available_counterparties;
1126
1127 advancer.maybe_start_scheme(&context)
1128 } else {
1129 None
1130 }
1131 }
1132
1133 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1135 &self.near_miss_labels
1136 }
1137
1138 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1140 &self.scheme_actions
1141 }
1142
1143 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1145 &self.difficulty_distribution
1146 }
1147
1148 pub fn check_behavioral_deviations(
1150 &self,
1151 entity_id: &str,
1152 observation: &super::context::Observation,
1153 ) -> Vec<super::context::BehavioralDeviation> {
1154 if let Some(ref baseline) = self.behavioral_baseline {
1155 baseline.check_deviation(entity_id, observation)
1156 } else {
1157 Vec::new()
1158 }
1159 }
1160
1161 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1163 if let Some(ref baseline) = self.behavioral_baseline {
1164 baseline.get_baseline(entity_id)
1165 } else {
1166 None
1167 }
1168 }
1169
1170 pub fn active_scheme_count(&self) -> usize {
1172 if let Some(ref advancer) = self.scheme_advancer {
1173 advancer.active_scheme_count()
1174 } else {
1175 0
1176 }
1177 }
1178
1179 pub fn has_enhanced_features(&self) -> bool {
1181 self.scheme_advancer.is_some()
1182 || self.near_miss_generator.is_some()
1183 || self.difficulty_calculator.is_some()
1184 || self.entity_aware_injector.is_some()
1185 }
1186}
1187
1188pub struct AnomalyInjectorConfigBuilder {
1190 config: AnomalyInjectorConfig,
1191}
1192
1193impl AnomalyInjectorConfigBuilder {
1194 pub fn new() -> Self {
1196 Self {
1197 config: AnomalyInjectorConfig::default(),
1198 }
1199 }
1200
1201 pub fn with_total_rate(mut self, rate: f64) -> Self {
1203 self.config.rates.total_rate = rate;
1204 self
1205 }
1206
1207 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1209 self.config.rates.fraud_rate = rate;
1210 self
1211 }
1212
1213 pub fn with_error_rate(mut self, rate: f64) -> Self {
1215 self.config.rates.error_rate = rate;
1216 self
1217 }
1218
1219 pub fn with_seed(mut self, seed: u64) -> Self {
1221 self.config.seed = seed;
1222 self
1223 }
1224
1225 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1227 self.config.patterns.temporal_pattern = pattern;
1228 self
1229 }
1230
1231 pub fn with_labels(mut self, generate: bool) -> Self {
1233 self.config.generate_labels = generate;
1234 self
1235 }
1236
1237 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1239 self.config.target_companies = companies;
1240 self
1241 }
1242
1243 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1245 self.config.date_range = Some((start, end));
1246 self
1247 }
1248
1249 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1255 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1256 self.config.enhanced.scheme_probability = probability;
1257 self
1258 }
1259
1260 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1262 self.config.enhanced.near_miss_enabled = enabled;
1263 self.config.enhanced.near_miss_proportion = proportion;
1264 self
1265 }
1266
1267 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1269 self.config.enhanced.approval_thresholds = thresholds;
1270 self
1271 }
1272
1273 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1275 self.config.enhanced.correlated_injection_enabled = enabled;
1276 self
1277 }
1278
1279 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1281 self.config.enhanced.temporal_clustering_enabled = enabled;
1282 self.config.enhanced.period_end_multiplier = multiplier;
1283 self
1284 }
1285
1286 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1288 self.config.enhanced.difficulty_classification_enabled = enabled;
1289 self
1290 }
1291
1292 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1294 self.config.enhanced.context_aware_enabled = enabled;
1295 self
1296 }
1297
1298 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1300 self.config.enhanced.behavioral_baseline_config = config;
1301 self
1302 }
1303
1304 pub fn with_all_enhanced_features(mut self) -> Self {
1306 self.config.enhanced.multi_stage_schemes_enabled = true;
1307 self.config.enhanced.scheme_probability = 0.02;
1308 self.config.enhanced.correlated_injection_enabled = true;
1309 self.config.enhanced.temporal_clustering_enabled = true;
1310 self.config.enhanced.period_end_multiplier = 2.5;
1311 self.config.enhanced.near_miss_enabled = true;
1312 self.config.enhanced.near_miss_proportion = 0.30;
1313 self.config.enhanced.difficulty_classification_enabled = true;
1314 self.config.enhanced.context_aware_enabled = true;
1315 self.config.enhanced.behavioral_baseline_config.enabled = true;
1316 self
1317 }
1318
1319 pub fn build(self) -> AnomalyInjectorConfig {
1321 self.config
1322 }
1323}
1324
1325impl Default for AnomalyInjectorConfigBuilder {
1326 fn default() -> Self {
1327 Self::new()
1328 }
1329}
1330
1331#[cfg(test)]
1332mod tests {
1333 use super::*;
1334 use chrono::NaiveDate;
1335 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1336 use rust_decimal_macros::dec;
1337
1338 fn create_test_entry(doc_num: &str) -> JournalEntry {
1339 let mut entry = JournalEntry::new_simple(
1340 doc_num.to_string(),
1341 "1000".to_string(),
1342 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1343 "Test Entry".to_string(),
1344 );
1345
1346 entry.add_line(JournalEntryLine {
1347 line_number: 1,
1348 gl_account: "5000".to_string(),
1349 debit_amount: dec!(1000),
1350 ..Default::default()
1351 });
1352
1353 entry.add_line(JournalEntryLine {
1354 line_number: 2,
1355 gl_account: "1000".to_string(),
1356 credit_amount: dec!(1000),
1357 ..Default::default()
1358 });
1359
1360 entry
1361 }
1362
1363 #[test]
1364 fn test_anomaly_injector_basic() {
1365 let config = AnomalyInjectorConfigBuilder::new()
1366 .with_total_rate(0.5) .with_seed(42)
1368 .build();
1369
1370 let mut injector = AnomalyInjector::new(config);
1371
1372 let mut entries: Vec<_> = (0..100)
1373 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1374 .collect();
1375
1376 let result = injector.process_entries(&mut entries);
1377
1378 assert!(result.anomalies_injected > 0);
1380 assert!(!result.labels.is_empty());
1381 assert!(result.labels.len() >= result.anomalies_injected);
1386 }
1387
1388 #[test]
1389 fn test_specific_injection() {
1390 let config = AnomalyInjectorConfig::default();
1391 let mut injector = AnomalyInjector::new(config);
1392
1393 let mut entry = create_test_entry("JE001");
1394 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1395
1396 let label = injector.inject_specific(&mut entry, anomaly_type);
1397
1398 assert!(label.is_some());
1399 let label = label.unwrap();
1400 assert!(!label.document_id.is_empty());
1402 assert_eq!(label.document_id, entry.document_number());
1403 }
1404
1405 #[test]
1406 fn test_self_approval_injection() {
1407 let config = AnomalyInjectorConfig::default();
1408 let mut injector = AnomalyInjector::new(config);
1409
1410 let mut entry = create_test_entry("JE001");
1411 let label = injector.create_self_approval(&mut entry, "USER001");
1412
1413 assert!(label.is_some());
1414 let label = label.unwrap();
1415 assert!(matches!(
1416 label.anomaly_type,
1417 AnomalyType::Fraud(FraudType::SelfApproval)
1418 ));
1419 assert!(label.related_entities.contains(&"USER001".to_string()));
1420 }
1421
1422 #[test]
1423 fn test_company_filtering() {
1424 let config = AnomalyInjectorConfigBuilder::new()
1425 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1427 .build();
1428
1429 let mut injector = AnomalyInjector::new(config);
1430
1431 let mut entries = vec![
1432 create_test_entry("JE001"), create_test_entry("JE002"), ];
1435
1436 let result = injector.process_entries(&mut entries);
1437
1438 assert_eq!(result.anomalies_injected, 0);
1440 }
1441
1442 fn create_test_entry_with_context(
1448 doc_num: &str,
1449 vendor_ref: Option<&str>,
1450 employee_id: &str,
1451 gl_account: &str,
1452 ) -> JournalEntry {
1453 let mut entry = JournalEntry::new_simple(
1454 doc_num.to_string(),
1455 "1000".to_string(),
1456 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1457 "Test Entry".to_string(),
1458 );
1459
1460 entry.header.reference = vendor_ref.map(|v| v.to_string());
1461 entry.header.created_by = employee_id.to_string();
1462
1463 entry.add_line(JournalEntryLine {
1464 line_number: 1,
1465 gl_account: gl_account.to_string(),
1466 debit_amount: dec!(1000),
1467 ..Default::default()
1468 });
1469
1470 entry.add_line(JournalEntryLine {
1471 line_number: 2,
1472 gl_account: "1000".to_string(),
1473 credit_amount: dec!(1000),
1474 ..Default::default()
1475 });
1476
1477 entry
1478 }
1479
1480 #[test]
1481 fn test_set_entity_contexts() {
1482 let config = AnomalyInjectorConfig::default();
1483 let mut injector = AnomalyInjector::new(config);
1484
1485 assert!(injector.vendor_contexts().is_empty());
1487 assert!(injector.employee_contexts().is_empty());
1488 assert!(injector.account_contexts().is_empty());
1489
1490 let mut vendors = HashMap::new();
1492 vendors.insert(
1493 "V001".to_string(),
1494 VendorContext {
1495 vendor_id: "V001".to_string(),
1496 is_new: true,
1497 ..Default::default()
1498 },
1499 );
1500
1501 let mut employees = HashMap::new();
1502 employees.insert(
1503 "EMP001".to_string(),
1504 EmployeeContext {
1505 employee_id: "EMP001".to_string(),
1506 is_new: true,
1507 ..Default::default()
1508 },
1509 );
1510
1511 let mut accounts = HashMap::new();
1512 accounts.insert(
1513 "8100".to_string(),
1514 AccountContext {
1515 account_code: "8100".to_string(),
1516 is_high_risk: true,
1517 ..Default::default()
1518 },
1519 );
1520
1521 injector.set_entity_contexts(vendors, employees, accounts);
1522
1523 assert_eq!(injector.vendor_contexts().len(), 1);
1524 assert_eq!(injector.employee_contexts().len(), 1);
1525 assert_eq!(injector.account_contexts().len(), 1);
1526 assert!(injector.vendor_contexts().contains_key("V001"));
1527 assert!(injector.employee_contexts().contains_key("EMP001"));
1528 assert!(injector.account_contexts().contains_key("8100"));
1529 }
1530
1531 #[test]
1532 fn test_default_behavior_no_contexts() {
1533 let config = AnomalyInjectorConfigBuilder::new()
1535 .with_total_rate(0.5)
1536 .with_seed(42)
1537 .build();
1538
1539 let mut injector = AnomalyInjector::new(config);
1540
1541 let mut entries: Vec<_> = (0..200)
1542 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1543 .collect();
1544
1545 let result = injector.process_entries(&mut entries);
1546
1547 assert!(result.anomalies_injected > 0);
1550 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1551 assert!(
1552 rate > 0.2 && rate < 0.8,
1553 "Expected ~50% rate, got {:.2}%",
1554 rate * 100.0
1555 );
1556 }
1557
1558 #[test]
1559 fn test_entity_context_increases_injection_rate() {
1560 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1566 .with_total_rate(base_rate)
1567 .with_seed(123)
1568 .build();
1569
1570 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1571
1572 let mut entries_no_ctx: Vec<_> = (0..500)
1573 .map(|i| {
1574 create_test_entry_with_context(
1575 &format!("JE{:04}", i),
1576 Some("V001"),
1577 "EMP001",
1578 "8100",
1579 )
1580 })
1581 .collect();
1582
1583 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1584
1585 let config_ctx = AnomalyInjectorConfigBuilder::new()
1587 .with_total_rate(base_rate)
1588 .with_seed(123)
1589 .build();
1590
1591 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1592
1593 let mut vendors = HashMap::new();
1595 vendors.insert(
1596 "V001".to_string(),
1597 VendorContext {
1598 vendor_id: "V001".to_string(),
1599 is_new: true, is_dormant_reactivation: true, ..Default::default()
1602 },
1603 );
1604
1605 let mut employees = HashMap::new();
1606 employees.insert(
1607 "EMP001".to_string(),
1608 EmployeeContext {
1609 employee_id: "EMP001".to_string(),
1610 is_new: true, ..Default::default()
1612 },
1613 );
1614
1615 let mut accounts = HashMap::new();
1616 accounts.insert(
1617 "8100".to_string(),
1618 AccountContext {
1619 account_code: "8100".to_string(),
1620 is_high_risk: true, ..Default::default()
1622 },
1623 );
1624
1625 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1626
1627 let mut entries_ctx: Vec<_> = (0..500)
1628 .map(|i| {
1629 create_test_entry_with_context(
1630 &format!("JE{:04}", i),
1631 Some("V001"),
1632 "EMP001",
1633 "8100",
1634 )
1635 })
1636 .collect();
1637
1638 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1639
1640 assert!(
1642 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1643 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1644 result_ctx.anomalies_injected,
1645 result_no_ctx.anomalies_injected,
1646 );
1647 }
1648
1649 #[test]
1650 fn test_risk_score_multiplication() {
1651 let config = AnomalyInjectorConfig::default();
1653 let mut injector = AnomalyInjector::new(config);
1654
1655 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1657 assert!(
1658 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1659 );
1660
1661 let mut vendors = HashMap::new();
1663 vendors.insert(
1664 "V_RISKY".to_string(),
1665 VendorContext {
1666 vendor_id: "V_RISKY".to_string(),
1667 is_new: true,
1668 ..Default::default()
1669 },
1670 );
1671
1672 let mut accounts = HashMap::new();
1673 accounts.insert(
1674 "9000".to_string(),
1675 AccountContext {
1676 account_code: "9000".to_string(),
1677 is_high_risk: true,
1678 ..Default::default()
1679 },
1680 );
1681
1682 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1683
1684 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1685 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1686 assert!(
1688 (multiplier - 4.0).abs() < f64::EPSILON,
1689 "Expected 4.0x multiplier, got {}",
1690 multiplier,
1691 );
1692
1693 let entry_vendor_only =
1695 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1696 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1697 assert!(
1698 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1699 "Expected 2.0x multiplier (vendor only), got {}",
1700 multiplier_vendor,
1701 );
1702
1703 let entry_no_match =
1705 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1706 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1707 assert!(
1708 (multiplier_none - 1.0).abs() < f64::EPSILON,
1709 "Expected 1.0x multiplier (no match), got {}",
1710 multiplier_none,
1711 );
1712 }
1713
1714 #[test]
1715 fn test_employee_context_multiplier() {
1716 let config = AnomalyInjectorConfig::default();
1717 let mut injector = AnomalyInjector::new(config);
1718
1719 let mut employees = HashMap::new();
1720 employees.insert(
1721 "EMP_NEW".to_string(),
1722 EmployeeContext {
1723 employee_id: "EMP_NEW".to_string(),
1724 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1728 },
1729 );
1730
1731 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1732
1733 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1734 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1735
1736 let expected = 1.5 * 1.3 * 1.2;
1738 assert!(
1739 (multiplier - expected).abs() < 0.01,
1740 "Expected {:.3}x multiplier, got {:.3}",
1741 expected,
1742 multiplier,
1743 );
1744 }
1745
1746 #[test]
1747 fn test_entity_contexts_persist_across_reset() {
1748 let config = AnomalyInjectorConfig::default();
1749 let mut injector = AnomalyInjector::new(config);
1750
1751 let mut vendors = HashMap::new();
1752 vendors.insert(
1753 "V001".to_string(),
1754 VendorContext {
1755 vendor_id: "V001".to_string(),
1756 is_new: true,
1757 ..Default::default()
1758 },
1759 );
1760
1761 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1762 assert_eq!(injector.vendor_contexts().len(), 1);
1763
1764 injector.reset();
1766 assert_eq!(injector.vendor_contexts().len(), 1);
1767 }
1768
1769 #[test]
1770 fn test_set_empty_contexts_clears() {
1771 let config = AnomalyInjectorConfig::default();
1772 let mut injector = AnomalyInjector::new(config);
1773
1774 let mut vendors = HashMap::new();
1775 vendors.insert(
1776 "V001".to_string(),
1777 VendorContext {
1778 vendor_id: "V001".to_string(),
1779 ..Default::default()
1780 },
1781 );
1782
1783 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1784 assert_eq!(injector.vendor_contexts().len(), 1);
1785
1786 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1788 assert!(injector.vendor_contexts().is_empty());
1789 }
1790
1791 #[test]
1792 fn test_dormant_vendor_multiplier() {
1793 let config = AnomalyInjectorConfig::default();
1794 let mut injector = AnomalyInjector::new(config);
1795
1796 let mut vendors = HashMap::new();
1797 vendors.insert(
1798 "V_DORMANT".to_string(),
1799 VendorContext {
1800 vendor_id: "V_DORMANT".to_string(),
1801 is_dormant_reactivation: true, ..Default::default()
1803 },
1804 );
1805
1806 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1807
1808 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1809 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1810 assert!(
1811 (multiplier - 1.5).abs() < f64::EPSILON,
1812 "Expected 1.5x multiplier for dormant vendor, got {}",
1813 multiplier,
1814 );
1815 }
1816
1817 #[test]
1826 fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1827 use chrono::{Datelike, Timelike, Weekday};
1828 use datasynth_core::models::FraudType;
1829
1830 let mut config = AnomalyInjectorConfig::default();
1831 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1832 enabled: true,
1833 weekend_bias: 1.0,
1834 round_dollar_bias: 1.0,
1835 off_hours_bias: 1.0,
1836 post_close_bias: 1.0,
1837 };
1838 let mut injector = AnomalyInjector::new(config);
1839
1840 let mut entry = JournalEntry::new_simple(
1842 "JE001".to_string(),
1843 "1000".to_string(),
1844 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
1846 );
1847 entry.add_line(JournalEntryLine {
1848 line_number: 1,
1849 gl_account: "5000".to_string(),
1850 debit_amount: dec!(1237),
1851 ..Default::default()
1852 });
1853 entry.add_line(JournalEntryLine {
1854 line_number: 2,
1855 gl_account: "1000".to_string(),
1856 credit_amount: dec!(1237),
1857 ..Default::default()
1858 });
1859
1860 let _ =
1861 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1862
1863 assert!(
1865 matches!(
1866 entry.header.posting_date.weekday(),
1867 Weekday::Sat | Weekday::Sun
1868 ),
1869 "expected weekend posting date, got {:?}",
1870 entry.header.posting_date.weekday()
1871 );
1872 let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1874 let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1875 assert_eq!(debit_total, credit_total, "entry must remain balanced");
1876 assert!(
1877 [
1878 dec!(1_000),
1879 dec!(5_000),
1880 dec!(10_000),
1881 dec!(25_000),
1882 dec!(50_000),
1883 dec!(100_000)
1884 ]
1885 .contains(&debit_total),
1886 "expected round-dollar total, got {}",
1887 debit_total
1888 );
1889 let hour = entry.header.created_at.hour();
1891 assert!(
1892 !(6..22).contains(&hour),
1893 "expected off-hours timestamp, got hour {}",
1894 hour
1895 );
1896 assert!(entry.header.is_post_close);
1898
1899 let stats = injector.get_stats();
1901 assert_eq!(stats.fraud_weekend_bias_applied, 1);
1902 assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1903 assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1904 assert_eq!(stats.fraud_post_close_bias_applied, 1);
1905 }
1906
1907 #[test]
1910 fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1911 use datasynth_core::models::FraudType;
1912
1913 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1915 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1916 enabled: true,
1917 weekend_bias: 0.0,
1918 round_dollar_bias: 0.0,
1919 off_hours_bias: 0.0,
1920 post_close_bias: 0.0,
1921 };
1922 let mut injector = AnomalyInjector::new(config);
1923 let mut entry = create_test_entry("JE001");
1924 entry.header.posting_date = original_date;
1925
1926 let _ =
1927 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1928
1929 assert_eq!(entry.header.posting_date, original_date);
1930 assert!(!entry.header.is_post_close);
1931 let stats = injector.get_stats();
1932 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1933 assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1934 assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1935 assert_eq!(stats.fraud_post_close_bias_applied, 0);
1936 }
1937
1938 #[test]
1941 fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1942 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1944 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1945 enabled: true,
1946 weekend_bias: 1.0,
1947 round_dollar_bias: 1.0,
1948 off_hours_bias: 1.0,
1949 post_close_bias: 1.0,
1950 };
1951 let mut injector = AnomalyInjector::new(config);
1952 let mut entry = create_test_entry("JE001");
1953 entry.header.posting_date = original_date;
1954
1955 let _ = injector.inject_specific(
1956 &mut entry,
1957 AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
1958 );
1959
1960 assert_eq!(entry.header.posting_date, original_date);
1961 let stats = injector.get_stats();
1962 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1963 }
1964
1965 #[test]
1969 fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
1970 use datasynth_core::models::{FraudType, ProcessIssueType};
1971
1972 let mut config = AnomalyInjectorConfig::default();
1973 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1974 enabled: true,
1975 weekend_bias: 1.0,
1976 round_dollar_bias: 0.0, off_hours_bias: 1.0,
1978 post_close_bias: 1.0,
1979 };
1980 let mut injector = AnomalyInjector::new(config);
1981 let mut entry = JournalEntry::new_simple(
1982 "JE001".into(),
1983 "1000".into(),
1984 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
1985 "Test".into(),
1986 );
1987 entry.add_line(JournalEntryLine {
1988 line_number: 1,
1989 gl_account: "5000".into(),
1990 debit_amount: dec!(1000),
1991 ..Default::default()
1992 });
1993 entry.add_line(JournalEntryLine {
1994 line_number: 2,
1995 gl_account: "1000".into(),
1996 credit_amount: dec!(1000),
1997 ..Default::default()
1998 });
1999
2000 let primary = injector
2001 .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2002 .expect("fraud label should be produced");
2003
2004 let labels = injector.get_labels();
2006 assert_eq!(
2007 labels.len(),
2008 3,
2009 "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2010 );
2011 let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2012 assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2013 assert!(types.contains(&AnomalyType::ProcessIssue(
2014 ProcessIssueType::AfterHoursPosting
2015 )));
2016 assert!(types.contains(&AnomalyType::ProcessIssue(
2017 ProcessIssueType::PostClosePosting
2018 )));
2019 assert_eq!(
2020 primary.anomaly_type,
2021 AnomalyType::Fraud(FraudType::FictitiousEntry)
2022 );
2023 }
2024}