1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25 RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31 EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38 TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48 pub rates: AnomalyRateConfig,
50 pub patterns: AnomalyPatternConfig,
52 pub seed: u64,
54 pub generate_labels: bool,
56 pub allow_duplicates: bool,
58 pub max_anomalies_per_document: usize,
60 pub target_companies: Vec<String>,
62 pub date_range: Option<(NaiveDate, NaiveDate)>,
64 pub enhanced: EnhancedInjectionConfig,
66}
67
68#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71 pub multi_stage_schemes_enabled: bool,
73 pub scheme_probability: f64,
75 pub correlated_injection_enabled: bool,
77 pub temporal_clustering_enabled: bool,
79 pub period_end_multiplier: f64,
81 pub near_miss_enabled: bool,
83 pub near_miss_proportion: f64,
85 pub approval_thresholds: Vec<Decimal>,
87 pub difficulty_classification_enabled: bool,
89 pub context_aware_enabled: bool,
91 pub behavioral_baseline_config: BehavioralBaselineConfig,
93 pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
98}
99
100#[derive(Debug, Clone)]
105pub struct FraudBehavioralBiasConfig {
106 pub enabled: bool,
108 pub weekend_bias: f64,
111 pub round_dollar_bias: f64,
114 pub off_hours_bias: f64,
117 pub post_close_bias: f64,
119}
120
121impl Default for FraudBehavioralBiasConfig {
122 fn default() -> Self {
123 Self {
124 enabled: true,
125 weekend_bias: 0.30,
126 round_dollar_bias: 0.40,
127 off_hours_bias: 0.35,
128 post_close_bias: 0.25,
129 }
130 }
131}
132
133impl Default for AnomalyInjectorConfig {
134 fn default() -> Self {
135 Self {
136 rates: AnomalyRateConfig::default(),
137 patterns: AnomalyPatternConfig::default(),
138 seed: 42,
139 generate_labels: true,
140 allow_duplicates: true,
141 max_anomalies_per_document: 2,
142 target_companies: Vec::new(),
143 date_range: None,
144 enhanced: EnhancedInjectionConfig::default(),
145 }
146 }
147}
148
149#[derive(Debug, Clone)]
151pub struct InjectionBatchResult {
152 pub entries_processed: usize,
154 pub anomalies_injected: usize,
156 pub duplicates_created: usize,
158 pub labels: Vec<LabeledAnomaly>,
160 pub summary: AnomalySummary,
162 pub modified_documents: Vec<String>,
164 pub near_miss_labels: Vec<NearMissLabel>,
166 pub scheme_actions: Vec<SchemeAction>,
168 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
170}
171
172pub struct AnomalyInjector {
174 config: AnomalyInjectorConfig,
175 rng: ChaCha8Rng,
176 uuid_factory: DeterministicUuidFactory,
177 type_selector: AnomalyTypeSelector,
178 strategies: StrategyCollection,
179 cluster_manager: ClusterManager,
180 entity_targeting: EntityTargetingManager,
182 document_anomaly_counts: HashMap<String, usize>,
184 labels: Vec<LabeledAnomaly>,
186 stats: InjectorStats,
188 scheme_advancer: Option<SchemeAdvancer>,
191 near_miss_generator: Option<NearMissGenerator>,
193 near_miss_labels: Vec<NearMissLabel>,
195 co_occurrence_handler: Option<AnomalyCoOccurrence>,
197 queued_co_occurrences: Vec<QueuedAnomaly>,
199 temporal_cluster_generator: Option<TemporalClusterGenerator>,
201 difficulty_calculator: Option<DifficultyCalculator>,
203 entity_aware_injector: Option<EntityAwareInjector>,
205 behavioral_baseline: Option<BehavioralBaseline>,
207 scheme_actions: Vec<SchemeAction>,
209 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
211 vendor_contexts: HashMap<String, VendorContext>,
214 employee_contexts: HashMap<String, EmployeeContext>,
216 account_contexts: HashMap<String, AccountContext>,
218}
219
220#[derive(Debug, Clone, Default)]
222pub struct InjectorStats {
223 pub total_processed: usize,
225 pub total_injected: usize,
227 pub by_category: HashMap<String, usize>,
229 pub by_type: HashMap<String, usize>,
231 pub by_company: HashMap<String, usize>,
233 pub skipped_rate: usize,
235 pub skipped_date: usize,
237 pub skipped_company: usize,
239 pub skipped_max_per_doc: usize,
241 pub fraud_weekend_bias_applied: usize,
243 pub fraud_round_dollar_bias_applied: usize,
245 pub fraud_off_hours_bias_applied: usize,
247 pub fraud_post_close_bias_applied: usize,
249}
250
251struct QueuedAnomaly {
253 anomaly_type: AnomalyType,
255 target_entity: Option<String>,
257 earliest_date: NaiveDate,
259 description: String,
261}
262
263impl AnomalyInjector {
264 pub fn new(config: AnomalyInjectorConfig) -> Self {
266 let mut rng = seeded_rng(config.seed, 0);
267 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
268 let entity_targeting =
269 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
270
271 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
273 let scheme_config = SchemeAdvancerConfig {
274 embezzlement_probability: config.enhanced.scheme_probability,
275 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
276 kickback_probability: config.enhanced.scheme_probability * 0.5,
277 seed: rng.random(),
278 ..Default::default()
279 };
280 Some(SchemeAdvancer::new(scheme_config))
281 } else {
282 None
283 };
284
285 let near_miss_generator = if config.enhanced.near_miss_enabled {
286 let near_miss_config = NearMissConfig {
287 proportion: config.enhanced.near_miss_proportion,
288 seed: rng.random(),
289 ..Default::default()
290 };
291 Some(NearMissGenerator::new(near_miss_config))
292 } else {
293 None
294 };
295
296 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
297 Some(AnomalyCoOccurrence::new())
298 } else {
299 None
300 };
301
302 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
303 Some(TemporalClusterGenerator::new())
304 } else {
305 None
306 };
307
308 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
309 Some(DifficultyCalculator::new())
310 } else {
311 None
312 };
313
314 let entity_aware_injector = if config.enhanced.context_aware_enabled {
315 Some(EntityAwareInjector::default())
316 } else {
317 None
318 };
319
320 let behavioral_baseline = if config.enhanced.context_aware_enabled
321 && config.enhanced.behavioral_baseline_config.enabled
322 {
323 Some(BehavioralBaseline::new(
324 config.enhanced.behavioral_baseline_config.clone(),
325 ))
326 } else {
327 None
328 };
329
330 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
331
332 Self {
333 config,
334 rng,
335 uuid_factory,
336 type_selector: AnomalyTypeSelector::new(),
337 strategies: StrategyCollection::default(),
338 cluster_manager,
339 entity_targeting,
340 document_anomaly_counts: HashMap::new(),
341 labels: Vec::new(),
342 stats: InjectorStats::default(),
343 scheme_advancer,
344 near_miss_generator,
345 near_miss_labels: Vec::new(),
346 co_occurrence_handler,
347 queued_co_occurrences: Vec::new(),
348 temporal_cluster_generator,
349 difficulty_calculator,
350 entity_aware_injector,
351 behavioral_baseline,
352 scheme_actions: Vec::new(),
353 difficulty_distribution: HashMap::new(),
354 vendor_contexts: HashMap::new(),
355 employee_contexts: HashMap::new(),
356 account_contexts: HashMap::new(),
357 }
358 }
359
360 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
362 debug!(
363 entry_count = entries.len(),
364 total_rate = self.config.rates.total_rate,
365 seed = self.config.seed,
366 "Injecting anomalies into journal entries"
367 );
368
369 let mut modified_documents = Vec::new();
370 let mut duplicates = Vec::new();
371
372 for entry in entries.iter_mut() {
373 self.stats.total_processed += 1;
374
375 if let Some(ref mut baseline) = self.behavioral_baseline {
377 use super::context::Observation;
378 let entity_id = entry.header.created_by.clone();
380 let observation =
381 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
382 baseline.record_observation(&entity_id, observation);
383 }
384
385 if !self.should_process(entry) {
387 continue;
388 }
389
390 let entry_date = entry.posting_date();
392 let ready_indices: Vec<usize> = self
393 .queued_co_occurrences
394 .iter()
395 .enumerate()
396 .filter(|(_, q)| entry_date >= q.earliest_date)
397 .map(|(i, _)| i)
398 .collect();
399
400 if let Some(&idx) = ready_indices.first() {
401 let queued = self.queued_co_occurrences.remove(idx);
402 if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
403 label = label.with_metadata("co_occurrence", "true");
404 label = label.with_metadata("co_occurrence_description", &queued.description);
405 if let Some(ref target) = queued.target_entity {
406 label = label.with_related_entity(target);
407 label = label.with_metadata("co_occurrence_target", target);
408 }
409 modified_documents.push(entry.document_number().clone());
410 self.labels.push(label);
411 self.stats.total_injected += 1;
412 }
413 continue; }
415
416 let base_rate = self.config.rates.total_rate;
418
419 let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
421 let employee_id = &entry.header.created_by;
422 let first_account = entry
423 .lines
424 .first()
425 .map(|l| l.gl_account.as_str())
426 .unwrap_or("");
427 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
429
430 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
431 let employee_ctx = self.employee_contexts.get(employee_id);
432 let account_ctx = self.account_contexts.get(first_account);
433
434 let multiplier =
435 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
436 (base_rate * multiplier).min(1.0)
437 } else {
438 self.calculate_context_rate_multiplier(entry) * base_rate
440 };
441
442 if let Some(ref tcg) = self.temporal_cluster_generator {
444 let temporal_multiplier = tcg
445 .get_active_clusters(entry_date)
446 .iter()
447 .map(|c| c.rate_multiplier)
448 .fold(1.0_f64, f64::max);
449 effective_rate = (effective_rate * temporal_multiplier).min(1.0);
450 }
451
452 if should_inject_anomaly(
454 effective_rate,
455 entry_date,
456 &self.config.patterns.temporal_pattern,
457 &mut self.rng,
458 ) {
459 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
461 let account = entry
463 .lines
464 .first()
465 .map(|l| l.gl_account.clone())
466 .unwrap_or_default();
467 near_miss_gen.record_transaction(
468 entry.document_number().clone(),
469 entry_date,
470 entry.total_debit(),
471 &account,
472 None,
473 );
474
475 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
477 entry.document_number().clone(),
478 entry_date,
479 entry.total_debit(),
480 &account,
481 None,
482 &self.config.enhanced.approval_thresholds,
483 ) {
484 self.near_miss_labels.push(near_miss_label);
485 continue; }
487 }
488
489 let anomaly_type = self.select_anomaly_category();
491
492 let target_entity = {
494 let mut candidates: Vec<String> =
495 self.vendor_contexts.keys().cloned().collect();
496 candidates.extend(self.employee_contexts.keys().cloned());
497 if candidates.is_empty() {
498 if let Some(ref r) = entry.header.reference {
500 candidates.push(r.clone());
501 }
502 }
503 self.entity_targeting
504 .select_entity(&candidates, &mut self.rng)
505 };
506
507 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
509 if let Some(ref entity_id) = target_entity {
511 label = label.with_metadata("entity_target", entity_id);
512 label = label.with_related_entity(entity_id);
513 label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
514 target_type: "Entity".to_string(),
515 target_id: entity_id.clone(),
516 });
517 }
518
519 if let Some(ref calculator) = self.difficulty_calculator {
521 let difficulty = calculator.calculate(&label);
522
523 label =
525 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
526 label = label.with_metadata(
527 "difficulty_score",
528 &difficulty.difficulty_score().to_string(),
529 );
530
531 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
533 }
534
535 modified_documents.push(entry.document_number().clone());
536 self.labels.push(label);
537 self.stats.total_injected += 1;
538
539 if let Some(ref co_occ) = self.co_occurrence_handler {
541 let correlated =
542 co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
543 for result in correlated {
544 self.queued_co_occurrences.push(QueuedAnomaly {
545 anomaly_type: result.anomaly_type,
546 target_entity: if result.same_entity {
547 target_entity.clone()
548 } else {
549 None
550 },
551 earliest_date: entry_date
552 + chrono::Duration::days(i64::from(result.lag_days)),
553 description: result.description,
554 });
555 }
556 }
557 }
558
559 if self.config.allow_duplicates
561 && matches!(
562 self.labels.last().map(|l| &l.anomaly_type),
563 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
564 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
565 )
566 {
567 let dup_strategy = DuplicationStrategy::default();
568 let duplicate =
569 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
570 duplicates.push(duplicate);
571 }
572 }
573 }
574
575 let duplicates_created = duplicates.len();
577
578 let summary = AnomalySummary::from_anomalies(&self.labels);
580
581 InjectionBatchResult {
582 entries_processed: self.stats.total_processed,
583 anomalies_injected: self.stats.total_injected,
584 duplicates_created,
585 labels: self.labels.clone(),
586 summary,
587 modified_documents,
588 near_miss_labels: self.near_miss_labels.clone(),
589 scheme_actions: self.scheme_actions.clone(),
590 difficulty_distribution: self.difficulty_distribution.clone(),
591 }
592 }
593
594 fn should_process(&mut self, entry: &JournalEntry) -> bool {
596 if !self.config.target_companies.is_empty()
598 && !self
599 .config
600 .target_companies
601 .iter()
602 .any(|c| c == entry.company_code())
603 {
604 self.stats.skipped_company += 1;
605 return false;
606 }
607
608 if let Some((start, end)) = self.config.date_range {
610 if entry.posting_date() < start || entry.posting_date() > end {
611 self.stats.skipped_date += 1;
612 return false;
613 }
614 }
615
616 let current_count = self
618 .document_anomaly_counts
619 .get(&entry.document_number())
620 .copied()
621 .unwrap_or(0);
622 if current_count >= self.config.max_anomalies_per_document {
623 self.stats.skipped_max_per_doc += 1;
624 return false;
625 }
626
627 true
628 }
629
630 fn select_anomaly_category(&mut self) -> AnomalyType {
632 let r = self.rng.random::<f64>();
633 let rates = &self.config.rates;
634
635 let mut cumulative = 0.0;
636
637 cumulative += rates.fraud_rate;
638 if r < cumulative {
639 return self.type_selector.select_fraud(&mut self.rng);
640 }
641
642 cumulative += rates.error_rate;
643 if r < cumulative {
644 return self.type_selector.select_error(&mut self.rng);
645 }
646
647 cumulative += rates.process_issue_rate;
648 if r < cumulative {
649 return self.type_selector.select_process_issue(&mut self.rng);
650 }
651
652 cumulative += rates.statistical_rate;
653 if r < cumulative {
654 return self.type_selector.select_statistical(&mut self.rng);
655 }
656
657 self.type_selector.select_relational(&mut self.rng)
658 }
659
660 fn inject_anomaly(
662 &mut self,
663 entry: &mut JournalEntry,
664 anomaly_type: AnomalyType,
665 ) -> Option<LabeledAnomaly> {
666 if !self.strategies.can_apply(entry, &anomaly_type) {
668 return None;
669 }
670
671 let result = self
673 .strategies
674 .apply_strategy(entry, &anomaly_type, &mut self.rng);
675
676 if !result.success {
677 return None;
678 }
679
680 *self
682 .document_anomaly_counts
683 .entry(entry.document_number().clone())
684 .or_insert(0) += 1;
685
686 let category = anomaly_type.category().to_string();
688 let type_name = anomaly_type.type_name();
689
690 *self.stats.by_category.entry(category).or_insert(0) += 1;
691 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
692 *self
693 .stats
694 .by_company
695 .entry(entry.company_code().to_string())
696 .or_insert(0) += 1;
697
698 if self.config.generate_labels {
700 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
701
702 entry.header.is_anomaly = true;
704 entry.header.anomaly_id = Some(anomaly_id.clone());
705 entry.header.anomaly_type = Some(type_name.clone());
706
707 let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
709 Vec::new();
710 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
711 entry.header.is_fraud = true;
712 if let AnomalyType::Fraud(ref ft) = anomaly_type {
713 entry.header.fraud_type = Some(*ft);
714 }
715 secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
721 }
722
723 let mut label = LabeledAnomaly::new(
724 anomaly_id,
725 anomaly_type.clone(),
726 entry.document_number().clone(),
727 "JE".to_string(),
728 entry.company_code().to_string(),
729 entry.posting_date(),
730 )
731 .with_description(&result.description)
732 .with_injection_strategy(&type_name);
733
734 let causal_reason = AnomalyCausalReason::RandomRate {
736 base_rate: self.config.rates.total_rate,
737 };
738 label = label.with_causal_reason(causal_reason);
739
740 let context_multiplier = self.calculate_context_rate_multiplier(entry);
742 if (context_multiplier - 1.0).abs() > f64::EPSILON {
743 label = label.with_metadata(
744 "entity_context_multiplier",
745 &format!("{context_multiplier:.3}"),
746 );
747 label = label.with_metadata(
748 "effective_rate",
749 &format!(
750 "{:.6}",
751 (self.config.rates.total_rate * context_multiplier).min(1.0)
752 ),
753 );
754 }
755
756 if let Some(impact) = result.monetary_impact {
758 label = label.with_monetary_impact(impact);
759 }
760
761 for entity in &result.related_entities {
763 label = label.with_related_entity(entity);
764 }
765
766 for (key, value) in &result.metadata {
768 label = label.with_metadata(key, value);
769 }
770
771 if let Some(cluster_id) =
773 self.cluster_manager
774 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
775 {
776 label = label.with_cluster(&cluster_id);
777 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
779 cluster_id: cluster_id.clone(),
780 });
781 }
782
783 for issue_type in &secondary_process_issues {
790 let child_id = format!("ANO{:08}", self.labels.len() + 1);
791 let child = LabeledAnomaly::new(
792 child_id,
793 AnomalyType::ProcessIssue(*issue_type),
794 entry.document_number().clone(),
795 "JE".to_string(),
796 entry.company_code().to_string(),
797 entry.posting_date(),
798 )
799 .with_description("Forensic pattern from fraud behavioral bias")
800 .with_injection_strategy("behavioral_bias")
801 .with_parent_anomaly(&label.anomaly_id);
802 self.labels.push(child);
803 }
804
805 return Some(label);
806 }
807
808 None
809 }
810
811 pub fn inject_specific(
813 &mut self,
814 entry: &mut JournalEntry,
815 anomaly_type: AnomalyType,
816 ) -> Option<LabeledAnomaly> {
817 self.inject_anomaly(entry, anomaly_type)
818 }
819
820 pub fn create_self_approval(
822 &mut self,
823 entry: &mut JournalEntry,
824 user_id: &str,
825 ) -> Option<LabeledAnomaly> {
826 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
827
828 let label = LabeledAnomaly::new(
829 format!("ANO{:08}", self.labels.len() + 1),
830 anomaly_type,
831 entry.document_number().clone(),
832 "JE".to_string(),
833 entry.company_code().to_string(),
834 entry.posting_date(),
835 )
836 .with_description(&format!("User {user_id} approved their own transaction"))
837 .with_related_entity(user_id)
838 .with_injection_strategy("ManualSelfApproval")
839 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
840 target_type: "User".to_string(),
841 target_id: user_id.to_string(),
842 });
843
844 entry.header.is_anomaly = true;
846 entry.header.is_fraud = true;
847 entry.header.anomaly_id = Some(label.anomaly_id.clone());
848 entry.header.anomaly_type = Some("SelfApproval".to_string());
849 entry.header.fraud_type = Some(FraudType::SelfApproval);
850
851 entry.header.created_by = user_id.to_string();
853
854 self.labels.push(label.clone());
855 Some(label)
856 }
857
858 pub fn create_sod_violation(
860 &mut self,
861 entry: &mut JournalEntry,
862 user_id: &str,
863 conflicting_duties: (&str, &str),
864 ) -> Option<LabeledAnomaly> {
865 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
866
867 let label = LabeledAnomaly::new(
868 format!("ANO{:08}", self.labels.len() + 1),
869 anomaly_type,
870 entry.document_number().clone(),
871 "JE".to_string(),
872 entry.company_code().to_string(),
873 entry.posting_date(),
874 )
875 .with_description(&format!(
876 "User {} performed conflicting duties: {} and {}",
877 user_id, conflicting_duties.0, conflicting_duties.1
878 ))
879 .with_related_entity(user_id)
880 .with_metadata("duty1", conflicting_duties.0)
881 .with_metadata("duty2", conflicting_duties.1)
882 .with_injection_strategy("ManualSoDViolation")
883 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
884 target_type: "User".to_string(),
885 target_id: user_id.to_string(),
886 });
887
888 entry.header.is_anomaly = true;
890 entry.header.is_fraud = true;
891 entry.header.anomaly_id = Some(label.anomaly_id.clone());
892 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
893 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
894
895 self.labels.push(label.clone());
896 Some(label)
897 }
898
899 pub fn create_ic_mismatch(
901 &mut self,
902 entry: &mut JournalEntry,
903 matching_company: &str,
904 expected_amount: Decimal,
905 actual_amount: Decimal,
906 ) -> Option<LabeledAnomaly> {
907 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
908
909 let label = LabeledAnomaly::new(
910 format!("ANO{:08}", self.labels.len() + 1),
911 anomaly_type,
912 entry.document_number().clone(),
913 "JE".to_string(),
914 entry.company_code().to_string(),
915 entry.posting_date(),
916 )
917 .with_description(&format!(
918 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
919 ))
920 .with_related_entity(matching_company)
921 .with_monetary_impact(actual_amount - expected_amount)
922 .with_metadata("expected_amount", &expected_amount.to_string())
923 .with_metadata("actual_amount", &actual_amount.to_string())
924 .with_injection_strategy("ManualICMismatch")
925 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
926 target_type: "Intercompany".to_string(),
927 target_id: matching_company.to_string(),
928 });
929
930 entry.header.is_anomaly = true;
932 entry.header.anomaly_id = Some(label.anomaly_id.clone());
933 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
934
935 self.labels.push(label.clone());
936 Some(label)
937 }
938
939 pub fn get_labels(&self) -> &[LabeledAnomaly] {
941 &self.labels
942 }
943
944 pub fn get_summary(&self) -> AnomalySummary {
946 AnomalySummary::from_anomalies(&self.labels)
947 }
948
949 pub fn get_stats(&self) -> &InjectorStats {
951 &self.stats
952 }
953
954 pub fn reset(&mut self) {
956 self.labels.clear();
957 self.document_anomaly_counts.clear();
958 self.stats = InjectorStats::default();
959 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
960
961 self.near_miss_labels.clear();
963 self.scheme_actions.clear();
964 self.difficulty_distribution.clear();
965
966 if let Some(ref mut baseline) = self.behavioral_baseline {
967 *baseline =
968 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
969 }
970 }
971
972 pub fn cluster_count(&self) -> usize {
974 self.cluster_manager.cluster_count()
975 }
976
977 pub fn set_entity_contexts(
990 &mut self,
991 vendors: HashMap<String, VendorContext>,
992 employees: HashMap<String, EmployeeContext>,
993 accounts: HashMap<String, AccountContext>,
994 ) {
995 self.vendor_contexts = vendors;
996 self.employee_contexts = employees;
997 self.account_contexts = accounts;
998 }
999
1000 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
1002 &self.vendor_contexts
1003 }
1004
1005 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
1007 &self.employee_contexts
1008 }
1009
1010 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
1012 &self.account_contexts
1013 }
1014
1015 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1024 if self.vendor_contexts.is_empty()
1025 && self.employee_contexts.is_empty()
1026 && self.account_contexts.is_empty()
1027 {
1028 return 1.0;
1029 }
1030
1031 let mut multiplier = 1.0;
1032
1033 if let Some(ref vendor_ref) = entry.header.reference {
1035 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1036 if ctx.is_new {
1038 multiplier *= 2.0;
1039 }
1040 if ctx.is_dormant_reactivation {
1041 multiplier *= 1.5;
1042 }
1043 }
1044 }
1045
1046 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1048 if ctx.is_new {
1049 multiplier *= 1.5;
1050 }
1051 if ctx.is_volume_fatigued {
1052 multiplier *= 1.3;
1053 }
1054 if ctx.is_overtime {
1055 multiplier *= 1.2;
1056 }
1057 }
1058
1059 if let Some(first_line) = entry.lines.first() {
1061 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1062 if ctx.is_high_risk {
1063 multiplier *= 2.0;
1064 }
1065 }
1066 }
1067
1068 multiplier
1069 }
1070
1071 fn apply_fraud_behavioral_bias(
1084 &mut self,
1085 entry: &mut JournalEntry,
1086 ) -> Vec<datasynth_core::models::ProcessIssueType> {
1087 use chrono::{Datelike, Duration, TimeZone, Utc, Weekday};
1088 use datasynth_core::models::ProcessIssueType;
1089
1090 let mut fired: Vec<ProcessIssueType> = Vec::new();
1091
1092 let cfg = &self.config.enhanced.fraud_behavioral_bias;
1093 if !cfg.enabled {
1094 return fired;
1095 }
1096
1097 if cfg.weekend_bias > 0.0 && self.rng.random::<f64>() < cfg.weekend_bias {
1099 let original = entry.header.posting_date;
1100 let days_to_weekend = match original.weekday() {
1101 Weekday::Mon => 5,
1102 Weekday::Tue => 4,
1103 Weekday::Wed => 3,
1104 Weekday::Thu => 2,
1105 Weekday::Fri => 1,
1106 Weekday::Sat | Weekday::Sun => 0,
1107 };
1108 let extra = if self.rng.random_bool(0.5) { 0 } else { 1 };
1109 entry.header.posting_date = original + Duration::days(days_to_weekend + extra);
1110 self.stats.fraud_weekend_bias_applied += 1;
1111 fired.push(ProcessIssueType::WeekendPosting);
1112 }
1113
1114 if cfg.round_dollar_bias > 0.0 && self.rng.random::<f64>() < cfg.round_dollar_bias {
1116 const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1117 if entry.lines.len() == 2 {
1118 let (debit_idx, credit_idx) = if entry.lines[0].is_debit() {
1119 (0, 1)
1120 } else {
1121 (1, 0)
1122 };
1123 let current = entry.lines[debit_idx]
1124 .debit_amount
1125 .max(entry.lines[credit_idx].credit_amount);
1126 if current > Decimal::ZERO {
1127 let current_f64: f64 = current.try_into().unwrap_or(0.0);
1129 let target = ROUND_TARGETS
1130 .iter()
1131 .min_by(|a, b| {
1132 let da = (**a as f64 - current_f64).abs();
1133 let db = (**b as f64 - current_f64).abs();
1134 da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
1135 })
1136 .copied()
1137 .unwrap_or(1_000);
1138 let rounded = Decimal::from(target);
1139 entry.lines[debit_idx].debit_amount = rounded;
1140 entry.lines[debit_idx].credit_amount = Decimal::ZERO;
1141 entry.lines[credit_idx].debit_amount = Decimal::ZERO;
1142 entry.lines[credit_idx].credit_amount = rounded;
1143 self.stats.fraud_round_dollar_bias_applied += 1;
1144 }
1145 }
1146 }
1147
1148 if cfg.off_hours_bias > 0.0 && self.rng.random::<f64>() < cfg.off_hours_bias {
1150 let hour: u32 = if self.rng.random_bool(0.5) {
1152 self.rng.random_range(22..24)
1153 } else {
1154 self.rng.random_range(0..6)
1155 };
1156 let minute: u32 = self.rng.random_range(0..60);
1157 let second: u32 = self.rng.random_range(0..60);
1158 if let chrono::LocalResult::Single(new_ts) = Utc.with_ymd_and_hms(
1159 entry.header.posting_date.year(),
1160 entry.header.posting_date.month(),
1161 entry.header.posting_date.day(),
1162 hour,
1163 minute,
1164 second,
1165 ) {
1166 entry.header.created_at = new_ts;
1167 self.stats.fraud_off_hours_bias_applied += 1;
1168 fired.push(ProcessIssueType::AfterHoursPosting);
1169 }
1170 }
1171
1172 if cfg.post_close_bias > 0.0
1174 && self.rng.random::<f64>() < cfg.post_close_bias
1175 && !entry.header.is_post_close
1176 {
1177 entry.header.is_post_close = true;
1178 self.stats.fraud_post_close_bias_applied += 1;
1179 fired.push(ProcessIssueType::PostClosePosting);
1180 }
1181
1182 fired
1183 }
1184
1185 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1194 if let Some(ref mut advancer) = self.scheme_advancer {
1195 let context = SchemeContext::new(date, company_code);
1196 let actions = advancer.advance_all(&context);
1197 self.scheme_actions.extend(actions.clone());
1198 actions
1199 } else {
1200 Vec::new()
1201 }
1202 }
1203
1204 pub fn maybe_start_scheme(
1210 &mut self,
1211 date: NaiveDate,
1212 company_code: &str,
1213 available_users: Vec<String>,
1214 available_accounts: Vec<String>,
1215 available_counterparties: Vec<String>,
1216 ) -> Option<uuid::Uuid> {
1217 if let Some(ref mut advancer) = self.scheme_advancer {
1218 let mut context = SchemeContext::new(date, company_code);
1219 context.available_users = available_users;
1220 context.available_accounts = available_accounts;
1221 context.available_counterparties = available_counterparties;
1222
1223 advancer.maybe_start_scheme(&context)
1224 } else {
1225 None
1226 }
1227 }
1228
1229 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1231 &self.near_miss_labels
1232 }
1233
1234 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1236 &self.scheme_actions
1237 }
1238
1239 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1241 &self.difficulty_distribution
1242 }
1243
1244 pub fn check_behavioral_deviations(
1246 &self,
1247 entity_id: &str,
1248 observation: &super::context::Observation,
1249 ) -> Vec<super::context::BehavioralDeviation> {
1250 if let Some(ref baseline) = self.behavioral_baseline {
1251 baseline.check_deviation(entity_id, observation)
1252 } else {
1253 Vec::new()
1254 }
1255 }
1256
1257 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1259 if let Some(ref baseline) = self.behavioral_baseline {
1260 baseline.get_baseline(entity_id)
1261 } else {
1262 None
1263 }
1264 }
1265
1266 pub fn active_scheme_count(&self) -> usize {
1268 if let Some(ref advancer) = self.scheme_advancer {
1269 advancer.active_scheme_count()
1270 } else {
1271 0
1272 }
1273 }
1274
1275 pub fn has_enhanced_features(&self) -> bool {
1277 self.scheme_advancer.is_some()
1278 || self.near_miss_generator.is_some()
1279 || self.difficulty_calculator.is_some()
1280 || self.entity_aware_injector.is_some()
1281 }
1282}
1283
1284pub struct AnomalyInjectorConfigBuilder {
1286 config: AnomalyInjectorConfig,
1287}
1288
1289impl AnomalyInjectorConfigBuilder {
1290 pub fn new() -> Self {
1292 Self {
1293 config: AnomalyInjectorConfig::default(),
1294 }
1295 }
1296
1297 pub fn with_total_rate(mut self, rate: f64) -> Self {
1299 self.config.rates.total_rate = rate;
1300 self
1301 }
1302
1303 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1305 self.config.rates.fraud_rate = rate;
1306 self
1307 }
1308
1309 pub fn with_error_rate(mut self, rate: f64) -> Self {
1311 self.config.rates.error_rate = rate;
1312 self
1313 }
1314
1315 pub fn with_seed(mut self, seed: u64) -> Self {
1317 self.config.seed = seed;
1318 self
1319 }
1320
1321 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1323 self.config.patterns.temporal_pattern = pattern;
1324 self
1325 }
1326
1327 pub fn with_labels(mut self, generate: bool) -> Self {
1329 self.config.generate_labels = generate;
1330 self
1331 }
1332
1333 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1335 self.config.target_companies = companies;
1336 self
1337 }
1338
1339 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1341 self.config.date_range = Some((start, end));
1342 self
1343 }
1344
1345 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1351 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1352 self.config.enhanced.scheme_probability = probability;
1353 self
1354 }
1355
1356 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1358 self.config.enhanced.near_miss_enabled = enabled;
1359 self.config.enhanced.near_miss_proportion = proportion;
1360 self
1361 }
1362
1363 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1365 self.config.enhanced.approval_thresholds = thresholds;
1366 self
1367 }
1368
1369 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1371 self.config.enhanced.correlated_injection_enabled = enabled;
1372 self
1373 }
1374
1375 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1377 self.config.enhanced.temporal_clustering_enabled = enabled;
1378 self.config.enhanced.period_end_multiplier = multiplier;
1379 self
1380 }
1381
1382 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1384 self.config.enhanced.difficulty_classification_enabled = enabled;
1385 self
1386 }
1387
1388 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1390 self.config.enhanced.context_aware_enabled = enabled;
1391 self
1392 }
1393
1394 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1396 self.config.enhanced.behavioral_baseline_config = config;
1397 self
1398 }
1399
1400 pub fn with_all_enhanced_features(mut self) -> Self {
1402 self.config.enhanced.multi_stage_schemes_enabled = true;
1403 self.config.enhanced.scheme_probability = 0.02;
1404 self.config.enhanced.correlated_injection_enabled = true;
1405 self.config.enhanced.temporal_clustering_enabled = true;
1406 self.config.enhanced.period_end_multiplier = 2.5;
1407 self.config.enhanced.near_miss_enabled = true;
1408 self.config.enhanced.near_miss_proportion = 0.30;
1409 self.config.enhanced.difficulty_classification_enabled = true;
1410 self.config.enhanced.context_aware_enabled = true;
1411 self.config.enhanced.behavioral_baseline_config.enabled = true;
1412 self
1413 }
1414
1415 pub fn build(self) -> AnomalyInjectorConfig {
1417 self.config
1418 }
1419}
1420
1421impl Default for AnomalyInjectorConfigBuilder {
1422 fn default() -> Self {
1423 Self::new()
1424 }
1425}
1426
1427#[cfg(test)]
1428#[allow(clippy::unwrap_used)]
1429mod tests {
1430 use super::*;
1431 use chrono::NaiveDate;
1432 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1433 use rust_decimal_macros::dec;
1434
1435 fn create_test_entry(doc_num: &str) -> JournalEntry {
1436 let mut entry = JournalEntry::new_simple(
1437 doc_num.to_string(),
1438 "1000".to_string(),
1439 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1440 "Test Entry".to_string(),
1441 );
1442
1443 entry.add_line(JournalEntryLine {
1444 line_number: 1,
1445 gl_account: "5000".to_string(),
1446 debit_amount: dec!(1000),
1447 ..Default::default()
1448 });
1449
1450 entry.add_line(JournalEntryLine {
1451 line_number: 2,
1452 gl_account: "1000".to_string(),
1453 credit_amount: dec!(1000),
1454 ..Default::default()
1455 });
1456
1457 entry
1458 }
1459
1460 #[test]
1461 fn test_anomaly_injector_basic() {
1462 let config = AnomalyInjectorConfigBuilder::new()
1463 .with_total_rate(0.5) .with_seed(42)
1465 .build();
1466
1467 let mut injector = AnomalyInjector::new(config);
1468
1469 let mut entries: Vec<_> = (0..100)
1470 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1471 .collect();
1472
1473 let result = injector.process_entries(&mut entries);
1474
1475 assert!(result.anomalies_injected > 0);
1477 assert!(!result.labels.is_empty());
1478 assert!(result.labels.len() >= result.anomalies_injected);
1483 }
1484
1485 #[test]
1486 fn test_specific_injection() {
1487 let config = AnomalyInjectorConfig::default();
1488 let mut injector = AnomalyInjector::new(config);
1489
1490 let mut entry = create_test_entry("JE001");
1491 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1492
1493 let label = injector.inject_specific(&mut entry, anomaly_type);
1494
1495 assert!(label.is_some());
1496 let label = label.unwrap();
1497 assert!(!label.document_id.is_empty());
1499 assert_eq!(label.document_id, entry.document_number());
1500 }
1501
1502 #[test]
1503 fn test_self_approval_injection() {
1504 let config = AnomalyInjectorConfig::default();
1505 let mut injector = AnomalyInjector::new(config);
1506
1507 let mut entry = create_test_entry("JE001");
1508 let label = injector.create_self_approval(&mut entry, "USER001");
1509
1510 assert!(label.is_some());
1511 let label = label.unwrap();
1512 assert!(matches!(
1513 label.anomaly_type,
1514 AnomalyType::Fraud(FraudType::SelfApproval)
1515 ));
1516 assert!(label.related_entities.contains(&"USER001".to_string()));
1517 }
1518
1519 #[test]
1520 fn test_company_filtering() {
1521 let config = AnomalyInjectorConfigBuilder::new()
1522 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1524 .build();
1525
1526 let mut injector = AnomalyInjector::new(config);
1527
1528 let mut entries = vec![
1529 create_test_entry("JE001"), create_test_entry("JE002"), ];
1532
1533 let result = injector.process_entries(&mut entries);
1534
1535 assert_eq!(result.anomalies_injected, 0);
1537 }
1538
1539 fn create_test_entry_with_context(
1545 doc_num: &str,
1546 vendor_ref: Option<&str>,
1547 employee_id: &str,
1548 gl_account: &str,
1549 ) -> JournalEntry {
1550 let mut entry = JournalEntry::new_simple(
1551 doc_num.to_string(),
1552 "1000".to_string(),
1553 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1554 "Test Entry".to_string(),
1555 );
1556
1557 entry.header.reference = vendor_ref.map(|v| v.to_string());
1558 entry.header.created_by = employee_id.to_string();
1559
1560 entry.add_line(JournalEntryLine {
1561 line_number: 1,
1562 gl_account: gl_account.to_string(),
1563 debit_amount: dec!(1000),
1564 ..Default::default()
1565 });
1566
1567 entry.add_line(JournalEntryLine {
1568 line_number: 2,
1569 gl_account: "1000".to_string(),
1570 credit_amount: dec!(1000),
1571 ..Default::default()
1572 });
1573
1574 entry
1575 }
1576
1577 #[test]
1578 fn test_set_entity_contexts() {
1579 let config = AnomalyInjectorConfig::default();
1580 let mut injector = AnomalyInjector::new(config);
1581
1582 assert!(injector.vendor_contexts().is_empty());
1584 assert!(injector.employee_contexts().is_empty());
1585 assert!(injector.account_contexts().is_empty());
1586
1587 let mut vendors = HashMap::new();
1589 vendors.insert(
1590 "V001".to_string(),
1591 VendorContext {
1592 vendor_id: "V001".to_string(),
1593 is_new: true,
1594 ..Default::default()
1595 },
1596 );
1597
1598 let mut employees = HashMap::new();
1599 employees.insert(
1600 "EMP001".to_string(),
1601 EmployeeContext {
1602 employee_id: "EMP001".to_string(),
1603 is_new: true,
1604 ..Default::default()
1605 },
1606 );
1607
1608 let mut accounts = HashMap::new();
1609 accounts.insert(
1610 "8100".to_string(),
1611 AccountContext {
1612 account_code: "8100".to_string(),
1613 is_high_risk: true,
1614 ..Default::default()
1615 },
1616 );
1617
1618 injector.set_entity_contexts(vendors, employees, accounts);
1619
1620 assert_eq!(injector.vendor_contexts().len(), 1);
1621 assert_eq!(injector.employee_contexts().len(), 1);
1622 assert_eq!(injector.account_contexts().len(), 1);
1623 assert!(injector.vendor_contexts().contains_key("V001"));
1624 assert!(injector.employee_contexts().contains_key("EMP001"));
1625 assert!(injector.account_contexts().contains_key("8100"));
1626 }
1627
1628 #[test]
1629 fn test_default_behavior_no_contexts() {
1630 let config = AnomalyInjectorConfigBuilder::new()
1632 .with_total_rate(0.5)
1633 .with_seed(42)
1634 .build();
1635
1636 let mut injector = AnomalyInjector::new(config);
1637
1638 let mut entries: Vec<_> = (0..200)
1639 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1640 .collect();
1641
1642 let result = injector.process_entries(&mut entries);
1643
1644 assert!(result.anomalies_injected > 0);
1647 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1648 assert!(
1649 rate > 0.2 && rate < 0.8,
1650 "Expected ~50% rate, got {:.2}%",
1651 rate * 100.0
1652 );
1653 }
1654
1655 #[test]
1656 fn test_entity_context_increases_injection_rate() {
1657 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1663 .with_total_rate(base_rate)
1664 .with_seed(123)
1665 .build();
1666
1667 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1668
1669 let mut entries_no_ctx: Vec<_> = (0..500)
1670 .map(|i| {
1671 create_test_entry_with_context(
1672 &format!("JE{:04}", i),
1673 Some("V001"),
1674 "EMP001",
1675 "8100",
1676 )
1677 })
1678 .collect();
1679
1680 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1681
1682 let config_ctx = AnomalyInjectorConfigBuilder::new()
1684 .with_total_rate(base_rate)
1685 .with_seed(123)
1686 .build();
1687
1688 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1689
1690 let mut vendors = HashMap::new();
1692 vendors.insert(
1693 "V001".to_string(),
1694 VendorContext {
1695 vendor_id: "V001".to_string(),
1696 is_new: true, is_dormant_reactivation: true, ..Default::default()
1699 },
1700 );
1701
1702 let mut employees = HashMap::new();
1703 employees.insert(
1704 "EMP001".to_string(),
1705 EmployeeContext {
1706 employee_id: "EMP001".to_string(),
1707 is_new: true, ..Default::default()
1709 },
1710 );
1711
1712 let mut accounts = HashMap::new();
1713 accounts.insert(
1714 "8100".to_string(),
1715 AccountContext {
1716 account_code: "8100".to_string(),
1717 is_high_risk: true, ..Default::default()
1719 },
1720 );
1721
1722 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1723
1724 let mut entries_ctx: Vec<_> = (0..500)
1725 .map(|i| {
1726 create_test_entry_with_context(
1727 &format!("JE{:04}", i),
1728 Some("V001"),
1729 "EMP001",
1730 "8100",
1731 )
1732 })
1733 .collect();
1734
1735 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1736
1737 assert!(
1739 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1740 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1741 result_ctx.anomalies_injected,
1742 result_no_ctx.anomalies_injected,
1743 );
1744 }
1745
1746 #[test]
1747 fn test_risk_score_multiplication() {
1748 let config = AnomalyInjectorConfig::default();
1750 let mut injector = AnomalyInjector::new(config);
1751
1752 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1754 assert!(
1755 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1756 );
1757
1758 let mut vendors = HashMap::new();
1760 vendors.insert(
1761 "V_RISKY".to_string(),
1762 VendorContext {
1763 vendor_id: "V_RISKY".to_string(),
1764 is_new: true,
1765 ..Default::default()
1766 },
1767 );
1768
1769 let mut accounts = HashMap::new();
1770 accounts.insert(
1771 "9000".to_string(),
1772 AccountContext {
1773 account_code: "9000".to_string(),
1774 is_high_risk: true,
1775 ..Default::default()
1776 },
1777 );
1778
1779 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1780
1781 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1782 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1783 assert!(
1785 (multiplier - 4.0).abs() < f64::EPSILON,
1786 "Expected 4.0x multiplier, got {}",
1787 multiplier,
1788 );
1789
1790 let entry_vendor_only =
1792 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1793 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1794 assert!(
1795 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1796 "Expected 2.0x multiplier (vendor only), got {}",
1797 multiplier_vendor,
1798 );
1799
1800 let entry_no_match =
1802 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1803 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1804 assert!(
1805 (multiplier_none - 1.0).abs() < f64::EPSILON,
1806 "Expected 1.0x multiplier (no match), got {}",
1807 multiplier_none,
1808 );
1809 }
1810
1811 #[test]
1812 fn test_employee_context_multiplier() {
1813 let config = AnomalyInjectorConfig::default();
1814 let mut injector = AnomalyInjector::new(config);
1815
1816 let mut employees = HashMap::new();
1817 employees.insert(
1818 "EMP_NEW".to_string(),
1819 EmployeeContext {
1820 employee_id: "EMP_NEW".to_string(),
1821 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1825 },
1826 );
1827
1828 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1829
1830 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1831 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1832
1833 let expected = 1.5 * 1.3 * 1.2;
1835 assert!(
1836 (multiplier - expected).abs() < 0.01,
1837 "Expected {:.3}x multiplier, got {:.3}",
1838 expected,
1839 multiplier,
1840 );
1841 }
1842
1843 #[test]
1844 fn test_entity_contexts_persist_across_reset() {
1845 let config = AnomalyInjectorConfig::default();
1846 let mut injector = AnomalyInjector::new(config);
1847
1848 let mut vendors = HashMap::new();
1849 vendors.insert(
1850 "V001".to_string(),
1851 VendorContext {
1852 vendor_id: "V001".to_string(),
1853 is_new: true,
1854 ..Default::default()
1855 },
1856 );
1857
1858 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1859 assert_eq!(injector.vendor_contexts().len(), 1);
1860
1861 injector.reset();
1863 assert_eq!(injector.vendor_contexts().len(), 1);
1864 }
1865
1866 #[test]
1867 fn test_set_empty_contexts_clears() {
1868 let config = AnomalyInjectorConfig::default();
1869 let mut injector = AnomalyInjector::new(config);
1870
1871 let mut vendors = HashMap::new();
1872 vendors.insert(
1873 "V001".to_string(),
1874 VendorContext {
1875 vendor_id: "V001".to_string(),
1876 ..Default::default()
1877 },
1878 );
1879
1880 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1881 assert_eq!(injector.vendor_contexts().len(), 1);
1882
1883 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1885 assert!(injector.vendor_contexts().is_empty());
1886 }
1887
1888 #[test]
1889 fn test_dormant_vendor_multiplier() {
1890 let config = AnomalyInjectorConfig::default();
1891 let mut injector = AnomalyInjector::new(config);
1892
1893 let mut vendors = HashMap::new();
1894 vendors.insert(
1895 "V_DORMANT".to_string(),
1896 VendorContext {
1897 vendor_id: "V_DORMANT".to_string(),
1898 is_dormant_reactivation: true, ..Default::default()
1900 },
1901 );
1902
1903 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1904
1905 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1906 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1907 assert!(
1908 (multiplier - 1.5).abs() < f64::EPSILON,
1909 "Expected 1.5x multiplier for dormant vendor, got {}",
1910 multiplier,
1911 );
1912 }
1913
1914 #[test]
1923 fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1924 use chrono::{Datelike, Timelike, Weekday};
1925 use datasynth_core::models::FraudType;
1926
1927 let mut config = AnomalyInjectorConfig::default();
1928 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1929 enabled: true,
1930 weekend_bias: 1.0,
1931 round_dollar_bias: 1.0,
1932 off_hours_bias: 1.0,
1933 post_close_bias: 1.0,
1934 };
1935 let mut injector = AnomalyInjector::new(config);
1936
1937 let mut entry = JournalEntry::new_simple(
1939 "JE001".to_string(),
1940 "1000".to_string(),
1941 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
1943 );
1944 entry.add_line(JournalEntryLine {
1945 line_number: 1,
1946 gl_account: "5000".to_string(),
1947 debit_amount: dec!(1237),
1948 ..Default::default()
1949 });
1950 entry.add_line(JournalEntryLine {
1951 line_number: 2,
1952 gl_account: "1000".to_string(),
1953 credit_amount: dec!(1237),
1954 ..Default::default()
1955 });
1956
1957 let _ =
1958 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1959
1960 assert!(
1962 matches!(
1963 entry.header.posting_date.weekday(),
1964 Weekday::Sat | Weekday::Sun
1965 ),
1966 "expected weekend posting date, got {:?}",
1967 entry.header.posting_date.weekday()
1968 );
1969 let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1971 let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1972 assert_eq!(debit_total, credit_total, "entry must remain balanced");
1973 assert!(
1974 [
1975 dec!(1_000),
1976 dec!(5_000),
1977 dec!(10_000),
1978 dec!(25_000),
1979 dec!(50_000),
1980 dec!(100_000)
1981 ]
1982 .contains(&debit_total),
1983 "expected round-dollar total, got {}",
1984 debit_total
1985 );
1986 let hour = entry.header.created_at.hour();
1988 assert!(
1989 !(6..22).contains(&hour),
1990 "expected off-hours timestamp, got hour {}",
1991 hour
1992 );
1993 assert!(entry.header.is_post_close);
1995
1996 let stats = injector.get_stats();
1998 assert_eq!(stats.fraud_weekend_bias_applied, 1);
1999 assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
2000 assert_eq!(stats.fraud_off_hours_bias_applied, 1);
2001 assert_eq!(stats.fraud_post_close_bias_applied, 1);
2002 }
2003
2004 #[test]
2007 fn fraud_behavioral_bias_rate_zero_applies_nothing() {
2008 use datasynth_core::models::FraudType;
2009
2010 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
2012 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2013 enabled: true,
2014 weekend_bias: 0.0,
2015 round_dollar_bias: 0.0,
2016 off_hours_bias: 0.0,
2017 post_close_bias: 0.0,
2018 };
2019 let mut injector = AnomalyInjector::new(config);
2020 let mut entry = create_test_entry("JE001");
2021 entry.header.posting_date = original_date;
2022
2023 let _ =
2024 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
2025
2026 assert_eq!(entry.header.posting_date, original_date);
2027 assert!(!entry.header.is_post_close);
2028 let stats = injector.get_stats();
2029 assert_eq!(stats.fraud_weekend_bias_applied, 0);
2030 assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
2031 assert_eq!(stats.fraud_off_hours_bias_applied, 0);
2032 assert_eq!(stats.fraud_post_close_bias_applied, 0);
2033 }
2034
2035 #[test]
2038 fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
2039 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
2041 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2042 enabled: true,
2043 weekend_bias: 1.0,
2044 round_dollar_bias: 1.0,
2045 off_hours_bias: 1.0,
2046 post_close_bias: 1.0,
2047 };
2048 let mut injector = AnomalyInjector::new(config);
2049 let mut entry = create_test_entry("JE001");
2050 entry.header.posting_date = original_date;
2051
2052 let _ = injector.inject_specific(
2053 &mut entry,
2054 AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
2055 );
2056
2057 assert_eq!(entry.header.posting_date, original_date);
2058 let stats = injector.get_stats();
2059 assert_eq!(stats.fraud_weekend_bias_applied, 0);
2060 }
2061
2062 #[test]
2066 fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
2067 use datasynth_core::models::{FraudType, ProcessIssueType};
2068
2069 let mut config = AnomalyInjectorConfig::default();
2070 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2071 enabled: true,
2072 weekend_bias: 1.0,
2073 round_dollar_bias: 0.0, off_hours_bias: 1.0,
2075 post_close_bias: 1.0,
2076 };
2077 let mut injector = AnomalyInjector::new(config);
2078 let mut entry = JournalEntry::new_simple(
2079 "JE001".into(),
2080 "1000".into(),
2081 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
2082 "Test".into(),
2083 );
2084 entry.add_line(JournalEntryLine {
2085 line_number: 1,
2086 gl_account: "5000".into(),
2087 debit_amount: dec!(1000),
2088 ..Default::default()
2089 });
2090 entry.add_line(JournalEntryLine {
2091 line_number: 2,
2092 gl_account: "1000".into(),
2093 credit_amount: dec!(1000),
2094 ..Default::default()
2095 });
2096
2097 let primary = injector
2098 .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2099 .expect("fraud label should be produced");
2100
2101 let labels = injector.get_labels();
2103 assert_eq!(
2104 labels.len(),
2105 3,
2106 "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2107 );
2108 let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2109 assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2110 assert!(types.contains(&AnomalyType::ProcessIssue(
2111 ProcessIssueType::AfterHoursPosting
2112 )));
2113 assert!(types.contains(&AnomalyType::ProcessIssue(
2114 ProcessIssueType::PostClosePosting
2115 )));
2116 assert_eq!(
2117 primary.anomaly_type,
2118 AnomalyType::Fraud(FraudType::FictitiousEntry)
2119 );
2120 }
2121}