1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::Rng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25 RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31 EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38 TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48 pub rates: AnomalyRateConfig,
50 pub patterns: AnomalyPatternConfig,
52 pub seed: u64,
54 pub generate_labels: bool,
56 pub allow_duplicates: bool,
58 pub max_anomalies_per_document: usize,
60 pub target_companies: Vec<String>,
62 pub date_range: Option<(NaiveDate, NaiveDate)>,
64 pub enhanced: EnhancedInjectionConfig,
66}
67
68#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71 pub multi_stage_schemes_enabled: bool,
73 pub scheme_probability: f64,
75 pub correlated_injection_enabled: bool,
77 pub temporal_clustering_enabled: bool,
79 pub period_end_multiplier: f64,
81 pub near_miss_enabled: bool,
83 pub near_miss_proportion: f64,
85 pub approval_thresholds: Vec<Decimal>,
87 pub difficulty_classification_enabled: bool,
89 pub context_aware_enabled: bool,
91 pub behavioral_baseline_config: BehavioralBaselineConfig,
93}
94
95impl Default for AnomalyInjectorConfig {
96 fn default() -> Self {
97 Self {
98 rates: AnomalyRateConfig::default(),
99 patterns: AnomalyPatternConfig::default(),
100 seed: 42,
101 generate_labels: true,
102 allow_duplicates: true,
103 max_anomalies_per_document: 2,
104 target_companies: Vec::new(),
105 date_range: None,
106 enhanced: EnhancedInjectionConfig::default(),
107 }
108 }
109}
110
111#[derive(Debug, Clone)]
113pub struct InjectionBatchResult {
114 pub entries_processed: usize,
116 pub anomalies_injected: usize,
118 pub duplicates_created: usize,
120 pub labels: Vec<LabeledAnomaly>,
122 pub summary: AnomalySummary,
124 pub modified_documents: Vec<String>,
126 pub near_miss_labels: Vec<NearMissLabel>,
128 pub scheme_actions: Vec<SchemeAction>,
130 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
132}
133
134pub struct AnomalyInjector {
136 config: AnomalyInjectorConfig,
137 rng: ChaCha8Rng,
138 uuid_factory: DeterministicUuidFactory,
139 type_selector: AnomalyTypeSelector,
140 strategies: StrategyCollection,
141 cluster_manager: ClusterManager,
142 #[allow(dead_code)]
145 entity_targeting: EntityTargetingManager,
146 document_anomaly_counts: HashMap<String, usize>,
148 labels: Vec<LabeledAnomaly>,
150 stats: InjectorStats,
152 scheme_advancer: Option<SchemeAdvancer>,
155 near_miss_generator: Option<NearMissGenerator>,
157 near_miss_labels: Vec<NearMissLabel>,
159 #[allow(dead_code)]
162 co_occurrence_handler: Option<AnomalyCoOccurrence>,
163 #[allow(dead_code)]
166 temporal_cluster_generator: Option<TemporalClusterGenerator>,
167 difficulty_calculator: Option<DifficultyCalculator>,
169 entity_aware_injector: Option<EntityAwareInjector>,
171 behavioral_baseline: Option<BehavioralBaseline>,
173 scheme_actions: Vec<SchemeAction>,
175 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
177 vendor_contexts: HashMap<String, VendorContext>,
180 employee_contexts: HashMap<String, EmployeeContext>,
182 account_contexts: HashMap<String, AccountContext>,
184}
185
186#[derive(Debug, Clone, Default)]
188pub struct InjectorStats {
189 pub total_processed: usize,
191 pub total_injected: usize,
193 pub by_category: HashMap<String, usize>,
195 pub by_type: HashMap<String, usize>,
197 pub by_company: HashMap<String, usize>,
199 pub skipped_rate: usize,
201 pub skipped_date: usize,
203 pub skipped_company: usize,
205 pub skipped_max_per_doc: usize,
207}
208
209impl AnomalyInjector {
210 pub fn new(config: AnomalyInjectorConfig) -> Self {
212 let mut rng = seeded_rng(config.seed, 0);
213 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
214 let entity_targeting =
215 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
216
217 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
219 let scheme_config = SchemeAdvancerConfig {
220 embezzlement_probability: config.enhanced.scheme_probability,
221 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
222 kickback_probability: config.enhanced.scheme_probability * 0.5,
223 seed: rng.random(),
224 ..Default::default()
225 };
226 Some(SchemeAdvancer::new(scheme_config))
227 } else {
228 None
229 };
230
231 let near_miss_generator = if config.enhanced.near_miss_enabled {
232 let near_miss_config = NearMissConfig {
233 proportion: config.enhanced.near_miss_proportion,
234 seed: rng.random(),
235 ..Default::default()
236 };
237 Some(NearMissGenerator::new(near_miss_config))
238 } else {
239 None
240 };
241
242 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
243 Some(AnomalyCoOccurrence::new())
244 } else {
245 None
246 };
247
248 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
249 Some(TemporalClusterGenerator::new())
250 } else {
251 None
252 };
253
254 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
255 Some(DifficultyCalculator::new())
256 } else {
257 None
258 };
259
260 let entity_aware_injector = if config.enhanced.context_aware_enabled {
261 Some(EntityAwareInjector::default())
262 } else {
263 None
264 };
265
266 let behavioral_baseline = if config.enhanced.context_aware_enabled
267 && config.enhanced.behavioral_baseline_config.enabled
268 {
269 Some(BehavioralBaseline::new(
270 config.enhanced.behavioral_baseline_config.clone(),
271 ))
272 } else {
273 None
274 };
275
276 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
277
278 Self {
279 config,
280 rng,
281 uuid_factory,
282 type_selector: AnomalyTypeSelector::new(),
283 strategies: StrategyCollection::default(),
284 cluster_manager,
285 entity_targeting,
286 document_anomaly_counts: HashMap::new(),
287 labels: Vec::new(),
288 stats: InjectorStats::default(),
289 scheme_advancer,
290 near_miss_generator,
291 near_miss_labels: Vec::new(),
292 co_occurrence_handler,
293 temporal_cluster_generator,
294 difficulty_calculator,
295 entity_aware_injector,
296 behavioral_baseline,
297 scheme_actions: Vec::new(),
298 difficulty_distribution: HashMap::new(),
299 vendor_contexts: HashMap::new(),
300 employee_contexts: HashMap::new(),
301 account_contexts: HashMap::new(),
302 }
303 }
304
305 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
307 debug!(
308 entry_count = entries.len(),
309 total_rate = self.config.rates.total_rate,
310 seed = self.config.seed,
311 "Injecting anomalies into journal entries"
312 );
313
314 let mut modified_documents = Vec::new();
315 let mut duplicates = Vec::new();
316
317 for entry in entries.iter_mut() {
318 self.stats.total_processed += 1;
319
320 if let Some(ref mut baseline) = self.behavioral_baseline {
322 use super::context::Observation;
323 let entity_id = entry.header.created_by.clone();
325 let observation =
326 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
327 baseline.record_observation(&entity_id, observation);
328 }
329
330 if !self.should_process(entry) {
332 continue;
333 }
334
335 let base_rate = self.config.rates.total_rate;
337
338 let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
340 let employee_id = &entry.header.created_by;
341 let first_account = entry
342 .lines
343 .first()
344 .map(|l| l.gl_account.as_str())
345 .unwrap_or("");
346 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
348
349 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
350 let employee_ctx = self.employee_contexts.get(employee_id);
351 let account_ctx = self.account_contexts.get(first_account);
352
353 let multiplier =
354 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
355 (base_rate * multiplier).min(1.0)
356 } else {
357 self.calculate_context_rate_multiplier(entry) * base_rate
359 };
360
361 if should_inject_anomaly(
363 effective_rate,
364 entry.posting_date(),
365 &self.config.patterns.temporal_pattern,
366 &mut self.rng,
367 ) {
368 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
370 let account = entry
372 .lines
373 .first()
374 .map(|l| l.gl_account.clone())
375 .unwrap_or_default();
376 near_miss_gen.record_transaction(
377 entry.document_number().clone(),
378 entry.posting_date(),
379 entry.total_debit(),
380 &account,
381 None,
382 );
383
384 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
386 entry.document_number().clone(),
387 entry.posting_date(),
388 entry.total_debit(),
389 &account,
390 None,
391 &self.config.enhanced.approval_thresholds,
392 ) {
393 self.near_miss_labels.push(near_miss_label);
394 continue; }
396 }
397
398 let anomaly_type = self.select_anomaly_category();
400
401 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
403 if let Some(ref calculator) = self.difficulty_calculator {
405 let difficulty = calculator.calculate(&label);
406
407 label =
409 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
410 label = label.with_metadata(
411 "difficulty_score",
412 &difficulty.difficulty_score().to_string(),
413 );
414
415 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
417 }
418
419 modified_documents.push(entry.document_number().clone());
420 self.labels.push(label);
421 self.stats.total_injected += 1;
422 }
423
424 if self.config.allow_duplicates
426 && matches!(
427 self.labels.last().map(|l| &l.anomaly_type),
428 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
429 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
430 )
431 {
432 let dup_strategy = DuplicationStrategy::default();
433 let duplicate =
434 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
435 duplicates.push(duplicate);
436 }
437 }
438 }
439
440 let duplicates_created = duplicates.len();
442
443 let summary = AnomalySummary::from_anomalies(&self.labels);
445
446 InjectionBatchResult {
447 entries_processed: self.stats.total_processed,
448 anomalies_injected: self.stats.total_injected,
449 duplicates_created,
450 labels: self.labels.clone(),
451 summary,
452 modified_documents,
453 near_miss_labels: self.near_miss_labels.clone(),
454 scheme_actions: self.scheme_actions.clone(),
455 difficulty_distribution: self.difficulty_distribution.clone(),
456 }
457 }
458
459 fn should_process(&mut self, entry: &JournalEntry) -> bool {
461 if !self.config.target_companies.is_empty()
463 && !self
464 .config
465 .target_companies
466 .iter()
467 .any(|c| c == entry.company_code())
468 {
469 self.stats.skipped_company += 1;
470 return false;
471 }
472
473 if let Some((start, end)) = self.config.date_range {
475 if entry.posting_date() < start || entry.posting_date() > end {
476 self.stats.skipped_date += 1;
477 return false;
478 }
479 }
480
481 let current_count = self
483 .document_anomaly_counts
484 .get(&entry.document_number())
485 .copied()
486 .unwrap_or(0);
487 if current_count >= self.config.max_anomalies_per_document {
488 self.stats.skipped_max_per_doc += 1;
489 return false;
490 }
491
492 true
493 }
494
495 fn select_anomaly_category(&mut self) -> AnomalyType {
497 let r = self.rng.random::<f64>();
498 let rates = &self.config.rates;
499
500 let mut cumulative = 0.0;
501
502 cumulative += rates.fraud_rate;
503 if r < cumulative {
504 return self.type_selector.select_fraud(&mut self.rng);
505 }
506
507 cumulative += rates.error_rate;
508 if r < cumulative {
509 return self.type_selector.select_error(&mut self.rng);
510 }
511
512 cumulative += rates.process_issue_rate;
513 if r < cumulative {
514 return self.type_selector.select_process_issue(&mut self.rng);
515 }
516
517 cumulative += rates.statistical_rate;
518 if r < cumulative {
519 return self.type_selector.select_statistical(&mut self.rng);
520 }
521
522 self.type_selector.select_relational(&mut self.rng)
523 }
524
525 fn inject_anomaly(
527 &mut self,
528 entry: &mut JournalEntry,
529 anomaly_type: AnomalyType,
530 ) -> Option<LabeledAnomaly> {
531 if !self.strategies.can_apply(entry, &anomaly_type) {
533 return None;
534 }
535
536 let result = self
538 .strategies
539 .apply_strategy(entry, &anomaly_type, &mut self.rng);
540
541 if !result.success {
542 return None;
543 }
544
545 *self
547 .document_anomaly_counts
548 .entry(entry.document_number().clone())
549 .or_insert(0) += 1;
550
551 let category = anomaly_type.category().to_string();
553 let type_name = anomaly_type.type_name();
554
555 *self.stats.by_category.entry(category).or_insert(0) += 1;
556 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
557 *self
558 .stats
559 .by_company
560 .entry(entry.company_code().to_string())
561 .or_insert(0) += 1;
562
563 if self.config.generate_labels {
565 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
566
567 entry.header.is_anomaly = true;
569 entry.header.anomaly_id = Some(anomaly_id.clone());
570 entry.header.anomaly_type = Some(type_name.clone());
571
572 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
574 entry.header.is_fraud = true;
575 if let AnomalyType::Fraud(ref ft) = anomaly_type {
576 entry.header.fraud_type = Some(*ft);
577 }
578 }
579
580 let mut label = LabeledAnomaly::new(
581 anomaly_id,
582 anomaly_type.clone(),
583 entry.document_number().clone(),
584 "JE".to_string(),
585 entry.company_code().to_string(),
586 entry.posting_date(),
587 )
588 .with_description(&result.description)
589 .with_injection_strategy(&type_name);
590
591 let causal_reason = AnomalyCausalReason::RandomRate {
593 base_rate: self.config.rates.total_rate,
594 };
595 label = label.with_causal_reason(causal_reason);
596
597 let context_multiplier = self.calculate_context_rate_multiplier(entry);
599 if (context_multiplier - 1.0).abs() > f64::EPSILON {
600 label = label.with_metadata(
601 "entity_context_multiplier",
602 &format!("{context_multiplier:.3}"),
603 );
604 label = label.with_metadata(
605 "effective_rate",
606 &format!(
607 "{:.6}",
608 (self.config.rates.total_rate * context_multiplier).min(1.0)
609 ),
610 );
611 }
612
613 if let Some(impact) = result.monetary_impact {
615 label = label.with_monetary_impact(impact);
616 }
617
618 for entity in &result.related_entities {
620 label = label.with_related_entity(entity);
621 }
622
623 for (key, value) in &result.metadata {
625 label = label.with_metadata(key, value);
626 }
627
628 if let Some(cluster_id) =
630 self.cluster_manager
631 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
632 {
633 label = label.with_cluster(&cluster_id);
634 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
636 cluster_id: cluster_id.clone(),
637 });
638 }
639
640 return Some(label);
641 }
642
643 None
644 }
645
646 pub fn inject_specific(
648 &mut self,
649 entry: &mut JournalEntry,
650 anomaly_type: AnomalyType,
651 ) -> Option<LabeledAnomaly> {
652 self.inject_anomaly(entry, anomaly_type)
653 }
654
655 pub fn create_self_approval(
657 &mut self,
658 entry: &mut JournalEntry,
659 user_id: &str,
660 ) -> Option<LabeledAnomaly> {
661 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
662
663 let label = LabeledAnomaly::new(
664 format!("ANO{:08}", self.labels.len() + 1),
665 anomaly_type,
666 entry.document_number().clone(),
667 "JE".to_string(),
668 entry.company_code().to_string(),
669 entry.posting_date(),
670 )
671 .with_description(&format!("User {user_id} approved their own transaction"))
672 .with_related_entity(user_id)
673 .with_injection_strategy("ManualSelfApproval")
674 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
675 target_type: "User".to_string(),
676 target_id: user_id.to_string(),
677 });
678
679 entry.header.is_anomaly = true;
681 entry.header.is_fraud = true;
682 entry.header.anomaly_id = Some(label.anomaly_id.clone());
683 entry.header.anomaly_type = Some("SelfApproval".to_string());
684 entry.header.fraud_type = Some(FraudType::SelfApproval);
685
686 entry.header.created_by = user_id.to_string();
688
689 self.labels.push(label.clone());
690 Some(label)
691 }
692
693 pub fn create_sod_violation(
695 &mut self,
696 entry: &mut JournalEntry,
697 user_id: &str,
698 conflicting_duties: (&str, &str),
699 ) -> Option<LabeledAnomaly> {
700 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
701
702 let label = LabeledAnomaly::new(
703 format!("ANO{:08}", self.labels.len() + 1),
704 anomaly_type,
705 entry.document_number().clone(),
706 "JE".to_string(),
707 entry.company_code().to_string(),
708 entry.posting_date(),
709 )
710 .with_description(&format!(
711 "User {} performed conflicting duties: {} and {}",
712 user_id, conflicting_duties.0, conflicting_duties.1
713 ))
714 .with_related_entity(user_id)
715 .with_metadata("duty1", conflicting_duties.0)
716 .with_metadata("duty2", conflicting_duties.1)
717 .with_injection_strategy("ManualSoDViolation")
718 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
719 target_type: "User".to_string(),
720 target_id: user_id.to_string(),
721 });
722
723 entry.header.is_anomaly = true;
725 entry.header.is_fraud = true;
726 entry.header.anomaly_id = Some(label.anomaly_id.clone());
727 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
728 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
729
730 self.labels.push(label.clone());
731 Some(label)
732 }
733
734 pub fn create_ic_mismatch(
736 &mut self,
737 entry: &mut JournalEntry,
738 matching_company: &str,
739 expected_amount: Decimal,
740 actual_amount: Decimal,
741 ) -> Option<LabeledAnomaly> {
742 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
743
744 let label = LabeledAnomaly::new(
745 format!("ANO{:08}", self.labels.len() + 1),
746 anomaly_type,
747 entry.document_number().clone(),
748 "JE".to_string(),
749 entry.company_code().to_string(),
750 entry.posting_date(),
751 )
752 .with_description(&format!(
753 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
754 ))
755 .with_related_entity(matching_company)
756 .with_monetary_impact(actual_amount - expected_amount)
757 .with_metadata("expected_amount", &expected_amount.to_string())
758 .with_metadata("actual_amount", &actual_amount.to_string())
759 .with_injection_strategy("ManualICMismatch")
760 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
761 target_type: "Intercompany".to_string(),
762 target_id: matching_company.to_string(),
763 });
764
765 entry.header.is_anomaly = true;
767 entry.header.anomaly_id = Some(label.anomaly_id.clone());
768 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
769
770 self.labels.push(label.clone());
771 Some(label)
772 }
773
774 pub fn get_labels(&self) -> &[LabeledAnomaly] {
776 &self.labels
777 }
778
779 pub fn get_summary(&self) -> AnomalySummary {
781 AnomalySummary::from_anomalies(&self.labels)
782 }
783
784 pub fn get_stats(&self) -> &InjectorStats {
786 &self.stats
787 }
788
789 pub fn reset(&mut self) {
791 self.labels.clear();
792 self.document_anomaly_counts.clear();
793 self.stats = InjectorStats::default();
794 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
795
796 self.near_miss_labels.clear();
798 self.scheme_actions.clear();
799 self.difficulty_distribution.clear();
800
801 if let Some(ref mut baseline) = self.behavioral_baseline {
802 *baseline =
803 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
804 }
805 }
806
807 pub fn cluster_count(&self) -> usize {
809 self.cluster_manager.cluster_count()
810 }
811
812 pub fn set_entity_contexts(
825 &mut self,
826 vendors: HashMap<String, VendorContext>,
827 employees: HashMap<String, EmployeeContext>,
828 accounts: HashMap<String, AccountContext>,
829 ) {
830 self.vendor_contexts = vendors;
831 self.employee_contexts = employees;
832 self.account_contexts = accounts;
833 }
834
835 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
837 &self.vendor_contexts
838 }
839
840 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
842 &self.employee_contexts
843 }
844
845 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
847 &self.account_contexts
848 }
849
850 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
859 if self.vendor_contexts.is_empty()
860 && self.employee_contexts.is_empty()
861 && self.account_contexts.is_empty()
862 {
863 return 1.0;
864 }
865
866 let mut multiplier = 1.0;
867
868 if let Some(ref vendor_ref) = entry.header.reference {
870 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
871 if ctx.is_new {
873 multiplier *= 2.0;
874 }
875 if ctx.is_dormant_reactivation {
876 multiplier *= 1.5;
877 }
878 }
879 }
880
881 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
883 if ctx.is_new {
884 multiplier *= 1.5;
885 }
886 if ctx.is_volume_fatigued {
887 multiplier *= 1.3;
888 }
889 if ctx.is_overtime {
890 multiplier *= 1.2;
891 }
892 }
893
894 if let Some(first_line) = entry.lines.first() {
896 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
897 if ctx.is_high_risk {
898 multiplier *= 2.0;
899 }
900 }
901 }
902
903 multiplier
904 }
905
906 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
915 if let Some(ref mut advancer) = self.scheme_advancer {
916 let context = SchemeContext::new(date, company_code);
917 let actions = advancer.advance_all(&context);
918 self.scheme_actions.extend(actions.clone());
919 actions
920 } else {
921 Vec::new()
922 }
923 }
924
925 pub fn maybe_start_scheme(
931 &mut self,
932 date: NaiveDate,
933 company_code: &str,
934 available_users: Vec<String>,
935 available_accounts: Vec<String>,
936 available_counterparties: Vec<String>,
937 ) -> Option<uuid::Uuid> {
938 if let Some(ref mut advancer) = self.scheme_advancer {
939 let mut context = SchemeContext::new(date, company_code);
940 context.available_users = available_users;
941 context.available_accounts = available_accounts;
942 context.available_counterparties = available_counterparties;
943
944 advancer.maybe_start_scheme(&context)
945 } else {
946 None
947 }
948 }
949
950 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
952 &self.near_miss_labels
953 }
954
955 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
957 &self.scheme_actions
958 }
959
960 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
962 &self.difficulty_distribution
963 }
964
965 pub fn check_behavioral_deviations(
967 &self,
968 entity_id: &str,
969 observation: &super::context::Observation,
970 ) -> Vec<super::context::BehavioralDeviation> {
971 if let Some(ref baseline) = self.behavioral_baseline {
972 baseline.check_deviation(entity_id, observation)
973 } else {
974 Vec::new()
975 }
976 }
977
978 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
980 if let Some(ref baseline) = self.behavioral_baseline {
981 baseline.get_baseline(entity_id)
982 } else {
983 None
984 }
985 }
986
987 pub fn active_scheme_count(&self) -> usize {
989 if let Some(ref advancer) = self.scheme_advancer {
990 advancer.active_scheme_count()
991 } else {
992 0
993 }
994 }
995
996 pub fn has_enhanced_features(&self) -> bool {
998 self.scheme_advancer.is_some()
999 || self.near_miss_generator.is_some()
1000 || self.difficulty_calculator.is_some()
1001 || self.entity_aware_injector.is_some()
1002 }
1003}
1004
1005pub struct AnomalyInjectorConfigBuilder {
1007 config: AnomalyInjectorConfig,
1008}
1009
1010impl AnomalyInjectorConfigBuilder {
1011 pub fn new() -> Self {
1013 Self {
1014 config: AnomalyInjectorConfig::default(),
1015 }
1016 }
1017
1018 pub fn with_total_rate(mut self, rate: f64) -> Self {
1020 self.config.rates.total_rate = rate;
1021 self
1022 }
1023
1024 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1026 self.config.rates.fraud_rate = rate;
1027 self
1028 }
1029
1030 pub fn with_error_rate(mut self, rate: f64) -> Self {
1032 self.config.rates.error_rate = rate;
1033 self
1034 }
1035
1036 pub fn with_seed(mut self, seed: u64) -> Self {
1038 self.config.seed = seed;
1039 self
1040 }
1041
1042 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1044 self.config.patterns.temporal_pattern = pattern;
1045 self
1046 }
1047
1048 pub fn with_labels(mut self, generate: bool) -> Self {
1050 self.config.generate_labels = generate;
1051 self
1052 }
1053
1054 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1056 self.config.target_companies = companies;
1057 self
1058 }
1059
1060 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1062 self.config.date_range = Some((start, end));
1063 self
1064 }
1065
1066 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1072 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1073 self.config.enhanced.scheme_probability = probability;
1074 self
1075 }
1076
1077 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1079 self.config.enhanced.near_miss_enabled = enabled;
1080 self.config.enhanced.near_miss_proportion = proportion;
1081 self
1082 }
1083
1084 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1086 self.config.enhanced.approval_thresholds = thresholds;
1087 self
1088 }
1089
1090 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1092 self.config.enhanced.correlated_injection_enabled = enabled;
1093 self
1094 }
1095
1096 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1098 self.config.enhanced.temporal_clustering_enabled = enabled;
1099 self.config.enhanced.period_end_multiplier = multiplier;
1100 self
1101 }
1102
1103 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1105 self.config.enhanced.difficulty_classification_enabled = enabled;
1106 self
1107 }
1108
1109 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1111 self.config.enhanced.context_aware_enabled = enabled;
1112 self
1113 }
1114
1115 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1117 self.config.enhanced.behavioral_baseline_config = config;
1118 self
1119 }
1120
1121 pub fn with_all_enhanced_features(mut self) -> Self {
1123 self.config.enhanced.multi_stage_schemes_enabled = true;
1124 self.config.enhanced.scheme_probability = 0.02;
1125 self.config.enhanced.correlated_injection_enabled = true;
1126 self.config.enhanced.temporal_clustering_enabled = true;
1127 self.config.enhanced.period_end_multiplier = 2.5;
1128 self.config.enhanced.near_miss_enabled = true;
1129 self.config.enhanced.near_miss_proportion = 0.30;
1130 self.config.enhanced.difficulty_classification_enabled = true;
1131 self.config.enhanced.context_aware_enabled = true;
1132 self.config.enhanced.behavioral_baseline_config.enabled = true;
1133 self
1134 }
1135
1136 pub fn build(self) -> AnomalyInjectorConfig {
1138 self.config
1139 }
1140}
1141
1142impl Default for AnomalyInjectorConfigBuilder {
1143 fn default() -> Self {
1144 Self::new()
1145 }
1146}
1147
1148#[cfg(test)]
1149#[allow(clippy::unwrap_used)]
1150mod tests {
1151 use super::*;
1152 use chrono::NaiveDate;
1153 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1154 use rust_decimal_macros::dec;
1155
1156 fn create_test_entry(doc_num: &str) -> JournalEntry {
1157 let mut entry = JournalEntry::new_simple(
1158 doc_num.to_string(),
1159 "1000".to_string(),
1160 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1161 "Test Entry".to_string(),
1162 );
1163
1164 entry.add_line(JournalEntryLine {
1165 line_number: 1,
1166 gl_account: "5000".to_string(),
1167 debit_amount: dec!(1000),
1168 ..Default::default()
1169 });
1170
1171 entry.add_line(JournalEntryLine {
1172 line_number: 2,
1173 gl_account: "1000".to_string(),
1174 credit_amount: dec!(1000),
1175 ..Default::default()
1176 });
1177
1178 entry
1179 }
1180
1181 #[test]
1182 fn test_anomaly_injector_basic() {
1183 let config = AnomalyInjectorConfigBuilder::new()
1184 .with_total_rate(0.5) .with_seed(42)
1186 .build();
1187
1188 let mut injector = AnomalyInjector::new(config);
1189
1190 let mut entries: Vec<_> = (0..100)
1191 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1192 .collect();
1193
1194 let result = injector.process_entries(&mut entries);
1195
1196 assert!(result.anomalies_injected > 0);
1198 assert!(!result.labels.is_empty());
1199 assert_eq!(result.labels.len(), result.anomalies_injected);
1200 }
1201
1202 #[test]
1203 fn test_specific_injection() {
1204 let config = AnomalyInjectorConfig::default();
1205 let mut injector = AnomalyInjector::new(config);
1206
1207 let mut entry = create_test_entry("JE001");
1208 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1209
1210 let label = injector.inject_specific(&mut entry, anomaly_type);
1211
1212 assert!(label.is_some());
1213 let label = label.unwrap();
1214 assert!(!label.document_id.is_empty());
1216 assert_eq!(label.document_id, entry.document_number());
1217 }
1218
1219 #[test]
1220 fn test_self_approval_injection() {
1221 let config = AnomalyInjectorConfig::default();
1222 let mut injector = AnomalyInjector::new(config);
1223
1224 let mut entry = create_test_entry("JE001");
1225 let label = injector.create_self_approval(&mut entry, "USER001");
1226
1227 assert!(label.is_some());
1228 let label = label.unwrap();
1229 assert!(matches!(
1230 label.anomaly_type,
1231 AnomalyType::Fraud(FraudType::SelfApproval)
1232 ));
1233 assert!(label.related_entities.contains(&"USER001".to_string()));
1234 }
1235
1236 #[test]
1237 fn test_company_filtering() {
1238 let config = AnomalyInjectorConfigBuilder::new()
1239 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1241 .build();
1242
1243 let mut injector = AnomalyInjector::new(config);
1244
1245 let mut entries = vec![
1246 create_test_entry("JE001"), create_test_entry("JE002"), ];
1249
1250 let result = injector.process_entries(&mut entries);
1251
1252 assert_eq!(result.anomalies_injected, 0);
1254 }
1255
1256 fn create_test_entry_with_context(
1262 doc_num: &str,
1263 vendor_ref: Option<&str>,
1264 employee_id: &str,
1265 gl_account: &str,
1266 ) -> JournalEntry {
1267 let mut entry = JournalEntry::new_simple(
1268 doc_num.to_string(),
1269 "1000".to_string(),
1270 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1271 "Test Entry".to_string(),
1272 );
1273
1274 entry.header.reference = vendor_ref.map(|v| v.to_string());
1275 entry.header.created_by = employee_id.to_string();
1276
1277 entry.add_line(JournalEntryLine {
1278 line_number: 1,
1279 gl_account: gl_account.to_string(),
1280 debit_amount: dec!(1000),
1281 ..Default::default()
1282 });
1283
1284 entry.add_line(JournalEntryLine {
1285 line_number: 2,
1286 gl_account: "1000".to_string(),
1287 credit_amount: dec!(1000),
1288 ..Default::default()
1289 });
1290
1291 entry
1292 }
1293
1294 #[test]
1295 fn test_set_entity_contexts() {
1296 let config = AnomalyInjectorConfig::default();
1297 let mut injector = AnomalyInjector::new(config);
1298
1299 assert!(injector.vendor_contexts().is_empty());
1301 assert!(injector.employee_contexts().is_empty());
1302 assert!(injector.account_contexts().is_empty());
1303
1304 let mut vendors = HashMap::new();
1306 vendors.insert(
1307 "V001".to_string(),
1308 VendorContext {
1309 vendor_id: "V001".to_string(),
1310 is_new: true,
1311 ..Default::default()
1312 },
1313 );
1314
1315 let mut employees = HashMap::new();
1316 employees.insert(
1317 "EMP001".to_string(),
1318 EmployeeContext {
1319 employee_id: "EMP001".to_string(),
1320 is_new: true,
1321 ..Default::default()
1322 },
1323 );
1324
1325 let mut accounts = HashMap::new();
1326 accounts.insert(
1327 "8100".to_string(),
1328 AccountContext {
1329 account_code: "8100".to_string(),
1330 is_high_risk: true,
1331 ..Default::default()
1332 },
1333 );
1334
1335 injector.set_entity_contexts(vendors, employees, accounts);
1336
1337 assert_eq!(injector.vendor_contexts().len(), 1);
1338 assert_eq!(injector.employee_contexts().len(), 1);
1339 assert_eq!(injector.account_contexts().len(), 1);
1340 assert!(injector.vendor_contexts().contains_key("V001"));
1341 assert!(injector.employee_contexts().contains_key("EMP001"));
1342 assert!(injector.account_contexts().contains_key("8100"));
1343 }
1344
1345 #[test]
1346 fn test_default_behavior_no_contexts() {
1347 let config = AnomalyInjectorConfigBuilder::new()
1349 .with_total_rate(0.5)
1350 .with_seed(42)
1351 .build();
1352
1353 let mut injector = AnomalyInjector::new(config);
1354
1355 let mut entries: Vec<_> = (0..200)
1356 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1357 .collect();
1358
1359 let result = injector.process_entries(&mut entries);
1360
1361 assert!(result.anomalies_injected > 0);
1364 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1365 assert!(
1366 rate > 0.2 && rate < 0.8,
1367 "Expected ~50% rate, got {:.2}%",
1368 rate * 100.0
1369 );
1370 }
1371
1372 #[test]
1373 fn test_entity_context_increases_injection_rate() {
1374 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1380 .with_total_rate(base_rate)
1381 .with_seed(123)
1382 .build();
1383
1384 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1385
1386 let mut entries_no_ctx: Vec<_> = (0..500)
1387 .map(|i| {
1388 create_test_entry_with_context(
1389 &format!("JE{:04}", i),
1390 Some("V001"),
1391 "EMP001",
1392 "8100",
1393 )
1394 })
1395 .collect();
1396
1397 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1398
1399 let config_ctx = AnomalyInjectorConfigBuilder::new()
1401 .with_total_rate(base_rate)
1402 .with_seed(123)
1403 .build();
1404
1405 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1406
1407 let mut vendors = HashMap::new();
1409 vendors.insert(
1410 "V001".to_string(),
1411 VendorContext {
1412 vendor_id: "V001".to_string(),
1413 is_new: true, is_dormant_reactivation: true, ..Default::default()
1416 },
1417 );
1418
1419 let mut employees = HashMap::new();
1420 employees.insert(
1421 "EMP001".to_string(),
1422 EmployeeContext {
1423 employee_id: "EMP001".to_string(),
1424 is_new: true, ..Default::default()
1426 },
1427 );
1428
1429 let mut accounts = HashMap::new();
1430 accounts.insert(
1431 "8100".to_string(),
1432 AccountContext {
1433 account_code: "8100".to_string(),
1434 is_high_risk: true, ..Default::default()
1436 },
1437 );
1438
1439 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1440
1441 let mut entries_ctx: Vec<_> = (0..500)
1442 .map(|i| {
1443 create_test_entry_with_context(
1444 &format!("JE{:04}", i),
1445 Some("V001"),
1446 "EMP001",
1447 "8100",
1448 )
1449 })
1450 .collect();
1451
1452 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1453
1454 assert!(
1456 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1457 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1458 result_ctx.anomalies_injected,
1459 result_no_ctx.anomalies_injected,
1460 );
1461 }
1462
1463 #[test]
1464 fn test_risk_score_multiplication() {
1465 let config = AnomalyInjectorConfig::default();
1467 let mut injector = AnomalyInjector::new(config);
1468
1469 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1471 assert!(
1472 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1473 );
1474
1475 let mut vendors = HashMap::new();
1477 vendors.insert(
1478 "V_RISKY".to_string(),
1479 VendorContext {
1480 vendor_id: "V_RISKY".to_string(),
1481 is_new: true,
1482 ..Default::default()
1483 },
1484 );
1485
1486 let mut accounts = HashMap::new();
1487 accounts.insert(
1488 "9000".to_string(),
1489 AccountContext {
1490 account_code: "9000".to_string(),
1491 is_high_risk: true,
1492 ..Default::default()
1493 },
1494 );
1495
1496 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1497
1498 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1499 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1500 assert!(
1502 (multiplier - 4.0).abs() < f64::EPSILON,
1503 "Expected 4.0x multiplier, got {}",
1504 multiplier,
1505 );
1506
1507 let entry_vendor_only =
1509 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1510 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1511 assert!(
1512 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1513 "Expected 2.0x multiplier (vendor only), got {}",
1514 multiplier_vendor,
1515 );
1516
1517 let entry_no_match =
1519 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1520 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1521 assert!(
1522 (multiplier_none - 1.0).abs() < f64::EPSILON,
1523 "Expected 1.0x multiplier (no match), got {}",
1524 multiplier_none,
1525 );
1526 }
1527
1528 #[test]
1529 fn test_employee_context_multiplier() {
1530 let config = AnomalyInjectorConfig::default();
1531 let mut injector = AnomalyInjector::new(config);
1532
1533 let mut employees = HashMap::new();
1534 employees.insert(
1535 "EMP_NEW".to_string(),
1536 EmployeeContext {
1537 employee_id: "EMP_NEW".to_string(),
1538 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1542 },
1543 );
1544
1545 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1546
1547 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1548 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1549
1550 let expected = 1.5 * 1.3 * 1.2;
1552 assert!(
1553 (multiplier - expected).abs() < 0.01,
1554 "Expected {:.3}x multiplier, got {:.3}",
1555 expected,
1556 multiplier,
1557 );
1558 }
1559
1560 #[test]
1561 fn test_entity_contexts_persist_across_reset() {
1562 let config = AnomalyInjectorConfig::default();
1563 let mut injector = AnomalyInjector::new(config);
1564
1565 let mut vendors = HashMap::new();
1566 vendors.insert(
1567 "V001".to_string(),
1568 VendorContext {
1569 vendor_id: "V001".to_string(),
1570 is_new: true,
1571 ..Default::default()
1572 },
1573 );
1574
1575 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1576 assert_eq!(injector.vendor_contexts().len(), 1);
1577
1578 injector.reset();
1580 assert_eq!(injector.vendor_contexts().len(), 1);
1581 }
1582
1583 #[test]
1584 fn test_set_empty_contexts_clears() {
1585 let config = AnomalyInjectorConfig::default();
1586 let mut injector = AnomalyInjector::new(config);
1587
1588 let mut vendors = HashMap::new();
1589 vendors.insert(
1590 "V001".to_string(),
1591 VendorContext {
1592 vendor_id: "V001".to_string(),
1593 ..Default::default()
1594 },
1595 );
1596
1597 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1598 assert_eq!(injector.vendor_contexts().len(), 1);
1599
1600 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1602 assert!(injector.vendor_contexts().is_empty());
1603 }
1604
1605 #[test]
1606 fn test_dormant_vendor_multiplier() {
1607 let config = AnomalyInjectorConfig::default();
1608 let mut injector = AnomalyInjector::new(config);
1609
1610 let mut vendors = HashMap::new();
1611 vendors.insert(
1612 "V_DORMANT".to_string(),
1613 VendorContext {
1614 vendor_id: "V_DORMANT".to_string(),
1615 is_dormant_reactivation: true, ..Default::default()
1617 },
1618 );
1619
1620 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1621
1622 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1623 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1624 assert!(
1625 (multiplier - 1.5).abs() < f64::EPSILON,
1626 "Expected 1.5x multiplier for dormant vendor, got {}",
1627 multiplier,
1628 );
1629 }
1630}