1use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::Rng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25 RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31 EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38 TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48 pub rates: AnomalyRateConfig,
50 pub patterns: AnomalyPatternConfig,
52 pub seed: u64,
54 pub generate_labels: bool,
56 pub allow_duplicates: bool,
58 pub max_anomalies_per_document: usize,
60 pub target_companies: Vec<String>,
62 pub date_range: Option<(NaiveDate, NaiveDate)>,
64 pub enhanced: EnhancedInjectionConfig,
66}
67
68#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71 pub multi_stage_schemes_enabled: bool,
73 pub scheme_probability: f64,
75 pub correlated_injection_enabled: bool,
77 pub temporal_clustering_enabled: bool,
79 pub period_end_multiplier: f64,
81 pub near_miss_enabled: bool,
83 pub near_miss_proportion: f64,
85 pub approval_thresholds: Vec<Decimal>,
87 pub difficulty_classification_enabled: bool,
89 pub context_aware_enabled: bool,
91 pub behavioral_baseline_config: BehavioralBaselineConfig,
93}
94
95impl Default for AnomalyInjectorConfig {
96 fn default() -> Self {
97 Self {
98 rates: AnomalyRateConfig::default(),
99 patterns: AnomalyPatternConfig::default(),
100 seed: 42,
101 generate_labels: true,
102 allow_duplicates: true,
103 max_anomalies_per_document: 2,
104 target_companies: Vec::new(),
105 date_range: None,
106 enhanced: EnhancedInjectionConfig::default(),
107 }
108 }
109}
110
111#[derive(Debug, Clone)]
113pub struct InjectionBatchResult {
114 pub entries_processed: usize,
116 pub anomalies_injected: usize,
118 pub duplicates_created: usize,
120 pub labels: Vec<LabeledAnomaly>,
122 pub summary: AnomalySummary,
124 pub modified_documents: Vec<String>,
126 pub near_miss_labels: Vec<NearMissLabel>,
128 pub scheme_actions: Vec<SchemeAction>,
130 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
132}
133
134pub struct AnomalyInjector {
136 config: AnomalyInjectorConfig,
137 rng: ChaCha8Rng,
138 uuid_factory: DeterministicUuidFactory,
139 type_selector: AnomalyTypeSelector,
140 strategies: StrategyCollection,
141 cluster_manager: ClusterManager,
142 #[allow(dead_code)]
145 entity_targeting: EntityTargetingManager,
146 document_anomaly_counts: HashMap<String, usize>,
148 labels: Vec<LabeledAnomaly>,
150 stats: InjectorStats,
152 scheme_advancer: Option<SchemeAdvancer>,
155 near_miss_generator: Option<NearMissGenerator>,
157 near_miss_labels: Vec<NearMissLabel>,
159 #[allow(dead_code)]
161 co_occurrence_handler: Option<AnomalyCoOccurrence>,
162 #[allow(dead_code)]
164 temporal_cluster_generator: Option<TemporalClusterGenerator>,
165 difficulty_calculator: Option<DifficultyCalculator>,
167 entity_aware_injector: Option<EntityAwareInjector>,
169 behavioral_baseline: Option<BehavioralBaseline>,
171 scheme_actions: Vec<SchemeAction>,
173 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
175 vendor_contexts: HashMap<String, VendorContext>,
178 employee_contexts: HashMap<String, EmployeeContext>,
180 account_contexts: HashMap<String, AccountContext>,
182}
183
184#[derive(Debug, Clone, Default)]
186pub struct InjectorStats {
187 pub total_processed: usize,
189 pub total_injected: usize,
191 pub by_category: HashMap<String, usize>,
193 pub by_type: HashMap<String, usize>,
195 pub by_company: HashMap<String, usize>,
197 pub skipped_rate: usize,
199 pub skipped_date: usize,
201 pub skipped_company: usize,
203 pub skipped_max_per_doc: usize,
205}
206
207impl AnomalyInjector {
208 pub fn new(config: AnomalyInjectorConfig) -> Self {
210 let mut rng = seeded_rng(config.seed, 0);
211 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
212 let entity_targeting =
213 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
214
215 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
217 let scheme_config = SchemeAdvancerConfig {
218 embezzlement_probability: config.enhanced.scheme_probability,
219 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
220 kickback_probability: config.enhanced.scheme_probability * 0.5,
221 seed: rng.random(),
222 ..Default::default()
223 };
224 Some(SchemeAdvancer::new(scheme_config))
225 } else {
226 None
227 };
228
229 let near_miss_generator = if config.enhanced.near_miss_enabled {
230 let near_miss_config = NearMissConfig {
231 proportion: config.enhanced.near_miss_proportion,
232 seed: rng.random(),
233 ..Default::default()
234 };
235 Some(NearMissGenerator::new(near_miss_config))
236 } else {
237 None
238 };
239
240 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
241 Some(AnomalyCoOccurrence::new())
242 } else {
243 None
244 };
245
246 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
247 Some(TemporalClusterGenerator::new())
248 } else {
249 None
250 };
251
252 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
253 Some(DifficultyCalculator::new())
254 } else {
255 None
256 };
257
258 let entity_aware_injector = if config.enhanced.context_aware_enabled {
259 Some(EntityAwareInjector::default())
260 } else {
261 None
262 };
263
264 let behavioral_baseline = if config.enhanced.context_aware_enabled
265 && config.enhanced.behavioral_baseline_config.enabled
266 {
267 Some(BehavioralBaseline::new(
268 config.enhanced.behavioral_baseline_config.clone(),
269 ))
270 } else {
271 None
272 };
273
274 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
275
276 Self {
277 config,
278 rng,
279 uuid_factory,
280 type_selector: AnomalyTypeSelector::new(),
281 strategies: StrategyCollection::default(),
282 cluster_manager,
283 entity_targeting,
284 document_anomaly_counts: HashMap::new(),
285 labels: Vec::new(),
286 stats: InjectorStats::default(),
287 scheme_advancer,
288 near_miss_generator,
289 near_miss_labels: Vec::new(),
290 co_occurrence_handler,
291 temporal_cluster_generator,
292 difficulty_calculator,
293 entity_aware_injector,
294 behavioral_baseline,
295 scheme_actions: Vec::new(),
296 difficulty_distribution: HashMap::new(),
297 vendor_contexts: HashMap::new(),
298 employee_contexts: HashMap::new(),
299 account_contexts: HashMap::new(),
300 }
301 }
302
303 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
305 debug!(
306 entry_count = entries.len(),
307 total_rate = self.config.rates.total_rate,
308 seed = self.config.seed,
309 "Injecting anomalies into journal entries"
310 );
311
312 let mut modified_documents = Vec::new();
313 let mut duplicates = Vec::new();
314
315 for entry in entries.iter_mut() {
316 self.stats.total_processed += 1;
317
318 if let Some(ref mut baseline) = self.behavioral_baseline {
320 use super::context::Observation;
321 let entity_id = entry.header.created_by.clone();
323 let observation =
324 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
325 baseline.record_observation(&entity_id, observation);
326 }
327
328 if !self.should_process(entry) {
330 continue;
331 }
332
333 let base_rate = self.config.rates.total_rate;
335
336 let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
338 let employee_id = &entry.header.created_by;
339 let first_account = entry
340 .lines
341 .first()
342 .map(|l| l.gl_account.as_str())
343 .unwrap_or("");
344 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
346
347 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
348 let employee_ctx = self.employee_contexts.get(employee_id);
349 let account_ctx = self.account_contexts.get(first_account);
350
351 let multiplier =
352 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
353 (base_rate * multiplier).min(1.0)
354 } else {
355 self.calculate_context_rate_multiplier(entry) * base_rate
357 };
358
359 if should_inject_anomaly(
361 effective_rate,
362 entry.posting_date(),
363 &self.config.patterns.temporal_pattern,
364 &mut self.rng,
365 ) {
366 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
368 let account = entry
370 .lines
371 .first()
372 .map(|l| l.gl_account.clone())
373 .unwrap_or_default();
374 near_miss_gen.record_transaction(
375 entry.document_number().clone(),
376 entry.posting_date(),
377 entry.total_debit(),
378 &account,
379 None,
380 );
381
382 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
384 entry.document_number().clone(),
385 entry.posting_date(),
386 entry.total_debit(),
387 &account,
388 None,
389 &self.config.enhanced.approval_thresholds,
390 ) {
391 self.near_miss_labels.push(near_miss_label);
392 continue; }
394 }
395
396 let anomaly_type = self.select_anomaly_category();
398
399 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
401 if let Some(ref calculator) = self.difficulty_calculator {
403 let difficulty = calculator.calculate(&label);
404
405 label = label
407 .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
408 label = label.with_metadata(
409 "difficulty_score",
410 &difficulty.difficulty_score().to_string(),
411 );
412
413 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
415 }
416
417 modified_documents.push(entry.document_number().clone());
418 self.labels.push(label);
419 self.stats.total_injected += 1;
420 }
421
422 if self.config.allow_duplicates
424 && matches!(
425 self.labels.last().map(|l| &l.anomaly_type),
426 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
427 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
428 )
429 {
430 let dup_strategy = DuplicationStrategy::default();
431 let duplicate =
432 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
433 duplicates.push(duplicate);
434 }
435 }
436 }
437
438 let duplicates_created = duplicates.len();
440
441 let summary = AnomalySummary::from_anomalies(&self.labels);
443
444 InjectionBatchResult {
445 entries_processed: self.stats.total_processed,
446 anomalies_injected: self.stats.total_injected,
447 duplicates_created,
448 labels: self.labels.clone(),
449 summary,
450 modified_documents,
451 near_miss_labels: self.near_miss_labels.clone(),
452 scheme_actions: self.scheme_actions.clone(),
453 difficulty_distribution: self.difficulty_distribution.clone(),
454 }
455 }
456
457 fn should_process(&mut self, entry: &JournalEntry) -> bool {
459 if !self.config.target_companies.is_empty()
461 && !self
462 .config
463 .target_companies
464 .iter()
465 .any(|c| c == entry.company_code())
466 {
467 self.stats.skipped_company += 1;
468 return false;
469 }
470
471 if let Some((start, end)) = self.config.date_range {
473 if entry.posting_date() < start || entry.posting_date() > end {
474 self.stats.skipped_date += 1;
475 return false;
476 }
477 }
478
479 let current_count = self
481 .document_anomaly_counts
482 .get(&entry.document_number())
483 .copied()
484 .unwrap_or(0);
485 if current_count >= self.config.max_anomalies_per_document {
486 self.stats.skipped_max_per_doc += 1;
487 return false;
488 }
489
490 true
491 }
492
493 fn select_anomaly_category(&mut self) -> AnomalyType {
495 let r = self.rng.random::<f64>();
496 let rates = &self.config.rates;
497
498 let mut cumulative = 0.0;
499
500 cumulative += rates.fraud_rate;
501 if r < cumulative {
502 return self.type_selector.select_fraud(&mut self.rng);
503 }
504
505 cumulative += rates.error_rate;
506 if r < cumulative {
507 return self.type_selector.select_error(&mut self.rng);
508 }
509
510 cumulative += rates.process_issue_rate;
511 if r < cumulative {
512 return self.type_selector.select_process_issue(&mut self.rng);
513 }
514
515 cumulative += rates.statistical_rate;
516 if r < cumulative {
517 return self.type_selector.select_statistical(&mut self.rng);
518 }
519
520 self.type_selector.select_relational(&mut self.rng)
521 }
522
523 fn inject_anomaly(
525 &mut self,
526 entry: &mut JournalEntry,
527 anomaly_type: AnomalyType,
528 ) -> Option<LabeledAnomaly> {
529 if !self.strategies.can_apply(entry, &anomaly_type) {
531 return None;
532 }
533
534 let result = self
536 .strategies
537 .apply_strategy(entry, &anomaly_type, &mut self.rng);
538
539 if !result.success {
540 return None;
541 }
542
543 *self
545 .document_anomaly_counts
546 .entry(entry.document_number().clone())
547 .or_insert(0) += 1;
548
549 let category = anomaly_type.category().to_string();
551 let type_name = anomaly_type.type_name();
552
553 *self.stats.by_category.entry(category).or_insert(0) += 1;
554 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
555 *self
556 .stats
557 .by_company
558 .entry(entry.company_code().to_string())
559 .or_insert(0) += 1;
560
561 if self.config.generate_labels {
563 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
564
565 entry.header.is_anomaly = true;
567 entry.header.anomaly_id = Some(anomaly_id.clone());
568 entry.header.anomaly_type = Some(type_name.clone());
569
570 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
572 entry.header.is_fraud = true;
573 if let AnomalyType::Fraud(ref ft) = anomaly_type {
574 entry.header.fraud_type = Some(*ft);
575 }
576 }
577
578 let mut label = LabeledAnomaly::new(
579 anomaly_id,
580 anomaly_type.clone(),
581 entry.document_number().clone(),
582 "JE".to_string(),
583 entry.company_code().to_string(),
584 entry.posting_date(),
585 )
586 .with_description(&result.description)
587 .with_injection_strategy(&type_name);
588
589 let causal_reason = AnomalyCausalReason::RandomRate {
591 base_rate: self.config.rates.total_rate,
592 };
593 label = label.with_causal_reason(causal_reason);
594
595 let context_multiplier = self.calculate_context_rate_multiplier(entry);
597 if (context_multiplier - 1.0).abs() > f64::EPSILON {
598 label = label.with_metadata(
599 "entity_context_multiplier",
600 &format!("{:.3}", context_multiplier),
601 );
602 label = label.with_metadata(
603 "effective_rate",
604 &format!(
605 "{:.6}",
606 (self.config.rates.total_rate * context_multiplier).min(1.0)
607 ),
608 );
609 }
610
611 if let Some(impact) = result.monetary_impact {
613 label = label.with_monetary_impact(impact);
614 }
615
616 for entity in &result.related_entities {
618 label = label.with_related_entity(entity);
619 }
620
621 for (key, value) in &result.metadata {
623 label = label.with_metadata(key, value);
624 }
625
626 if let Some(cluster_id) =
628 self.cluster_manager
629 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
630 {
631 label = label.with_cluster(&cluster_id);
632 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
634 cluster_id: cluster_id.clone(),
635 });
636 }
637
638 return Some(label);
639 }
640
641 None
642 }
643
644 pub fn inject_specific(
646 &mut self,
647 entry: &mut JournalEntry,
648 anomaly_type: AnomalyType,
649 ) -> Option<LabeledAnomaly> {
650 self.inject_anomaly(entry, anomaly_type)
651 }
652
653 pub fn create_self_approval(
655 &mut self,
656 entry: &mut JournalEntry,
657 user_id: &str,
658 ) -> Option<LabeledAnomaly> {
659 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
660
661 let label = LabeledAnomaly::new(
662 format!("ANO{:08}", self.labels.len() + 1),
663 anomaly_type,
664 entry.document_number().clone(),
665 "JE".to_string(),
666 entry.company_code().to_string(),
667 entry.posting_date(),
668 )
669 .with_description(&format!("User {} approved their own transaction", user_id))
670 .with_related_entity(user_id)
671 .with_injection_strategy("ManualSelfApproval")
672 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
673 target_type: "User".to_string(),
674 target_id: user_id.to_string(),
675 });
676
677 entry.header.is_anomaly = true;
679 entry.header.is_fraud = true;
680 entry.header.anomaly_id = Some(label.anomaly_id.clone());
681 entry.header.anomaly_type = Some("SelfApproval".to_string());
682 entry.header.fraud_type = Some(FraudType::SelfApproval);
683
684 entry.header.created_by = user_id.to_string();
686
687 self.labels.push(label.clone());
688 Some(label)
689 }
690
691 pub fn create_sod_violation(
693 &mut self,
694 entry: &mut JournalEntry,
695 user_id: &str,
696 conflicting_duties: (&str, &str),
697 ) -> Option<LabeledAnomaly> {
698 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
699
700 let label = LabeledAnomaly::new(
701 format!("ANO{:08}", self.labels.len() + 1),
702 anomaly_type,
703 entry.document_number().clone(),
704 "JE".to_string(),
705 entry.company_code().to_string(),
706 entry.posting_date(),
707 )
708 .with_description(&format!(
709 "User {} performed conflicting duties: {} and {}",
710 user_id, conflicting_duties.0, conflicting_duties.1
711 ))
712 .with_related_entity(user_id)
713 .with_metadata("duty1", conflicting_duties.0)
714 .with_metadata("duty2", conflicting_duties.1)
715 .with_injection_strategy("ManualSoDViolation")
716 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
717 target_type: "User".to_string(),
718 target_id: user_id.to_string(),
719 });
720
721 entry.header.is_anomaly = true;
723 entry.header.is_fraud = true;
724 entry.header.anomaly_id = Some(label.anomaly_id.clone());
725 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
726 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
727
728 self.labels.push(label.clone());
729 Some(label)
730 }
731
732 pub fn create_ic_mismatch(
734 &mut self,
735 entry: &mut JournalEntry,
736 matching_company: &str,
737 expected_amount: Decimal,
738 actual_amount: Decimal,
739 ) -> Option<LabeledAnomaly> {
740 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
741
742 let label = LabeledAnomaly::new(
743 format!("ANO{:08}", self.labels.len() + 1),
744 anomaly_type,
745 entry.document_number().clone(),
746 "JE".to_string(),
747 entry.company_code().to_string(),
748 entry.posting_date(),
749 )
750 .with_description(&format!(
751 "Intercompany mismatch with {}: expected {} but got {}",
752 matching_company, expected_amount, actual_amount
753 ))
754 .with_related_entity(matching_company)
755 .with_monetary_impact(actual_amount - expected_amount)
756 .with_metadata("expected_amount", &expected_amount.to_string())
757 .with_metadata("actual_amount", &actual_amount.to_string())
758 .with_injection_strategy("ManualICMismatch")
759 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
760 target_type: "Intercompany".to_string(),
761 target_id: matching_company.to_string(),
762 });
763
764 entry.header.is_anomaly = true;
766 entry.header.anomaly_id = Some(label.anomaly_id.clone());
767 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
768
769 self.labels.push(label.clone());
770 Some(label)
771 }
772
773 pub fn get_labels(&self) -> &[LabeledAnomaly] {
775 &self.labels
776 }
777
778 pub fn get_summary(&self) -> AnomalySummary {
780 AnomalySummary::from_anomalies(&self.labels)
781 }
782
783 pub fn get_stats(&self) -> &InjectorStats {
785 &self.stats
786 }
787
788 pub fn reset(&mut self) {
790 self.labels.clear();
791 self.document_anomaly_counts.clear();
792 self.stats = InjectorStats::default();
793 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
794
795 self.near_miss_labels.clear();
797 self.scheme_actions.clear();
798 self.difficulty_distribution.clear();
799
800 if let Some(ref mut baseline) = self.behavioral_baseline {
801 *baseline =
802 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
803 }
804 }
805
806 pub fn cluster_count(&self) -> usize {
808 self.cluster_manager.cluster_count()
809 }
810
811 pub fn set_entity_contexts(
824 &mut self,
825 vendors: HashMap<String, VendorContext>,
826 employees: HashMap<String, EmployeeContext>,
827 accounts: HashMap<String, AccountContext>,
828 ) {
829 self.vendor_contexts = vendors;
830 self.employee_contexts = employees;
831 self.account_contexts = accounts;
832 }
833
834 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
836 &self.vendor_contexts
837 }
838
839 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
841 &self.employee_contexts
842 }
843
844 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
846 &self.account_contexts
847 }
848
849 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
858 if self.vendor_contexts.is_empty()
859 && self.employee_contexts.is_empty()
860 && self.account_contexts.is_empty()
861 {
862 return 1.0;
863 }
864
865 let mut multiplier = 1.0;
866
867 if let Some(ref vendor_ref) = entry.header.reference {
869 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
870 if ctx.is_new {
872 multiplier *= 2.0;
873 }
874 if ctx.is_dormant_reactivation {
875 multiplier *= 1.5;
876 }
877 }
878 }
879
880 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
882 if ctx.is_new {
883 multiplier *= 1.5;
884 }
885 if ctx.is_volume_fatigued {
886 multiplier *= 1.3;
887 }
888 if ctx.is_overtime {
889 multiplier *= 1.2;
890 }
891 }
892
893 if let Some(first_line) = entry.lines.first() {
895 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
896 if ctx.is_high_risk {
897 multiplier *= 2.0;
898 }
899 }
900 }
901
902 multiplier
903 }
904
905 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
914 if let Some(ref mut advancer) = self.scheme_advancer {
915 let context = SchemeContext::new(date, company_code);
916 let actions = advancer.advance_all(&context);
917 self.scheme_actions.extend(actions.clone());
918 actions
919 } else {
920 Vec::new()
921 }
922 }
923
924 pub fn maybe_start_scheme(
930 &mut self,
931 date: NaiveDate,
932 company_code: &str,
933 available_users: Vec<String>,
934 available_accounts: Vec<String>,
935 available_counterparties: Vec<String>,
936 ) -> Option<uuid::Uuid> {
937 if let Some(ref mut advancer) = self.scheme_advancer {
938 let mut context = SchemeContext::new(date, company_code);
939 context.available_users = available_users;
940 context.available_accounts = available_accounts;
941 context.available_counterparties = available_counterparties;
942
943 advancer.maybe_start_scheme(&context)
944 } else {
945 None
946 }
947 }
948
949 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
951 &self.near_miss_labels
952 }
953
954 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
956 &self.scheme_actions
957 }
958
959 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
961 &self.difficulty_distribution
962 }
963
964 pub fn check_behavioral_deviations(
966 &self,
967 entity_id: &str,
968 observation: &super::context::Observation,
969 ) -> Vec<super::context::BehavioralDeviation> {
970 if let Some(ref baseline) = self.behavioral_baseline {
971 baseline.check_deviation(entity_id, observation)
972 } else {
973 Vec::new()
974 }
975 }
976
977 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
979 if let Some(ref baseline) = self.behavioral_baseline {
980 baseline.get_baseline(entity_id)
981 } else {
982 None
983 }
984 }
985
986 pub fn active_scheme_count(&self) -> usize {
988 if let Some(ref advancer) = self.scheme_advancer {
989 advancer.active_scheme_count()
990 } else {
991 0
992 }
993 }
994
995 pub fn has_enhanced_features(&self) -> bool {
997 self.scheme_advancer.is_some()
998 || self.near_miss_generator.is_some()
999 || self.difficulty_calculator.is_some()
1000 || self.entity_aware_injector.is_some()
1001 }
1002}
1003
1004pub struct AnomalyInjectorConfigBuilder {
1006 config: AnomalyInjectorConfig,
1007}
1008
1009impl AnomalyInjectorConfigBuilder {
1010 pub fn new() -> Self {
1012 Self {
1013 config: AnomalyInjectorConfig::default(),
1014 }
1015 }
1016
1017 pub fn with_total_rate(mut self, rate: f64) -> Self {
1019 self.config.rates.total_rate = rate;
1020 self
1021 }
1022
1023 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1025 self.config.rates.fraud_rate = rate;
1026 self
1027 }
1028
1029 pub fn with_error_rate(mut self, rate: f64) -> Self {
1031 self.config.rates.error_rate = rate;
1032 self
1033 }
1034
1035 pub fn with_seed(mut self, seed: u64) -> Self {
1037 self.config.seed = seed;
1038 self
1039 }
1040
1041 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1043 self.config.patterns.temporal_pattern = pattern;
1044 self
1045 }
1046
1047 pub fn with_labels(mut self, generate: bool) -> Self {
1049 self.config.generate_labels = generate;
1050 self
1051 }
1052
1053 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1055 self.config.target_companies = companies;
1056 self
1057 }
1058
1059 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1061 self.config.date_range = Some((start, end));
1062 self
1063 }
1064
1065 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1071 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1072 self.config.enhanced.scheme_probability = probability;
1073 self
1074 }
1075
1076 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1078 self.config.enhanced.near_miss_enabled = enabled;
1079 self.config.enhanced.near_miss_proportion = proportion;
1080 self
1081 }
1082
1083 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1085 self.config.enhanced.approval_thresholds = thresholds;
1086 self
1087 }
1088
1089 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1091 self.config.enhanced.correlated_injection_enabled = enabled;
1092 self
1093 }
1094
1095 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1097 self.config.enhanced.temporal_clustering_enabled = enabled;
1098 self.config.enhanced.period_end_multiplier = multiplier;
1099 self
1100 }
1101
1102 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1104 self.config.enhanced.difficulty_classification_enabled = enabled;
1105 self
1106 }
1107
1108 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1110 self.config.enhanced.context_aware_enabled = enabled;
1111 self
1112 }
1113
1114 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1116 self.config.enhanced.behavioral_baseline_config = config;
1117 self
1118 }
1119
1120 pub fn with_all_enhanced_features(mut self) -> Self {
1122 self.config.enhanced.multi_stage_schemes_enabled = true;
1123 self.config.enhanced.scheme_probability = 0.02;
1124 self.config.enhanced.correlated_injection_enabled = true;
1125 self.config.enhanced.temporal_clustering_enabled = true;
1126 self.config.enhanced.period_end_multiplier = 2.5;
1127 self.config.enhanced.near_miss_enabled = true;
1128 self.config.enhanced.near_miss_proportion = 0.30;
1129 self.config.enhanced.difficulty_classification_enabled = true;
1130 self.config.enhanced.context_aware_enabled = true;
1131 self.config.enhanced.behavioral_baseline_config.enabled = true;
1132 self
1133 }
1134
1135 pub fn build(self) -> AnomalyInjectorConfig {
1137 self.config
1138 }
1139}
1140
1141impl Default for AnomalyInjectorConfigBuilder {
1142 fn default() -> Self {
1143 Self::new()
1144 }
1145}
1146
1147#[cfg(test)]
1148#[allow(clippy::unwrap_used)]
1149mod tests {
1150 use super::*;
1151 use chrono::NaiveDate;
1152 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1153 use rust_decimal_macros::dec;
1154
1155 fn create_test_entry(doc_num: &str) -> JournalEntry {
1156 let mut entry = JournalEntry::new_simple(
1157 doc_num.to_string(),
1158 "1000".to_string(),
1159 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1160 "Test Entry".to_string(),
1161 );
1162
1163 entry.add_line(JournalEntryLine {
1164 line_number: 1,
1165 gl_account: "5000".to_string(),
1166 debit_amount: dec!(1000),
1167 ..Default::default()
1168 });
1169
1170 entry.add_line(JournalEntryLine {
1171 line_number: 2,
1172 gl_account: "1000".to_string(),
1173 credit_amount: dec!(1000),
1174 ..Default::default()
1175 });
1176
1177 entry
1178 }
1179
1180 #[test]
1181 fn test_anomaly_injector_basic() {
1182 let config = AnomalyInjectorConfigBuilder::new()
1183 .with_total_rate(0.5) .with_seed(42)
1185 .build();
1186
1187 let mut injector = AnomalyInjector::new(config);
1188
1189 let mut entries: Vec<_> = (0..100)
1190 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1191 .collect();
1192
1193 let result = injector.process_entries(&mut entries);
1194
1195 assert!(result.anomalies_injected > 0);
1197 assert!(!result.labels.is_empty());
1198 assert_eq!(result.labels.len(), result.anomalies_injected);
1199 }
1200
1201 #[test]
1202 fn test_specific_injection() {
1203 let config = AnomalyInjectorConfig::default();
1204 let mut injector = AnomalyInjector::new(config);
1205
1206 let mut entry = create_test_entry("JE001");
1207 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1208
1209 let label = injector.inject_specific(&mut entry, anomaly_type);
1210
1211 assert!(label.is_some());
1212 let label = label.unwrap();
1213 assert!(!label.document_id.is_empty());
1215 assert_eq!(label.document_id, entry.document_number());
1216 }
1217
1218 #[test]
1219 fn test_self_approval_injection() {
1220 let config = AnomalyInjectorConfig::default();
1221 let mut injector = AnomalyInjector::new(config);
1222
1223 let mut entry = create_test_entry("JE001");
1224 let label = injector.create_self_approval(&mut entry, "USER001");
1225
1226 assert!(label.is_some());
1227 let label = label.unwrap();
1228 assert!(matches!(
1229 label.anomaly_type,
1230 AnomalyType::Fraud(FraudType::SelfApproval)
1231 ));
1232 assert!(label.related_entities.contains(&"USER001".to_string()));
1233 }
1234
1235 #[test]
1236 fn test_company_filtering() {
1237 let config = AnomalyInjectorConfigBuilder::new()
1238 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1240 .build();
1241
1242 let mut injector = AnomalyInjector::new(config);
1243
1244 let mut entries = vec![
1245 create_test_entry("JE001"), create_test_entry("JE002"), ];
1248
1249 let result = injector.process_entries(&mut entries);
1250
1251 assert_eq!(result.anomalies_injected, 0);
1253 }
1254
1255 fn create_test_entry_with_context(
1261 doc_num: &str,
1262 vendor_ref: Option<&str>,
1263 employee_id: &str,
1264 gl_account: &str,
1265 ) -> JournalEntry {
1266 let mut entry = JournalEntry::new_simple(
1267 doc_num.to_string(),
1268 "1000".to_string(),
1269 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1270 "Test Entry".to_string(),
1271 );
1272
1273 entry.header.reference = vendor_ref.map(|v| v.to_string());
1274 entry.header.created_by = employee_id.to_string();
1275
1276 entry.add_line(JournalEntryLine {
1277 line_number: 1,
1278 gl_account: gl_account.to_string(),
1279 debit_amount: dec!(1000),
1280 ..Default::default()
1281 });
1282
1283 entry.add_line(JournalEntryLine {
1284 line_number: 2,
1285 gl_account: "1000".to_string(),
1286 credit_amount: dec!(1000),
1287 ..Default::default()
1288 });
1289
1290 entry
1291 }
1292
1293 #[test]
1294 fn test_set_entity_contexts() {
1295 let config = AnomalyInjectorConfig::default();
1296 let mut injector = AnomalyInjector::new(config);
1297
1298 assert!(injector.vendor_contexts().is_empty());
1300 assert!(injector.employee_contexts().is_empty());
1301 assert!(injector.account_contexts().is_empty());
1302
1303 let mut vendors = HashMap::new();
1305 vendors.insert(
1306 "V001".to_string(),
1307 VendorContext {
1308 vendor_id: "V001".to_string(),
1309 is_new: true,
1310 ..Default::default()
1311 },
1312 );
1313
1314 let mut employees = HashMap::new();
1315 employees.insert(
1316 "EMP001".to_string(),
1317 EmployeeContext {
1318 employee_id: "EMP001".to_string(),
1319 is_new: true,
1320 ..Default::default()
1321 },
1322 );
1323
1324 let mut accounts = HashMap::new();
1325 accounts.insert(
1326 "8100".to_string(),
1327 AccountContext {
1328 account_code: "8100".to_string(),
1329 is_high_risk: true,
1330 ..Default::default()
1331 },
1332 );
1333
1334 injector.set_entity_contexts(vendors, employees, accounts);
1335
1336 assert_eq!(injector.vendor_contexts().len(), 1);
1337 assert_eq!(injector.employee_contexts().len(), 1);
1338 assert_eq!(injector.account_contexts().len(), 1);
1339 assert!(injector.vendor_contexts().contains_key("V001"));
1340 assert!(injector.employee_contexts().contains_key("EMP001"));
1341 assert!(injector.account_contexts().contains_key("8100"));
1342 }
1343
1344 #[test]
1345 fn test_default_behavior_no_contexts() {
1346 let config = AnomalyInjectorConfigBuilder::new()
1348 .with_total_rate(0.5)
1349 .with_seed(42)
1350 .build();
1351
1352 let mut injector = AnomalyInjector::new(config);
1353
1354 let mut entries: Vec<_> = (0..200)
1355 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1356 .collect();
1357
1358 let result = injector.process_entries(&mut entries);
1359
1360 assert!(result.anomalies_injected > 0);
1363 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1364 assert!(
1365 rate > 0.2 && rate < 0.8,
1366 "Expected ~50% rate, got {:.2}%",
1367 rate * 100.0
1368 );
1369 }
1370
1371 #[test]
1372 fn test_entity_context_increases_injection_rate() {
1373 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1379 .with_total_rate(base_rate)
1380 .with_seed(123)
1381 .build();
1382
1383 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1384
1385 let mut entries_no_ctx: Vec<_> = (0..500)
1386 .map(|i| {
1387 create_test_entry_with_context(
1388 &format!("JE{:04}", i),
1389 Some("V001"),
1390 "EMP001",
1391 "8100",
1392 )
1393 })
1394 .collect();
1395
1396 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1397
1398 let config_ctx = AnomalyInjectorConfigBuilder::new()
1400 .with_total_rate(base_rate)
1401 .with_seed(123)
1402 .build();
1403
1404 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1405
1406 let mut vendors = HashMap::new();
1408 vendors.insert(
1409 "V001".to_string(),
1410 VendorContext {
1411 vendor_id: "V001".to_string(),
1412 is_new: true, is_dormant_reactivation: true, ..Default::default()
1415 },
1416 );
1417
1418 let mut employees = HashMap::new();
1419 employees.insert(
1420 "EMP001".to_string(),
1421 EmployeeContext {
1422 employee_id: "EMP001".to_string(),
1423 is_new: true, ..Default::default()
1425 },
1426 );
1427
1428 let mut accounts = HashMap::new();
1429 accounts.insert(
1430 "8100".to_string(),
1431 AccountContext {
1432 account_code: "8100".to_string(),
1433 is_high_risk: true, ..Default::default()
1435 },
1436 );
1437
1438 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1439
1440 let mut entries_ctx: Vec<_> = (0..500)
1441 .map(|i| {
1442 create_test_entry_with_context(
1443 &format!("JE{:04}", i),
1444 Some("V001"),
1445 "EMP001",
1446 "8100",
1447 )
1448 })
1449 .collect();
1450
1451 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1452
1453 assert!(
1455 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1456 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1457 result_ctx.anomalies_injected,
1458 result_no_ctx.anomalies_injected,
1459 );
1460 }
1461
1462 #[test]
1463 fn test_risk_score_multiplication() {
1464 let config = AnomalyInjectorConfig::default();
1466 let mut injector = AnomalyInjector::new(config);
1467
1468 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1470 assert!(
1471 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1472 );
1473
1474 let mut vendors = HashMap::new();
1476 vendors.insert(
1477 "V_RISKY".to_string(),
1478 VendorContext {
1479 vendor_id: "V_RISKY".to_string(),
1480 is_new: true,
1481 ..Default::default()
1482 },
1483 );
1484
1485 let mut accounts = HashMap::new();
1486 accounts.insert(
1487 "9000".to_string(),
1488 AccountContext {
1489 account_code: "9000".to_string(),
1490 is_high_risk: true,
1491 ..Default::default()
1492 },
1493 );
1494
1495 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1496
1497 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1498 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1499 assert!(
1501 (multiplier - 4.0).abs() < f64::EPSILON,
1502 "Expected 4.0x multiplier, got {}",
1503 multiplier,
1504 );
1505
1506 let entry_vendor_only =
1508 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1509 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1510 assert!(
1511 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1512 "Expected 2.0x multiplier (vendor only), got {}",
1513 multiplier_vendor,
1514 );
1515
1516 let entry_no_match =
1518 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1519 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1520 assert!(
1521 (multiplier_none - 1.0).abs() < f64::EPSILON,
1522 "Expected 1.0x multiplier (no match), got {}",
1523 multiplier_none,
1524 );
1525 }
1526
1527 #[test]
1528 fn test_employee_context_multiplier() {
1529 let config = AnomalyInjectorConfig::default();
1530 let mut injector = AnomalyInjector::new(config);
1531
1532 let mut employees = HashMap::new();
1533 employees.insert(
1534 "EMP_NEW".to_string(),
1535 EmployeeContext {
1536 employee_id: "EMP_NEW".to_string(),
1537 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1541 },
1542 );
1543
1544 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1545
1546 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1547 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1548
1549 let expected = 1.5 * 1.3 * 1.2;
1551 assert!(
1552 (multiplier - expected).abs() < 0.01,
1553 "Expected {:.3}x multiplier, got {:.3}",
1554 expected,
1555 multiplier,
1556 );
1557 }
1558
1559 #[test]
1560 fn test_entity_contexts_persist_across_reset() {
1561 let config = AnomalyInjectorConfig::default();
1562 let mut injector = AnomalyInjector::new(config);
1563
1564 let mut vendors = HashMap::new();
1565 vendors.insert(
1566 "V001".to_string(),
1567 VendorContext {
1568 vendor_id: "V001".to_string(),
1569 is_new: true,
1570 ..Default::default()
1571 },
1572 );
1573
1574 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1575 assert_eq!(injector.vendor_contexts().len(), 1);
1576
1577 injector.reset();
1579 assert_eq!(injector.vendor_contexts().len(), 1);
1580 }
1581
1582 #[test]
1583 fn test_set_empty_contexts_clears() {
1584 let config = AnomalyInjectorConfig::default();
1585 let mut injector = AnomalyInjector::new(config);
1586
1587 let mut vendors = HashMap::new();
1588 vendors.insert(
1589 "V001".to_string(),
1590 VendorContext {
1591 vendor_id: "V001".to_string(),
1592 ..Default::default()
1593 },
1594 );
1595
1596 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1597 assert_eq!(injector.vendor_contexts().len(), 1);
1598
1599 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1601 assert!(injector.vendor_contexts().is_empty());
1602 }
1603
1604 #[test]
1605 fn test_dormant_vendor_multiplier() {
1606 let config = AnomalyInjectorConfig::default();
1607 let mut injector = AnomalyInjector::new(config);
1608
1609 let mut vendors = HashMap::new();
1610 vendors.insert(
1611 "V_DORMANT".to_string(),
1612 VendorContext {
1613 vendor_id: "V_DORMANT".to_string(),
1614 is_dormant_reactivation: true, ..Default::default()
1616 },
1617 );
1618
1619 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1620
1621 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1622 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1623 assert!(
1624 (multiplier - 1.5).abs() < f64::EPSILON,
1625 "Expected 1.5x multiplier for dormant vendor, got {}",
1626 multiplier,
1627 );
1628 }
1629}