1use chrono::NaiveDate;
15use rand::Rng;
16use rand::SeedableRng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20
21use datasynth_core::models::{
22 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
23 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
24 RelationalAnomalyType,
25};
26
27use super::context::{
28 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
29 EntityAwareInjector, VendorContext,
30};
31use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
32use super::difficulty::DifficultyCalculator;
33use super::near_miss::{NearMissConfig, NearMissGenerator};
34use super::patterns::{
35 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
36 TemporalPattern,
37};
38use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
39use super::schemes::{SchemeAction, SchemeContext};
40use super::strategies::{DuplicationStrategy, StrategyCollection};
41use super::types::AnomalyTypeSelector;
42
43#[derive(Debug, Clone)]
45pub struct AnomalyInjectorConfig {
46 pub rates: AnomalyRateConfig,
48 pub patterns: AnomalyPatternConfig,
50 pub seed: u64,
52 pub generate_labels: bool,
54 pub allow_duplicates: bool,
56 pub max_anomalies_per_document: usize,
58 pub target_companies: Vec<String>,
60 pub date_range: Option<(NaiveDate, NaiveDate)>,
62 pub enhanced: EnhancedInjectionConfig,
64}
65
66#[derive(Debug, Clone, Default)]
68pub struct EnhancedInjectionConfig {
69 pub multi_stage_schemes_enabled: bool,
71 pub scheme_probability: f64,
73 pub correlated_injection_enabled: bool,
75 pub temporal_clustering_enabled: bool,
77 pub period_end_multiplier: f64,
79 pub near_miss_enabled: bool,
81 pub near_miss_proportion: f64,
83 pub approval_thresholds: Vec<Decimal>,
85 pub difficulty_classification_enabled: bool,
87 pub context_aware_enabled: bool,
89 pub behavioral_baseline_config: BehavioralBaselineConfig,
91}
92
93impl Default for AnomalyInjectorConfig {
94 fn default() -> Self {
95 Self {
96 rates: AnomalyRateConfig::default(),
97 patterns: AnomalyPatternConfig::default(),
98 seed: 42,
99 generate_labels: true,
100 allow_duplicates: true,
101 max_anomalies_per_document: 2,
102 target_companies: Vec::new(),
103 date_range: None,
104 enhanced: EnhancedInjectionConfig::default(),
105 }
106 }
107}
108
109#[derive(Debug, Clone)]
111pub struct InjectionBatchResult {
112 pub entries_processed: usize,
114 pub anomalies_injected: usize,
116 pub duplicates_created: usize,
118 pub labels: Vec<LabeledAnomaly>,
120 pub summary: AnomalySummary,
122 pub modified_documents: Vec<String>,
124 pub near_miss_labels: Vec<NearMissLabel>,
126 pub scheme_actions: Vec<SchemeAction>,
128 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
130}
131
132#[allow(dead_code)]
134pub struct AnomalyInjector {
135 config: AnomalyInjectorConfig,
136 rng: ChaCha8Rng,
137 type_selector: AnomalyTypeSelector,
138 strategies: StrategyCollection,
139 cluster_manager: ClusterManager,
140 entity_targeting: EntityTargetingManager,
141 document_anomaly_counts: HashMap<String, usize>,
143 labels: Vec<LabeledAnomaly>,
145 stats: InjectorStats,
147 scheme_advancer: Option<SchemeAdvancer>,
150 near_miss_generator: Option<NearMissGenerator>,
152 near_miss_labels: Vec<NearMissLabel>,
154 co_occurrence_handler: Option<AnomalyCoOccurrence>,
156 temporal_cluster_generator: Option<TemporalClusterGenerator>,
158 difficulty_calculator: Option<DifficultyCalculator>,
160 entity_aware_injector: Option<EntityAwareInjector>,
162 behavioral_baseline: Option<BehavioralBaseline>,
164 scheme_actions: Vec<SchemeAction>,
166 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
168 vendor_contexts: HashMap<String, VendorContext>,
171 employee_contexts: HashMap<String, EmployeeContext>,
173 account_contexts: HashMap<String, AccountContext>,
175}
176
177#[derive(Debug, Clone, Default)]
179#[allow(dead_code)]
180pub struct InjectorStats {
181 total_processed: usize,
182 total_injected: usize,
183 by_category: HashMap<String, usize>,
184 by_type: HashMap<String, usize>,
185 by_company: HashMap<String, usize>,
186 skipped_rate: usize,
187 skipped_date: usize,
188 skipped_company: usize,
189 skipped_max_per_doc: usize,
190}
191
192impl AnomalyInjector {
193 pub fn new(config: AnomalyInjectorConfig) -> Self {
195 let mut rng = ChaCha8Rng::seed_from_u64(config.seed);
196 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
197 let entity_targeting =
198 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
199
200 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
202 let scheme_config = SchemeAdvancerConfig {
203 embezzlement_probability: config.enhanced.scheme_probability,
204 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
205 kickback_probability: config.enhanced.scheme_probability * 0.5,
206 seed: rng.gen(),
207 ..Default::default()
208 };
209 Some(SchemeAdvancer::new(scheme_config))
210 } else {
211 None
212 };
213
214 let near_miss_generator = if config.enhanced.near_miss_enabled {
215 let near_miss_config = NearMissConfig {
216 proportion: config.enhanced.near_miss_proportion,
217 seed: rng.gen(),
218 ..Default::default()
219 };
220 Some(NearMissGenerator::new(near_miss_config))
221 } else {
222 None
223 };
224
225 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
226 Some(AnomalyCoOccurrence::new())
227 } else {
228 None
229 };
230
231 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
232 Some(TemporalClusterGenerator::new())
233 } else {
234 None
235 };
236
237 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
238 Some(DifficultyCalculator::new())
239 } else {
240 None
241 };
242
243 let entity_aware_injector = if config.enhanced.context_aware_enabled {
244 Some(EntityAwareInjector::default())
245 } else {
246 None
247 };
248
249 let behavioral_baseline = if config.enhanced.context_aware_enabled
250 && config.enhanced.behavioral_baseline_config.enabled
251 {
252 Some(BehavioralBaseline::new(
253 config.enhanced.behavioral_baseline_config.clone(),
254 ))
255 } else {
256 None
257 };
258
259 Self {
260 config,
261 rng,
262 type_selector: AnomalyTypeSelector::new(),
263 strategies: StrategyCollection::default(),
264 cluster_manager,
265 entity_targeting,
266 document_anomaly_counts: HashMap::new(),
267 labels: Vec::new(),
268 stats: InjectorStats::default(),
269 scheme_advancer,
270 near_miss_generator,
271 near_miss_labels: Vec::new(),
272 co_occurrence_handler,
273 temporal_cluster_generator,
274 difficulty_calculator,
275 entity_aware_injector,
276 behavioral_baseline,
277 scheme_actions: Vec::new(),
278 difficulty_distribution: HashMap::new(),
279 vendor_contexts: HashMap::new(),
280 employee_contexts: HashMap::new(),
281 account_contexts: HashMap::new(),
282 }
283 }
284
285 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
287 let mut modified_documents = Vec::new();
288 let mut duplicates = Vec::new();
289
290 for entry in entries.iter_mut() {
291 self.stats.total_processed += 1;
292
293 if let Some(ref mut baseline) = self.behavioral_baseline {
295 use super::context::Observation;
296 let entity_id = entry.header.created_by.clone();
298 let observation =
299 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
300 baseline.record_observation(&entity_id, observation);
301 }
302
303 if !self.should_process(entry) {
305 continue;
306 }
307
308 let base_rate = self.config.rates.total_rate;
310
311 let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
313 let employee_id = &entry.header.created_by;
314 let first_account = entry
315 .lines
316 .first()
317 .map(|l| l.gl_account.as_str())
318 .unwrap_or("");
319 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
321
322 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
323 let employee_ctx = self.employee_contexts.get(employee_id);
324 let account_ctx = self.account_contexts.get(first_account);
325
326 let multiplier =
327 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
328 (base_rate * multiplier).min(1.0)
329 } else {
330 self.calculate_context_rate_multiplier(entry) * base_rate
332 };
333
334 if should_inject_anomaly(
336 effective_rate,
337 entry.posting_date(),
338 &self.config.patterns.temporal_pattern,
339 &mut self.rng,
340 ) {
341 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
343 let account = entry
345 .lines
346 .first()
347 .map(|l| l.gl_account.clone())
348 .unwrap_or_default();
349 near_miss_gen.record_transaction(
350 entry.document_number().clone(),
351 entry.posting_date(),
352 entry.total_debit(),
353 &account,
354 None,
355 );
356
357 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
359 entry.document_number().clone(),
360 entry.posting_date(),
361 entry.total_debit(),
362 &account,
363 None,
364 &self.config.enhanced.approval_thresholds,
365 ) {
366 self.near_miss_labels.push(near_miss_label);
367 continue; }
369 }
370
371 let anomaly_type = self.select_anomaly_category();
373
374 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
376 if let Some(ref calculator) = self.difficulty_calculator {
378 let difficulty = calculator.calculate(&label);
379
380 label = label
382 .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
383 label = label.with_metadata(
384 "difficulty_score",
385 &difficulty.difficulty_score().to_string(),
386 );
387
388 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
390 }
391
392 modified_documents.push(entry.document_number().clone());
393 self.labels.push(label);
394 self.stats.total_injected += 1;
395 }
396
397 if self.config.allow_duplicates
399 && matches!(
400 self.labels.last().map(|l| &l.anomaly_type),
401 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
402 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
403 )
404 {
405 let dup_strategy = DuplicationStrategy::default();
406 let duplicate = dup_strategy.duplicate(entry, &mut self.rng);
407 duplicates.push(duplicate);
408 }
409 }
410 }
411
412 let duplicates_created = duplicates.len();
414
415 let summary = AnomalySummary::from_anomalies(&self.labels);
417
418 InjectionBatchResult {
419 entries_processed: self.stats.total_processed,
420 anomalies_injected: self.stats.total_injected,
421 duplicates_created,
422 labels: self.labels.clone(),
423 summary,
424 modified_documents,
425 near_miss_labels: self.near_miss_labels.clone(),
426 scheme_actions: self.scheme_actions.clone(),
427 difficulty_distribution: self.difficulty_distribution.clone(),
428 }
429 }
430
431 fn should_process(&mut self, entry: &JournalEntry) -> bool {
433 if !self.config.target_companies.is_empty()
435 && !self
436 .config
437 .target_companies
438 .iter()
439 .any(|c| c == entry.company_code())
440 {
441 self.stats.skipped_company += 1;
442 return false;
443 }
444
445 if let Some((start, end)) = self.config.date_range {
447 if entry.posting_date() < start || entry.posting_date() > end {
448 self.stats.skipped_date += 1;
449 return false;
450 }
451 }
452
453 let current_count = self
455 .document_anomaly_counts
456 .get(&entry.document_number())
457 .copied()
458 .unwrap_or(0);
459 if current_count >= self.config.max_anomalies_per_document {
460 self.stats.skipped_max_per_doc += 1;
461 return false;
462 }
463
464 true
465 }
466
467 fn select_anomaly_category(&mut self) -> AnomalyType {
469 let r = self.rng.gen::<f64>();
470 let rates = &self.config.rates;
471
472 let mut cumulative = 0.0;
473
474 cumulative += rates.fraud_rate;
475 if r < cumulative {
476 return self.type_selector.select_fraud(&mut self.rng);
477 }
478
479 cumulative += rates.error_rate;
480 if r < cumulative {
481 return self.type_selector.select_error(&mut self.rng);
482 }
483
484 cumulative += rates.process_issue_rate;
485 if r < cumulative {
486 return self.type_selector.select_process_issue(&mut self.rng);
487 }
488
489 cumulative += rates.statistical_rate;
490 if r < cumulative {
491 return self.type_selector.select_statistical(&mut self.rng);
492 }
493
494 self.type_selector.select_relational(&mut self.rng)
495 }
496
497 fn inject_anomaly(
499 &mut self,
500 entry: &mut JournalEntry,
501 anomaly_type: AnomalyType,
502 ) -> Option<LabeledAnomaly> {
503 if !self.strategies.can_apply(entry, &anomaly_type) {
505 return None;
506 }
507
508 let result = self
510 .strategies
511 .apply_strategy(entry, &anomaly_type, &mut self.rng);
512
513 if !result.success {
514 return None;
515 }
516
517 *self
519 .document_anomaly_counts
520 .entry(entry.document_number().clone())
521 .or_insert(0) += 1;
522
523 let category = anomaly_type.category().to_string();
525 let type_name = anomaly_type.type_name();
526
527 *self.stats.by_category.entry(category).or_insert(0) += 1;
528 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
529 *self
530 .stats
531 .by_company
532 .entry(entry.company_code().to_string())
533 .or_insert(0) += 1;
534
535 if self.config.generate_labels {
537 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
538
539 entry.header.is_anomaly = true;
541 entry.header.anomaly_id = Some(anomaly_id.clone());
542 entry.header.anomaly_type = Some(type_name.clone());
543
544 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
546 entry.header.is_fraud = true;
547 if let AnomalyType::Fraud(ref ft) = anomaly_type {
548 entry.header.fraud_type = Some(*ft);
549 }
550 }
551
552 let mut label = LabeledAnomaly::new(
553 anomaly_id,
554 anomaly_type.clone(),
555 entry.document_number().clone(),
556 "JE".to_string(),
557 entry.company_code().to_string(),
558 entry.posting_date(),
559 )
560 .with_description(&result.description)
561 .with_injection_strategy(&type_name);
562
563 let causal_reason = AnomalyCausalReason::RandomRate {
565 base_rate: self.config.rates.total_rate,
566 };
567 label = label.with_causal_reason(causal_reason);
568
569 let context_multiplier = self.calculate_context_rate_multiplier(entry);
571 if (context_multiplier - 1.0).abs() > f64::EPSILON {
572 label = label.with_metadata(
573 "entity_context_multiplier",
574 &format!("{:.3}", context_multiplier),
575 );
576 label = label.with_metadata(
577 "effective_rate",
578 &format!(
579 "{:.6}",
580 (self.config.rates.total_rate * context_multiplier).min(1.0)
581 ),
582 );
583 }
584
585 if let Some(impact) = result.monetary_impact {
587 label = label.with_monetary_impact(impact);
588 }
589
590 for entity in &result.related_entities {
592 label = label.with_related_entity(entity);
593 }
594
595 for (key, value) in &result.metadata {
597 label = label.with_metadata(key, value);
598 }
599
600 if let Some(cluster_id) =
602 self.cluster_manager
603 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
604 {
605 label = label.with_cluster(&cluster_id);
606 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
608 cluster_id: cluster_id.clone(),
609 });
610 }
611
612 return Some(label);
613 }
614
615 None
616 }
617
618 pub fn inject_specific(
620 &mut self,
621 entry: &mut JournalEntry,
622 anomaly_type: AnomalyType,
623 ) -> Option<LabeledAnomaly> {
624 self.inject_anomaly(entry, anomaly_type)
625 }
626
627 pub fn create_self_approval(
629 &mut self,
630 entry: &mut JournalEntry,
631 user_id: &str,
632 ) -> Option<LabeledAnomaly> {
633 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
634
635 let label = LabeledAnomaly::new(
636 format!("ANO{:08}", self.labels.len() + 1),
637 anomaly_type,
638 entry.document_number().clone(),
639 "JE".to_string(),
640 entry.company_code().to_string(),
641 entry.posting_date(),
642 )
643 .with_description(&format!("User {} approved their own transaction", user_id))
644 .with_related_entity(user_id)
645 .with_injection_strategy("ManualSelfApproval")
646 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
647 target_type: "User".to_string(),
648 target_id: user_id.to_string(),
649 });
650
651 entry.header.is_anomaly = true;
653 entry.header.is_fraud = true;
654 entry.header.anomaly_id = Some(label.anomaly_id.clone());
655 entry.header.anomaly_type = Some("SelfApproval".to_string());
656 entry.header.fraud_type = Some(FraudType::SelfApproval);
657
658 entry.header.created_by = user_id.to_string();
660
661 self.labels.push(label.clone());
662 Some(label)
663 }
664
665 pub fn create_sod_violation(
667 &mut self,
668 entry: &mut JournalEntry,
669 user_id: &str,
670 conflicting_duties: (&str, &str),
671 ) -> Option<LabeledAnomaly> {
672 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
673
674 let label = LabeledAnomaly::new(
675 format!("ANO{:08}", self.labels.len() + 1),
676 anomaly_type,
677 entry.document_number().clone(),
678 "JE".to_string(),
679 entry.company_code().to_string(),
680 entry.posting_date(),
681 )
682 .with_description(&format!(
683 "User {} performed conflicting duties: {} and {}",
684 user_id, conflicting_duties.0, conflicting_duties.1
685 ))
686 .with_related_entity(user_id)
687 .with_metadata("duty1", conflicting_duties.0)
688 .with_metadata("duty2", conflicting_duties.1)
689 .with_injection_strategy("ManualSoDViolation")
690 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
691 target_type: "User".to_string(),
692 target_id: user_id.to_string(),
693 });
694
695 entry.header.is_anomaly = true;
697 entry.header.is_fraud = true;
698 entry.header.anomaly_id = Some(label.anomaly_id.clone());
699 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
700 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
701
702 self.labels.push(label.clone());
703 Some(label)
704 }
705
706 pub fn create_ic_mismatch(
708 &mut self,
709 entry: &mut JournalEntry,
710 matching_company: &str,
711 expected_amount: Decimal,
712 actual_amount: Decimal,
713 ) -> Option<LabeledAnomaly> {
714 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
715
716 let label = LabeledAnomaly::new(
717 format!("ANO{:08}", self.labels.len() + 1),
718 anomaly_type,
719 entry.document_number().clone(),
720 "JE".to_string(),
721 entry.company_code().to_string(),
722 entry.posting_date(),
723 )
724 .with_description(&format!(
725 "Intercompany mismatch with {}: expected {} but got {}",
726 matching_company, expected_amount, actual_amount
727 ))
728 .with_related_entity(matching_company)
729 .with_monetary_impact(actual_amount - expected_amount)
730 .with_metadata("expected_amount", &expected_amount.to_string())
731 .with_metadata("actual_amount", &actual_amount.to_string())
732 .with_injection_strategy("ManualICMismatch")
733 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
734 target_type: "Intercompany".to_string(),
735 target_id: matching_company.to_string(),
736 });
737
738 entry.header.is_anomaly = true;
740 entry.header.anomaly_id = Some(label.anomaly_id.clone());
741 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
742
743 self.labels.push(label.clone());
744 Some(label)
745 }
746
747 pub fn get_labels(&self) -> &[LabeledAnomaly] {
749 &self.labels
750 }
751
752 pub fn get_summary(&self) -> AnomalySummary {
754 AnomalySummary::from_anomalies(&self.labels)
755 }
756
757 pub fn get_stats(&self) -> &InjectorStats {
759 &self.stats
760 }
761
762 pub fn reset(&mut self) {
764 self.labels.clear();
765 self.document_anomaly_counts.clear();
766 self.stats = InjectorStats::default();
767 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
768
769 self.near_miss_labels.clear();
771 self.scheme_actions.clear();
772 self.difficulty_distribution.clear();
773
774 if let Some(ref mut baseline) = self.behavioral_baseline {
775 *baseline =
776 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
777 }
778 }
779
780 pub fn cluster_count(&self) -> usize {
782 self.cluster_manager.cluster_count()
783 }
784
785 pub fn set_entity_contexts(
798 &mut self,
799 vendors: HashMap<String, VendorContext>,
800 employees: HashMap<String, EmployeeContext>,
801 accounts: HashMap<String, AccountContext>,
802 ) {
803 self.vendor_contexts = vendors;
804 self.employee_contexts = employees;
805 self.account_contexts = accounts;
806 }
807
808 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
810 &self.vendor_contexts
811 }
812
813 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
815 &self.employee_contexts
816 }
817
818 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
820 &self.account_contexts
821 }
822
823 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
832 if self.vendor_contexts.is_empty()
833 && self.employee_contexts.is_empty()
834 && self.account_contexts.is_empty()
835 {
836 return 1.0;
837 }
838
839 let mut multiplier = 1.0;
840
841 if let Some(ref vendor_ref) = entry.header.reference {
843 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
844 if ctx.is_new {
846 multiplier *= 2.0;
847 }
848 if ctx.is_dormant_reactivation {
849 multiplier *= 1.5;
850 }
851 }
852 }
853
854 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
856 if ctx.is_new {
857 multiplier *= 1.5;
858 }
859 if ctx.is_volume_fatigued {
860 multiplier *= 1.3;
861 }
862 if ctx.is_overtime {
863 multiplier *= 1.2;
864 }
865 }
866
867 if let Some(first_line) = entry.lines.first() {
869 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
870 if ctx.is_high_risk {
871 multiplier *= 2.0;
872 }
873 }
874 }
875
876 multiplier
877 }
878
879 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
888 if let Some(ref mut advancer) = self.scheme_advancer {
889 let context = SchemeContext::new(date, company_code);
890 let actions = advancer.advance_all(&context);
891 self.scheme_actions.extend(actions.clone());
892 actions
893 } else {
894 Vec::new()
895 }
896 }
897
898 pub fn maybe_start_scheme(
904 &mut self,
905 date: NaiveDate,
906 company_code: &str,
907 available_users: Vec<String>,
908 available_accounts: Vec<String>,
909 available_counterparties: Vec<String>,
910 ) -> Option<uuid::Uuid> {
911 if let Some(ref mut advancer) = self.scheme_advancer {
912 let mut context = SchemeContext::new(date, company_code);
913 context.available_users = available_users;
914 context.available_accounts = available_accounts;
915 context.available_counterparties = available_counterparties;
916
917 advancer.maybe_start_scheme(&context)
918 } else {
919 None
920 }
921 }
922
923 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
925 &self.near_miss_labels
926 }
927
928 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
930 &self.scheme_actions
931 }
932
933 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
935 &self.difficulty_distribution
936 }
937
938 pub fn check_behavioral_deviations(
940 &self,
941 entity_id: &str,
942 observation: &super::context::Observation,
943 ) -> Vec<super::context::BehavioralDeviation> {
944 if let Some(ref baseline) = self.behavioral_baseline {
945 baseline.check_deviation(entity_id, observation)
946 } else {
947 Vec::new()
948 }
949 }
950
951 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
953 if let Some(ref baseline) = self.behavioral_baseline {
954 baseline.get_baseline(entity_id)
955 } else {
956 None
957 }
958 }
959
960 pub fn active_scheme_count(&self) -> usize {
962 if let Some(ref advancer) = self.scheme_advancer {
963 advancer.active_scheme_count()
964 } else {
965 0
966 }
967 }
968
969 pub fn has_enhanced_features(&self) -> bool {
971 self.scheme_advancer.is_some()
972 || self.near_miss_generator.is_some()
973 || self.difficulty_calculator.is_some()
974 || self.entity_aware_injector.is_some()
975 }
976}
977
978pub struct AnomalyInjectorConfigBuilder {
980 config: AnomalyInjectorConfig,
981}
982
983impl AnomalyInjectorConfigBuilder {
984 pub fn new() -> Self {
986 Self {
987 config: AnomalyInjectorConfig::default(),
988 }
989 }
990
991 pub fn with_total_rate(mut self, rate: f64) -> Self {
993 self.config.rates.total_rate = rate;
994 self
995 }
996
997 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
999 self.config.rates.fraud_rate = rate;
1000 self
1001 }
1002
1003 pub fn with_error_rate(mut self, rate: f64) -> Self {
1005 self.config.rates.error_rate = rate;
1006 self
1007 }
1008
1009 pub fn with_seed(mut self, seed: u64) -> Self {
1011 self.config.seed = seed;
1012 self
1013 }
1014
1015 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1017 self.config.patterns.temporal_pattern = pattern;
1018 self
1019 }
1020
1021 pub fn with_labels(mut self, generate: bool) -> Self {
1023 self.config.generate_labels = generate;
1024 self
1025 }
1026
1027 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1029 self.config.target_companies = companies;
1030 self
1031 }
1032
1033 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1035 self.config.date_range = Some((start, end));
1036 self
1037 }
1038
1039 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1045 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1046 self.config.enhanced.scheme_probability = probability;
1047 self
1048 }
1049
1050 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1052 self.config.enhanced.near_miss_enabled = enabled;
1053 self.config.enhanced.near_miss_proportion = proportion;
1054 self
1055 }
1056
1057 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1059 self.config.enhanced.approval_thresholds = thresholds;
1060 self
1061 }
1062
1063 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1065 self.config.enhanced.correlated_injection_enabled = enabled;
1066 self
1067 }
1068
1069 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1071 self.config.enhanced.temporal_clustering_enabled = enabled;
1072 self.config.enhanced.period_end_multiplier = multiplier;
1073 self
1074 }
1075
1076 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1078 self.config.enhanced.difficulty_classification_enabled = enabled;
1079 self
1080 }
1081
1082 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1084 self.config.enhanced.context_aware_enabled = enabled;
1085 self
1086 }
1087
1088 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1090 self.config.enhanced.behavioral_baseline_config = config;
1091 self
1092 }
1093
1094 pub fn with_all_enhanced_features(mut self) -> Self {
1096 self.config.enhanced.multi_stage_schemes_enabled = true;
1097 self.config.enhanced.scheme_probability = 0.02;
1098 self.config.enhanced.correlated_injection_enabled = true;
1099 self.config.enhanced.temporal_clustering_enabled = true;
1100 self.config.enhanced.period_end_multiplier = 2.5;
1101 self.config.enhanced.near_miss_enabled = true;
1102 self.config.enhanced.near_miss_proportion = 0.30;
1103 self.config.enhanced.difficulty_classification_enabled = true;
1104 self.config.enhanced.context_aware_enabled = true;
1105 self.config.enhanced.behavioral_baseline_config.enabled = true;
1106 self
1107 }
1108
1109 pub fn build(self) -> AnomalyInjectorConfig {
1111 self.config
1112 }
1113}
1114
1115impl Default for AnomalyInjectorConfigBuilder {
1116 fn default() -> Self {
1117 Self::new()
1118 }
1119}
1120
1121#[cfg(test)]
1122#[allow(clippy::unwrap_used)]
1123mod tests {
1124 use super::*;
1125 use chrono::NaiveDate;
1126 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1127 use rust_decimal_macros::dec;
1128
1129 fn create_test_entry(doc_num: &str) -> JournalEntry {
1130 let mut entry = JournalEntry::new_simple(
1131 doc_num.to_string(),
1132 "1000".to_string(),
1133 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1134 "Test Entry".to_string(),
1135 );
1136
1137 entry.add_line(JournalEntryLine {
1138 line_number: 1,
1139 gl_account: "5000".to_string(),
1140 debit_amount: dec!(1000),
1141 ..Default::default()
1142 });
1143
1144 entry.add_line(JournalEntryLine {
1145 line_number: 2,
1146 gl_account: "1000".to_string(),
1147 credit_amount: dec!(1000),
1148 ..Default::default()
1149 });
1150
1151 entry
1152 }
1153
1154 #[test]
1155 fn test_anomaly_injector_basic() {
1156 let config = AnomalyInjectorConfigBuilder::new()
1157 .with_total_rate(0.5) .with_seed(42)
1159 .build();
1160
1161 let mut injector = AnomalyInjector::new(config);
1162
1163 let mut entries: Vec<_> = (0..100)
1164 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1165 .collect();
1166
1167 let result = injector.process_entries(&mut entries);
1168
1169 assert!(result.anomalies_injected > 0);
1171 assert!(!result.labels.is_empty());
1172 assert_eq!(result.labels.len(), result.anomalies_injected);
1173 }
1174
1175 #[test]
1176 fn test_specific_injection() {
1177 let config = AnomalyInjectorConfig::default();
1178 let mut injector = AnomalyInjector::new(config);
1179
1180 let mut entry = create_test_entry("JE001");
1181 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1182
1183 let label = injector.inject_specific(&mut entry, anomaly_type);
1184
1185 assert!(label.is_some());
1186 let label = label.unwrap();
1187 assert!(!label.document_id.is_empty());
1189 assert_eq!(label.document_id, entry.document_number());
1190 }
1191
1192 #[test]
1193 fn test_self_approval_injection() {
1194 let config = AnomalyInjectorConfig::default();
1195 let mut injector = AnomalyInjector::new(config);
1196
1197 let mut entry = create_test_entry("JE001");
1198 let label = injector.create_self_approval(&mut entry, "USER001");
1199
1200 assert!(label.is_some());
1201 let label = label.unwrap();
1202 assert!(matches!(
1203 label.anomaly_type,
1204 AnomalyType::Fraud(FraudType::SelfApproval)
1205 ));
1206 assert!(label.related_entities.contains(&"USER001".to_string()));
1207 }
1208
1209 #[test]
1210 fn test_company_filtering() {
1211 let config = AnomalyInjectorConfigBuilder::new()
1212 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1214 .build();
1215
1216 let mut injector = AnomalyInjector::new(config);
1217
1218 let mut entries = vec![
1219 create_test_entry("JE001"), create_test_entry("JE002"), ];
1222
1223 let result = injector.process_entries(&mut entries);
1224
1225 assert_eq!(result.anomalies_injected, 0);
1227 }
1228
1229 fn create_test_entry_with_context(
1235 doc_num: &str,
1236 vendor_ref: Option<&str>,
1237 employee_id: &str,
1238 gl_account: &str,
1239 ) -> JournalEntry {
1240 let mut entry = JournalEntry::new_simple(
1241 doc_num.to_string(),
1242 "1000".to_string(),
1243 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1244 "Test Entry".to_string(),
1245 );
1246
1247 entry.header.reference = vendor_ref.map(|v| v.to_string());
1248 entry.header.created_by = employee_id.to_string();
1249
1250 entry.add_line(JournalEntryLine {
1251 line_number: 1,
1252 gl_account: gl_account.to_string(),
1253 debit_amount: dec!(1000),
1254 ..Default::default()
1255 });
1256
1257 entry.add_line(JournalEntryLine {
1258 line_number: 2,
1259 gl_account: "1000".to_string(),
1260 credit_amount: dec!(1000),
1261 ..Default::default()
1262 });
1263
1264 entry
1265 }
1266
1267 #[test]
1268 fn test_set_entity_contexts() {
1269 let config = AnomalyInjectorConfig::default();
1270 let mut injector = AnomalyInjector::new(config);
1271
1272 assert!(injector.vendor_contexts().is_empty());
1274 assert!(injector.employee_contexts().is_empty());
1275 assert!(injector.account_contexts().is_empty());
1276
1277 let mut vendors = HashMap::new();
1279 vendors.insert(
1280 "V001".to_string(),
1281 VendorContext {
1282 vendor_id: "V001".to_string(),
1283 is_new: true,
1284 ..Default::default()
1285 },
1286 );
1287
1288 let mut employees = HashMap::new();
1289 employees.insert(
1290 "EMP001".to_string(),
1291 EmployeeContext {
1292 employee_id: "EMP001".to_string(),
1293 is_new: true,
1294 ..Default::default()
1295 },
1296 );
1297
1298 let mut accounts = HashMap::new();
1299 accounts.insert(
1300 "8100".to_string(),
1301 AccountContext {
1302 account_code: "8100".to_string(),
1303 is_high_risk: true,
1304 ..Default::default()
1305 },
1306 );
1307
1308 injector.set_entity_contexts(vendors, employees, accounts);
1309
1310 assert_eq!(injector.vendor_contexts().len(), 1);
1311 assert_eq!(injector.employee_contexts().len(), 1);
1312 assert_eq!(injector.account_contexts().len(), 1);
1313 assert!(injector.vendor_contexts().contains_key("V001"));
1314 assert!(injector.employee_contexts().contains_key("EMP001"));
1315 assert!(injector.account_contexts().contains_key("8100"));
1316 }
1317
1318 #[test]
1319 fn test_default_behavior_no_contexts() {
1320 let config = AnomalyInjectorConfigBuilder::new()
1322 .with_total_rate(0.5)
1323 .with_seed(42)
1324 .build();
1325
1326 let mut injector = AnomalyInjector::new(config);
1327
1328 let mut entries: Vec<_> = (0..200)
1329 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1330 .collect();
1331
1332 let result = injector.process_entries(&mut entries);
1333
1334 assert!(result.anomalies_injected > 0);
1337 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1338 assert!(
1339 rate > 0.2 && rate < 0.8,
1340 "Expected ~50% rate, got {:.2}%",
1341 rate * 100.0
1342 );
1343 }
1344
1345 #[test]
1346 fn test_entity_context_increases_injection_rate() {
1347 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1353 .with_total_rate(base_rate)
1354 .with_seed(123)
1355 .build();
1356
1357 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1358
1359 let mut entries_no_ctx: Vec<_> = (0..500)
1360 .map(|i| {
1361 create_test_entry_with_context(
1362 &format!("JE{:04}", i),
1363 Some("V001"),
1364 "EMP001",
1365 "8100",
1366 )
1367 })
1368 .collect();
1369
1370 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1371
1372 let config_ctx = AnomalyInjectorConfigBuilder::new()
1374 .with_total_rate(base_rate)
1375 .with_seed(123)
1376 .build();
1377
1378 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1379
1380 let mut vendors = HashMap::new();
1382 vendors.insert(
1383 "V001".to_string(),
1384 VendorContext {
1385 vendor_id: "V001".to_string(),
1386 is_new: true, is_dormant_reactivation: true, ..Default::default()
1389 },
1390 );
1391
1392 let mut employees = HashMap::new();
1393 employees.insert(
1394 "EMP001".to_string(),
1395 EmployeeContext {
1396 employee_id: "EMP001".to_string(),
1397 is_new: true, ..Default::default()
1399 },
1400 );
1401
1402 let mut accounts = HashMap::new();
1403 accounts.insert(
1404 "8100".to_string(),
1405 AccountContext {
1406 account_code: "8100".to_string(),
1407 is_high_risk: true, ..Default::default()
1409 },
1410 );
1411
1412 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1413
1414 let mut entries_ctx: Vec<_> = (0..500)
1415 .map(|i| {
1416 create_test_entry_with_context(
1417 &format!("JE{:04}", i),
1418 Some("V001"),
1419 "EMP001",
1420 "8100",
1421 )
1422 })
1423 .collect();
1424
1425 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1426
1427 assert!(
1429 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1430 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1431 result_ctx.anomalies_injected,
1432 result_no_ctx.anomalies_injected,
1433 );
1434 }
1435
1436 #[test]
1437 fn test_risk_score_multiplication() {
1438 let config = AnomalyInjectorConfig::default();
1440 let mut injector = AnomalyInjector::new(config);
1441
1442 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1444 assert!(
1445 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1446 );
1447
1448 let mut vendors = HashMap::new();
1450 vendors.insert(
1451 "V_RISKY".to_string(),
1452 VendorContext {
1453 vendor_id: "V_RISKY".to_string(),
1454 is_new: true,
1455 ..Default::default()
1456 },
1457 );
1458
1459 let mut accounts = HashMap::new();
1460 accounts.insert(
1461 "9000".to_string(),
1462 AccountContext {
1463 account_code: "9000".to_string(),
1464 is_high_risk: true,
1465 ..Default::default()
1466 },
1467 );
1468
1469 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1470
1471 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1472 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1473 assert!(
1475 (multiplier - 4.0).abs() < f64::EPSILON,
1476 "Expected 4.0x multiplier, got {}",
1477 multiplier,
1478 );
1479
1480 let entry_vendor_only =
1482 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1483 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1484 assert!(
1485 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1486 "Expected 2.0x multiplier (vendor only), got {}",
1487 multiplier_vendor,
1488 );
1489
1490 let entry_no_match =
1492 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1493 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1494 assert!(
1495 (multiplier_none - 1.0).abs() < f64::EPSILON,
1496 "Expected 1.0x multiplier (no match), got {}",
1497 multiplier_none,
1498 );
1499 }
1500
1501 #[test]
1502 fn test_employee_context_multiplier() {
1503 let config = AnomalyInjectorConfig::default();
1504 let mut injector = AnomalyInjector::new(config);
1505
1506 let mut employees = HashMap::new();
1507 employees.insert(
1508 "EMP_NEW".to_string(),
1509 EmployeeContext {
1510 employee_id: "EMP_NEW".to_string(),
1511 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1515 },
1516 );
1517
1518 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1519
1520 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1521 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1522
1523 let expected = 1.5 * 1.3 * 1.2;
1525 assert!(
1526 (multiplier - expected).abs() < 0.01,
1527 "Expected {:.3}x multiplier, got {:.3}",
1528 expected,
1529 multiplier,
1530 );
1531 }
1532
1533 #[test]
1534 fn test_entity_contexts_persist_across_reset() {
1535 let config = AnomalyInjectorConfig::default();
1536 let mut injector = AnomalyInjector::new(config);
1537
1538 let mut vendors = HashMap::new();
1539 vendors.insert(
1540 "V001".to_string(),
1541 VendorContext {
1542 vendor_id: "V001".to_string(),
1543 is_new: true,
1544 ..Default::default()
1545 },
1546 );
1547
1548 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1549 assert_eq!(injector.vendor_contexts().len(), 1);
1550
1551 injector.reset();
1553 assert_eq!(injector.vendor_contexts().len(), 1);
1554 }
1555
1556 #[test]
1557 fn test_set_empty_contexts_clears() {
1558 let config = AnomalyInjectorConfig::default();
1559 let mut injector = AnomalyInjector::new(config);
1560
1561 let mut vendors = HashMap::new();
1562 vendors.insert(
1563 "V001".to_string(),
1564 VendorContext {
1565 vendor_id: "V001".to_string(),
1566 ..Default::default()
1567 },
1568 );
1569
1570 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1571 assert_eq!(injector.vendor_contexts().len(), 1);
1572
1573 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1575 assert!(injector.vendor_contexts().is_empty());
1576 }
1577
1578 #[test]
1579 fn test_dormant_vendor_multiplier() {
1580 let config = AnomalyInjectorConfig::default();
1581 let mut injector = AnomalyInjector::new(config);
1582
1583 let mut vendors = HashMap::new();
1584 vendors.insert(
1585 "V_DORMANT".to_string(),
1586 VendorContext {
1587 vendor_id: "V_DORMANT".to_string(),
1588 is_dormant_reactivation: true, ..Default::default()
1590 },
1591 );
1592
1593 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1594
1595 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1596 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1597 assert!(
1598 (multiplier - 1.5).abs() < f64::EPSILON,
1599 "Expected 1.5x multiplier for dormant vendor, got {}",
1600 multiplier,
1601 );
1602 }
1603}