1use chrono::NaiveDate;
15use rand::Rng;
16use rand::SeedableRng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20
21use datasynth_core::models::{
22 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
23 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
24 RelationalAnomalyType,
25};
26
27use super::context::{
28 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
29 EntityAwareInjector, VendorContext,
30};
31use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
32use super::difficulty::DifficultyCalculator;
33use super::near_miss::{NearMissConfig, NearMissGenerator};
34use super::patterns::{
35 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
36 TemporalPattern,
37};
38use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
39use super::schemes::{SchemeAction, SchemeContext};
40use super::strategies::{DuplicationStrategy, StrategyCollection};
41use super::types::AnomalyTypeSelector;
42
43#[derive(Debug, Clone)]
45pub struct AnomalyInjectorConfig {
46 pub rates: AnomalyRateConfig,
48 pub patterns: AnomalyPatternConfig,
50 pub seed: u64,
52 pub generate_labels: bool,
54 pub allow_duplicates: bool,
56 pub max_anomalies_per_document: usize,
58 pub target_companies: Vec<String>,
60 pub date_range: Option<(NaiveDate, NaiveDate)>,
62 pub enhanced: EnhancedInjectionConfig,
64}
65
66#[derive(Debug, Clone, Default)]
68pub struct EnhancedInjectionConfig {
69 pub multi_stage_schemes_enabled: bool,
71 pub scheme_probability: f64,
73 pub correlated_injection_enabled: bool,
75 pub temporal_clustering_enabled: bool,
77 pub period_end_multiplier: f64,
79 pub near_miss_enabled: bool,
81 pub near_miss_proportion: f64,
83 pub approval_thresholds: Vec<Decimal>,
85 pub difficulty_classification_enabled: bool,
87 pub context_aware_enabled: bool,
89 pub behavioral_baseline_config: BehavioralBaselineConfig,
91}
92
93impl Default for AnomalyInjectorConfig {
94 fn default() -> Self {
95 Self {
96 rates: AnomalyRateConfig::default(),
97 patterns: AnomalyPatternConfig::default(),
98 seed: 42,
99 generate_labels: true,
100 allow_duplicates: true,
101 max_anomalies_per_document: 2,
102 target_companies: Vec::new(),
103 date_range: None,
104 enhanced: EnhancedInjectionConfig::default(),
105 }
106 }
107}
108
109#[derive(Debug, Clone)]
111pub struct InjectionBatchResult {
112 pub entries_processed: usize,
114 pub anomalies_injected: usize,
116 pub duplicates_created: usize,
118 pub labels: Vec<LabeledAnomaly>,
120 pub summary: AnomalySummary,
122 pub modified_documents: Vec<String>,
124 pub near_miss_labels: Vec<NearMissLabel>,
126 pub scheme_actions: Vec<SchemeAction>,
128 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
130}
131
132#[allow(dead_code)]
134pub struct AnomalyInjector {
135 config: AnomalyInjectorConfig,
136 rng: ChaCha8Rng,
137 type_selector: AnomalyTypeSelector,
138 strategies: StrategyCollection,
139 cluster_manager: ClusterManager,
140 entity_targeting: EntityTargetingManager,
141 document_anomaly_counts: HashMap<String, usize>,
143 labels: Vec<LabeledAnomaly>,
145 stats: InjectorStats,
147 scheme_advancer: Option<SchemeAdvancer>,
150 near_miss_generator: Option<NearMissGenerator>,
152 near_miss_labels: Vec<NearMissLabel>,
154 co_occurrence_handler: Option<AnomalyCoOccurrence>,
156 temporal_cluster_generator: Option<TemporalClusterGenerator>,
158 difficulty_calculator: Option<DifficultyCalculator>,
160 entity_aware_injector: Option<EntityAwareInjector>,
162 behavioral_baseline: Option<BehavioralBaseline>,
164 scheme_actions: Vec<SchemeAction>,
166 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
168 vendor_contexts: HashMap<String, VendorContext>,
171 employee_contexts: HashMap<String, EmployeeContext>,
173 account_contexts: HashMap<String, AccountContext>,
175}
176
177#[derive(Debug, Clone, Default)]
179#[allow(dead_code)]
180pub struct InjectorStats {
181 total_processed: usize,
182 total_injected: usize,
183 by_category: HashMap<String, usize>,
184 by_type: HashMap<String, usize>,
185 by_company: HashMap<String, usize>,
186 skipped_rate: usize,
187 skipped_date: usize,
188 skipped_company: usize,
189 skipped_max_per_doc: usize,
190}
191
192impl AnomalyInjector {
193 pub fn new(config: AnomalyInjectorConfig) -> Self {
195 let mut rng = ChaCha8Rng::seed_from_u64(config.seed);
196 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
197 let entity_targeting =
198 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
199
200 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
202 let scheme_config = SchemeAdvancerConfig {
203 embezzlement_probability: config.enhanced.scheme_probability,
204 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
205 kickback_probability: config.enhanced.scheme_probability * 0.5,
206 seed: rng.gen(),
207 ..Default::default()
208 };
209 Some(SchemeAdvancer::new(scheme_config))
210 } else {
211 None
212 };
213
214 let near_miss_generator = if config.enhanced.near_miss_enabled {
215 let near_miss_config = NearMissConfig {
216 proportion: config.enhanced.near_miss_proportion,
217 seed: rng.gen(),
218 ..Default::default()
219 };
220 Some(NearMissGenerator::new(near_miss_config))
221 } else {
222 None
223 };
224
225 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
226 Some(AnomalyCoOccurrence::new())
227 } else {
228 None
229 };
230
231 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
232 Some(TemporalClusterGenerator::new())
233 } else {
234 None
235 };
236
237 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
238 Some(DifficultyCalculator::new())
239 } else {
240 None
241 };
242
243 let entity_aware_injector = if config.enhanced.context_aware_enabled {
244 Some(EntityAwareInjector::default())
245 } else {
246 None
247 };
248
249 let behavioral_baseline = if config.enhanced.context_aware_enabled
250 && config.enhanced.behavioral_baseline_config.enabled
251 {
252 Some(BehavioralBaseline::new(
253 config.enhanced.behavioral_baseline_config.clone(),
254 ))
255 } else {
256 None
257 };
258
259 Self {
260 config,
261 rng,
262 type_selector: AnomalyTypeSelector::new(),
263 strategies: StrategyCollection::default(),
264 cluster_manager,
265 entity_targeting,
266 document_anomaly_counts: HashMap::new(),
267 labels: Vec::new(),
268 stats: InjectorStats::default(),
269 scheme_advancer,
270 near_miss_generator,
271 near_miss_labels: Vec::new(),
272 co_occurrence_handler,
273 temporal_cluster_generator,
274 difficulty_calculator,
275 entity_aware_injector,
276 behavioral_baseline,
277 scheme_actions: Vec::new(),
278 difficulty_distribution: HashMap::new(),
279 vendor_contexts: HashMap::new(),
280 employee_contexts: HashMap::new(),
281 account_contexts: HashMap::new(),
282 }
283 }
284
285 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
287 let mut modified_documents = Vec::new();
288 let mut duplicates = Vec::new();
289
290 for entry in entries.iter_mut() {
291 self.stats.total_processed += 1;
292
293 if let Some(ref mut baseline) = self.behavioral_baseline {
295 use super::context::Observation;
296 let entity_id = entry.header.created_by.clone();
298 let observation =
299 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
300 baseline.record_observation(&entity_id, observation);
301 }
302
303 if !self.should_process(entry) {
305 continue;
306 }
307
308 let base_rate = self.config.rates.total_rate;
310
311 let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
313 let employee_id = &entry.header.created_by;
314 let first_account = entry
315 .lines
316 .first()
317 .map(|l| l.gl_account.as_str())
318 .unwrap_or("");
319 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
321
322 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
323 let employee_ctx = self.employee_contexts.get(employee_id);
324 let account_ctx = self.account_contexts.get(first_account);
325
326 let multiplier =
327 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
328 (base_rate * multiplier).min(1.0)
329 } else {
330 self.calculate_context_rate_multiplier(entry) * base_rate
332 };
333
334 if should_inject_anomaly(
336 effective_rate,
337 entry.posting_date(),
338 &self.config.patterns.temporal_pattern,
339 &mut self.rng,
340 ) {
341 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
343 let account = entry
345 .lines
346 .first()
347 .map(|l| l.gl_account.clone())
348 .unwrap_or_default();
349 near_miss_gen.record_transaction(
350 entry.document_number().clone(),
351 entry.posting_date(),
352 entry.total_debit(),
353 &account,
354 None,
355 );
356
357 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
359 entry.document_number().clone(),
360 entry.posting_date(),
361 entry.total_debit(),
362 &account,
363 None,
364 &self.config.enhanced.approval_thresholds,
365 ) {
366 self.near_miss_labels.push(near_miss_label);
367 continue; }
369 }
370
371 let anomaly_type = self.select_anomaly_category();
373
374 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
376 if let Some(ref calculator) = self.difficulty_calculator {
378 let difficulty = calculator.calculate(&label);
379
380 label = label
382 .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
383 label = label.with_metadata(
384 "difficulty_score",
385 &difficulty.difficulty_score().to_string(),
386 );
387
388 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
390 }
391
392 modified_documents.push(entry.document_number().clone());
393 self.labels.push(label);
394 self.stats.total_injected += 1;
395 }
396
397 if self.config.allow_duplicates
399 && matches!(
400 self.labels.last().map(|l| &l.anomaly_type),
401 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
402 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
403 )
404 {
405 let dup_strategy = DuplicationStrategy::default();
406 let duplicate = dup_strategy.duplicate(entry, &mut self.rng);
407 duplicates.push(duplicate);
408 }
409 }
410 }
411
412 let duplicates_created = duplicates.len();
414
415 let summary = AnomalySummary::from_anomalies(&self.labels);
417
418 InjectionBatchResult {
419 entries_processed: self.stats.total_processed,
420 anomalies_injected: self.stats.total_injected,
421 duplicates_created,
422 labels: self.labels.clone(),
423 summary,
424 modified_documents,
425 near_miss_labels: self.near_miss_labels.clone(),
426 scheme_actions: self.scheme_actions.clone(),
427 difficulty_distribution: self.difficulty_distribution.clone(),
428 }
429 }
430
431 fn should_process(&mut self, entry: &JournalEntry) -> bool {
433 if !self.config.target_companies.is_empty()
435 && !self
436 .config
437 .target_companies
438 .iter()
439 .any(|c| c == entry.company_code())
440 {
441 self.stats.skipped_company += 1;
442 return false;
443 }
444
445 if let Some((start, end)) = self.config.date_range {
447 if entry.posting_date() < start || entry.posting_date() > end {
448 self.stats.skipped_date += 1;
449 return false;
450 }
451 }
452
453 let current_count = self
455 .document_anomaly_counts
456 .get(&entry.document_number())
457 .copied()
458 .unwrap_or(0);
459 if current_count >= self.config.max_anomalies_per_document {
460 self.stats.skipped_max_per_doc += 1;
461 return false;
462 }
463
464 true
465 }
466
467 fn select_anomaly_category(&mut self) -> AnomalyType {
469 let r = self.rng.gen::<f64>();
470 let rates = &self.config.rates;
471
472 let mut cumulative = 0.0;
473
474 cumulative += rates.fraud_rate;
475 if r < cumulative {
476 return self.type_selector.select_fraud(&mut self.rng);
477 }
478
479 cumulative += rates.error_rate;
480 if r < cumulative {
481 return self.type_selector.select_error(&mut self.rng);
482 }
483
484 cumulative += rates.process_issue_rate;
485 if r < cumulative {
486 return self.type_selector.select_process_issue(&mut self.rng);
487 }
488
489 cumulative += rates.statistical_rate;
490 if r < cumulative {
491 return self.type_selector.select_statistical(&mut self.rng);
492 }
493
494 self.type_selector.select_relational(&mut self.rng)
495 }
496
497 fn inject_anomaly(
499 &mut self,
500 entry: &mut JournalEntry,
501 anomaly_type: AnomalyType,
502 ) -> Option<LabeledAnomaly> {
503 if !self.strategies.can_apply(entry, &anomaly_type) {
505 return None;
506 }
507
508 let result = self
510 .strategies
511 .apply_strategy(entry, &anomaly_type, &mut self.rng);
512
513 if !result.success {
514 return None;
515 }
516
517 *self
519 .document_anomaly_counts
520 .entry(entry.document_number().clone())
521 .or_insert(0) += 1;
522
523 let category = anomaly_type.category().to_string();
525 let type_name = anomaly_type.type_name();
526
527 *self.stats.by_category.entry(category).or_insert(0) += 1;
528 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
529 *self
530 .stats
531 .by_company
532 .entry(entry.company_code().to_string())
533 .or_insert(0) += 1;
534
535 if self.config.generate_labels {
537 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
538
539 entry.header.is_anomaly = true;
541 entry.header.anomaly_id = Some(anomaly_id.clone());
542 entry.header.anomaly_type = Some(type_name.clone());
543
544 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
546 entry.header.is_fraud = true;
547 if let AnomalyType::Fraud(ref ft) = anomaly_type {
548 entry.header.fraud_type = Some(*ft);
549 }
550 }
551
552 let mut label = LabeledAnomaly::new(
553 anomaly_id,
554 anomaly_type.clone(),
555 entry.document_number().clone(),
556 "JE".to_string(),
557 entry.company_code().to_string(),
558 entry.posting_date(),
559 )
560 .with_description(&result.description)
561 .with_injection_strategy(&type_name);
562
563 let causal_reason = AnomalyCausalReason::RandomRate {
565 base_rate: self.config.rates.total_rate,
566 };
567 label = label.with_causal_reason(causal_reason);
568
569 let context_multiplier = self.calculate_context_rate_multiplier(entry);
571 if (context_multiplier - 1.0).abs() > f64::EPSILON {
572 label = label.with_metadata(
573 "entity_context_multiplier",
574 &format!("{:.3}", context_multiplier),
575 );
576 label = label.with_metadata(
577 "effective_rate",
578 &format!(
579 "{:.6}",
580 (self.config.rates.total_rate * context_multiplier).min(1.0)
581 ),
582 );
583 }
584
585 if let Some(impact) = result.monetary_impact {
587 label = label.with_monetary_impact(impact);
588 }
589
590 for entity in &result.related_entities {
592 label = label.with_related_entity(entity);
593 }
594
595 for (key, value) in &result.metadata {
597 label = label.with_metadata(key, value);
598 }
599
600 if let Some(cluster_id) =
602 self.cluster_manager
603 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
604 {
605 label = label.with_cluster(&cluster_id);
606 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
608 cluster_id: cluster_id.clone(),
609 });
610 }
611
612 return Some(label);
613 }
614
615 None
616 }
617
618 pub fn inject_specific(
620 &mut self,
621 entry: &mut JournalEntry,
622 anomaly_type: AnomalyType,
623 ) -> Option<LabeledAnomaly> {
624 self.inject_anomaly(entry, anomaly_type)
625 }
626
627 pub fn create_self_approval(
629 &mut self,
630 entry: &mut JournalEntry,
631 user_id: &str,
632 ) -> Option<LabeledAnomaly> {
633 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
634
635 let label = LabeledAnomaly::new(
636 format!("ANO{:08}", self.labels.len() + 1),
637 anomaly_type,
638 entry.document_number().clone(),
639 "JE".to_string(),
640 entry.company_code().to_string(),
641 entry.posting_date(),
642 )
643 .with_description(&format!("User {} approved their own transaction", user_id))
644 .with_related_entity(user_id)
645 .with_injection_strategy("ManualSelfApproval")
646 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
647 target_type: "User".to_string(),
648 target_id: user_id.to_string(),
649 });
650
651 entry.header.is_anomaly = true;
653 entry.header.is_fraud = true;
654 entry.header.anomaly_id = Some(label.anomaly_id.clone());
655 entry.header.anomaly_type = Some("SelfApproval".to_string());
656 entry.header.fraud_type = Some(FraudType::SelfApproval);
657
658 entry.header.created_by = user_id.to_string();
660
661 self.labels.push(label.clone());
662 Some(label)
663 }
664
665 pub fn create_sod_violation(
667 &mut self,
668 entry: &mut JournalEntry,
669 user_id: &str,
670 conflicting_duties: (&str, &str),
671 ) -> Option<LabeledAnomaly> {
672 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
673
674 let label = LabeledAnomaly::new(
675 format!("ANO{:08}", self.labels.len() + 1),
676 anomaly_type,
677 entry.document_number().clone(),
678 "JE".to_string(),
679 entry.company_code().to_string(),
680 entry.posting_date(),
681 )
682 .with_description(&format!(
683 "User {} performed conflicting duties: {} and {}",
684 user_id, conflicting_duties.0, conflicting_duties.1
685 ))
686 .with_related_entity(user_id)
687 .with_metadata("duty1", conflicting_duties.0)
688 .with_metadata("duty2", conflicting_duties.1)
689 .with_injection_strategy("ManualSoDViolation")
690 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
691 target_type: "User".to_string(),
692 target_id: user_id.to_string(),
693 });
694
695 entry.header.is_anomaly = true;
697 entry.header.is_fraud = true;
698 entry.header.anomaly_id = Some(label.anomaly_id.clone());
699 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
700 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
701
702 self.labels.push(label.clone());
703 Some(label)
704 }
705
706 pub fn create_ic_mismatch(
708 &mut self,
709 entry: &mut JournalEntry,
710 matching_company: &str,
711 expected_amount: Decimal,
712 actual_amount: Decimal,
713 ) -> Option<LabeledAnomaly> {
714 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
715
716 let label = LabeledAnomaly::new(
717 format!("ANO{:08}", self.labels.len() + 1),
718 anomaly_type,
719 entry.document_number().clone(),
720 "JE".to_string(),
721 entry.company_code().to_string(),
722 entry.posting_date(),
723 )
724 .with_description(&format!(
725 "Intercompany mismatch with {}: expected {} but got {}",
726 matching_company, expected_amount, actual_amount
727 ))
728 .with_related_entity(matching_company)
729 .with_monetary_impact(actual_amount - expected_amount)
730 .with_metadata("expected_amount", &expected_amount.to_string())
731 .with_metadata("actual_amount", &actual_amount.to_string())
732 .with_injection_strategy("ManualICMismatch")
733 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
734 target_type: "Intercompany".to_string(),
735 target_id: matching_company.to_string(),
736 });
737
738 entry.header.is_anomaly = true;
740 entry.header.anomaly_id = Some(label.anomaly_id.clone());
741 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
742
743 self.labels.push(label.clone());
744 Some(label)
745 }
746
747 pub fn get_labels(&self) -> &[LabeledAnomaly] {
749 &self.labels
750 }
751
752 pub fn get_summary(&self) -> AnomalySummary {
754 AnomalySummary::from_anomalies(&self.labels)
755 }
756
757 pub fn get_stats(&self) -> &InjectorStats {
759 &self.stats
760 }
761
762 pub fn reset(&mut self) {
764 self.labels.clear();
765 self.document_anomaly_counts.clear();
766 self.stats = InjectorStats::default();
767 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
768
769 self.near_miss_labels.clear();
771 self.scheme_actions.clear();
772 self.difficulty_distribution.clear();
773
774 if let Some(ref mut baseline) = self.behavioral_baseline {
775 *baseline =
776 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
777 }
778 }
779
780 pub fn cluster_count(&self) -> usize {
782 self.cluster_manager.cluster_count()
783 }
784
785 pub fn set_entity_contexts(
798 &mut self,
799 vendors: HashMap<String, VendorContext>,
800 employees: HashMap<String, EmployeeContext>,
801 accounts: HashMap<String, AccountContext>,
802 ) {
803 self.vendor_contexts = vendors;
804 self.employee_contexts = employees;
805 self.account_contexts = accounts;
806 }
807
808 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
810 &self.vendor_contexts
811 }
812
813 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
815 &self.employee_contexts
816 }
817
818 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
820 &self.account_contexts
821 }
822
823 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
832 if self.vendor_contexts.is_empty()
833 && self.employee_contexts.is_empty()
834 && self.account_contexts.is_empty()
835 {
836 return 1.0;
837 }
838
839 let mut multiplier = 1.0;
840
841 if let Some(ref vendor_ref) = entry.header.reference {
843 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
844 if ctx.is_new {
846 multiplier *= 2.0;
847 }
848 if ctx.is_dormant_reactivation {
849 multiplier *= 1.5;
850 }
851 }
852 }
853
854 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
856 if ctx.is_new {
857 multiplier *= 1.5;
858 }
859 if ctx.is_volume_fatigued {
860 multiplier *= 1.3;
861 }
862 if ctx.is_overtime {
863 multiplier *= 1.2;
864 }
865 }
866
867 if let Some(first_line) = entry.lines.first() {
869 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
870 if ctx.is_high_risk {
871 multiplier *= 2.0;
872 }
873 }
874 }
875
876 multiplier
877 }
878
879 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
888 if let Some(ref mut advancer) = self.scheme_advancer {
889 let context = SchemeContext::new(date, company_code);
890 let actions = advancer.advance_all(&context);
891 self.scheme_actions.extend(actions.clone());
892 actions
893 } else {
894 Vec::new()
895 }
896 }
897
898 pub fn maybe_start_scheme(
904 &mut self,
905 date: NaiveDate,
906 company_code: &str,
907 available_users: Vec<String>,
908 available_accounts: Vec<String>,
909 available_counterparties: Vec<String>,
910 ) -> Option<uuid::Uuid> {
911 if let Some(ref mut advancer) = self.scheme_advancer {
912 let mut context = SchemeContext::new(date, company_code);
913 context.available_users = available_users;
914 context.available_accounts = available_accounts;
915 context.available_counterparties = available_counterparties;
916
917 advancer.maybe_start_scheme(&context)
918 } else {
919 None
920 }
921 }
922
923 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
925 &self.near_miss_labels
926 }
927
928 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
930 &self.scheme_actions
931 }
932
933 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
935 &self.difficulty_distribution
936 }
937
938 pub fn check_behavioral_deviations(
940 &self,
941 entity_id: &str,
942 observation: &super::context::Observation,
943 ) -> Vec<super::context::BehavioralDeviation> {
944 if let Some(ref baseline) = self.behavioral_baseline {
945 baseline.check_deviation(entity_id, observation)
946 } else {
947 Vec::new()
948 }
949 }
950
951 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
953 if let Some(ref baseline) = self.behavioral_baseline {
954 baseline.get_baseline(entity_id)
955 } else {
956 None
957 }
958 }
959
960 pub fn active_scheme_count(&self) -> usize {
962 if let Some(ref advancer) = self.scheme_advancer {
963 advancer.active_scheme_count()
964 } else {
965 0
966 }
967 }
968
969 pub fn has_enhanced_features(&self) -> bool {
971 self.scheme_advancer.is_some()
972 || self.near_miss_generator.is_some()
973 || self.difficulty_calculator.is_some()
974 || self.entity_aware_injector.is_some()
975 }
976}
977
978pub struct AnomalyInjectorConfigBuilder {
980 config: AnomalyInjectorConfig,
981}
982
983impl AnomalyInjectorConfigBuilder {
984 pub fn new() -> Self {
986 Self {
987 config: AnomalyInjectorConfig::default(),
988 }
989 }
990
991 pub fn with_total_rate(mut self, rate: f64) -> Self {
993 self.config.rates.total_rate = rate;
994 self
995 }
996
997 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
999 self.config.rates.fraud_rate = rate;
1000 self
1001 }
1002
1003 pub fn with_error_rate(mut self, rate: f64) -> Self {
1005 self.config.rates.error_rate = rate;
1006 self
1007 }
1008
1009 pub fn with_seed(mut self, seed: u64) -> Self {
1011 self.config.seed = seed;
1012 self
1013 }
1014
1015 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1017 self.config.patterns.temporal_pattern = pattern;
1018 self
1019 }
1020
1021 pub fn with_labels(mut self, generate: bool) -> Self {
1023 self.config.generate_labels = generate;
1024 self
1025 }
1026
1027 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1029 self.config.target_companies = companies;
1030 self
1031 }
1032
1033 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1035 self.config.date_range = Some((start, end));
1036 self
1037 }
1038
1039 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1045 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1046 self.config.enhanced.scheme_probability = probability;
1047 self
1048 }
1049
1050 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1052 self.config.enhanced.near_miss_enabled = enabled;
1053 self.config.enhanced.near_miss_proportion = proportion;
1054 self
1055 }
1056
1057 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1059 self.config.enhanced.approval_thresholds = thresholds;
1060 self
1061 }
1062
1063 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1065 self.config.enhanced.correlated_injection_enabled = enabled;
1066 self
1067 }
1068
1069 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1071 self.config.enhanced.temporal_clustering_enabled = enabled;
1072 self.config.enhanced.period_end_multiplier = multiplier;
1073 self
1074 }
1075
1076 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1078 self.config.enhanced.difficulty_classification_enabled = enabled;
1079 self
1080 }
1081
1082 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1084 self.config.enhanced.context_aware_enabled = enabled;
1085 self
1086 }
1087
1088 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1090 self.config.enhanced.behavioral_baseline_config = config;
1091 self
1092 }
1093
1094 pub fn with_all_enhanced_features(mut self) -> Self {
1096 self.config.enhanced.multi_stage_schemes_enabled = true;
1097 self.config.enhanced.scheme_probability = 0.02;
1098 self.config.enhanced.correlated_injection_enabled = true;
1099 self.config.enhanced.temporal_clustering_enabled = true;
1100 self.config.enhanced.period_end_multiplier = 2.5;
1101 self.config.enhanced.near_miss_enabled = true;
1102 self.config.enhanced.near_miss_proportion = 0.30;
1103 self.config.enhanced.difficulty_classification_enabled = true;
1104 self.config.enhanced.context_aware_enabled = true;
1105 self.config.enhanced.behavioral_baseline_config.enabled = true;
1106 self
1107 }
1108
1109 pub fn build(self) -> AnomalyInjectorConfig {
1111 self.config
1112 }
1113}
1114
1115impl Default for AnomalyInjectorConfigBuilder {
1116 fn default() -> Self {
1117 Self::new()
1118 }
1119}
1120
1121#[cfg(test)]
1122mod tests {
1123 use super::*;
1124 use chrono::NaiveDate;
1125 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1126 use rust_decimal_macros::dec;
1127
1128 fn create_test_entry(doc_num: &str) -> JournalEntry {
1129 let mut entry = JournalEntry::new_simple(
1130 doc_num.to_string(),
1131 "1000".to_string(),
1132 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1133 "Test Entry".to_string(),
1134 );
1135
1136 entry.add_line(JournalEntryLine {
1137 line_number: 1,
1138 gl_account: "5000".to_string(),
1139 debit_amount: dec!(1000),
1140 ..Default::default()
1141 });
1142
1143 entry.add_line(JournalEntryLine {
1144 line_number: 2,
1145 gl_account: "1000".to_string(),
1146 credit_amount: dec!(1000),
1147 ..Default::default()
1148 });
1149
1150 entry
1151 }
1152
1153 #[test]
1154 fn test_anomaly_injector_basic() {
1155 let config = AnomalyInjectorConfigBuilder::new()
1156 .with_total_rate(0.5) .with_seed(42)
1158 .build();
1159
1160 let mut injector = AnomalyInjector::new(config);
1161
1162 let mut entries: Vec<_> = (0..100)
1163 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1164 .collect();
1165
1166 let result = injector.process_entries(&mut entries);
1167
1168 assert!(result.anomalies_injected > 0);
1170 assert!(!result.labels.is_empty());
1171 assert_eq!(result.labels.len(), result.anomalies_injected);
1172 }
1173
1174 #[test]
1175 fn test_specific_injection() {
1176 let config = AnomalyInjectorConfig::default();
1177 let mut injector = AnomalyInjector::new(config);
1178
1179 let mut entry = create_test_entry("JE001");
1180 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1181
1182 let label = injector.inject_specific(&mut entry, anomaly_type);
1183
1184 assert!(label.is_some());
1185 let label = label.unwrap();
1186 assert!(!label.document_id.is_empty());
1188 assert_eq!(label.document_id, entry.document_number());
1189 }
1190
1191 #[test]
1192 fn test_self_approval_injection() {
1193 let config = AnomalyInjectorConfig::default();
1194 let mut injector = AnomalyInjector::new(config);
1195
1196 let mut entry = create_test_entry("JE001");
1197 let label = injector.create_self_approval(&mut entry, "USER001");
1198
1199 assert!(label.is_some());
1200 let label = label.unwrap();
1201 assert!(matches!(
1202 label.anomaly_type,
1203 AnomalyType::Fraud(FraudType::SelfApproval)
1204 ));
1205 assert!(label.related_entities.contains(&"USER001".to_string()));
1206 }
1207
1208 #[test]
1209 fn test_company_filtering() {
1210 let config = AnomalyInjectorConfigBuilder::new()
1211 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1213 .build();
1214
1215 let mut injector = AnomalyInjector::new(config);
1216
1217 let mut entries = vec![
1218 create_test_entry("JE001"), create_test_entry("JE002"), ];
1221
1222 let result = injector.process_entries(&mut entries);
1223
1224 assert_eq!(result.anomalies_injected, 0);
1226 }
1227
1228 fn create_test_entry_with_context(
1234 doc_num: &str,
1235 vendor_ref: Option<&str>,
1236 employee_id: &str,
1237 gl_account: &str,
1238 ) -> JournalEntry {
1239 let mut entry = JournalEntry::new_simple(
1240 doc_num.to_string(),
1241 "1000".to_string(),
1242 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1243 "Test Entry".to_string(),
1244 );
1245
1246 entry.header.reference = vendor_ref.map(|v| v.to_string());
1247 entry.header.created_by = employee_id.to_string();
1248
1249 entry.add_line(JournalEntryLine {
1250 line_number: 1,
1251 gl_account: gl_account.to_string(),
1252 debit_amount: dec!(1000),
1253 ..Default::default()
1254 });
1255
1256 entry.add_line(JournalEntryLine {
1257 line_number: 2,
1258 gl_account: "1000".to_string(),
1259 credit_amount: dec!(1000),
1260 ..Default::default()
1261 });
1262
1263 entry
1264 }
1265
1266 #[test]
1267 fn test_set_entity_contexts() {
1268 let config = AnomalyInjectorConfig::default();
1269 let mut injector = AnomalyInjector::new(config);
1270
1271 assert!(injector.vendor_contexts().is_empty());
1273 assert!(injector.employee_contexts().is_empty());
1274 assert!(injector.account_contexts().is_empty());
1275
1276 let mut vendors = HashMap::new();
1278 vendors.insert(
1279 "V001".to_string(),
1280 VendorContext {
1281 vendor_id: "V001".to_string(),
1282 is_new: true,
1283 ..Default::default()
1284 },
1285 );
1286
1287 let mut employees = HashMap::new();
1288 employees.insert(
1289 "EMP001".to_string(),
1290 EmployeeContext {
1291 employee_id: "EMP001".to_string(),
1292 is_new: true,
1293 ..Default::default()
1294 },
1295 );
1296
1297 let mut accounts = HashMap::new();
1298 accounts.insert(
1299 "8100".to_string(),
1300 AccountContext {
1301 account_code: "8100".to_string(),
1302 is_high_risk: true,
1303 ..Default::default()
1304 },
1305 );
1306
1307 injector.set_entity_contexts(vendors, employees, accounts);
1308
1309 assert_eq!(injector.vendor_contexts().len(), 1);
1310 assert_eq!(injector.employee_contexts().len(), 1);
1311 assert_eq!(injector.account_contexts().len(), 1);
1312 assert!(injector.vendor_contexts().contains_key("V001"));
1313 assert!(injector.employee_contexts().contains_key("EMP001"));
1314 assert!(injector.account_contexts().contains_key("8100"));
1315 }
1316
1317 #[test]
1318 fn test_default_behavior_no_contexts() {
1319 let config = AnomalyInjectorConfigBuilder::new()
1321 .with_total_rate(0.5)
1322 .with_seed(42)
1323 .build();
1324
1325 let mut injector = AnomalyInjector::new(config);
1326
1327 let mut entries: Vec<_> = (0..200)
1328 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1329 .collect();
1330
1331 let result = injector.process_entries(&mut entries);
1332
1333 assert!(result.anomalies_injected > 0);
1336 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1337 assert!(
1338 rate > 0.2 && rate < 0.8,
1339 "Expected ~50% rate, got {:.2}%",
1340 rate * 100.0
1341 );
1342 }
1343
1344 #[test]
1345 fn test_entity_context_increases_injection_rate() {
1346 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1352 .with_total_rate(base_rate)
1353 .with_seed(123)
1354 .build();
1355
1356 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1357
1358 let mut entries_no_ctx: Vec<_> = (0..500)
1359 .map(|i| {
1360 create_test_entry_with_context(
1361 &format!("JE{:04}", i),
1362 Some("V001"),
1363 "EMP001",
1364 "8100",
1365 )
1366 })
1367 .collect();
1368
1369 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1370
1371 let config_ctx = AnomalyInjectorConfigBuilder::new()
1373 .with_total_rate(base_rate)
1374 .with_seed(123)
1375 .build();
1376
1377 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1378
1379 let mut vendors = HashMap::new();
1381 vendors.insert(
1382 "V001".to_string(),
1383 VendorContext {
1384 vendor_id: "V001".to_string(),
1385 is_new: true, is_dormant_reactivation: true, ..Default::default()
1388 },
1389 );
1390
1391 let mut employees = HashMap::new();
1392 employees.insert(
1393 "EMP001".to_string(),
1394 EmployeeContext {
1395 employee_id: "EMP001".to_string(),
1396 is_new: true, ..Default::default()
1398 },
1399 );
1400
1401 let mut accounts = HashMap::new();
1402 accounts.insert(
1403 "8100".to_string(),
1404 AccountContext {
1405 account_code: "8100".to_string(),
1406 is_high_risk: true, ..Default::default()
1408 },
1409 );
1410
1411 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1412
1413 let mut entries_ctx: Vec<_> = (0..500)
1414 .map(|i| {
1415 create_test_entry_with_context(
1416 &format!("JE{:04}", i),
1417 Some("V001"),
1418 "EMP001",
1419 "8100",
1420 )
1421 })
1422 .collect();
1423
1424 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1425
1426 assert!(
1428 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1429 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1430 result_ctx.anomalies_injected,
1431 result_no_ctx.anomalies_injected,
1432 );
1433 }
1434
1435 #[test]
1436 fn test_risk_score_multiplication() {
1437 let config = AnomalyInjectorConfig::default();
1439 let mut injector = AnomalyInjector::new(config);
1440
1441 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1443 assert!(
1444 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1445 );
1446
1447 let mut vendors = HashMap::new();
1449 vendors.insert(
1450 "V_RISKY".to_string(),
1451 VendorContext {
1452 vendor_id: "V_RISKY".to_string(),
1453 is_new: true,
1454 ..Default::default()
1455 },
1456 );
1457
1458 let mut accounts = HashMap::new();
1459 accounts.insert(
1460 "9000".to_string(),
1461 AccountContext {
1462 account_code: "9000".to_string(),
1463 is_high_risk: true,
1464 ..Default::default()
1465 },
1466 );
1467
1468 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1469
1470 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1471 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1472 assert!(
1474 (multiplier - 4.0).abs() < f64::EPSILON,
1475 "Expected 4.0x multiplier, got {}",
1476 multiplier,
1477 );
1478
1479 let entry_vendor_only =
1481 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1482 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1483 assert!(
1484 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1485 "Expected 2.0x multiplier (vendor only), got {}",
1486 multiplier_vendor,
1487 );
1488
1489 let entry_no_match =
1491 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1492 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1493 assert!(
1494 (multiplier_none - 1.0).abs() < f64::EPSILON,
1495 "Expected 1.0x multiplier (no match), got {}",
1496 multiplier_none,
1497 );
1498 }
1499
1500 #[test]
1501 fn test_employee_context_multiplier() {
1502 let config = AnomalyInjectorConfig::default();
1503 let mut injector = AnomalyInjector::new(config);
1504
1505 let mut employees = HashMap::new();
1506 employees.insert(
1507 "EMP_NEW".to_string(),
1508 EmployeeContext {
1509 employee_id: "EMP_NEW".to_string(),
1510 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1514 },
1515 );
1516
1517 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1518
1519 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1520 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1521
1522 let expected = 1.5 * 1.3 * 1.2;
1524 assert!(
1525 (multiplier - expected).abs() < 0.01,
1526 "Expected {:.3}x multiplier, got {:.3}",
1527 expected,
1528 multiplier,
1529 );
1530 }
1531
1532 #[test]
1533 fn test_entity_contexts_persist_across_reset() {
1534 let config = AnomalyInjectorConfig::default();
1535 let mut injector = AnomalyInjector::new(config);
1536
1537 let mut vendors = HashMap::new();
1538 vendors.insert(
1539 "V001".to_string(),
1540 VendorContext {
1541 vendor_id: "V001".to_string(),
1542 is_new: true,
1543 ..Default::default()
1544 },
1545 );
1546
1547 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1548 assert_eq!(injector.vendor_contexts().len(), 1);
1549
1550 injector.reset();
1552 assert_eq!(injector.vendor_contexts().len(), 1);
1553 }
1554
1555 #[test]
1556 fn test_set_empty_contexts_clears() {
1557 let config = AnomalyInjectorConfig::default();
1558 let mut injector = AnomalyInjector::new(config);
1559
1560 let mut vendors = HashMap::new();
1561 vendors.insert(
1562 "V001".to_string(),
1563 VendorContext {
1564 vendor_id: "V001".to_string(),
1565 ..Default::default()
1566 },
1567 );
1568
1569 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1570 assert_eq!(injector.vendor_contexts().len(), 1);
1571
1572 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1574 assert!(injector.vendor_contexts().is_empty());
1575 }
1576
1577 #[test]
1578 fn test_dormant_vendor_multiplier() {
1579 let config = AnomalyInjectorConfig::default();
1580 let mut injector = AnomalyInjector::new(config);
1581
1582 let mut vendors = HashMap::new();
1583 vendors.insert(
1584 "V_DORMANT".to_string(),
1585 VendorContext {
1586 vendor_id: "V_DORMANT".to_string(),
1587 is_dormant_reactivation: true, ..Default::default()
1589 },
1590 );
1591
1592 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1593
1594 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1595 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1596 assert!(
1597 (multiplier - 1.5).abs() < f64::EPSILON,
1598 "Expected 1.5x multiplier for dormant vendor, got {}",
1599 multiplier,
1600 );
1601 }
1602}