1use chrono::NaiveDate;
15use rand::Rng;
16use rand::SeedableRng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20
21use datasynth_core::models::{
22 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
23 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
24 RelationalAnomalyType,
25};
26
27use super::context::{BehavioralBaseline, BehavioralBaselineConfig, EntityAwareInjector};
28use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
29use super::difficulty::DifficultyCalculator;
30use super::near_miss::{NearMissConfig, NearMissGenerator};
31use super::patterns::{
32 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
33 TemporalPattern,
34};
35use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
36use super::schemes::{SchemeAction, SchemeContext};
37use super::strategies::{DuplicationStrategy, StrategyCollection};
38use super::types::AnomalyTypeSelector;
39
40#[derive(Debug, Clone)]
42pub struct AnomalyInjectorConfig {
43 pub rates: AnomalyRateConfig,
45 pub patterns: AnomalyPatternConfig,
47 pub seed: u64,
49 pub generate_labels: bool,
51 pub allow_duplicates: bool,
53 pub max_anomalies_per_document: usize,
55 pub target_companies: Vec<String>,
57 pub date_range: Option<(NaiveDate, NaiveDate)>,
59 pub enhanced: EnhancedInjectionConfig,
61}
62
63#[derive(Debug, Clone, Default)]
65pub struct EnhancedInjectionConfig {
66 pub multi_stage_schemes_enabled: bool,
68 pub scheme_probability: f64,
70 pub correlated_injection_enabled: bool,
72 pub temporal_clustering_enabled: bool,
74 pub period_end_multiplier: f64,
76 pub near_miss_enabled: bool,
78 pub near_miss_proportion: f64,
80 pub approval_thresholds: Vec<Decimal>,
82 pub difficulty_classification_enabled: bool,
84 pub context_aware_enabled: bool,
86 pub behavioral_baseline_config: BehavioralBaselineConfig,
88}
89
90impl Default for AnomalyInjectorConfig {
91 fn default() -> Self {
92 Self {
93 rates: AnomalyRateConfig::default(),
94 patterns: AnomalyPatternConfig::default(),
95 seed: 42,
96 generate_labels: true,
97 allow_duplicates: true,
98 max_anomalies_per_document: 2,
99 target_companies: Vec::new(),
100 date_range: None,
101 enhanced: EnhancedInjectionConfig::default(),
102 }
103 }
104}
105
106#[derive(Debug, Clone)]
108pub struct InjectionBatchResult {
109 pub entries_processed: usize,
111 pub anomalies_injected: usize,
113 pub duplicates_created: usize,
115 pub labels: Vec<LabeledAnomaly>,
117 pub summary: AnomalySummary,
119 pub modified_documents: Vec<String>,
121 pub near_miss_labels: Vec<NearMissLabel>,
123 pub scheme_actions: Vec<SchemeAction>,
125 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
127}
128
129#[allow(dead_code)]
131pub struct AnomalyInjector {
132 config: AnomalyInjectorConfig,
133 rng: ChaCha8Rng,
134 type_selector: AnomalyTypeSelector,
135 strategies: StrategyCollection,
136 cluster_manager: ClusterManager,
137 entity_targeting: EntityTargetingManager,
138 document_anomaly_counts: HashMap<String, usize>,
140 labels: Vec<LabeledAnomaly>,
142 stats: InjectorStats,
144 scheme_advancer: Option<SchemeAdvancer>,
147 near_miss_generator: Option<NearMissGenerator>,
149 near_miss_labels: Vec<NearMissLabel>,
151 co_occurrence_handler: Option<AnomalyCoOccurrence>,
153 temporal_cluster_generator: Option<TemporalClusterGenerator>,
155 difficulty_calculator: Option<DifficultyCalculator>,
157 entity_aware_injector: Option<EntityAwareInjector>,
159 behavioral_baseline: Option<BehavioralBaseline>,
161 scheme_actions: Vec<SchemeAction>,
163 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
165}
166
167#[derive(Debug, Clone, Default)]
169#[allow(dead_code)]
170pub struct InjectorStats {
171 total_processed: usize,
172 total_injected: usize,
173 by_category: HashMap<String, usize>,
174 by_type: HashMap<String, usize>,
175 by_company: HashMap<String, usize>,
176 skipped_rate: usize,
177 skipped_date: usize,
178 skipped_company: usize,
179 skipped_max_per_doc: usize,
180}
181
182impl AnomalyInjector {
183 pub fn new(config: AnomalyInjectorConfig) -> Self {
185 let mut rng = ChaCha8Rng::seed_from_u64(config.seed);
186 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
187 let entity_targeting =
188 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
189
190 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
192 let scheme_config = SchemeAdvancerConfig {
193 embezzlement_probability: config.enhanced.scheme_probability,
194 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
195 kickback_probability: config.enhanced.scheme_probability * 0.5,
196 seed: rng.gen(),
197 ..Default::default()
198 };
199 Some(SchemeAdvancer::new(scheme_config))
200 } else {
201 None
202 };
203
204 let near_miss_generator = if config.enhanced.near_miss_enabled {
205 let near_miss_config = NearMissConfig {
206 proportion: config.enhanced.near_miss_proportion,
207 seed: rng.gen(),
208 ..Default::default()
209 };
210 Some(NearMissGenerator::new(near_miss_config))
211 } else {
212 None
213 };
214
215 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
216 Some(AnomalyCoOccurrence::new())
217 } else {
218 None
219 };
220
221 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
222 Some(TemporalClusterGenerator::new())
223 } else {
224 None
225 };
226
227 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
228 Some(DifficultyCalculator::new())
229 } else {
230 None
231 };
232
233 let entity_aware_injector = if config.enhanced.context_aware_enabled {
234 Some(EntityAwareInjector::default())
235 } else {
236 None
237 };
238
239 let behavioral_baseline = if config.enhanced.context_aware_enabled
240 && config.enhanced.behavioral_baseline_config.enabled
241 {
242 Some(BehavioralBaseline::new(
243 config.enhanced.behavioral_baseline_config.clone(),
244 ))
245 } else {
246 None
247 };
248
249 Self {
250 config,
251 rng,
252 type_selector: AnomalyTypeSelector::new(),
253 strategies: StrategyCollection::default(),
254 cluster_manager,
255 entity_targeting,
256 document_anomaly_counts: HashMap::new(),
257 labels: Vec::new(),
258 stats: InjectorStats::default(),
259 scheme_advancer,
260 near_miss_generator,
261 near_miss_labels: Vec::new(),
262 co_occurrence_handler,
263 temporal_cluster_generator,
264 difficulty_calculator,
265 entity_aware_injector,
266 behavioral_baseline,
267 scheme_actions: Vec::new(),
268 difficulty_distribution: HashMap::new(),
269 }
270 }
271
272 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
274 let mut modified_documents = Vec::new();
275 let mut duplicates = Vec::new();
276
277 for entry in entries.iter_mut() {
278 self.stats.total_processed += 1;
279
280 if let Some(ref mut baseline) = self.behavioral_baseline {
282 use super::context::Observation;
283 let entity_id = entry.header.created_by.clone();
285 let observation =
286 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
287 baseline.record_observation(&entity_id, observation);
288 }
289
290 if !self.should_process(entry) {
292 continue;
293 }
294
295 let effective_rate = self.config.rates.total_rate;
297
298 if let Some(ref injector) = self.entity_aware_injector {
300 let _ = injector;
303 }
304
305 if should_inject_anomaly(
307 effective_rate,
308 entry.posting_date(),
309 &self.config.patterns.temporal_pattern,
310 &mut self.rng,
311 ) {
312 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
314 let account = entry
316 .lines
317 .first()
318 .map(|l| l.gl_account.clone())
319 .unwrap_or_default();
320 near_miss_gen.record_transaction(
321 entry.document_number().clone(),
322 entry.posting_date(),
323 entry.total_debit(),
324 &account,
325 None,
326 );
327
328 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
330 entry.document_number().clone(),
331 entry.posting_date(),
332 entry.total_debit(),
333 &account,
334 None,
335 &self.config.enhanced.approval_thresholds,
336 ) {
337 self.near_miss_labels.push(near_miss_label);
338 continue; }
340 }
341
342 let anomaly_type = self.select_anomaly_category();
344
345 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
347 if let Some(ref calculator) = self.difficulty_calculator {
349 let difficulty = calculator.calculate(&label);
350
351 label = label
353 .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
354 label = label.with_metadata(
355 "difficulty_score",
356 &difficulty.difficulty_score().to_string(),
357 );
358
359 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
361 }
362
363 modified_documents.push(entry.document_number().clone());
364 self.labels.push(label);
365 self.stats.total_injected += 1;
366 }
367
368 if self.config.allow_duplicates
370 && matches!(
371 self.labels.last().map(|l| &l.anomaly_type),
372 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
373 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
374 )
375 {
376 let dup_strategy = DuplicationStrategy::default();
377 let duplicate = dup_strategy.duplicate(entry, &mut self.rng);
378 duplicates.push(duplicate);
379 }
380 }
381 }
382
383 let duplicates_created = duplicates.len();
385
386 let summary = AnomalySummary::from_anomalies(&self.labels);
388
389 InjectionBatchResult {
390 entries_processed: self.stats.total_processed,
391 anomalies_injected: self.stats.total_injected,
392 duplicates_created,
393 labels: self.labels.clone(),
394 summary,
395 modified_documents,
396 near_miss_labels: self.near_miss_labels.clone(),
397 scheme_actions: self.scheme_actions.clone(),
398 difficulty_distribution: self.difficulty_distribution.clone(),
399 }
400 }
401
402 fn should_process(&mut self, entry: &JournalEntry) -> bool {
404 if !self.config.target_companies.is_empty()
406 && !self
407 .config
408 .target_companies
409 .iter()
410 .any(|c| c == entry.company_code())
411 {
412 self.stats.skipped_company += 1;
413 return false;
414 }
415
416 if let Some((start, end)) = self.config.date_range {
418 if entry.posting_date() < start || entry.posting_date() > end {
419 self.stats.skipped_date += 1;
420 return false;
421 }
422 }
423
424 let current_count = self
426 .document_anomaly_counts
427 .get(&entry.document_number())
428 .copied()
429 .unwrap_or(0);
430 if current_count >= self.config.max_anomalies_per_document {
431 self.stats.skipped_max_per_doc += 1;
432 return false;
433 }
434
435 true
436 }
437
438 fn select_anomaly_category(&mut self) -> AnomalyType {
440 let r = self.rng.gen::<f64>();
441 let rates = &self.config.rates;
442
443 let mut cumulative = 0.0;
444
445 cumulative += rates.fraud_rate;
446 if r < cumulative {
447 return self.type_selector.select_fraud(&mut self.rng);
448 }
449
450 cumulative += rates.error_rate;
451 if r < cumulative {
452 return self.type_selector.select_error(&mut self.rng);
453 }
454
455 cumulative += rates.process_issue_rate;
456 if r < cumulative {
457 return self.type_selector.select_process_issue(&mut self.rng);
458 }
459
460 cumulative += rates.statistical_rate;
461 if r < cumulative {
462 return self.type_selector.select_statistical(&mut self.rng);
463 }
464
465 self.type_selector.select_relational(&mut self.rng)
466 }
467
468 fn inject_anomaly(
470 &mut self,
471 entry: &mut JournalEntry,
472 anomaly_type: AnomalyType,
473 ) -> Option<LabeledAnomaly> {
474 if !self.strategies.can_apply(entry, &anomaly_type) {
476 return None;
477 }
478
479 let result = self
481 .strategies
482 .apply_strategy(entry, &anomaly_type, &mut self.rng);
483
484 if !result.success {
485 return None;
486 }
487
488 *self
490 .document_anomaly_counts
491 .entry(entry.document_number().clone())
492 .or_insert(0) += 1;
493
494 let category = anomaly_type.category().to_string();
496 let type_name = anomaly_type.type_name();
497
498 *self.stats.by_category.entry(category).or_insert(0) += 1;
499 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
500 *self
501 .stats
502 .by_company
503 .entry(entry.company_code().to_string())
504 .or_insert(0) += 1;
505
506 if self.config.generate_labels {
508 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
509
510 entry.header.is_anomaly = true;
512 entry.header.anomaly_id = Some(anomaly_id.clone());
513 entry.header.anomaly_type = Some(type_name.clone());
514
515 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
517 entry.header.is_fraud = true;
518 if let AnomalyType::Fraud(ref ft) = anomaly_type {
519 entry.header.fraud_type = Some(*ft);
520 }
521 }
522
523 let mut label = LabeledAnomaly::new(
524 anomaly_id,
525 anomaly_type.clone(),
526 entry.document_number().clone(),
527 "JE".to_string(),
528 entry.company_code().to_string(),
529 entry.posting_date(),
530 )
531 .with_description(&result.description)
532 .with_injection_strategy(&type_name);
533
534 let causal_reason = AnomalyCausalReason::RandomRate {
536 base_rate: self.config.rates.total_rate,
537 };
538 label = label.with_causal_reason(causal_reason);
539
540 if let Some(impact) = result.monetary_impact {
542 label = label.with_monetary_impact(impact);
543 }
544
545 for entity in &result.related_entities {
547 label = label.with_related_entity(entity);
548 }
549
550 for (key, value) in &result.metadata {
552 label = label.with_metadata(key, value);
553 }
554
555 if let Some(cluster_id) =
557 self.cluster_manager
558 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
559 {
560 label = label.with_cluster(&cluster_id);
561 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
563 cluster_id: cluster_id.clone(),
564 });
565 }
566
567 return Some(label);
568 }
569
570 None
571 }
572
573 pub fn inject_specific(
575 &mut self,
576 entry: &mut JournalEntry,
577 anomaly_type: AnomalyType,
578 ) -> Option<LabeledAnomaly> {
579 self.inject_anomaly(entry, anomaly_type)
580 }
581
582 pub fn create_self_approval(
584 &mut self,
585 entry: &mut JournalEntry,
586 user_id: &str,
587 ) -> Option<LabeledAnomaly> {
588 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
589
590 let label = LabeledAnomaly::new(
591 format!("ANO{:08}", self.labels.len() + 1),
592 anomaly_type,
593 entry.document_number().clone(),
594 "JE".to_string(),
595 entry.company_code().to_string(),
596 entry.posting_date(),
597 )
598 .with_description(&format!("User {} approved their own transaction", user_id))
599 .with_related_entity(user_id)
600 .with_injection_strategy("ManualSelfApproval")
601 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
602 target_type: "User".to_string(),
603 target_id: user_id.to_string(),
604 });
605
606 entry.header.is_anomaly = true;
608 entry.header.is_fraud = true;
609 entry.header.anomaly_id = Some(label.anomaly_id.clone());
610 entry.header.anomaly_type = Some("SelfApproval".to_string());
611 entry.header.fraud_type = Some(FraudType::SelfApproval);
612
613 entry.header.created_by = user_id.to_string();
615
616 self.labels.push(label.clone());
617 Some(label)
618 }
619
620 pub fn create_sod_violation(
622 &mut self,
623 entry: &mut JournalEntry,
624 user_id: &str,
625 conflicting_duties: (&str, &str),
626 ) -> Option<LabeledAnomaly> {
627 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
628
629 let label = LabeledAnomaly::new(
630 format!("ANO{:08}", self.labels.len() + 1),
631 anomaly_type,
632 entry.document_number().clone(),
633 "JE".to_string(),
634 entry.company_code().to_string(),
635 entry.posting_date(),
636 )
637 .with_description(&format!(
638 "User {} performed conflicting duties: {} and {}",
639 user_id, conflicting_duties.0, conflicting_duties.1
640 ))
641 .with_related_entity(user_id)
642 .with_metadata("duty1", conflicting_duties.0)
643 .with_metadata("duty2", conflicting_duties.1)
644 .with_injection_strategy("ManualSoDViolation")
645 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
646 target_type: "User".to_string(),
647 target_id: user_id.to_string(),
648 });
649
650 entry.header.is_anomaly = true;
652 entry.header.is_fraud = true;
653 entry.header.anomaly_id = Some(label.anomaly_id.clone());
654 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
655 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
656
657 self.labels.push(label.clone());
658 Some(label)
659 }
660
661 pub fn create_ic_mismatch(
663 &mut self,
664 entry: &mut JournalEntry,
665 matching_company: &str,
666 expected_amount: Decimal,
667 actual_amount: Decimal,
668 ) -> Option<LabeledAnomaly> {
669 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
670
671 let label = LabeledAnomaly::new(
672 format!("ANO{:08}", self.labels.len() + 1),
673 anomaly_type,
674 entry.document_number().clone(),
675 "JE".to_string(),
676 entry.company_code().to_string(),
677 entry.posting_date(),
678 )
679 .with_description(&format!(
680 "Intercompany mismatch with {}: expected {} but got {}",
681 matching_company, expected_amount, actual_amount
682 ))
683 .with_related_entity(matching_company)
684 .with_monetary_impact(actual_amount - expected_amount)
685 .with_metadata("expected_amount", &expected_amount.to_string())
686 .with_metadata("actual_amount", &actual_amount.to_string())
687 .with_injection_strategy("ManualICMismatch")
688 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
689 target_type: "Intercompany".to_string(),
690 target_id: matching_company.to_string(),
691 });
692
693 entry.header.is_anomaly = true;
695 entry.header.anomaly_id = Some(label.anomaly_id.clone());
696 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
697
698 self.labels.push(label.clone());
699 Some(label)
700 }
701
702 pub fn get_labels(&self) -> &[LabeledAnomaly] {
704 &self.labels
705 }
706
707 pub fn get_summary(&self) -> AnomalySummary {
709 AnomalySummary::from_anomalies(&self.labels)
710 }
711
712 pub fn get_stats(&self) -> &InjectorStats {
714 &self.stats
715 }
716
717 pub fn reset(&mut self) {
719 self.labels.clear();
720 self.document_anomaly_counts.clear();
721 self.stats = InjectorStats::default();
722 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
723
724 self.near_miss_labels.clear();
726 self.scheme_actions.clear();
727 self.difficulty_distribution.clear();
728
729 if let Some(ref mut baseline) = self.behavioral_baseline {
730 *baseline =
731 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
732 }
733 }
734
735 pub fn cluster_count(&self) -> usize {
737 self.cluster_manager.cluster_count()
738 }
739
740 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
749 if let Some(ref mut advancer) = self.scheme_advancer {
750 let context = SchemeContext::new(date, company_code);
751 let actions = advancer.advance_all(&context);
752 self.scheme_actions.extend(actions.clone());
753 actions
754 } else {
755 Vec::new()
756 }
757 }
758
759 pub fn maybe_start_scheme(
765 &mut self,
766 date: NaiveDate,
767 company_code: &str,
768 available_users: Vec<String>,
769 available_accounts: Vec<String>,
770 available_counterparties: Vec<String>,
771 ) -> Option<uuid::Uuid> {
772 if let Some(ref mut advancer) = self.scheme_advancer {
773 let mut context = SchemeContext::new(date, company_code);
774 context.available_users = available_users;
775 context.available_accounts = available_accounts;
776 context.available_counterparties = available_counterparties;
777
778 advancer.maybe_start_scheme(&context)
779 } else {
780 None
781 }
782 }
783
784 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
786 &self.near_miss_labels
787 }
788
789 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
791 &self.scheme_actions
792 }
793
794 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
796 &self.difficulty_distribution
797 }
798
799 pub fn check_behavioral_deviations(
801 &self,
802 entity_id: &str,
803 observation: &super::context::Observation,
804 ) -> Vec<super::context::BehavioralDeviation> {
805 if let Some(ref baseline) = self.behavioral_baseline {
806 baseline.check_deviation(entity_id, observation)
807 } else {
808 Vec::new()
809 }
810 }
811
812 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
814 if let Some(ref baseline) = self.behavioral_baseline {
815 baseline.get_baseline(entity_id)
816 } else {
817 None
818 }
819 }
820
821 pub fn active_scheme_count(&self) -> usize {
823 if let Some(ref advancer) = self.scheme_advancer {
824 advancer.active_scheme_count()
825 } else {
826 0
827 }
828 }
829
830 pub fn has_enhanced_features(&self) -> bool {
832 self.scheme_advancer.is_some()
833 || self.near_miss_generator.is_some()
834 || self.difficulty_calculator.is_some()
835 || self.entity_aware_injector.is_some()
836 }
837}
838
839pub struct AnomalyInjectorConfigBuilder {
841 config: AnomalyInjectorConfig,
842}
843
844impl AnomalyInjectorConfigBuilder {
845 pub fn new() -> Self {
847 Self {
848 config: AnomalyInjectorConfig::default(),
849 }
850 }
851
852 pub fn with_total_rate(mut self, rate: f64) -> Self {
854 self.config.rates.total_rate = rate;
855 self
856 }
857
858 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
860 self.config.rates.fraud_rate = rate;
861 self
862 }
863
864 pub fn with_error_rate(mut self, rate: f64) -> Self {
866 self.config.rates.error_rate = rate;
867 self
868 }
869
870 pub fn with_seed(mut self, seed: u64) -> Self {
872 self.config.seed = seed;
873 self
874 }
875
876 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
878 self.config.patterns.temporal_pattern = pattern;
879 self
880 }
881
882 pub fn with_labels(mut self, generate: bool) -> Self {
884 self.config.generate_labels = generate;
885 self
886 }
887
888 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
890 self.config.target_companies = companies;
891 self
892 }
893
894 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
896 self.config.date_range = Some((start, end));
897 self
898 }
899
900 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
906 self.config.enhanced.multi_stage_schemes_enabled = enabled;
907 self.config.enhanced.scheme_probability = probability;
908 self
909 }
910
911 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
913 self.config.enhanced.near_miss_enabled = enabled;
914 self.config.enhanced.near_miss_proportion = proportion;
915 self
916 }
917
918 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
920 self.config.enhanced.approval_thresholds = thresholds;
921 self
922 }
923
924 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
926 self.config.enhanced.correlated_injection_enabled = enabled;
927 self
928 }
929
930 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
932 self.config.enhanced.temporal_clustering_enabled = enabled;
933 self.config.enhanced.period_end_multiplier = multiplier;
934 self
935 }
936
937 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
939 self.config.enhanced.difficulty_classification_enabled = enabled;
940 self
941 }
942
943 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
945 self.config.enhanced.context_aware_enabled = enabled;
946 self
947 }
948
949 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
951 self.config.enhanced.behavioral_baseline_config = config;
952 self
953 }
954
955 pub fn with_all_enhanced_features(mut self) -> Self {
957 self.config.enhanced.multi_stage_schemes_enabled = true;
958 self.config.enhanced.scheme_probability = 0.02;
959 self.config.enhanced.correlated_injection_enabled = true;
960 self.config.enhanced.temporal_clustering_enabled = true;
961 self.config.enhanced.period_end_multiplier = 2.5;
962 self.config.enhanced.near_miss_enabled = true;
963 self.config.enhanced.near_miss_proportion = 0.30;
964 self.config.enhanced.difficulty_classification_enabled = true;
965 self.config.enhanced.context_aware_enabled = true;
966 self.config.enhanced.behavioral_baseline_config.enabled = true;
967 self
968 }
969
970 pub fn build(self) -> AnomalyInjectorConfig {
972 self.config
973 }
974}
975
976impl Default for AnomalyInjectorConfigBuilder {
977 fn default() -> Self {
978 Self::new()
979 }
980}
981
982#[cfg(test)]
983mod tests {
984 use super::*;
985 use chrono::NaiveDate;
986 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
987 use rust_decimal_macros::dec;
988
989 fn create_test_entry(doc_num: &str) -> JournalEntry {
990 let mut entry = JournalEntry::new_simple(
991 doc_num.to_string(),
992 "1000".to_string(),
993 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
994 "Test Entry".to_string(),
995 );
996
997 entry.add_line(JournalEntryLine {
998 line_number: 1,
999 gl_account: "5000".to_string(),
1000 debit_amount: dec!(1000),
1001 ..Default::default()
1002 });
1003
1004 entry.add_line(JournalEntryLine {
1005 line_number: 2,
1006 gl_account: "1000".to_string(),
1007 credit_amount: dec!(1000),
1008 ..Default::default()
1009 });
1010
1011 entry
1012 }
1013
1014 #[test]
1015 fn test_anomaly_injector_basic() {
1016 let config = AnomalyInjectorConfigBuilder::new()
1017 .with_total_rate(0.5) .with_seed(42)
1019 .build();
1020
1021 let mut injector = AnomalyInjector::new(config);
1022
1023 let mut entries: Vec<_> = (0..100)
1024 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1025 .collect();
1026
1027 let result = injector.process_entries(&mut entries);
1028
1029 assert!(result.anomalies_injected > 0);
1031 assert!(!result.labels.is_empty());
1032 assert_eq!(result.labels.len(), result.anomalies_injected);
1033 }
1034
1035 #[test]
1036 fn test_specific_injection() {
1037 let config = AnomalyInjectorConfig::default();
1038 let mut injector = AnomalyInjector::new(config);
1039
1040 let mut entry = create_test_entry("JE001");
1041 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1042
1043 let label = injector.inject_specific(&mut entry, anomaly_type);
1044
1045 assert!(label.is_some());
1046 let label = label.unwrap();
1047 assert!(!label.document_id.is_empty());
1049 assert_eq!(label.document_id, entry.document_number());
1050 }
1051
1052 #[test]
1053 fn test_self_approval_injection() {
1054 let config = AnomalyInjectorConfig::default();
1055 let mut injector = AnomalyInjector::new(config);
1056
1057 let mut entry = create_test_entry("JE001");
1058 let label = injector.create_self_approval(&mut entry, "USER001");
1059
1060 assert!(label.is_some());
1061 let label = label.unwrap();
1062 assert!(matches!(
1063 label.anomaly_type,
1064 AnomalyType::Fraud(FraudType::SelfApproval)
1065 ));
1066 assert!(label.related_entities.contains(&"USER001".to_string()));
1067 }
1068
1069 #[test]
1070 fn test_company_filtering() {
1071 let config = AnomalyInjectorConfigBuilder::new()
1072 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1074 .build();
1075
1076 let mut injector = AnomalyInjector::new(config);
1077
1078 let mut entries = vec![
1079 create_test_entry("JE001"), create_test_entry("JE002"), ];
1082
1083 let result = injector.process_entries(&mut entries);
1084
1085 assert_eq!(result.anomalies_injected, 0);
1087 }
1088}