1use chrono::NaiveDate;
15use datasynth_config::schema::FraudCampaignConfig;
16use datasynth_core::utils::seeded_rng;
17use rand::RngExt;
18use rand_chacha::ChaCha8Rng;
19use rust_decimal::Decimal;
20use std::collections::HashMap;
21use tracing::debug;
22
23use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
24use datasynth_core::models::{
25 AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
26 AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
27 RelationalAnomalyType,
28};
29use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
30
31use super::context::{
32 AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
33 EntityAwareInjector, VendorContext,
34};
35use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
36use super::difficulty::DifficultyCalculator;
37use super::near_miss::{NearMissConfig, NearMissGenerator};
38use super::patterns::{
39 should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
40 TemporalPattern,
41};
42use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
43use super::schemes::{SchemeAction, SchemeContext};
44use super::strategies::{DuplicationStrategy, StrategyCollection};
45use super::types::AnomalyTypeSelector;
46
47#[derive(Debug, Clone)]
49pub struct AnomalyInjectorConfig {
50 pub rates: AnomalyRateConfig,
52 pub patterns: AnomalyPatternConfig,
54 pub seed: u64,
56 pub generate_labels: bool,
58 pub allow_duplicates: bool,
60 pub max_anomalies_per_document: usize,
62 pub target_companies: Vec<String>,
64 pub date_range: Option<(NaiveDate, NaiveDate)>,
66 pub enhanced: EnhancedInjectionConfig,
68}
69
70#[derive(Debug, Clone, Default)]
72pub struct EnhancedInjectionConfig {
73 pub multi_stage_schemes_enabled: bool,
75 pub scheme_probability: f64,
77 pub correlated_injection_enabled: bool,
79 pub temporal_clustering_enabled: bool,
81 pub period_end_multiplier: f64,
83 pub near_miss_enabled: bool,
85 pub near_miss_proportion: f64,
87 pub approval_thresholds: Vec<Decimal>,
89 pub difficulty_classification_enabled: bool,
91 pub context_aware_enabled: bool,
93 pub behavioral_baseline_config: BehavioralBaselineConfig,
95 pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
100 pub fraud_campaign: FraudCampaignConfig,
103}
104
105impl Default for AnomalyInjectorConfig {
106 fn default() -> Self {
107 Self {
108 rates: AnomalyRateConfig::default(),
109 patterns: AnomalyPatternConfig::default(),
110 seed: 42,
111 generate_labels: true,
112 allow_duplicates: true,
113 max_anomalies_per_document: 2,
114 target_companies: Vec::new(),
115 date_range: None,
116 enhanced: EnhancedInjectionConfig::default(),
117 }
118 }
119}
120
121#[derive(Debug, Clone)]
123pub struct InjectionBatchResult {
124 pub entries_processed: usize,
126 pub anomalies_injected: usize,
128 pub duplicates_created: usize,
130 pub labels: Vec<LabeledAnomaly>,
132 pub summary: AnomalySummary,
134 pub modified_documents: Vec<String>,
136 pub near_miss_labels: Vec<NearMissLabel>,
138 pub scheme_actions: Vec<SchemeAction>,
140 pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
142 pub carry_forward: Vec<super::campaign::CarryForwardRecord>,
145}
146
147pub struct AnomalyInjector {
149 config: AnomalyInjectorConfig,
150 rng: ChaCha8Rng,
151 uuid_factory: DeterministicUuidFactory,
152 type_selector: AnomalyTypeSelector,
153 strategies: StrategyCollection,
154 cluster_manager: ClusterManager,
155 entity_targeting: EntityTargetingManager,
157 document_anomaly_counts: HashMap<String, usize>,
159 labels: Vec<LabeledAnomaly>,
161 stats: InjectorStats,
163 scheme_advancer: Option<SchemeAdvancer>,
166 near_miss_generator: Option<NearMissGenerator>,
168 near_miss_labels: Vec<NearMissLabel>,
170 co_occurrence_handler: Option<AnomalyCoOccurrence>,
172 queued_co_occurrences: Vec<QueuedAnomaly>,
174 temporal_cluster_generator: Option<TemporalClusterGenerator>,
176 difficulty_calculator: Option<DifficultyCalculator>,
178 entity_aware_injector: Option<EntityAwareInjector>,
180 behavioral_baseline: Option<BehavioralBaseline>,
182 scheme_actions: Vec<SchemeAction>,
184 difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
186 vendor_contexts: HashMap<String, VendorContext>,
189 employee_contexts: HashMap<String, EmployeeContext>,
191 account_contexts: HashMap<String, AccountContext>,
193}
194
195#[derive(Debug, Clone, Default)]
197pub struct InjectorStats {
198 pub total_processed: usize,
200 pub total_injected: usize,
202 pub by_category: HashMap<String, usize>,
204 pub by_type: HashMap<String, usize>,
206 pub by_company: HashMap<String, usize>,
208 pub skipped_rate: usize,
210 pub skipped_date: usize,
212 pub skipped_company: usize,
214 pub skipped_max_per_doc: usize,
216 pub fraud_weekend_bias_applied: usize,
218 pub fraud_round_dollar_bias_applied: usize,
220 pub fraud_off_hours_bias_applied: usize,
222 pub fraud_post_close_bias_applied: usize,
224}
225
226struct QueuedAnomaly {
228 anomaly_type: AnomalyType,
230 target_entity: Option<String>,
232 earliest_date: NaiveDate,
234 description: String,
236}
237
238impl AnomalyInjector {
239 pub fn new(config: AnomalyInjectorConfig) -> Self {
241 let mut rng = seeded_rng(config.seed, 0);
242 let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
243 let entity_targeting =
244 EntityTargetingManager::new(config.patterns.entity_targeting.clone());
245
246 let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
248 let scheme_config = SchemeAdvancerConfig {
249 embezzlement_probability: config.enhanced.scheme_probability,
250 revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
251 kickback_probability: config.enhanced.scheme_probability * 0.5,
252 seed: rng.random(),
253 ..Default::default()
254 };
255 Some(SchemeAdvancer::new(scheme_config))
256 } else {
257 None
258 };
259
260 let near_miss_generator = if config.enhanced.near_miss_enabled {
261 let near_miss_config = NearMissConfig {
262 proportion: config.enhanced.near_miss_proportion,
263 seed: rng.random(),
264 ..Default::default()
265 };
266 Some(NearMissGenerator::new(near_miss_config))
267 } else {
268 None
269 };
270
271 let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
272 Some(AnomalyCoOccurrence::new())
273 } else {
274 None
275 };
276
277 let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
278 Some(TemporalClusterGenerator::new())
279 } else {
280 None
281 };
282
283 let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
284 Some(DifficultyCalculator::new())
285 } else {
286 None
287 };
288
289 let entity_aware_injector = if config.enhanced.context_aware_enabled {
290 Some(EntityAwareInjector::default())
291 } else {
292 None
293 };
294
295 let behavioral_baseline = if config.enhanced.context_aware_enabled
296 && config.enhanced.behavioral_baseline_config.enabled
297 {
298 Some(BehavioralBaseline::new(
299 config.enhanced.behavioral_baseline_config.clone(),
300 ))
301 } else {
302 None
303 };
304
305 let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
306
307 Self {
308 config,
309 rng,
310 uuid_factory,
311 type_selector: AnomalyTypeSelector::new(),
312 strategies: StrategyCollection::default(),
313 cluster_manager,
314 entity_targeting,
315 document_anomaly_counts: HashMap::new(),
316 labels: Vec::new(),
317 stats: InjectorStats::default(),
318 scheme_advancer,
319 near_miss_generator,
320 near_miss_labels: Vec::new(),
321 co_occurrence_handler,
322 queued_co_occurrences: Vec::new(),
323 temporal_cluster_generator,
324 difficulty_calculator,
325 entity_aware_injector,
326 behavioral_baseline,
327 scheme_actions: Vec::new(),
328 difficulty_distribution: HashMap::new(),
329 vendor_contexts: HashMap::new(),
330 employee_contexts: HashMap::new(),
331 account_contexts: HashMap::new(),
332 }
333 }
334
335 pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
337 debug!(
338 entry_count = entries.len(),
339 total_rate = self.config.rates.total_rate,
340 seed = self.config.seed,
341 "Injecting anomalies into journal entries"
342 );
343
344 let mut modified_documents = Vec::new();
345 let mut duplicates = Vec::new();
346
347 for entry in entries.iter_mut() {
348 self.stats.total_processed += 1;
349
350 if let Some(ref mut baseline) = self.behavioral_baseline {
352 use super::context::Observation;
353 let entity_id = entry.header.created_by.clone();
355 let observation =
356 Observation::new(entry.posting_date()).with_amount(entry.total_debit());
357 baseline.record_observation(&entity_id, observation);
358 }
359
360 if !self.should_process(entry) {
362 continue;
363 }
364
365 let entry_date = entry.posting_date();
367 let ready_indices: Vec<usize> = self
368 .queued_co_occurrences
369 .iter()
370 .enumerate()
371 .filter(|(_, q)| entry_date >= q.earliest_date)
372 .map(|(i, _)| i)
373 .collect();
374
375 if let Some(&idx) = ready_indices.first() {
376 let queued = self.queued_co_occurrences.remove(idx);
377 if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
378 label = label.with_metadata("co_occurrence", "true");
379 label = label.with_metadata("co_occurrence_description", &queued.description);
380 if let Some(ref target) = queued.target_entity {
381 label = label.with_related_entity(target);
382 label = label.with_metadata("co_occurrence_target", target);
383 }
384 modified_documents.push(entry.document_number().clone());
385 self.labels.push(label);
386 self.stats.total_injected += 1;
387 }
388 continue; }
390
391 let base_rate = self.config.rates.total_rate;
393
394 let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
396 let employee_id = &entry.header.created_by;
397 let first_account = entry
398 .lines
399 .first()
400 .map(|l| l.gl_account.as_str())
401 .unwrap_or("");
402 let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
404
405 let vendor_ctx = self.vendor_contexts.get(vendor_ref);
406 let employee_ctx = self.employee_contexts.get(employee_id);
407 let account_ctx = self.account_contexts.get(first_account);
408
409 let multiplier =
410 injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
411 (base_rate * multiplier).min(1.0)
412 } else {
413 self.calculate_context_rate_multiplier(entry) * base_rate
415 };
416
417 if let Some(ref tcg) = self.temporal_cluster_generator {
419 let temporal_multiplier = tcg
420 .get_active_clusters(entry_date)
421 .iter()
422 .map(|c| c.rate_multiplier)
423 .fold(1.0_f64, f64::max);
424 effective_rate = (effective_rate * temporal_multiplier).min(1.0);
425 }
426
427 if should_inject_anomaly(
429 effective_rate,
430 entry_date,
431 &self.config.patterns.temporal_pattern,
432 &mut self.rng,
433 ) {
434 if let Some(ref mut near_miss_gen) = self.near_miss_generator {
436 let account = entry
438 .lines
439 .first()
440 .map(|l| l.gl_account.clone())
441 .unwrap_or_default();
442 near_miss_gen.record_transaction(
443 entry.document_number().clone(),
444 entry_date,
445 entry.total_debit(),
446 &account,
447 None,
448 );
449
450 if let Some(near_miss_label) = near_miss_gen.check_near_miss(
452 entry.document_number().clone(),
453 entry_date,
454 entry.total_debit(),
455 &account,
456 None,
457 &self.config.enhanced.approval_thresholds,
458 ) {
459 self.near_miss_labels.push(near_miss_label);
460 continue; }
462 }
463
464 let anomaly_type = self.select_anomaly_category();
466
467 let target_entity = {
469 let mut candidates: Vec<String> =
470 self.vendor_contexts.keys().cloned().collect();
471 candidates.extend(self.employee_contexts.keys().cloned());
472 if candidates.is_empty() {
473 if let Some(ref r) = entry.header.reference {
475 candidates.push(r.clone());
476 }
477 }
478 self.entity_targeting
479 .select_entity(&candidates, &mut self.rng)
480 };
481
482 if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
484 if let Some(ref entity_id) = target_entity {
486 label = label.with_metadata("entity_target", entity_id);
487 label = label.with_related_entity(entity_id);
488 label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
489 target_type: "Entity".to_string(),
490 target_id: entity_id.clone(),
491 });
492 }
493
494 if let Some(ref calculator) = self.difficulty_calculator {
496 let difficulty = calculator.calculate(&label);
497
498 label =
500 label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
501 label = label.with_metadata(
502 "difficulty_score",
503 &difficulty.difficulty_score().to_string(),
504 );
505
506 *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
508 }
509
510 modified_documents.push(entry.document_number().clone());
511 self.labels.push(label);
512 self.stats.total_injected += 1;
513
514 if let Some(ref co_occ) = self.co_occurrence_handler {
516 let correlated =
517 co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
518 for result in correlated {
519 self.queued_co_occurrences.push(QueuedAnomaly {
520 anomaly_type: result.anomaly_type,
521 target_entity: if result.same_entity {
522 target_entity.clone()
523 } else {
524 None
525 },
526 earliest_date: entry_date
527 + chrono::Duration::days(i64::from(result.lag_days)),
528 description: result.description,
529 });
530 }
531 }
532 }
533
534 if self.config.allow_duplicates
546 && entry.header.ic_pair_id.is_none()
547 && matches!(
548 self.labels.last().map(|l| &l.anomaly_type),
549 Some(AnomalyType::Error(ErrorType::DuplicateEntry))
550 | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
551 )
552 {
553 let dup_strategy = DuplicationStrategy::default();
554 let duplicate =
555 dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
556 duplicates.push(duplicate);
557 }
558 }
559 }
560
561 let mut carry_forward: Vec<super::campaign::CarryForwardRecord> = Vec::new();
564 if self.config.enhanced.fraud_campaign.is_active() {
565 let campaign_cfg = self.config.enhanced.fraud_campaign.clone();
566 let campaign_labels =
567 super::campaign::plan_campaigns(&campaign_cfg, entries, &mut self.rng);
568 self.stats.total_injected += campaign_labels.len();
569 for label in &campaign_labels {
570 modified_documents.push(label.document_id.clone());
571 }
572 if campaign_cfg.carry_forward.enabled {
575 let accounts: Vec<String> = {
576 let mut set = std::collections::BTreeSet::new();
577 for e in entries.iter() {
578 for l in &e.lines {
579 set.insert(l.gl_account.clone());
580 }
581 }
582 set.into_iter().collect()
583 };
584 carry_forward = super::campaign::build_carry_forward_register(
585 &campaign_labels,
586 &campaign_cfg.carry_forward,
587 &accounts,
588 &mut self.rng,
589 );
590 }
591 self.labels.extend(campaign_labels);
592 }
593
594 let duplicates_created = duplicates.len();
596
597 let summary = AnomalySummary::from_anomalies(&self.labels);
599
600 InjectionBatchResult {
601 entries_processed: self.stats.total_processed,
602 anomalies_injected: self.stats.total_injected,
603 duplicates_created,
604 labels: self.labels.clone(),
605 summary,
606 modified_documents,
607 near_miss_labels: self.near_miss_labels.clone(),
608 scheme_actions: self.scheme_actions.clone(),
609 difficulty_distribution: self.difficulty_distribution.clone(),
610 carry_forward,
611 }
612 }
613
614 fn should_process(&mut self, entry: &JournalEntry) -> bool {
616 if !self.config.target_companies.is_empty()
618 && !self
619 .config
620 .target_companies
621 .iter()
622 .any(|c| c == entry.company_code())
623 {
624 self.stats.skipped_company += 1;
625 return false;
626 }
627
628 if let Some((start, end)) = self.config.date_range {
630 if entry.posting_date() < start || entry.posting_date() > end {
631 self.stats.skipped_date += 1;
632 return false;
633 }
634 }
635
636 let current_count = self
638 .document_anomaly_counts
639 .get(&entry.document_number())
640 .copied()
641 .unwrap_or(0);
642 if current_count >= self.config.max_anomalies_per_document {
643 self.stats.skipped_max_per_doc += 1;
644 return false;
645 }
646
647 true
648 }
649
650 fn select_anomaly_category(&mut self) -> AnomalyType {
652 let r = self.rng.random::<f64>();
653 let rates = &self.config.rates;
654
655 let mut cumulative = 0.0;
656
657 cumulative += rates.fraud_rate;
658 if r < cumulative {
659 return self.type_selector.select_fraud(&mut self.rng);
660 }
661
662 cumulative += rates.error_rate;
663 if r < cumulative {
664 return self.type_selector.select_error(&mut self.rng);
665 }
666
667 cumulative += rates.process_issue_rate;
668 if r < cumulative {
669 return self.type_selector.select_process_issue(&mut self.rng);
670 }
671
672 cumulative += rates.statistical_rate;
673 if r < cumulative {
674 return self.type_selector.select_statistical(&mut self.rng);
675 }
676
677 self.type_selector.select_relational(&mut self.rng)
678 }
679
680 fn inject_anomaly(
682 &mut self,
683 entry: &mut JournalEntry,
684 anomaly_type: AnomalyType,
685 ) -> Option<LabeledAnomaly> {
686 if !self.strategies.can_apply(entry, &anomaly_type) {
688 return None;
689 }
690
691 let result = self
693 .strategies
694 .apply_strategy(entry, &anomaly_type, &mut self.rng);
695
696 if !result.success {
697 return None;
698 }
699
700 *self
702 .document_anomaly_counts
703 .entry(entry.document_number().clone())
704 .or_insert(0) += 1;
705
706 let category = anomaly_type.category().to_string();
708 let type_name = anomaly_type.type_name();
709
710 *self.stats.by_category.entry(category).or_insert(0) += 1;
711 *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
712 *self
713 .stats
714 .by_company
715 .entry(entry.company_code().to_string())
716 .or_insert(0) += 1;
717
718 if self.config.generate_labels {
720 let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
721
722 entry.header.is_anomaly = true;
724 entry.header.anomaly_id = Some(anomaly_id.clone());
725 entry.header.anomaly_type = Some(type_name.clone());
726
727 let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
729 Vec::new();
730 if matches!(anomaly_type, AnomalyType::Fraud(_)) {
731 entry.header.is_fraud = true;
732 if let AnomalyType::Fraud(ref ft) = anomaly_type {
733 entry.header.fraud_type = Some(*ft);
734 }
735 secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
741 }
742
743 let mut label = LabeledAnomaly::new(
744 anomaly_id,
745 anomaly_type.clone(),
746 entry.document_number().clone(),
747 "JE".to_string(),
748 entry.company_code().to_string(),
749 entry.posting_date(),
750 )
751 .with_description(&result.description)
752 .with_injection_strategy(&type_name);
753
754 let causal_reason = AnomalyCausalReason::RandomRate {
756 base_rate: self.config.rates.total_rate,
757 };
758 label = label.with_causal_reason(causal_reason);
759
760 let context_multiplier = self.calculate_context_rate_multiplier(entry);
762 if (context_multiplier - 1.0).abs() > f64::EPSILON {
763 label = label.with_metadata(
764 "entity_context_multiplier",
765 &format!("{context_multiplier:.3}"),
766 );
767 label = label.with_metadata(
768 "effective_rate",
769 &format!(
770 "{:.6}",
771 (self.config.rates.total_rate * context_multiplier).min(1.0)
772 ),
773 );
774 }
775
776 if let Some(impact) = result.monetary_impact {
778 label = label.with_monetary_impact(impact);
779 }
780
781 for entity in &result.related_entities {
783 label = label.with_related_entity(entity);
784 }
785
786 for (key, value) in &result.metadata {
788 label = label.with_metadata(key, value);
789 }
790
791 if let Some(cluster_id) =
793 self.cluster_manager
794 .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
795 {
796 label = label.with_cluster(&cluster_id);
797 label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
799 cluster_id: cluster_id.clone(),
800 });
801 }
802
803 for issue_type in &secondary_process_issues {
810 let child_id = format!("ANO{:08}", self.labels.len() + 1);
811 let child = LabeledAnomaly::new(
812 child_id,
813 AnomalyType::ProcessIssue(*issue_type),
814 entry.document_number().clone(),
815 "JE".to_string(),
816 entry.company_code().to_string(),
817 entry.posting_date(),
818 )
819 .with_description("Forensic pattern from fraud behavioral bias")
820 .with_injection_strategy("behavioral_bias")
821 .with_parent_anomaly(&label.anomaly_id);
822 self.labels.push(child);
823 }
824
825 return Some(label);
826 }
827
828 None
829 }
830
831 pub fn inject_specific(
833 &mut self,
834 entry: &mut JournalEntry,
835 anomaly_type: AnomalyType,
836 ) -> Option<LabeledAnomaly> {
837 self.inject_anomaly(entry, anomaly_type)
838 }
839
840 pub fn create_self_approval(
842 &mut self,
843 entry: &mut JournalEntry,
844 user_id: &str,
845 ) -> Option<LabeledAnomaly> {
846 let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
847
848 let label = LabeledAnomaly::new(
849 format!("ANO{:08}", self.labels.len() + 1),
850 anomaly_type,
851 entry.document_number().clone(),
852 "JE".to_string(),
853 entry.company_code().to_string(),
854 entry.posting_date(),
855 )
856 .with_description(&format!("User {user_id} approved their own transaction"))
857 .with_related_entity(user_id)
858 .with_injection_strategy("ManualSelfApproval")
859 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
860 target_type: "User".to_string(),
861 target_id: user_id.to_string(),
862 });
863
864 entry.header.is_anomaly = true;
866 entry.header.is_fraud = true;
867 entry.header.anomaly_id = Some(label.anomaly_id.clone());
868 entry.header.anomaly_type = Some("SelfApproval".to_string());
869 entry.header.fraud_type = Some(FraudType::SelfApproval);
870
871 entry.header.created_by = user_id.to_string();
873
874 self.apply_fraud_behavioral_bias(entry);
877
878 self.labels.push(label.clone());
879 Some(label)
880 }
881
882 pub fn create_sod_violation(
884 &mut self,
885 entry: &mut JournalEntry,
886 user_id: &str,
887 conflicting_duties: (&str, &str),
888 ) -> Option<LabeledAnomaly> {
889 let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
890
891 let label = LabeledAnomaly::new(
892 format!("ANO{:08}", self.labels.len() + 1),
893 anomaly_type,
894 entry.document_number().clone(),
895 "JE".to_string(),
896 entry.company_code().to_string(),
897 entry.posting_date(),
898 )
899 .with_description(&format!(
900 "User {} performed conflicting duties: {} and {}",
901 user_id, conflicting_duties.0, conflicting_duties.1
902 ))
903 .with_related_entity(user_id)
904 .with_metadata("duty1", conflicting_duties.0)
905 .with_metadata("duty2", conflicting_duties.1)
906 .with_injection_strategy("ManualSoDViolation")
907 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
908 target_type: "User".to_string(),
909 target_id: user_id.to_string(),
910 });
911
912 entry.header.is_anomaly = true;
914 entry.header.is_fraud = true;
915 entry.header.anomaly_id = Some(label.anomaly_id.clone());
916 entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
917 entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
918
919 self.apply_fraud_behavioral_bias(entry);
921
922 self.labels.push(label.clone());
923 Some(label)
924 }
925
926 pub fn create_ic_mismatch(
928 &mut self,
929 entry: &mut JournalEntry,
930 matching_company: &str,
931 expected_amount: Decimal,
932 actual_amount: Decimal,
933 ) -> Option<LabeledAnomaly> {
934 let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
935
936 let label = LabeledAnomaly::new(
937 format!("ANO{:08}", self.labels.len() + 1),
938 anomaly_type,
939 entry.document_number().clone(),
940 "JE".to_string(),
941 entry.company_code().to_string(),
942 entry.posting_date(),
943 )
944 .with_description(&format!(
945 "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
946 ))
947 .with_related_entity(matching_company)
948 .with_monetary_impact(actual_amount - expected_amount)
949 .with_metadata("expected_amount", &expected_amount.to_string())
950 .with_metadata("actual_amount", &actual_amount.to_string())
951 .with_injection_strategy("ManualICMismatch")
952 .with_causal_reason(AnomalyCausalReason::EntityTargeting {
953 target_type: "Intercompany".to_string(),
954 target_id: matching_company.to_string(),
955 });
956
957 entry.header.is_anomaly = true;
959 entry.header.anomaly_id = Some(label.anomaly_id.clone());
960 entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
961
962 self.labels.push(label.clone());
963 Some(label)
964 }
965
966 pub fn get_labels(&self) -> &[LabeledAnomaly] {
968 &self.labels
969 }
970
971 pub fn get_summary(&self) -> AnomalySummary {
973 AnomalySummary::from_anomalies(&self.labels)
974 }
975
976 pub fn get_stats(&self) -> &InjectorStats {
978 &self.stats
979 }
980
981 pub fn reset(&mut self) {
983 self.labels.clear();
984 self.document_anomaly_counts.clear();
985 self.stats = InjectorStats::default();
986 self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
987
988 self.near_miss_labels.clear();
990 self.scheme_actions.clear();
991 self.difficulty_distribution.clear();
992
993 if let Some(ref mut baseline) = self.behavioral_baseline {
994 *baseline =
995 BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
996 }
997 }
998
999 pub fn cluster_count(&self) -> usize {
1001 self.cluster_manager.cluster_count()
1002 }
1003
1004 pub fn set_entity_contexts(
1017 &mut self,
1018 vendors: HashMap<String, VendorContext>,
1019 employees: HashMap<String, EmployeeContext>,
1020 accounts: HashMap<String, AccountContext>,
1021 ) {
1022 self.vendor_contexts = vendors;
1023 self.employee_contexts = employees;
1024 self.account_contexts = accounts;
1025 }
1026
1027 pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
1029 &self.vendor_contexts
1030 }
1031
1032 pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
1034 &self.employee_contexts
1035 }
1036
1037 pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
1039 &self.account_contexts
1040 }
1041
1042 fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1051 if self.vendor_contexts.is_empty()
1052 && self.employee_contexts.is_empty()
1053 && self.account_contexts.is_empty()
1054 {
1055 return 1.0;
1056 }
1057
1058 let mut multiplier = 1.0;
1059
1060 if let Some(ref vendor_ref) = entry.header.reference {
1062 if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1063 if ctx.is_new {
1065 multiplier *= 2.0;
1066 }
1067 if ctx.is_dormant_reactivation {
1068 multiplier *= 1.5;
1069 }
1070 }
1071 }
1072
1073 if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1075 if ctx.is_new {
1076 multiplier *= 1.5;
1077 }
1078 if ctx.is_volume_fatigued {
1079 multiplier *= 1.3;
1080 }
1081 if ctx.is_overtime {
1082 multiplier *= 1.2;
1083 }
1084 }
1085
1086 if let Some(first_line) = entry.lines.first() {
1088 if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1089 if ctx.is_high_risk {
1090 multiplier *= 2.0;
1091 }
1092 }
1093 }
1094
1095 multiplier
1096 }
1097
1098 fn apply_fraud_behavioral_bias(
1107 &mut self,
1108 entry: &mut JournalEntry,
1109 ) -> Vec<datasynth_core::models::ProcessIssueType> {
1110 use datasynth_core::models::ProcessIssueType;
1111
1112 let cfg = self.config.enhanced.fraud_behavioral_bias;
1113 let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1114 for issue in &fired {
1115 match issue {
1116 ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1117 ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1118 ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1119 _ => {}
1120 }
1121 }
1122 if cfg.round_dollar_bias > 0.0 {
1127 const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1128 let max_amt: Decimal = entry
1129 .lines
1130 .iter()
1131 .map(|l| l.debit_amount.max(l.credit_amount))
1132 .max()
1133 .unwrap_or(Decimal::ZERO);
1134 if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1135 self.stats.fraud_round_dollar_bias_applied += 1;
1136 }
1137 }
1138 fired
1139 }
1140
1141 pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1150 if let Some(ref mut advancer) = self.scheme_advancer {
1151 let context = SchemeContext::new(date, company_code);
1152 let actions = advancer.advance_all(&context);
1153 self.scheme_actions.extend(actions.clone());
1154 actions
1155 } else {
1156 Vec::new()
1157 }
1158 }
1159
1160 pub fn maybe_start_scheme(
1166 &mut self,
1167 date: NaiveDate,
1168 company_code: &str,
1169 available_users: Vec<String>,
1170 available_accounts: Vec<String>,
1171 available_counterparties: Vec<String>,
1172 ) -> Option<uuid::Uuid> {
1173 if let Some(ref mut advancer) = self.scheme_advancer {
1174 let mut context = SchemeContext::new(date, company_code);
1175 context.available_users = available_users;
1176 context.available_accounts = available_accounts;
1177 context.available_counterparties = available_counterparties;
1178
1179 advancer.maybe_start_scheme(&context)
1180 } else {
1181 None
1182 }
1183 }
1184
1185 pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1187 &self.near_miss_labels
1188 }
1189
1190 pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1192 &self.scheme_actions
1193 }
1194
1195 pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1197 &self.difficulty_distribution
1198 }
1199
1200 pub fn check_behavioral_deviations(
1202 &self,
1203 entity_id: &str,
1204 observation: &super::context::Observation,
1205 ) -> Vec<super::context::BehavioralDeviation> {
1206 if let Some(ref baseline) = self.behavioral_baseline {
1207 baseline.check_deviation(entity_id, observation)
1208 } else {
1209 Vec::new()
1210 }
1211 }
1212
1213 pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1215 if let Some(ref baseline) = self.behavioral_baseline {
1216 baseline.get_baseline(entity_id)
1217 } else {
1218 None
1219 }
1220 }
1221
1222 pub fn active_scheme_count(&self) -> usize {
1224 if let Some(ref advancer) = self.scheme_advancer {
1225 advancer.active_scheme_count()
1226 } else {
1227 0
1228 }
1229 }
1230
1231 pub fn has_enhanced_features(&self) -> bool {
1233 self.scheme_advancer.is_some()
1234 || self.near_miss_generator.is_some()
1235 || self.difficulty_calculator.is_some()
1236 || self.entity_aware_injector.is_some()
1237 }
1238}
1239
1240pub struct AnomalyInjectorConfigBuilder {
1242 config: AnomalyInjectorConfig,
1243}
1244
1245impl AnomalyInjectorConfigBuilder {
1246 pub fn new() -> Self {
1248 Self {
1249 config: AnomalyInjectorConfig::default(),
1250 }
1251 }
1252
1253 pub fn with_total_rate(mut self, rate: f64) -> Self {
1255 self.config.rates.total_rate = rate;
1256 self
1257 }
1258
1259 pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1261 self.config.rates.fraud_rate = rate;
1262 self
1263 }
1264
1265 pub fn with_error_rate(mut self, rate: f64) -> Self {
1267 self.config.rates.error_rate = rate;
1268 self
1269 }
1270
1271 pub fn with_seed(mut self, seed: u64) -> Self {
1273 self.config.seed = seed;
1274 self
1275 }
1276
1277 pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1279 self.config.patterns.temporal_pattern = pattern;
1280 self
1281 }
1282
1283 pub fn with_labels(mut self, generate: bool) -> Self {
1285 self.config.generate_labels = generate;
1286 self
1287 }
1288
1289 pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1291 self.config.target_companies = companies;
1292 self
1293 }
1294
1295 pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1297 self.config.date_range = Some((start, end));
1298 self
1299 }
1300
1301 pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1307 self.config.enhanced.multi_stage_schemes_enabled = enabled;
1308 self.config.enhanced.scheme_probability = probability;
1309 self
1310 }
1311
1312 pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1314 self.config.enhanced.near_miss_enabled = enabled;
1315 self.config.enhanced.near_miss_proportion = proportion;
1316 self
1317 }
1318
1319 pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1321 self.config.enhanced.approval_thresholds = thresholds;
1322 self
1323 }
1324
1325 pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1327 self.config.enhanced.correlated_injection_enabled = enabled;
1328 self
1329 }
1330
1331 pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1333 self.config.enhanced.temporal_clustering_enabled = enabled;
1334 self.config.enhanced.period_end_multiplier = multiplier;
1335 self
1336 }
1337
1338 pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1340 self.config.enhanced.difficulty_classification_enabled = enabled;
1341 self
1342 }
1343
1344 pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1346 self.config.enhanced.context_aware_enabled = enabled;
1347 self
1348 }
1349
1350 pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1352 self.config.enhanced.behavioral_baseline_config = config;
1353 self
1354 }
1355
1356 pub fn with_all_enhanced_features(mut self) -> Self {
1358 self.config.enhanced.multi_stage_schemes_enabled = true;
1359 self.config.enhanced.scheme_probability = 0.02;
1360 self.config.enhanced.correlated_injection_enabled = true;
1361 self.config.enhanced.temporal_clustering_enabled = true;
1362 self.config.enhanced.period_end_multiplier = 2.5;
1363 self.config.enhanced.near_miss_enabled = true;
1364 self.config.enhanced.near_miss_proportion = 0.30;
1365 self.config.enhanced.difficulty_classification_enabled = true;
1366 self.config.enhanced.context_aware_enabled = true;
1367 self.config.enhanced.behavioral_baseline_config.enabled = true;
1368 self
1369 }
1370
1371 pub fn build(self) -> AnomalyInjectorConfig {
1373 self.config
1374 }
1375}
1376
1377impl Default for AnomalyInjectorConfigBuilder {
1378 fn default() -> Self {
1379 Self::new()
1380 }
1381}
1382
1383#[cfg(test)]
1384mod tests {
1385 use super::*;
1386 use chrono::NaiveDate;
1387 use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1388 use rust_decimal_macros::dec;
1389
1390 fn create_test_entry(doc_num: &str) -> JournalEntry {
1391 let mut entry = JournalEntry::new_simple(
1392 doc_num.to_string(),
1393 "1000".to_string(),
1394 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1395 "Test Entry".to_string(),
1396 );
1397
1398 entry.add_line(JournalEntryLine {
1399 line_number: 1,
1400 gl_account: "5000".to_string(),
1401 debit_amount: dec!(1000),
1402 ..Default::default()
1403 });
1404
1405 entry.add_line(JournalEntryLine {
1406 line_number: 2,
1407 gl_account: "1000".to_string(),
1408 credit_amount: dec!(1000),
1409 ..Default::default()
1410 });
1411
1412 entry
1413 }
1414
1415 #[test]
1416 fn test_anomaly_injector_basic() {
1417 let config = AnomalyInjectorConfigBuilder::new()
1418 .with_total_rate(0.5) .with_seed(42)
1420 .build();
1421
1422 let mut injector = AnomalyInjector::new(config);
1423
1424 let mut entries: Vec<_> = (0..100)
1425 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1426 .collect();
1427
1428 let result = injector.process_entries(&mut entries);
1429
1430 assert!(result.anomalies_injected > 0);
1432 assert!(!result.labels.is_empty());
1433 assert!(result.labels.len() >= result.anomalies_injected);
1438 }
1439
1440 #[test]
1441 fn test_specific_injection() {
1442 let config = AnomalyInjectorConfig::default();
1443 let mut injector = AnomalyInjector::new(config);
1444
1445 let mut entry = create_test_entry("JE001");
1446 let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1447
1448 let label = injector.inject_specific(&mut entry, anomaly_type);
1449
1450 assert!(label.is_some());
1451 let label = label.unwrap();
1452 assert!(!label.document_id.is_empty());
1454 assert_eq!(label.document_id, entry.document_number());
1455 }
1456
1457 #[test]
1458 fn test_self_approval_injection() {
1459 let config = AnomalyInjectorConfig::default();
1460 let mut injector = AnomalyInjector::new(config);
1461
1462 let mut entry = create_test_entry("JE001");
1463 let label = injector.create_self_approval(&mut entry, "USER001");
1464
1465 assert!(label.is_some());
1466 let label = label.unwrap();
1467 assert!(matches!(
1468 label.anomaly_type,
1469 AnomalyType::Fraud(FraudType::SelfApproval)
1470 ));
1471 assert!(label.related_entities.contains(&"USER001".to_string()));
1472 }
1473
1474 #[test]
1475 fn test_company_filtering() {
1476 let config = AnomalyInjectorConfigBuilder::new()
1477 .with_total_rate(1.0) .with_target_companies(vec!["2000".to_string()])
1479 .build();
1480
1481 let mut injector = AnomalyInjector::new(config);
1482
1483 let mut entries = vec![
1484 create_test_entry("JE001"), create_test_entry("JE002"), ];
1487
1488 let result = injector.process_entries(&mut entries);
1489
1490 assert_eq!(result.anomalies_injected, 0);
1492 }
1493
1494 fn create_test_entry_with_context(
1500 doc_num: &str,
1501 vendor_ref: Option<&str>,
1502 employee_id: &str,
1503 gl_account: &str,
1504 ) -> JournalEntry {
1505 let mut entry = JournalEntry::new_simple(
1506 doc_num.to_string(),
1507 "1000".to_string(),
1508 NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1509 "Test Entry".to_string(),
1510 );
1511
1512 entry.header.reference = vendor_ref.map(|v| v.to_string());
1513 entry.header.created_by = employee_id.to_string();
1514
1515 entry.add_line(JournalEntryLine {
1516 line_number: 1,
1517 gl_account: gl_account.to_string(),
1518 debit_amount: dec!(1000),
1519 ..Default::default()
1520 });
1521
1522 entry.add_line(JournalEntryLine {
1523 line_number: 2,
1524 gl_account: "1000".to_string(),
1525 credit_amount: dec!(1000),
1526 ..Default::default()
1527 });
1528
1529 entry
1530 }
1531
1532 #[test]
1533 fn test_set_entity_contexts() {
1534 let config = AnomalyInjectorConfig::default();
1535 let mut injector = AnomalyInjector::new(config);
1536
1537 assert!(injector.vendor_contexts().is_empty());
1539 assert!(injector.employee_contexts().is_empty());
1540 assert!(injector.account_contexts().is_empty());
1541
1542 let mut vendors = HashMap::new();
1544 vendors.insert(
1545 "V001".to_string(),
1546 VendorContext {
1547 vendor_id: "V001".to_string(),
1548 is_new: true,
1549 ..Default::default()
1550 },
1551 );
1552
1553 let mut employees = HashMap::new();
1554 employees.insert(
1555 "EMP001".to_string(),
1556 EmployeeContext {
1557 employee_id: "EMP001".to_string(),
1558 is_new: true,
1559 ..Default::default()
1560 },
1561 );
1562
1563 let mut accounts = HashMap::new();
1564 accounts.insert(
1565 "8100".to_string(),
1566 AccountContext {
1567 account_code: "8100".to_string(),
1568 is_high_risk: true,
1569 ..Default::default()
1570 },
1571 );
1572
1573 injector.set_entity_contexts(vendors, employees, accounts);
1574
1575 assert_eq!(injector.vendor_contexts().len(), 1);
1576 assert_eq!(injector.employee_contexts().len(), 1);
1577 assert_eq!(injector.account_contexts().len(), 1);
1578 assert!(injector.vendor_contexts().contains_key("V001"));
1579 assert!(injector.employee_contexts().contains_key("EMP001"));
1580 assert!(injector.account_contexts().contains_key("8100"));
1581 }
1582
1583 #[test]
1584 fn test_default_behavior_no_contexts() {
1585 let config = AnomalyInjectorConfigBuilder::new()
1587 .with_total_rate(0.5)
1588 .with_seed(42)
1589 .build();
1590
1591 let mut injector = AnomalyInjector::new(config);
1592
1593 let mut entries: Vec<_> = (0..200)
1594 .map(|i| create_test_entry(&format!("JE{:04}", i)))
1595 .collect();
1596
1597 let result = injector.process_entries(&mut entries);
1598
1599 assert!(result.anomalies_injected > 0);
1602 let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1603 assert!(
1604 rate > 0.2 && rate < 0.8,
1605 "Expected ~50% rate, got {:.2}%",
1606 rate * 100.0
1607 );
1608 }
1609
1610 #[test]
1611 fn test_entity_context_increases_injection_rate() {
1612 let base_rate = 0.10; let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1618 .with_total_rate(base_rate)
1619 .with_seed(123)
1620 .build();
1621
1622 let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1623
1624 let mut entries_no_ctx: Vec<_> = (0..500)
1625 .map(|i| {
1626 create_test_entry_with_context(
1627 &format!("JE{:04}", i),
1628 Some("V001"),
1629 "EMP001",
1630 "8100",
1631 )
1632 })
1633 .collect();
1634
1635 let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1636
1637 let config_ctx = AnomalyInjectorConfigBuilder::new()
1639 .with_total_rate(base_rate)
1640 .with_seed(123)
1641 .build();
1642
1643 let mut injector_ctx = AnomalyInjector::new(config_ctx);
1644
1645 let mut vendors = HashMap::new();
1647 vendors.insert(
1648 "V001".to_string(),
1649 VendorContext {
1650 vendor_id: "V001".to_string(),
1651 is_new: true, is_dormant_reactivation: true, ..Default::default()
1654 },
1655 );
1656
1657 let mut employees = HashMap::new();
1658 employees.insert(
1659 "EMP001".to_string(),
1660 EmployeeContext {
1661 employee_id: "EMP001".to_string(),
1662 is_new: true, ..Default::default()
1664 },
1665 );
1666
1667 let mut accounts = HashMap::new();
1668 accounts.insert(
1669 "8100".to_string(),
1670 AccountContext {
1671 account_code: "8100".to_string(),
1672 is_high_risk: true, ..Default::default()
1674 },
1675 );
1676
1677 injector_ctx.set_entity_contexts(vendors, employees, accounts);
1678
1679 let mut entries_ctx: Vec<_> = (0..500)
1680 .map(|i| {
1681 create_test_entry_with_context(
1682 &format!("JE{:04}", i),
1683 Some("V001"),
1684 "EMP001",
1685 "8100",
1686 )
1687 })
1688 .collect();
1689
1690 let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1691
1692 assert!(
1694 result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1695 "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1696 result_ctx.anomalies_injected,
1697 result_no_ctx.anomalies_injected,
1698 );
1699 }
1700
1701 #[test]
1702 fn test_risk_score_multiplication() {
1703 let config = AnomalyInjectorConfig::default();
1705 let mut injector = AnomalyInjector::new(config);
1706
1707 let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1709 assert!(
1710 (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1711 );
1712
1713 let mut vendors = HashMap::new();
1715 vendors.insert(
1716 "V_RISKY".to_string(),
1717 VendorContext {
1718 vendor_id: "V_RISKY".to_string(),
1719 is_new: true,
1720 ..Default::default()
1721 },
1722 );
1723
1724 let mut accounts = HashMap::new();
1725 accounts.insert(
1726 "9000".to_string(),
1727 AccountContext {
1728 account_code: "9000".to_string(),
1729 is_high_risk: true,
1730 ..Default::default()
1731 },
1732 );
1733
1734 injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1735
1736 let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1737 let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1738 assert!(
1740 (multiplier - 4.0).abs() < f64::EPSILON,
1741 "Expected 4.0x multiplier, got {}",
1742 multiplier,
1743 );
1744
1745 let entry_vendor_only =
1747 create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1748 let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1749 assert!(
1750 (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1751 "Expected 2.0x multiplier (vendor only), got {}",
1752 multiplier_vendor,
1753 );
1754
1755 let entry_no_match =
1757 create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1758 let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1759 assert!(
1760 (multiplier_none - 1.0).abs() < f64::EPSILON,
1761 "Expected 1.0x multiplier (no match), got {}",
1762 multiplier_none,
1763 );
1764 }
1765
1766 #[test]
1767 fn test_employee_context_multiplier() {
1768 let config = AnomalyInjectorConfig::default();
1769 let mut injector = AnomalyInjector::new(config);
1770
1771 let mut employees = HashMap::new();
1772 employees.insert(
1773 "EMP_NEW".to_string(),
1774 EmployeeContext {
1775 employee_id: "EMP_NEW".to_string(),
1776 is_new: true, is_volume_fatigued: true, is_overtime: true, ..Default::default()
1780 },
1781 );
1782
1783 injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1784
1785 let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1786 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1787
1788 let expected = 1.5 * 1.3 * 1.2;
1790 assert!(
1791 (multiplier - expected).abs() < 0.01,
1792 "Expected {:.3}x multiplier, got {:.3}",
1793 expected,
1794 multiplier,
1795 );
1796 }
1797
1798 #[test]
1799 fn test_entity_contexts_persist_across_reset() {
1800 let config = AnomalyInjectorConfig::default();
1801 let mut injector = AnomalyInjector::new(config);
1802
1803 let mut vendors = HashMap::new();
1804 vendors.insert(
1805 "V001".to_string(),
1806 VendorContext {
1807 vendor_id: "V001".to_string(),
1808 is_new: true,
1809 ..Default::default()
1810 },
1811 );
1812
1813 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1814 assert_eq!(injector.vendor_contexts().len(), 1);
1815
1816 injector.reset();
1818 assert_eq!(injector.vendor_contexts().len(), 1);
1819 }
1820
1821 #[test]
1822 fn test_set_empty_contexts_clears() {
1823 let config = AnomalyInjectorConfig::default();
1824 let mut injector = AnomalyInjector::new(config);
1825
1826 let mut vendors = HashMap::new();
1827 vendors.insert(
1828 "V001".to_string(),
1829 VendorContext {
1830 vendor_id: "V001".to_string(),
1831 ..Default::default()
1832 },
1833 );
1834
1835 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1836 assert_eq!(injector.vendor_contexts().len(), 1);
1837
1838 injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1840 assert!(injector.vendor_contexts().is_empty());
1841 }
1842
1843 #[test]
1844 fn test_dormant_vendor_multiplier() {
1845 let config = AnomalyInjectorConfig::default();
1846 let mut injector = AnomalyInjector::new(config);
1847
1848 let mut vendors = HashMap::new();
1849 vendors.insert(
1850 "V_DORMANT".to_string(),
1851 VendorContext {
1852 vendor_id: "V_DORMANT".to_string(),
1853 is_dormant_reactivation: true, ..Default::default()
1855 },
1856 );
1857
1858 injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1859
1860 let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1861 let multiplier = injector.calculate_context_rate_multiplier(&entry);
1862 assert!(
1863 (multiplier - 1.5).abs() < f64::EPSILON,
1864 "Expected 1.5x multiplier for dormant vendor, got {}",
1865 multiplier,
1866 );
1867 }
1868
1869 #[test]
1878 fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1879 use chrono::{Datelike, Timelike, Weekday};
1880 use datasynth_core::models::FraudType;
1881
1882 let mut config = AnomalyInjectorConfig::default();
1883 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1884 enabled: true,
1885 weekend_bias: 1.0,
1886 round_dollar_bias: 1.0,
1887 off_hours_bias: 1.0,
1888 post_close_bias: 1.0,
1889 };
1890 let mut injector = AnomalyInjector::new(config);
1891
1892 let mut entry = JournalEntry::new_simple(
1894 "JE001".to_string(),
1895 "1000".to_string(),
1896 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), "Test Entry".to_string(),
1898 );
1899 entry.add_line(JournalEntryLine {
1900 line_number: 1,
1901 gl_account: "5000".to_string(),
1902 debit_amount: dec!(1237),
1903 ..Default::default()
1904 });
1905 entry.add_line(JournalEntryLine {
1906 line_number: 2,
1907 gl_account: "1000".to_string(),
1908 credit_amount: dec!(1237),
1909 ..Default::default()
1910 });
1911
1912 let _ =
1913 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1914
1915 assert!(
1917 matches!(
1918 entry.header.posting_date.weekday(),
1919 Weekday::Sat | Weekday::Sun
1920 ),
1921 "expected weekend posting date, got {:?}",
1922 entry.header.posting_date.weekday()
1923 );
1924 let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1926 let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1927 assert_eq!(debit_total, credit_total, "entry must remain balanced");
1928 assert!(
1929 [
1930 dec!(1_000),
1931 dec!(5_000),
1932 dec!(10_000),
1933 dec!(25_000),
1934 dec!(50_000),
1935 dec!(100_000)
1936 ]
1937 .contains(&debit_total),
1938 "expected round-dollar total, got {}",
1939 debit_total
1940 );
1941 let hour = entry.header.created_at.hour();
1943 assert!(
1944 !(6..22).contains(&hour),
1945 "expected off-hours timestamp, got hour {}",
1946 hour
1947 );
1948 assert!(entry.header.is_post_close);
1950
1951 let stats = injector.get_stats();
1953 assert_eq!(stats.fraud_weekend_bias_applied, 1);
1954 assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1955 assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1956 assert_eq!(stats.fraud_post_close_bias_applied, 1);
1957 }
1958
1959 #[test]
1962 fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1963 use datasynth_core::models::FraudType;
1964
1965 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1967 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1968 enabled: true,
1969 weekend_bias: 0.0,
1970 round_dollar_bias: 0.0,
1971 off_hours_bias: 0.0,
1972 post_close_bias: 0.0,
1973 };
1974 let mut injector = AnomalyInjector::new(config);
1975 let mut entry = create_test_entry("JE001");
1976 entry.header.posting_date = original_date;
1977
1978 let _ =
1979 injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1980
1981 assert_eq!(entry.header.posting_date, original_date);
1982 assert!(!entry.header.is_post_close);
1983 let stats = injector.get_stats();
1984 assert_eq!(stats.fraud_weekend_bias_applied, 0);
1985 assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1986 assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1987 assert_eq!(stats.fraud_post_close_bias_applied, 0);
1988 }
1989
1990 #[test]
1993 fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1994 let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); let mut config = AnomalyInjectorConfig::default();
1996 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1997 enabled: true,
1998 weekend_bias: 1.0,
1999 round_dollar_bias: 1.0,
2000 off_hours_bias: 1.0,
2001 post_close_bias: 1.0,
2002 };
2003 let mut injector = AnomalyInjector::new(config);
2004 let mut entry = create_test_entry("JE001");
2005 entry.header.posting_date = original_date;
2006
2007 let _ = injector.inject_specific(
2008 &mut entry,
2009 AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
2010 );
2011
2012 assert_eq!(entry.header.posting_date, original_date);
2013 let stats = injector.get_stats();
2014 assert_eq!(stats.fraud_weekend_bias_applied, 0);
2015 }
2016
2017 #[test]
2021 fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
2022 use datasynth_core::models::{FraudType, ProcessIssueType};
2023
2024 let mut config = AnomalyInjectorConfig::default();
2025 config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2026 enabled: true,
2027 weekend_bias: 1.0,
2028 round_dollar_bias: 0.0, off_hours_bias: 1.0,
2030 post_close_bias: 1.0,
2031 };
2032 let mut injector = AnomalyInjector::new(config);
2033 let mut entry = JournalEntry::new_simple(
2034 "JE001".into(),
2035 "1000".into(),
2036 NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
2037 "Test".into(),
2038 );
2039 entry.add_line(JournalEntryLine {
2040 line_number: 1,
2041 gl_account: "5000".into(),
2042 debit_amount: dec!(1000),
2043 ..Default::default()
2044 });
2045 entry.add_line(JournalEntryLine {
2046 line_number: 2,
2047 gl_account: "1000".into(),
2048 credit_amount: dec!(1000),
2049 ..Default::default()
2050 });
2051
2052 let primary = injector
2053 .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2054 .expect("fraud label should be produced");
2055
2056 let labels = injector.get_labels();
2058 assert_eq!(
2059 labels.len(),
2060 3,
2061 "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2062 );
2063 let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2064 assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2065 assert!(types.contains(&AnomalyType::ProcessIssue(
2066 ProcessIssueType::AfterHoursPosting
2067 )));
2068 assert!(types.contains(&AnomalyType::ProcessIssue(
2069 ProcessIssueType::PostClosePosting
2070 )));
2071 assert_eq!(
2072 primary.anomaly_type,
2073 AnomalyType::Fraud(FraudType::FictitiousEntry)
2074 );
2075 }
2076}