Skip to main content

datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use rand::Rng;
16use rand::SeedableRng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20
21use datasynth_core::models::{
22    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
23    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
24    RelationalAnomalyType,
25};
26
27use super::context::{
28    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
29    EntityAwareInjector, VendorContext,
30};
31use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
32use super::difficulty::DifficultyCalculator;
33use super::near_miss::{NearMissConfig, NearMissGenerator};
34use super::patterns::{
35    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
36    TemporalPattern,
37};
38use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
39use super::schemes::{SchemeAction, SchemeContext};
40use super::strategies::{DuplicationStrategy, StrategyCollection};
41use super::types::AnomalyTypeSelector;
42
43/// Configuration for the anomaly injector.
44#[derive(Debug, Clone)]
45pub struct AnomalyInjectorConfig {
46    /// Rate configuration.
47    pub rates: AnomalyRateConfig,
48    /// Pattern configuration.
49    pub patterns: AnomalyPatternConfig,
50    /// Random seed for reproducibility.
51    pub seed: u64,
52    /// Whether to generate labels.
53    pub generate_labels: bool,
54    /// Whether to allow duplicate injection.
55    pub allow_duplicates: bool,
56    /// Maximum anomalies per document.
57    pub max_anomalies_per_document: usize,
58    /// Company codes to target (empty = all).
59    pub target_companies: Vec<String>,
60    /// Date range for injection.
61    pub date_range: Option<(NaiveDate, NaiveDate)>,
62    /// Enhanced features configuration.
63    pub enhanced: EnhancedInjectionConfig,
64}
65
66/// Enhanced injection configuration for v0.3.0+ features.
67#[derive(Debug, Clone, Default)]
68pub struct EnhancedInjectionConfig {
69    /// Enable multi-stage fraud scheme generation.
70    pub multi_stage_schemes_enabled: bool,
71    /// Probability of starting a new scheme per perpetrator per year.
72    pub scheme_probability: f64,
73    /// Enable correlated anomaly injection.
74    pub correlated_injection_enabled: bool,
75    /// Enable temporal clustering (period-end spikes).
76    pub temporal_clustering_enabled: bool,
77    /// Period-end anomaly rate multiplier.
78    pub period_end_multiplier: f64,
79    /// Enable near-miss generation.
80    pub near_miss_enabled: bool,
81    /// Proportion of anomalies that are near-misses.
82    pub near_miss_proportion: f64,
83    /// Approval thresholds for threshold-proximity near-misses.
84    pub approval_thresholds: Vec<Decimal>,
85    /// Enable detection difficulty classification.
86    pub difficulty_classification_enabled: bool,
87    /// Enable context-aware injection.
88    pub context_aware_enabled: bool,
89    /// Behavioral baseline configuration.
90    pub behavioral_baseline_config: BehavioralBaselineConfig,
91}
92
93impl Default for AnomalyInjectorConfig {
94    fn default() -> Self {
95        Self {
96            rates: AnomalyRateConfig::default(),
97            patterns: AnomalyPatternConfig::default(),
98            seed: 42,
99            generate_labels: true,
100            allow_duplicates: true,
101            max_anomalies_per_document: 2,
102            target_companies: Vec::new(),
103            date_range: None,
104            enhanced: EnhancedInjectionConfig::default(),
105        }
106    }
107}
108
109/// Result of an injection batch.
110#[derive(Debug, Clone)]
111pub struct InjectionBatchResult {
112    /// Number of entries processed.
113    pub entries_processed: usize,
114    /// Number of anomalies injected.
115    pub anomalies_injected: usize,
116    /// Number of duplicates created.
117    pub duplicates_created: usize,
118    /// Labels generated.
119    pub labels: Vec<LabeledAnomaly>,
120    /// Summary of anomalies.
121    pub summary: AnomalySummary,
122    /// Entries that were modified (document numbers).
123    pub modified_documents: Vec<String>,
124    /// Near-miss labels (suspicious but legitimate transactions).
125    pub near_miss_labels: Vec<NearMissLabel>,
126    /// Multi-stage scheme actions generated.
127    pub scheme_actions: Vec<SchemeAction>,
128    /// Difficulty distribution summary.
129    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
130}
131
132/// Main anomaly injection engine.
133#[allow(dead_code)]
134pub struct AnomalyInjector {
135    config: AnomalyInjectorConfig,
136    rng: ChaCha8Rng,
137    type_selector: AnomalyTypeSelector,
138    strategies: StrategyCollection,
139    cluster_manager: ClusterManager,
140    entity_targeting: EntityTargetingManager,
141    /// Tracking which documents already have anomalies.
142    document_anomaly_counts: HashMap<String, usize>,
143    /// All generated labels.
144    labels: Vec<LabeledAnomaly>,
145    /// Statistics.
146    stats: InjectorStats,
147    // Enhanced components (v0.3.0+)
148    /// Multi-stage fraud scheme advancer.
149    scheme_advancer: Option<SchemeAdvancer>,
150    /// Near-miss generator.
151    near_miss_generator: Option<NearMissGenerator>,
152    /// Near-miss labels generated.
153    near_miss_labels: Vec<NearMissLabel>,
154    /// Co-occurrence pattern handler.
155    co_occurrence_handler: Option<AnomalyCoOccurrence>,
156    /// Temporal cluster generator.
157    temporal_cluster_generator: Option<TemporalClusterGenerator>,
158    /// Difficulty calculator.
159    difficulty_calculator: Option<DifficultyCalculator>,
160    /// Entity-aware injector.
161    entity_aware_injector: Option<EntityAwareInjector>,
162    /// Behavioral baseline tracker.
163    behavioral_baseline: Option<BehavioralBaseline>,
164    /// Scheme actions generated.
165    scheme_actions: Vec<SchemeAction>,
166    /// Difficulty distribution.
167    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
168    // Entity context lookup maps for risk-adjusted injection rates
169    /// Vendor contexts keyed by vendor ID.
170    vendor_contexts: HashMap<String, VendorContext>,
171    /// Employee contexts keyed by employee ID.
172    employee_contexts: HashMap<String, EmployeeContext>,
173    /// Account contexts keyed by account code.
174    account_contexts: HashMap<String, AccountContext>,
175}
176
177/// Internal statistics tracking.
178#[derive(Debug, Clone, Default)]
179#[allow(dead_code)]
180pub struct InjectorStats {
181    total_processed: usize,
182    total_injected: usize,
183    by_category: HashMap<String, usize>,
184    by_type: HashMap<String, usize>,
185    by_company: HashMap<String, usize>,
186    skipped_rate: usize,
187    skipped_date: usize,
188    skipped_company: usize,
189    skipped_max_per_doc: usize,
190}
191
192impl AnomalyInjector {
193    /// Creates a new anomaly injector.
194    pub fn new(config: AnomalyInjectorConfig) -> Self {
195        let mut rng = ChaCha8Rng::seed_from_u64(config.seed);
196        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
197        let entity_targeting =
198            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
199
200        // Initialize enhanced components based on configuration
201        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
202            let scheme_config = SchemeAdvancerConfig {
203                embezzlement_probability: config.enhanced.scheme_probability,
204                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
205                kickback_probability: config.enhanced.scheme_probability * 0.5,
206                seed: rng.gen(),
207                ..Default::default()
208            };
209            Some(SchemeAdvancer::new(scheme_config))
210        } else {
211            None
212        };
213
214        let near_miss_generator = if config.enhanced.near_miss_enabled {
215            let near_miss_config = NearMissConfig {
216                proportion: config.enhanced.near_miss_proportion,
217                seed: rng.gen(),
218                ..Default::default()
219            };
220            Some(NearMissGenerator::new(near_miss_config))
221        } else {
222            None
223        };
224
225        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
226            Some(AnomalyCoOccurrence::new())
227        } else {
228            None
229        };
230
231        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
232            Some(TemporalClusterGenerator::new())
233        } else {
234            None
235        };
236
237        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
238            Some(DifficultyCalculator::new())
239        } else {
240            None
241        };
242
243        let entity_aware_injector = if config.enhanced.context_aware_enabled {
244            Some(EntityAwareInjector::default())
245        } else {
246            None
247        };
248
249        let behavioral_baseline = if config.enhanced.context_aware_enabled
250            && config.enhanced.behavioral_baseline_config.enabled
251        {
252            Some(BehavioralBaseline::new(
253                config.enhanced.behavioral_baseline_config.clone(),
254            ))
255        } else {
256            None
257        };
258
259        Self {
260            config,
261            rng,
262            type_selector: AnomalyTypeSelector::new(),
263            strategies: StrategyCollection::default(),
264            cluster_manager,
265            entity_targeting,
266            document_anomaly_counts: HashMap::new(),
267            labels: Vec::new(),
268            stats: InjectorStats::default(),
269            scheme_advancer,
270            near_miss_generator,
271            near_miss_labels: Vec::new(),
272            co_occurrence_handler,
273            temporal_cluster_generator,
274            difficulty_calculator,
275            entity_aware_injector,
276            behavioral_baseline,
277            scheme_actions: Vec::new(),
278            difficulty_distribution: HashMap::new(),
279            vendor_contexts: HashMap::new(),
280            employee_contexts: HashMap::new(),
281            account_contexts: HashMap::new(),
282        }
283    }
284
285    /// Processes a batch of journal entries, potentially injecting anomalies.
286    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
287        let mut modified_documents = Vec::new();
288        let mut duplicates = Vec::new();
289
290        for entry in entries.iter_mut() {
291            self.stats.total_processed += 1;
292
293            // Update behavioral baseline if enabled
294            if let Some(ref mut baseline) = self.behavioral_baseline {
295                use super::context::Observation;
296                // Record the observation for baseline building
297                let entity_id = entry.header.created_by.clone();
298                let observation =
299                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
300                baseline.record_observation(&entity_id, observation);
301            }
302
303            // Check if we should process this entry
304            if !self.should_process(entry) {
305                continue;
306            }
307
308            // Calculate effective rate (temporal clustering is applied later per-type)
309            let base_rate = self.config.rates.total_rate;
310
311            // Calculate entity-aware rate adjustment using context lookup maps
312            let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
313                let employee_id = &entry.header.created_by;
314                let first_account = entry
315                    .lines
316                    .first()
317                    .map(|l| l.gl_account.as_str())
318                    .unwrap_or("");
319                // Look up vendor from the entry's reference field (vendor ID convention)
320                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
321
322                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
323                let employee_ctx = self.employee_contexts.get(employee_id);
324                let account_ctx = self.account_contexts.get(first_account);
325
326                let multiplier =
327                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
328                (base_rate * multiplier).min(1.0)
329            } else {
330                // No entity-aware injector: fall back to context maps alone
331                self.calculate_context_rate_multiplier(entry) * base_rate
332            };
333
334            // Determine if we inject an anomaly
335            if should_inject_anomaly(
336                effective_rate,
337                entry.posting_date(),
338                &self.config.patterns.temporal_pattern,
339                &mut self.rng,
340            ) {
341                // Check if this should be a near-miss instead
342                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
343                    // Record the transaction for near-duplicate detection
344                    let account = entry
345                        .lines
346                        .first()
347                        .map(|l| l.gl_account.clone())
348                        .unwrap_or_default();
349                    near_miss_gen.record_transaction(
350                        entry.document_number().clone(),
351                        entry.posting_date(),
352                        entry.total_debit(),
353                        &account,
354                        None,
355                    );
356
357                    // Check if this could be a near-miss
358                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
359                        entry.document_number().clone(),
360                        entry.posting_date(),
361                        entry.total_debit(),
362                        &account,
363                        None,
364                        &self.config.enhanced.approval_thresholds,
365                    ) {
366                        self.near_miss_labels.push(near_miss_label);
367                        continue; // Skip actual anomaly injection
368                    }
369                }
370
371                // Select anomaly category based on rates
372                let anomaly_type = self.select_anomaly_category();
373
374                // Apply the anomaly
375                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
376                    // Calculate detection difficulty if enabled
377                    if let Some(ref calculator) = self.difficulty_calculator {
378                        let difficulty = calculator.calculate(&label);
379
380                        // Store difficulty in metadata
381                        label = label
382                            .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
383                        label = label.with_metadata(
384                            "difficulty_score",
385                            &difficulty.difficulty_score().to_string(),
386                        );
387
388                        // Update difficulty distribution
389                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
390                    }
391
392                    modified_documents.push(entry.document_number().clone());
393                    self.labels.push(label);
394                    self.stats.total_injected += 1;
395                }
396
397                // Check for duplicate injection
398                if self.config.allow_duplicates
399                    && matches!(
400                        self.labels.last().map(|l| &l.anomaly_type),
401                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
402                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
403                    )
404                {
405                    let dup_strategy = DuplicationStrategy::default();
406                    let duplicate = dup_strategy.duplicate(entry, &mut self.rng);
407                    duplicates.push(duplicate);
408                }
409            }
410        }
411
412        // Count duplicates
413        let duplicates_created = duplicates.len();
414
415        // Build summary
416        let summary = AnomalySummary::from_anomalies(&self.labels);
417
418        InjectionBatchResult {
419            entries_processed: self.stats.total_processed,
420            anomalies_injected: self.stats.total_injected,
421            duplicates_created,
422            labels: self.labels.clone(),
423            summary,
424            modified_documents,
425            near_miss_labels: self.near_miss_labels.clone(),
426            scheme_actions: self.scheme_actions.clone(),
427            difficulty_distribution: self.difficulty_distribution.clone(),
428        }
429    }
430
431    /// Checks if an entry should be processed.
432    fn should_process(&mut self, entry: &JournalEntry) -> bool {
433        // Check company filter
434        if !self.config.target_companies.is_empty()
435            && !self
436                .config
437                .target_companies
438                .iter()
439                .any(|c| c == entry.company_code())
440        {
441            self.stats.skipped_company += 1;
442            return false;
443        }
444
445        // Check date range
446        if let Some((start, end)) = self.config.date_range {
447            if entry.posting_date() < start || entry.posting_date() > end {
448                self.stats.skipped_date += 1;
449                return false;
450            }
451        }
452
453        // Check max anomalies per document
454        let current_count = self
455            .document_anomaly_counts
456            .get(&entry.document_number())
457            .copied()
458            .unwrap_or(0);
459        if current_count >= self.config.max_anomalies_per_document {
460            self.stats.skipped_max_per_doc += 1;
461            return false;
462        }
463
464        true
465    }
466
467    /// Selects an anomaly category based on configured rates.
468    fn select_anomaly_category(&mut self) -> AnomalyType {
469        let r = self.rng.gen::<f64>();
470        let rates = &self.config.rates;
471
472        let mut cumulative = 0.0;
473
474        cumulative += rates.fraud_rate;
475        if r < cumulative {
476            return self.type_selector.select_fraud(&mut self.rng);
477        }
478
479        cumulative += rates.error_rate;
480        if r < cumulative {
481            return self.type_selector.select_error(&mut self.rng);
482        }
483
484        cumulative += rates.process_issue_rate;
485        if r < cumulative {
486            return self.type_selector.select_process_issue(&mut self.rng);
487        }
488
489        cumulative += rates.statistical_rate;
490        if r < cumulative {
491            return self.type_selector.select_statistical(&mut self.rng);
492        }
493
494        self.type_selector.select_relational(&mut self.rng)
495    }
496
497    /// Injects an anomaly into an entry.
498    fn inject_anomaly(
499        &mut self,
500        entry: &mut JournalEntry,
501        anomaly_type: AnomalyType,
502    ) -> Option<LabeledAnomaly> {
503        // Check if strategy can be applied
504        if !self.strategies.can_apply(entry, &anomaly_type) {
505            return None;
506        }
507
508        // Apply the strategy
509        let result = self
510            .strategies
511            .apply_strategy(entry, &anomaly_type, &mut self.rng);
512
513        if !result.success {
514            return None;
515        }
516
517        // Update document anomaly count
518        *self
519            .document_anomaly_counts
520            .entry(entry.document_number().clone())
521            .or_insert(0) += 1;
522
523        // Update statistics
524        let category = anomaly_type.category().to_string();
525        let type_name = anomaly_type.type_name();
526
527        *self.stats.by_category.entry(category).or_insert(0) += 1;
528        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
529        *self
530            .stats
531            .by_company
532            .entry(entry.company_code().to_string())
533            .or_insert(0) += 1;
534
535        // Generate label
536        if self.config.generate_labels {
537            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
538
539            // Update entry header with anomaly tracking fields
540            entry.header.is_anomaly = true;
541            entry.header.anomaly_id = Some(anomaly_id.clone());
542            entry.header.anomaly_type = Some(type_name.clone());
543
544            // Also set fraud flag if this is a fraud anomaly
545            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
546                entry.header.is_fraud = true;
547                if let AnomalyType::Fraud(ref ft) = anomaly_type {
548                    entry.header.fraud_type = Some(*ft);
549                }
550            }
551
552            let mut label = LabeledAnomaly::new(
553                anomaly_id,
554                anomaly_type.clone(),
555                entry.document_number().clone(),
556                "JE".to_string(),
557                entry.company_code().to_string(),
558                entry.posting_date(),
559            )
560            .with_description(&result.description)
561            .with_injection_strategy(&type_name);
562
563            // Add causal reason with injection context (provenance tracking)
564            let causal_reason = AnomalyCausalReason::RandomRate {
565                base_rate: self.config.rates.total_rate,
566            };
567            label = label.with_causal_reason(causal_reason);
568
569            // Add entity context metadata if contexts are populated
570            let context_multiplier = self.calculate_context_rate_multiplier(entry);
571            if (context_multiplier - 1.0).abs() > f64::EPSILON {
572                label = label.with_metadata(
573                    "entity_context_multiplier",
574                    &format!("{:.3}", context_multiplier),
575                );
576                label = label.with_metadata(
577                    "effective_rate",
578                    &format!(
579                        "{:.6}",
580                        (self.config.rates.total_rate * context_multiplier).min(1.0)
581                    ),
582                );
583            }
584
585            // Add monetary impact
586            if let Some(impact) = result.monetary_impact {
587                label = label.with_monetary_impact(impact);
588            }
589
590            // Add related entities
591            for entity in &result.related_entities {
592                label = label.with_related_entity(entity);
593            }
594
595            // Add metadata
596            for (key, value) in &result.metadata {
597                label = label.with_metadata(key, value);
598            }
599
600            // Assign cluster and update causal reason if in cluster
601            if let Some(cluster_id) =
602                self.cluster_manager
603                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
604            {
605                label = label.with_cluster(&cluster_id);
606                // Update causal reason to reflect cluster membership
607                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
608                    cluster_id: cluster_id.clone(),
609                });
610            }
611
612            return Some(label);
613        }
614
615        None
616    }
617
618    /// Injects a specific anomaly type into an entry.
619    pub fn inject_specific(
620        &mut self,
621        entry: &mut JournalEntry,
622        anomaly_type: AnomalyType,
623    ) -> Option<LabeledAnomaly> {
624        self.inject_anomaly(entry, anomaly_type)
625    }
626
627    /// Creates a self-approval anomaly.
628    pub fn create_self_approval(
629        &mut self,
630        entry: &mut JournalEntry,
631        user_id: &str,
632    ) -> Option<LabeledAnomaly> {
633        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
634
635        let label = LabeledAnomaly::new(
636            format!("ANO{:08}", self.labels.len() + 1),
637            anomaly_type,
638            entry.document_number().clone(),
639            "JE".to_string(),
640            entry.company_code().to_string(),
641            entry.posting_date(),
642        )
643        .with_description(&format!("User {} approved their own transaction", user_id))
644        .with_related_entity(user_id)
645        .with_injection_strategy("ManualSelfApproval")
646        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
647            target_type: "User".to_string(),
648            target_id: user_id.to_string(),
649        });
650
651        // Set entry header anomaly tracking fields
652        entry.header.is_anomaly = true;
653        entry.header.is_fraud = true;
654        entry.header.anomaly_id = Some(label.anomaly_id.clone());
655        entry.header.anomaly_type = Some("SelfApproval".to_string());
656        entry.header.fraud_type = Some(FraudType::SelfApproval);
657
658        // Set approver = requester
659        entry.header.created_by = user_id.to_string();
660
661        self.labels.push(label.clone());
662        Some(label)
663    }
664
665    /// Creates a segregation of duties violation.
666    pub fn create_sod_violation(
667        &mut self,
668        entry: &mut JournalEntry,
669        user_id: &str,
670        conflicting_duties: (&str, &str),
671    ) -> Option<LabeledAnomaly> {
672        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
673
674        let label = LabeledAnomaly::new(
675            format!("ANO{:08}", self.labels.len() + 1),
676            anomaly_type,
677            entry.document_number().clone(),
678            "JE".to_string(),
679            entry.company_code().to_string(),
680            entry.posting_date(),
681        )
682        .with_description(&format!(
683            "User {} performed conflicting duties: {} and {}",
684            user_id, conflicting_duties.0, conflicting_duties.1
685        ))
686        .with_related_entity(user_id)
687        .with_metadata("duty1", conflicting_duties.0)
688        .with_metadata("duty2", conflicting_duties.1)
689        .with_injection_strategy("ManualSoDViolation")
690        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
691            target_type: "User".to_string(),
692            target_id: user_id.to_string(),
693        });
694
695        // Set entry header anomaly tracking fields
696        entry.header.is_anomaly = true;
697        entry.header.is_fraud = true;
698        entry.header.anomaly_id = Some(label.anomaly_id.clone());
699        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
700        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
701
702        self.labels.push(label.clone());
703        Some(label)
704    }
705
706    /// Creates an intercompany mismatch anomaly.
707    pub fn create_ic_mismatch(
708        &mut self,
709        entry: &mut JournalEntry,
710        matching_company: &str,
711        expected_amount: Decimal,
712        actual_amount: Decimal,
713    ) -> Option<LabeledAnomaly> {
714        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
715
716        let label = LabeledAnomaly::new(
717            format!("ANO{:08}", self.labels.len() + 1),
718            anomaly_type,
719            entry.document_number().clone(),
720            "JE".to_string(),
721            entry.company_code().to_string(),
722            entry.posting_date(),
723        )
724        .with_description(&format!(
725            "Intercompany mismatch with {}: expected {} but got {}",
726            matching_company, expected_amount, actual_amount
727        ))
728        .with_related_entity(matching_company)
729        .with_monetary_impact(actual_amount - expected_amount)
730        .with_metadata("expected_amount", &expected_amount.to_string())
731        .with_metadata("actual_amount", &actual_amount.to_string())
732        .with_injection_strategy("ManualICMismatch")
733        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
734            target_type: "Intercompany".to_string(),
735            target_id: matching_company.to_string(),
736        });
737
738        // Set entry header anomaly tracking fields
739        entry.header.is_anomaly = true;
740        entry.header.anomaly_id = Some(label.anomaly_id.clone());
741        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
742
743        self.labels.push(label.clone());
744        Some(label)
745    }
746
747    /// Returns all generated labels.
748    pub fn get_labels(&self) -> &[LabeledAnomaly] {
749        &self.labels
750    }
751
752    /// Returns the anomaly summary.
753    pub fn get_summary(&self) -> AnomalySummary {
754        AnomalySummary::from_anomalies(&self.labels)
755    }
756
757    /// Returns injection statistics.
758    pub fn get_stats(&self) -> &InjectorStats {
759        &self.stats
760    }
761
762    /// Clears all labels and resets statistics.
763    pub fn reset(&mut self) {
764        self.labels.clear();
765        self.document_anomaly_counts.clear();
766        self.stats = InjectorStats::default();
767        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
768
769        // Reset enhanced components
770        self.near_miss_labels.clear();
771        self.scheme_actions.clear();
772        self.difficulty_distribution.clear();
773
774        if let Some(ref mut baseline) = self.behavioral_baseline {
775            *baseline =
776                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
777        }
778    }
779
780    /// Returns the number of clusters created.
781    pub fn cluster_count(&self) -> usize {
782        self.cluster_manager.cluster_count()
783    }
784
785    // =========================================================================
786    // Entity Context API
787    // =========================================================================
788
789    /// Sets entity contexts for risk-adjusted anomaly injection.
790    ///
791    /// When entity contexts are provided, the injector adjusts anomaly injection
792    /// rates based on entity risk factors. Entries involving high-risk vendors,
793    /// new employees, or sensitive accounts will have higher effective injection
794    /// rates.
795    ///
796    /// Pass empty HashMaps to clear previously set contexts.
797    pub fn set_entity_contexts(
798        &mut self,
799        vendors: HashMap<String, VendorContext>,
800        employees: HashMap<String, EmployeeContext>,
801        accounts: HashMap<String, AccountContext>,
802    ) {
803        self.vendor_contexts = vendors;
804        self.employee_contexts = employees;
805        self.account_contexts = accounts;
806    }
807
808    /// Returns a reference to the vendor context map.
809    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
810        &self.vendor_contexts
811    }
812
813    /// Returns a reference to the employee context map.
814    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
815        &self.employee_contexts
816    }
817
818    /// Returns a reference to the account context map.
819    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
820        &self.account_contexts
821    }
822
823    /// Calculates a rate multiplier from the entity context maps alone (no
824    /// `EntityAwareInjector` needed). This provides a lightweight fallback
825    /// when context-aware injection is not fully enabled but context maps
826    /// have been populated.
827    ///
828    /// The multiplier is the product of individual entity risk factors found
829    /// in the context maps for the given journal entry. If no contexts match,
830    /// returns 1.0 (no adjustment).
831    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
832        if self.vendor_contexts.is_empty()
833            && self.employee_contexts.is_empty()
834            && self.account_contexts.is_empty()
835        {
836            return 1.0;
837        }
838
839        let mut multiplier = 1.0;
840
841        // Vendor lookup via reference field
842        if let Some(ref vendor_ref) = entry.header.reference {
843            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
844                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
845                if ctx.is_new {
846                    multiplier *= 2.0;
847                }
848                if ctx.is_dormant_reactivation {
849                    multiplier *= 1.5;
850                }
851            }
852        }
853
854        // Employee lookup via created_by
855        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
856            if ctx.is_new {
857                multiplier *= 1.5;
858            }
859            if ctx.is_volume_fatigued {
860                multiplier *= 1.3;
861            }
862            if ctx.is_overtime {
863                multiplier *= 1.2;
864            }
865        }
866
867        // Account lookup via first line's GL account
868        if let Some(first_line) = entry.lines.first() {
869            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
870                if ctx.is_high_risk {
871                    multiplier *= 2.0;
872                }
873            }
874        }
875
876        multiplier
877    }
878
879    // =========================================================================
880    // Enhanced Features API (v0.3.0+)
881    // =========================================================================
882
883    /// Advances all active fraud schemes by one time step.
884    ///
885    /// Call this method once per simulated day to generate scheme actions.
886    /// Returns the scheme actions generated for this date.
887    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
888        if let Some(ref mut advancer) = self.scheme_advancer {
889            let context = SchemeContext::new(date, company_code);
890            let actions = advancer.advance_all(&context);
891            self.scheme_actions.extend(actions.clone());
892            actions
893        } else {
894            Vec::new()
895        }
896    }
897
898    /// Potentially starts a new fraud scheme based on probabilities.
899    ///
900    /// Call this method periodically (e.g., once per period) to allow new
901    /// schemes to start based on configured probabilities.
902    /// Returns the scheme ID if a scheme was started.
903    pub fn maybe_start_scheme(
904        &mut self,
905        date: NaiveDate,
906        company_code: &str,
907        available_users: Vec<String>,
908        available_accounts: Vec<String>,
909        available_counterparties: Vec<String>,
910    ) -> Option<uuid::Uuid> {
911        if let Some(ref mut advancer) = self.scheme_advancer {
912            let mut context = SchemeContext::new(date, company_code);
913            context.available_users = available_users;
914            context.available_accounts = available_accounts;
915            context.available_counterparties = available_counterparties;
916
917            advancer.maybe_start_scheme(&context)
918        } else {
919            None
920        }
921    }
922
923    /// Returns all near-miss labels generated.
924    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
925        &self.near_miss_labels
926    }
927
928    /// Returns all scheme actions generated.
929    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
930        &self.scheme_actions
931    }
932
933    /// Returns the detection difficulty distribution.
934    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
935        &self.difficulty_distribution
936    }
937
938    /// Checks for behavioral deviations for an entity with an observation.
939    pub fn check_behavioral_deviations(
940        &self,
941        entity_id: &str,
942        observation: &super::context::Observation,
943    ) -> Vec<super::context::BehavioralDeviation> {
944        if let Some(ref baseline) = self.behavioral_baseline {
945            baseline.check_deviation(entity_id, observation)
946        } else {
947            Vec::new()
948        }
949    }
950
951    /// Gets the baseline for an entity.
952    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
953        if let Some(ref baseline) = self.behavioral_baseline {
954            baseline.get_baseline(entity_id)
955        } else {
956            None
957        }
958    }
959
960    /// Returns the number of active schemes.
961    pub fn active_scheme_count(&self) -> usize {
962        if let Some(ref advancer) = self.scheme_advancer {
963            advancer.active_scheme_count()
964        } else {
965            0
966        }
967    }
968
969    /// Returns whether enhanced features are enabled.
970    pub fn has_enhanced_features(&self) -> bool {
971        self.scheme_advancer.is_some()
972            || self.near_miss_generator.is_some()
973            || self.difficulty_calculator.is_some()
974            || self.entity_aware_injector.is_some()
975    }
976}
977
978/// Builder for AnomalyInjectorConfig.
979pub struct AnomalyInjectorConfigBuilder {
980    config: AnomalyInjectorConfig,
981}
982
983impl AnomalyInjectorConfigBuilder {
984    /// Creates a new builder with default configuration.
985    pub fn new() -> Self {
986        Self {
987            config: AnomalyInjectorConfig::default(),
988        }
989    }
990
991    /// Sets the total anomaly rate.
992    pub fn with_total_rate(mut self, rate: f64) -> Self {
993        self.config.rates.total_rate = rate;
994        self
995    }
996
997    /// Sets the fraud rate (proportion of anomalies).
998    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
999        self.config.rates.fraud_rate = rate;
1000        self
1001    }
1002
1003    /// Sets the error rate (proportion of anomalies).
1004    pub fn with_error_rate(mut self, rate: f64) -> Self {
1005        self.config.rates.error_rate = rate;
1006        self
1007    }
1008
1009    /// Sets the random seed.
1010    pub fn with_seed(mut self, seed: u64) -> Self {
1011        self.config.seed = seed;
1012        self
1013    }
1014
1015    /// Sets the temporal pattern.
1016    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1017        self.config.patterns.temporal_pattern = pattern;
1018        self
1019    }
1020
1021    /// Enables or disables label generation.
1022    pub fn with_labels(mut self, generate: bool) -> Self {
1023        self.config.generate_labels = generate;
1024        self
1025    }
1026
1027    /// Sets target companies.
1028    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1029        self.config.target_companies = companies;
1030        self
1031    }
1032
1033    /// Sets the date range.
1034    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1035        self.config.date_range = Some((start, end));
1036        self
1037    }
1038
1039    // =========================================================================
1040    // Enhanced Features Configuration (v0.3.0+)
1041    // =========================================================================
1042
1043    /// Enables multi-stage fraud scheme generation.
1044    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1045        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1046        self.config.enhanced.scheme_probability = probability;
1047        self
1048    }
1049
1050    /// Enables near-miss generation.
1051    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1052        self.config.enhanced.near_miss_enabled = enabled;
1053        self.config.enhanced.near_miss_proportion = proportion;
1054        self
1055    }
1056
1057    /// Sets approval thresholds for threshold-proximity near-misses.
1058    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1059        self.config.enhanced.approval_thresholds = thresholds;
1060        self
1061    }
1062
1063    /// Enables correlated anomaly injection.
1064    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1065        self.config.enhanced.correlated_injection_enabled = enabled;
1066        self
1067    }
1068
1069    /// Enables temporal clustering (period-end spikes).
1070    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1071        self.config.enhanced.temporal_clustering_enabled = enabled;
1072        self.config.enhanced.period_end_multiplier = multiplier;
1073        self
1074    }
1075
1076    /// Enables detection difficulty classification.
1077    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1078        self.config.enhanced.difficulty_classification_enabled = enabled;
1079        self
1080    }
1081
1082    /// Enables context-aware injection.
1083    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1084        self.config.enhanced.context_aware_enabled = enabled;
1085        self
1086    }
1087
1088    /// Sets behavioral baseline configuration.
1089    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1090        self.config.enhanced.behavioral_baseline_config = config;
1091        self
1092    }
1093
1094    /// Enables all enhanced features with default settings.
1095    pub fn with_all_enhanced_features(mut self) -> Self {
1096        self.config.enhanced.multi_stage_schemes_enabled = true;
1097        self.config.enhanced.scheme_probability = 0.02;
1098        self.config.enhanced.correlated_injection_enabled = true;
1099        self.config.enhanced.temporal_clustering_enabled = true;
1100        self.config.enhanced.period_end_multiplier = 2.5;
1101        self.config.enhanced.near_miss_enabled = true;
1102        self.config.enhanced.near_miss_proportion = 0.30;
1103        self.config.enhanced.difficulty_classification_enabled = true;
1104        self.config.enhanced.context_aware_enabled = true;
1105        self.config.enhanced.behavioral_baseline_config.enabled = true;
1106        self
1107    }
1108
1109    /// Builds the configuration.
1110    pub fn build(self) -> AnomalyInjectorConfig {
1111        self.config
1112    }
1113}
1114
1115impl Default for AnomalyInjectorConfigBuilder {
1116    fn default() -> Self {
1117        Self::new()
1118    }
1119}
1120
1121#[cfg(test)]
1122#[allow(clippy::unwrap_used)]
1123mod tests {
1124    use super::*;
1125    use chrono::NaiveDate;
1126    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1127    use rust_decimal_macros::dec;
1128
1129    fn create_test_entry(doc_num: &str) -> JournalEntry {
1130        let mut entry = JournalEntry::new_simple(
1131            doc_num.to_string(),
1132            "1000".to_string(),
1133            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1134            "Test Entry".to_string(),
1135        );
1136
1137        entry.add_line(JournalEntryLine {
1138            line_number: 1,
1139            gl_account: "5000".to_string(),
1140            debit_amount: dec!(1000),
1141            ..Default::default()
1142        });
1143
1144        entry.add_line(JournalEntryLine {
1145            line_number: 2,
1146            gl_account: "1000".to_string(),
1147            credit_amount: dec!(1000),
1148            ..Default::default()
1149        });
1150
1151        entry
1152    }
1153
1154    #[test]
1155    fn test_anomaly_injector_basic() {
1156        let config = AnomalyInjectorConfigBuilder::new()
1157            .with_total_rate(0.5) // High rate for testing
1158            .with_seed(42)
1159            .build();
1160
1161        let mut injector = AnomalyInjector::new(config);
1162
1163        let mut entries: Vec<_> = (0..100)
1164            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1165            .collect();
1166
1167        let result = injector.process_entries(&mut entries);
1168
1169        // With 50% rate, we should have some anomalies
1170        assert!(result.anomalies_injected > 0);
1171        assert!(!result.labels.is_empty());
1172        assert_eq!(result.labels.len(), result.anomalies_injected);
1173    }
1174
1175    #[test]
1176    fn test_specific_injection() {
1177        let config = AnomalyInjectorConfig::default();
1178        let mut injector = AnomalyInjector::new(config);
1179
1180        let mut entry = create_test_entry("JE001");
1181        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1182
1183        let label = injector.inject_specific(&mut entry, anomaly_type);
1184
1185        assert!(label.is_some());
1186        let label = label.unwrap();
1187        // document_id is the UUID string from the journal entry header
1188        assert!(!label.document_id.is_empty());
1189        assert_eq!(label.document_id, entry.document_number());
1190    }
1191
1192    #[test]
1193    fn test_self_approval_injection() {
1194        let config = AnomalyInjectorConfig::default();
1195        let mut injector = AnomalyInjector::new(config);
1196
1197        let mut entry = create_test_entry("JE001");
1198        let label = injector.create_self_approval(&mut entry, "USER001");
1199
1200        assert!(label.is_some());
1201        let label = label.unwrap();
1202        assert!(matches!(
1203            label.anomaly_type,
1204            AnomalyType::Fraud(FraudType::SelfApproval)
1205        ));
1206        assert!(label.related_entities.contains(&"USER001".to_string()));
1207    }
1208
1209    #[test]
1210    fn test_company_filtering() {
1211        let config = AnomalyInjectorConfigBuilder::new()
1212            .with_total_rate(1.0) // Inject all
1213            .with_target_companies(vec!["2000".to_string()])
1214            .build();
1215
1216        let mut injector = AnomalyInjector::new(config);
1217
1218        let mut entries = vec![
1219            create_test_entry("JE001"), // company 1000
1220            create_test_entry("JE002"), // company 1000
1221        ];
1222
1223        let result = injector.process_entries(&mut entries);
1224
1225        // No anomalies because entries are in company 1000, not 2000
1226        assert_eq!(result.anomalies_injected, 0);
1227    }
1228
1229    // =========================================================================
1230    // Entity Context Tests
1231    // =========================================================================
1232
1233    /// Helper to create a test entry with specific vendor reference and employee.
1234    fn create_test_entry_with_context(
1235        doc_num: &str,
1236        vendor_ref: Option<&str>,
1237        employee_id: &str,
1238        gl_account: &str,
1239    ) -> JournalEntry {
1240        let mut entry = JournalEntry::new_simple(
1241            doc_num.to_string(),
1242            "1000".to_string(),
1243            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1244            "Test Entry".to_string(),
1245        );
1246
1247        entry.header.reference = vendor_ref.map(|v| v.to_string());
1248        entry.header.created_by = employee_id.to_string();
1249
1250        entry.add_line(JournalEntryLine {
1251            line_number: 1,
1252            gl_account: gl_account.to_string(),
1253            debit_amount: dec!(1000),
1254            ..Default::default()
1255        });
1256
1257        entry.add_line(JournalEntryLine {
1258            line_number: 2,
1259            gl_account: "1000".to_string(),
1260            credit_amount: dec!(1000),
1261            ..Default::default()
1262        });
1263
1264        entry
1265    }
1266
1267    #[test]
1268    fn test_set_entity_contexts() {
1269        let config = AnomalyInjectorConfig::default();
1270        let mut injector = AnomalyInjector::new(config);
1271
1272        // Initially empty
1273        assert!(injector.vendor_contexts().is_empty());
1274        assert!(injector.employee_contexts().is_empty());
1275        assert!(injector.account_contexts().is_empty());
1276
1277        // Set contexts
1278        let mut vendors = HashMap::new();
1279        vendors.insert(
1280            "V001".to_string(),
1281            VendorContext {
1282                vendor_id: "V001".to_string(),
1283                is_new: true,
1284                ..Default::default()
1285            },
1286        );
1287
1288        let mut employees = HashMap::new();
1289        employees.insert(
1290            "EMP001".to_string(),
1291            EmployeeContext {
1292                employee_id: "EMP001".to_string(),
1293                is_new: true,
1294                ..Default::default()
1295            },
1296        );
1297
1298        let mut accounts = HashMap::new();
1299        accounts.insert(
1300            "8100".to_string(),
1301            AccountContext {
1302                account_code: "8100".to_string(),
1303                is_high_risk: true,
1304                ..Default::default()
1305            },
1306        );
1307
1308        injector.set_entity_contexts(vendors, employees, accounts);
1309
1310        assert_eq!(injector.vendor_contexts().len(), 1);
1311        assert_eq!(injector.employee_contexts().len(), 1);
1312        assert_eq!(injector.account_contexts().len(), 1);
1313        assert!(injector.vendor_contexts().contains_key("V001"));
1314        assert!(injector.employee_contexts().contains_key("EMP001"));
1315        assert!(injector.account_contexts().contains_key("8100"));
1316    }
1317
1318    #[test]
1319    fn test_default_behavior_no_contexts() {
1320        // Without any entity contexts, the base rate is used unchanged.
1321        let config = AnomalyInjectorConfigBuilder::new()
1322            .with_total_rate(0.5)
1323            .with_seed(42)
1324            .build();
1325
1326        let mut injector = AnomalyInjector::new(config);
1327
1328        let mut entries: Vec<_> = (0..200)
1329            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1330            .collect();
1331
1332        let result = injector.process_entries(&mut entries);
1333
1334        // With 50% base rate and no context, expect roughly 50% injection
1335        // Allow wide margin for randomness
1336        assert!(result.anomalies_injected > 0);
1337        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1338        assert!(
1339            rate > 0.2 && rate < 0.8,
1340            "Expected ~50% rate, got {:.2}%",
1341            rate * 100.0
1342        );
1343    }
1344
1345    #[test]
1346    fn test_entity_context_increases_injection_rate() {
1347        // With high-risk entity contexts, the effective rate should be higher
1348        // than the base rate, leading to more anomalies being injected.
1349        let base_rate = 0.10; // Low base rate
1350
1351        // Run without contexts
1352        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1353            .with_total_rate(base_rate)
1354            .with_seed(123)
1355            .build();
1356
1357        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1358
1359        let mut entries_no_ctx: Vec<_> = (0..500)
1360            .map(|i| {
1361                create_test_entry_with_context(
1362                    &format!("JE{:04}", i),
1363                    Some("V001"),
1364                    "EMP001",
1365                    "8100",
1366                )
1367            })
1368            .collect();
1369
1370        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1371
1372        // Run with high-risk contexts (same seed for comparable randomness)
1373        let config_ctx = AnomalyInjectorConfigBuilder::new()
1374            .with_total_rate(base_rate)
1375            .with_seed(123)
1376            .build();
1377
1378        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1379
1380        // Set up high-risk contexts
1381        let mut vendors = HashMap::new();
1382        vendors.insert(
1383            "V001".to_string(),
1384            VendorContext {
1385                vendor_id: "V001".to_string(),
1386                is_new: true,                  // 2.0x multiplier
1387                is_dormant_reactivation: true, // 1.5x multiplier
1388                ..Default::default()
1389            },
1390        );
1391
1392        let mut employees = HashMap::new();
1393        employees.insert(
1394            "EMP001".to_string(),
1395            EmployeeContext {
1396                employee_id: "EMP001".to_string(),
1397                is_new: true, // 1.5x multiplier
1398                ..Default::default()
1399            },
1400        );
1401
1402        let mut accounts = HashMap::new();
1403        accounts.insert(
1404            "8100".to_string(),
1405            AccountContext {
1406                account_code: "8100".to_string(),
1407                is_high_risk: true, // 2.0x multiplier
1408                ..Default::default()
1409            },
1410        );
1411
1412        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1413
1414        let mut entries_ctx: Vec<_> = (0..500)
1415            .map(|i| {
1416                create_test_entry_with_context(
1417                    &format!("JE{:04}", i),
1418                    Some("V001"),
1419                    "EMP001",
1420                    "8100",
1421                )
1422            })
1423            .collect();
1424
1425        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1426
1427        // The context-enhanced run should inject more anomalies
1428        assert!(
1429            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1430            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1431            result_ctx.anomalies_injected,
1432            result_no_ctx.anomalies_injected,
1433        );
1434    }
1435
1436    #[test]
1437    fn test_risk_score_multiplication() {
1438        // Verify the calculate_context_rate_multiplier produces correct values.
1439        let config = AnomalyInjectorConfig::default();
1440        let mut injector = AnomalyInjector::new(config);
1441
1442        // No contexts: multiplier should be 1.0
1443        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1444        assert!(
1445            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1446        );
1447
1448        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1449        let mut vendors = HashMap::new();
1450        vendors.insert(
1451            "V_RISKY".to_string(),
1452            VendorContext {
1453                vendor_id: "V_RISKY".to_string(),
1454                is_new: true,
1455                ..Default::default()
1456            },
1457        );
1458
1459        let mut accounts = HashMap::new();
1460        accounts.insert(
1461            "9000".to_string(),
1462            AccountContext {
1463                account_code: "9000".to_string(),
1464                is_high_risk: true,
1465                ..Default::default()
1466            },
1467        );
1468
1469        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1470
1471        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1472        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1473        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1474        assert!(
1475            (multiplier - 4.0).abs() < f64::EPSILON,
1476            "Expected 4.0x multiplier, got {}",
1477            multiplier,
1478        );
1479
1480        // Entry with only vendor context match (no account match)
1481        let entry_vendor_only =
1482            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1483        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1484        assert!(
1485            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1486            "Expected 2.0x multiplier (vendor only), got {}",
1487            multiplier_vendor,
1488        );
1489
1490        // Entry with no matching contexts
1491        let entry_no_match =
1492            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1493        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1494        assert!(
1495            (multiplier_none - 1.0).abs() < f64::EPSILON,
1496            "Expected 1.0x multiplier (no match), got {}",
1497            multiplier_none,
1498        );
1499    }
1500
1501    #[test]
1502    fn test_employee_context_multiplier() {
1503        let config = AnomalyInjectorConfig::default();
1504        let mut injector = AnomalyInjector::new(config);
1505
1506        let mut employees = HashMap::new();
1507        employees.insert(
1508            "EMP_NEW".to_string(),
1509            EmployeeContext {
1510                employee_id: "EMP_NEW".to_string(),
1511                is_new: true,             // 1.5x
1512                is_volume_fatigued: true, // 1.3x
1513                is_overtime: true,        // 1.2x
1514                ..Default::default()
1515            },
1516        );
1517
1518        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1519
1520        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1521        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1522
1523        // 1.5 * 1.3 * 1.2 = 2.34
1524        let expected = 1.5 * 1.3 * 1.2;
1525        assert!(
1526            (multiplier - expected).abs() < 0.01,
1527            "Expected {:.3}x multiplier, got {:.3}",
1528            expected,
1529            multiplier,
1530        );
1531    }
1532
1533    #[test]
1534    fn test_entity_contexts_persist_across_reset() {
1535        let config = AnomalyInjectorConfig::default();
1536        let mut injector = AnomalyInjector::new(config);
1537
1538        let mut vendors = HashMap::new();
1539        vendors.insert(
1540            "V001".to_string(),
1541            VendorContext {
1542                vendor_id: "V001".to_string(),
1543                is_new: true,
1544                ..Default::default()
1545            },
1546        );
1547
1548        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1549        assert_eq!(injector.vendor_contexts().len(), 1);
1550
1551        // Reset clears labels and stats but not entity contexts
1552        injector.reset();
1553        assert_eq!(injector.vendor_contexts().len(), 1);
1554    }
1555
1556    #[test]
1557    fn test_set_empty_contexts_clears() {
1558        let config = AnomalyInjectorConfig::default();
1559        let mut injector = AnomalyInjector::new(config);
1560
1561        let mut vendors = HashMap::new();
1562        vendors.insert(
1563            "V001".to_string(),
1564            VendorContext {
1565                vendor_id: "V001".to_string(),
1566                ..Default::default()
1567            },
1568        );
1569
1570        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1571        assert_eq!(injector.vendor_contexts().len(), 1);
1572
1573        // Setting empty maps clears
1574        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1575        assert!(injector.vendor_contexts().is_empty());
1576    }
1577
1578    #[test]
1579    fn test_dormant_vendor_multiplier() {
1580        let config = AnomalyInjectorConfig::default();
1581        let mut injector = AnomalyInjector::new(config);
1582
1583        let mut vendors = HashMap::new();
1584        vendors.insert(
1585            "V_DORMANT".to_string(),
1586            VendorContext {
1587                vendor_id: "V_DORMANT".to_string(),
1588                is_dormant_reactivation: true, // 1.5x
1589                ..Default::default()
1590            },
1591        );
1592
1593        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1594
1595        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1596        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1597        assert!(
1598            (multiplier - 1.5).abs() < f64::EPSILON,
1599            "Expected 1.5x multiplier for dormant vendor, got {}",
1600            multiplier,
1601        );
1602    }
1603}