Skip to main content

datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use rand::Rng;
16use rand::SeedableRng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20
21use datasynth_core::models::{
22    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
23    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
24    RelationalAnomalyType,
25};
26
27use super::context::{
28    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
29    EntityAwareInjector, VendorContext,
30};
31use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
32use super::difficulty::DifficultyCalculator;
33use super::near_miss::{NearMissConfig, NearMissGenerator};
34use super::patterns::{
35    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
36    TemporalPattern,
37};
38use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
39use super::schemes::{SchemeAction, SchemeContext};
40use super::strategies::{DuplicationStrategy, StrategyCollection};
41use super::types::AnomalyTypeSelector;
42
43/// Configuration for the anomaly injector.
44#[derive(Debug, Clone)]
45pub struct AnomalyInjectorConfig {
46    /// Rate configuration.
47    pub rates: AnomalyRateConfig,
48    /// Pattern configuration.
49    pub patterns: AnomalyPatternConfig,
50    /// Random seed for reproducibility.
51    pub seed: u64,
52    /// Whether to generate labels.
53    pub generate_labels: bool,
54    /// Whether to allow duplicate injection.
55    pub allow_duplicates: bool,
56    /// Maximum anomalies per document.
57    pub max_anomalies_per_document: usize,
58    /// Company codes to target (empty = all).
59    pub target_companies: Vec<String>,
60    /// Date range for injection.
61    pub date_range: Option<(NaiveDate, NaiveDate)>,
62    /// Enhanced features configuration.
63    pub enhanced: EnhancedInjectionConfig,
64}
65
66/// Enhanced injection configuration for v0.3.0+ features.
67#[derive(Debug, Clone, Default)]
68pub struct EnhancedInjectionConfig {
69    /// Enable multi-stage fraud scheme generation.
70    pub multi_stage_schemes_enabled: bool,
71    /// Probability of starting a new scheme per perpetrator per year.
72    pub scheme_probability: f64,
73    /// Enable correlated anomaly injection.
74    pub correlated_injection_enabled: bool,
75    /// Enable temporal clustering (period-end spikes).
76    pub temporal_clustering_enabled: bool,
77    /// Period-end anomaly rate multiplier.
78    pub period_end_multiplier: f64,
79    /// Enable near-miss generation.
80    pub near_miss_enabled: bool,
81    /// Proportion of anomalies that are near-misses.
82    pub near_miss_proportion: f64,
83    /// Approval thresholds for threshold-proximity near-misses.
84    pub approval_thresholds: Vec<Decimal>,
85    /// Enable detection difficulty classification.
86    pub difficulty_classification_enabled: bool,
87    /// Enable context-aware injection.
88    pub context_aware_enabled: bool,
89    /// Behavioral baseline configuration.
90    pub behavioral_baseline_config: BehavioralBaselineConfig,
91}
92
93impl Default for AnomalyInjectorConfig {
94    fn default() -> Self {
95        Self {
96            rates: AnomalyRateConfig::default(),
97            patterns: AnomalyPatternConfig::default(),
98            seed: 42,
99            generate_labels: true,
100            allow_duplicates: true,
101            max_anomalies_per_document: 2,
102            target_companies: Vec::new(),
103            date_range: None,
104            enhanced: EnhancedInjectionConfig::default(),
105        }
106    }
107}
108
109/// Result of an injection batch.
110#[derive(Debug, Clone)]
111pub struct InjectionBatchResult {
112    /// Number of entries processed.
113    pub entries_processed: usize,
114    /// Number of anomalies injected.
115    pub anomalies_injected: usize,
116    /// Number of duplicates created.
117    pub duplicates_created: usize,
118    /// Labels generated.
119    pub labels: Vec<LabeledAnomaly>,
120    /// Summary of anomalies.
121    pub summary: AnomalySummary,
122    /// Entries that were modified (document numbers).
123    pub modified_documents: Vec<String>,
124    /// Near-miss labels (suspicious but legitimate transactions).
125    pub near_miss_labels: Vec<NearMissLabel>,
126    /// Multi-stage scheme actions generated.
127    pub scheme_actions: Vec<SchemeAction>,
128    /// Difficulty distribution summary.
129    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
130}
131
132/// Main anomaly injection engine.
133#[allow(dead_code)]
134pub struct AnomalyInjector {
135    config: AnomalyInjectorConfig,
136    rng: ChaCha8Rng,
137    type_selector: AnomalyTypeSelector,
138    strategies: StrategyCollection,
139    cluster_manager: ClusterManager,
140    entity_targeting: EntityTargetingManager,
141    /// Tracking which documents already have anomalies.
142    document_anomaly_counts: HashMap<String, usize>,
143    /// All generated labels.
144    labels: Vec<LabeledAnomaly>,
145    /// Statistics.
146    stats: InjectorStats,
147    // Enhanced components (v0.3.0+)
148    /// Multi-stage fraud scheme advancer.
149    scheme_advancer: Option<SchemeAdvancer>,
150    /// Near-miss generator.
151    near_miss_generator: Option<NearMissGenerator>,
152    /// Near-miss labels generated.
153    near_miss_labels: Vec<NearMissLabel>,
154    /// Co-occurrence pattern handler.
155    co_occurrence_handler: Option<AnomalyCoOccurrence>,
156    /// Temporal cluster generator.
157    temporal_cluster_generator: Option<TemporalClusterGenerator>,
158    /// Difficulty calculator.
159    difficulty_calculator: Option<DifficultyCalculator>,
160    /// Entity-aware injector.
161    entity_aware_injector: Option<EntityAwareInjector>,
162    /// Behavioral baseline tracker.
163    behavioral_baseline: Option<BehavioralBaseline>,
164    /// Scheme actions generated.
165    scheme_actions: Vec<SchemeAction>,
166    /// Difficulty distribution.
167    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
168    // Entity context lookup maps for risk-adjusted injection rates
169    /// Vendor contexts keyed by vendor ID.
170    vendor_contexts: HashMap<String, VendorContext>,
171    /// Employee contexts keyed by employee ID.
172    employee_contexts: HashMap<String, EmployeeContext>,
173    /// Account contexts keyed by account code.
174    account_contexts: HashMap<String, AccountContext>,
175}
176
177/// Internal statistics tracking.
178#[derive(Debug, Clone, Default)]
179#[allow(dead_code)]
180pub struct InjectorStats {
181    total_processed: usize,
182    total_injected: usize,
183    by_category: HashMap<String, usize>,
184    by_type: HashMap<String, usize>,
185    by_company: HashMap<String, usize>,
186    skipped_rate: usize,
187    skipped_date: usize,
188    skipped_company: usize,
189    skipped_max_per_doc: usize,
190}
191
192impl AnomalyInjector {
193    /// Creates a new anomaly injector.
194    pub fn new(config: AnomalyInjectorConfig) -> Self {
195        let mut rng = ChaCha8Rng::seed_from_u64(config.seed);
196        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
197        let entity_targeting =
198            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
199
200        // Initialize enhanced components based on configuration
201        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
202            let scheme_config = SchemeAdvancerConfig {
203                embezzlement_probability: config.enhanced.scheme_probability,
204                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
205                kickback_probability: config.enhanced.scheme_probability * 0.5,
206                seed: rng.gen(),
207                ..Default::default()
208            };
209            Some(SchemeAdvancer::new(scheme_config))
210        } else {
211            None
212        };
213
214        let near_miss_generator = if config.enhanced.near_miss_enabled {
215            let near_miss_config = NearMissConfig {
216                proportion: config.enhanced.near_miss_proportion,
217                seed: rng.gen(),
218                ..Default::default()
219            };
220            Some(NearMissGenerator::new(near_miss_config))
221        } else {
222            None
223        };
224
225        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
226            Some(AnomalyCoOccurrence::new())
227        } else {
228            None
229        };
230
231        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
232            Some(TemporalClusterGenerator::new())
233        } else {
234            None
235        };
236
237        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
238            Some(DifficultyCalculator::new())
239        } else {
240            None
241        };
242
243        let entity_aware_injector = if config.enhanced.context_aware_enabled {
244            Some(EntityAwareInjector::default())
245        } else {
246            None
247        };
248
249        let behavioral_baseline = if config.enhanced.context_aware_enabled
250            && config.enhanced.behavioral_baseline_config.enabled
251        {
252            Some(BehavioralBaseline::new(
253                config.enhanced.behavioral_baseline_config.clone(),
254            ))
255        } else {
256            None
257        };
258
259        Self {
260            config,
261            rng,
262            type_selector: AnomalyTypeSelector::new(),
263            strategies: StrategyCollection::default(),
264            cluster_manager,
265            entity_targeting,
266            document_anomaly_counts: HashMap::new(),
267            labels: Vec::new(),
268            stats: InjectorStats::default(),
269            scheme_advancer,
270            near_miss_generator,
271            near_miss_labels: Vec::new(),
272            co_occurrence_handler,
273            temporal_cluster_generator,
274            difficulty_calculator,
275            entity_aware_injector,
276            behavioral_baseline,
277            scheme_actions: Vec::new(),
278            difficulty_distribution: HashMap::new(),
279            vendor_contexts: HashMap::new(),
280            employee_contexts: HashMap::new(),
281            account_contexts: HashMap::new(),
282        }
283    }
284
285    /// Processes a batch of journal entries, potentially injecting anomalies.
286    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
287        let mut modified_documents = Vec::new();
288        let mut duplicates = Vec::new();
289
290        for entry in entries.iter_mut() {
291            self.stats.total_processed += 1;
292
293            // Update behavioral baseline if enabled
294            if let Some(ref mut baseline) = self.behavioral_baseline {
295                use super::context::Observation;
296                // Record the observation for baseline building
297                let entity_id = entry.header.created_by.clone();
298                let observation =
299                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
300                baseline.record_observation(&entity_id, observation);
301            }
302
303            // Check if we should process this entry
304            if !self.should_process(entry) {
305                continue;
306            }
307
308            // Calculate effective rate (temporal clustering is applied later per-type)
309            let base_rate = self.config.rates.total_rate;
310
311            // Calculate entity-aware rate adjustment using context lookup maps
312            let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
313                let employee_id = &entry.header.created_by;
314                let first_account = entry
315                    .lines
316                    .first()
317                    .map(|l| l.gl_account.as_str())
318                    .unwrap_or("");
319                // Look up vendor from the entry's reference field (vendor ID convention)
320                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
321
322                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
323                let employee_ctx = self.employee_contexts.get(employee_id);
324                let account_ctx = self.account_contexts.get(first_account);
325
326                let multiplier =
327                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
328                (base_rate * multiplier).min(1.0)
329            } else {
330                // No entity-aware injector: fall back to context maps alone
331                self.calculate_context_rate_multiplier(entry) * base_rate
332            };
333
334            // Determine if we inject an anomaly
335            if should_inject_anomaly(
336                effective_rate,
337                entry.posting_date(),
338                &self.config.patterns.temporal_pattern,
339                &mut self.rng,
340            ) {
341                // Check if this should be a near-miss instead
342                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
343                    // Record the transaction for near-duplicate detection
344                    let account = entry
345                        .lines
346                        .first()
347                        .map(|l| l.gl_account.clone())
348                        .unwrap_or_default();
349                    near_miss_gen.record_transaction(
350                        entry.document_number().clone(),
351                        entry.posting_date(),
352                        entry.total_debit(),
353                        &account,
354                        None,
355                    );
356
357                    // Check if this could be a near-miss
358                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
359                        entry.document_number().clone(),
360                        entry.posting_date(),
361                        entry.total_debit(),
362                        &account,
363                        None,
364                        &self.config.enhanced.approval_thresholds,
365                    ) {
366                        self.near_miss_labels.push(near_miss_label);
367                        continue; // Skip actual anomaly injection
368                    }
369                }
370
371                // Select anomaly category based on rates
372                let anomaly_type = self.select_anomaly_category();
373
374                // Apply the anomaly
375                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
376                    // Calculate detection difficulty if enabled
377                    if let Some(ref calculator) = self.difficulty_calculator {
378                        let difficulty = calculator.calculate(&label);
379
380                        // Store difficulty in metadata
381                        label = label
382                            .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
383                        label = label.with_metadata(
384                            "difficulty_score",
385                            &difficulty.difficulty_score().to_string(),
386                        );
387
388                        // Update difficulty distribution
389                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
390                    }
391
392                    modified_documents.push(entry.document_number().clone());
393                    self.labels.push(label);
394                    self.stats.total_injected += 1;
395                }
396
397                // Check for duplicate injection
398                if self.config.allow_duplicates
399                    && matches!(
400                        self.labels.last().map(|l| &l.anomaly_type),
401                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
402                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
403                    )
404                {
405                    let dup_strategy = DuplicationStrategy::default();
406                    let duplicate = dup_strategy.duplicate(entry, &mut self.rng);
407                    duplicates.push(duplicate);
408                }
409            }
410        }
411
412        // Count duplicates
413        let duplicates_created = duplicates.len();
414
415        // Build summary
416        let summary = AnomalySummary::from_anomalies(&self.labels);
417
418        InjectionBatchResult {
419            entries_processed: self.stats.total_processed,
420            anomalies_injected: self.stats.total_injected,
421            duplicates_created,
422            labels: self.labels.clone(),
423            summary,
424            modified_documents,
425            near_miss_labels: self.near_miss_labels.clone(),
426            scheme_actions: self.scheme_actions.clone(),
427            difficulty_distribution: self.difficulty_distribution.clone(),
428        }
429    }
430
431    /// Checks if an entry should be processed.
432    fn should_process(&mut self, entry: &JournalEntry) -> bool {
433        // Check company filter
434        if !self.config.target_companies.is_empty()
435            && !self
436                .config
437                .target_companies
438                .iter()
439                .any(|c| c == entry.company_code())
440        {
441            self.stats.skipped_company += 1;
442            return false;
443        }
444
445        // Check date range
446        if let Some((start, end)) = self.config.date_range {
447            if entry.posting_date() < start || entry.posting_date() > end {
448                self.stats.skipped_date += 1;
449                return false;
450            }
451        }
452
453        // Check max anomalies per document
454        let current_count = self
455            .document_anomaly_counts
456            .get(&entry.document_number())
457            .copied()
458            .unwrap_or(0);
459        if current_count >= self.config.max_anomalies_per_document {
460            self.stats.skipped_max_per_doc += 1;
461            return false;
462        }
463
464        true
465    }
466
467    /// Selects an anomaly category based on configured rates.
468    fn select_anomaly_category(&mut self) -> AnomalyType {
469        let r = self.rng.gen::<f64>();
470        let rates = &self.config.rates;
471
472        let mut cumulative = 0.0;
473
474        cumulative += rates.fraud_rate;
475        if r < cumulative {
476            return self.type_selector.select_fraud(&mut self.rng);
477        }
478
479        cumulative += rates.error_rate;
480        if r < cumulative {
481            return self.type_selector.select_error(&mut self.rng);
482        }
483
484        cumulative += rates.process_issue_rate;
485        if r < cumulative {
486            return self.type_selector.select_process_issue(&mut self.rng);
487        }
488
489        cumulative += rates.statistical_rate;
490        if r < cumulative {
491            return self.type_selector.select_statistical(&mut self.rng);
492        }
493
494        self.type_selector.select_relational(&mut self.rng)
495    }
496
497    /// Injects an anomaly into an entry.
498    fn inject_anomaly(
499        &mut self,
500        entry: &mut JournalEntry,
501        anomaly_type: AnomalyType,
502    ) -> Option<LabeledAnomaly> {
503        // Check if strategy can be applied
504        if !self.strategies.can_apply(entry, &anomaly_type) {
505            return None;
506        }
507
508        // Apply the strategy
509        let result = self
510            .strategies
511            .apply_strategy(entry, &anomaly_type, &mut self.rng);
512
513        if !result.success {
514            return None;
515        }
516
517        // Update document anomaly count
518        *self
519            .document_anomaly_counts
520            .entry(entry.document_number().clone())
521            .or_insert(0) += 1;
522
523        // Update statistics
524        let category = anomaly_type.category().to_string();
525        let type_name = anomaly_type.type_name();
526
527        *self.stats.by_category.entry(category).or_insert(0) += 1;
528        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
529        *self
530            .stats
531            .by_company
532            .entry(entry.company_code().to_string())
533            .or_insert(0) += 1;
534
535        // Generate label
536        if self.config.generate_labels {
537            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
538
539            // Update entry header with anomaly tracking fields
540            entry.header.is_anomaly = true;
541            entry.header.anomaly_id = Some(anomaly_id.clone());
542            entry.header.anomaly_type = Some(type_name.clone());
543
544            // Also set fraud flag if this is a fraud anomaly
545            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
546                entry.header.is_fraud = true;
547                if let AnomalyType::Fraud(ref ft) = anomaly_type {
548                    entry.header.fraud_type = Some(*ft);
549                }
550            }
551
552            let mut label = LabeledAnomaly::new(
553                anomaly_id,
554                anomaly_type.clone(),
555                entry.document_number().clone(),
556                "JE".to_string(),
557                entry.company_code().to_string(),
558                entry.posting_date(),
559            )
560            .with_description(&result.description)
561            .with_injection_strategy(&type_name);
562
563            // Add causal reason with injection context (provenance tracking)
564            let causal_reason = AnomalyCausalReason::RandomRate {
565                base_rate: self.config.rates.total_rate,
566            };
567            label = label.with_causal_reason(causal_reason);
568
569            // Add entity context metadata if contexts are populated
570            let context_multiplier = self.calculate_context_rate_multiplier(entry);
571            if (context_multiplier - 1.0).abs() > f64::EPSILON {
572                label = label.with_metadata(
573                    "entity_context_multiplier",
574                    &format!("{:.3}", context_multiplier),
575                );
576                label = label.with_metadata(
577                    "effective_rate",
578                    &format!(
579                        "{:.6}",
580                        (self.config.rates.total_rate * context_multiplier).min(1.0)
581                    ),
582                );
583            }
584
585            // Add monetary impact
586            if let Some(impact) = result.monetary_impact {
587                label = label.with_monetary_impact(impact);
588            }
589
590            // Add related entities
591            for entity in &result.related_entities {
592                label = label.with_related_entity(entity);
593            }
594
595            // Add metadata
596            for (key, value) in &result.metadata {
597                label = label.with_metadata(key, value);
598            }
599
600            // Assign cluster and update causal reason if in cluster
601            if let Some(cluster_id) =
602                self.cluster_manager
603                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
604            {
605                label = label.with_cluster(&cluster_id);
606                // Update causal reason to reflect cluster membership
607                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
608                    cluster_id: cluster_id.clone(),
609                });
610            }
611
612            return Some(label);
613        }
614
615        None
616    }
617
618    /// Injects a specific anomaly type into an entry.
619    pub fn inject_specific(
620        &mut self,
621        entry: &mut JournalEntry,
622        anomaly_type: AnomalyType,
623    ) -> Option<LabeledAnomaly> {
624        self.inject_anomaly(entry, anomaly_type)
625    }
626
627    /// Creates a self-approval anomaly.
628    pub fn create_self_approval(
629        &mut self,
630        entry: &mut JournalEntry,
631        user_id: &str,
632    ) -> Option<LabeledAnomaly> {
633        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
634
635        let label = LabeledAnomaly::new(
636            format!("ANO{:08}", self.labels.len() + 1),
637            anomaly_type,
638            entry.document_number().clone(),
639            "JE".to_string(),
640            entry.company_code().to_string(),
641            entry.posting_date(),
642        )
643        .with_description(&format!("User {} approved their own transaction", user_id))
644        .with_related_entity(user_id)
645        .with_injection_strategy("ManualSelfApproval")
646        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
647            target_type: "User".to_string(),
648            target_id: user_id.to_string(),
649        });
650
651        // Set entry header anomaly tracking fields
652        entry.header.is_anomaly = true;
653        entry.header.is_fraud = true;
654        entry.header.anomaly_id = Some(label.anomaly_id.clone());
655        entry.header.anomaly_type = Some("SelfApproval".to_string());
656        entry.header.fraud_type = Some(FraudType::SelfApproval);
657
658        // Set approver = requester
659        entry.header.created_by = user_id.to_string();
660
661        self.labels.push(label.clone());
662        Some(label)
663    }
664
665    /// Creates a segregation of duties violation.
666    pub fn create_sod_violation(
667        &mut self,
668        entry: &mut JournalEntry,
669        user_id: &str,
670        conflicting_duties: (&str, &str),
671    ) -> Option<LabeledAnomaly> {
672        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
673
674        let label = LabeledAnomaly::new(
675            format!("ANO{:08}", self.labels.len() + 1),
676            anomaly_type,
677            entry.document_number().clone(),
678            "JE".to_string(),
679            entry.company_code().to_string(),
680            entry.posting_date(),
681        )
682        .with_description(&format!(
683            "User {} performed conflicting duties: {} and {}",
684            user_id, conflicting_duties.0, conflicting_duties.1
685        ))
686        .with_related_entity(user_id)
687        .with_metadata("duty1", conflicting_duties.0)
688        .with_metadata("duty2", conflicting_duties.1)
689        .with_injection_strategy("ManualSoDViolation")
690        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
691            target_type: "User".to_string(),
692            target_id: user_id.to_string(),
693        });
694
695        // Set entry header anomaly tracking fields
696        entry.header.is_anomaly = true;
697        entry.header.is_fraud = true;
698        entry.header.anomaly_id = Some(label.anomaly_id.clone());
699        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
700        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
701
702        self.labels.push(label.clone());
703        Some(label)
704    }
705
706    /// Creates an intercompany mismatch anomaly.
707    pub fn create_ic_mismatch(
708        &mut self,
709        entry: &mut JournalEntry,
710        matching_company: &str,
711        expected_amount: Decimal,
712        actual_amount: Decimal,
713    ) -> Option<LabeledAnomaly> {
714        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
715
716        let label = LabeledAnomaly::new(
717            format!("ANO{:08}", self.labels.len() + 1),
718            anomaly_type,
719            entry.document_number().clone(),
720            "JE".to_string(),
721            entry.company_code().to_string(),
722            entry.posting_date(),
723        )
724        .with_description(&format!(
725            "Intercompany mismatch with {}: expected {} but got {}",
726            matching_company, expected_amount, actual_amount
727        ))
728        .with_related_entity(matching_company)
729        .with_monetary_impact(actual_amount - expected_amount)
730        .with_metadata("expected_amount", &expected_amount.to_string())
731        .with_metadata("actual_amount", &actual_amount.to_string())
732        .with_injection_strategy("ManualICMismatch")
733        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
734            target_type: "Intercompany".to_string(),
735            target_id: matching_company.to_string(),
736        });
737
738        // Set entry header anomaly tracking fields
739        entry.header.is_anomaly = true;
740        entry.header.anomaly_id = Some(label.anomaly_id.clone());
741        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
742
743        self.labels.push(label.clone());
744        Some(label)
745    }
746
747    /// Returns all generated labels.
748    pub fn get_labels(&self) -> &[LabeledAnomaly] {
749        &self.labels
750    }
751
752    /// Returns the anomaly summary.
753    pub fn get_summary(&self) -> AnomalySummary {
754        AnomalySummary::from_anomalies(&self.labels)
755    }
756
757    /// Returns injection statistics.
758    pub fn get_stats(&self) -> &InjectorStats {
759        &self.stats
760    }
761
762    /// Clears all labels and resets statistics.
763    pub fn reset(&mut self) {
764        self.labels.clear();
765        self.document_anomaly_counts.clear();
766        self.stats = InjectorStats::default();
767        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
768
769        // Reset enhanced components
770        self.near_miss_labels.clear();
771        self.scheme_actions.clear();
772        self.difficulty_distribution.clear();
773
774        if let Some(ref mut baseline) = self.behavioral_baseline {
775            *baseline =
776                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
777        }
778    }
779
780    /// Returns the number of clusters created.
781    pub fn cluster_count(&self) -> usize {
782        self.cluster_manager.cluster_count()
783    }
784
785    // =========================================================================
786    // Entity Context API
787    // =========================================================================
788
789    /// Sets entity contexts for risk-adjusted anomaly injection.
790    ///
791    /// When entity contexts are provided, the injector adjusts anomaly injection
792    /// rates based on entity risk factors. Entries involving high-risk vendors,
793    /// new employees, or sensitive accounts will have higher effective injection
794    /// rates.
795    ///
796    /// Pass empty HashMaps to clear previously set contexts.
797    pub fn set_entity_contexts(
798        &mut self,
799        vendors: HashMap<String, VendorContext>,
800        employees: HashMap<String, EmployeeContext>,
801        accounts: HashMap<String, AccountContext>,
802    ) {
803        self.vendor_contexts = vendors;
804        self.employee_contexts = employees;
805        self.account_contexts = accounts;
806    }
807
808    /// Returns a reference to the vendor context map.
809    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
810        &self.vendor_contexts
811    }
812
813    /// Returns a reference to the employee context map.
814    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
815        &self.employee_contexts
816    }
817
818    /// Returns a reference to the account context map.
819    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
820        &self.account_contexts
821    }
822
823    /// Calculates a rate multiplier from the entity context maps alone (no
824    /// `EntityAwareInjector` needed). This provides a lightweight fallback
825    /// when context-aware injection is not fully enabled but context maps
826    /// have been populated.
827    ///
828    /// The multiplier is the product of individual entity risk factors found
829    /// in the context maps for the given journal entry. If no contexts match,
830    /// returns 1.0 (no adjustment).
831    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
832        if self.vendor_contexts.is_empty()
833            && self.employee_contexts.is_empty()
834            && self.account_contexts.is_empty()
835        {
836            return 1.0;
837        }
838
839        let mut multiplier = 1.0;
840
841        // Vendor lookup via reference field
842        if let Some(ref vendor_ref) = entry.header.reference {
843            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
844                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
845                if ctx.is_new {
846                    multiplier *= 2.0;
847                }
848                if ctx.is_dormant_reactivation {
849                    multiplier *= 1.5;
850                }
851            }
852        }
853
854        // Employee lookup via created_by
855        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
856            if ctx.is_new {
857                multiplier *= 1.5;
858            }
859            if ctx.is_volume_fatigued {
860                multiplier *= 1.3;
861            }
862            if ctx.is_overtime {
863                multiplier *= 1.2;
864            }
865        }
866
867        // Account lookup via first line's GL account
868        if let Some(first_line) = entry.lines.first() {
869            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
870                if ctx.is_high_risk {
871                    multiplier *= 2.0;
872                }
873            }
874        }
875
876        multiplier
877    }
878
879    // =========================================================================
880    // Enhanced Features API (v0.3.0+)
881    // =========================================================================
882
883    /// Advances all active fraud schemes by one time step.
884    ///
885    /// Call this method once per simulated day to generate scheme actions.
886    /// Returns the scheme actions generated for this date.
887    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
888        if let Some(ref mut advancer) = self.scheme_advancer {
889            let context = SchemeContext::new(date, company_code);
890            let actions = advancer.advance_all(&context);
891            self.scheme_actions.extend(actions.clone());
892            actions
893        } else {
894            Vec::new()
895        }
896    }
897
898    /// Potentially starts a new fraud scheme based on probabilities.
899    ///
900    /// Call this method periodically (e.g., once per period) to allow new
901    /// schemes to start based on configured probabilities.
902    /// Returns the scheme ID if a scheme was started.
903    pub fn maybe_start_scheme(
904        &mut self,
905        date: NaiveDate,
906        company_code: &str,
907        available_users: Vec<String>,
908        available_accounts: Vec<String>,
909        available_counterparties: Vec<String>,
910    ) -> Option<uuid::Uuid> {
911        if let Some(ref mut advancer) = self.scheme_advancer {
912            let mut context = SchemeContext::new(date, company_code);
913            context.available_users = available_users;
914            context.available_accounts = available_accounts;
915            context.available_counterparties = available_counterparties;
916
917            advancer.maybe_start_scheme(&context)
918        } else {
919            None
920        }
921    }
922
923    /// Returns all near-miss labels generated.
924    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
925        &self.near_miss_labels
926    }
927
928    /// Returns all scheme actions generated.
929    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
930        &self.scheme_actions
931    }
932
933    /// Returns the detection difficulty distribution.
934    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
935        &self.difficulty_distribution
936    }
937
938    /// Checks for behavioral deviations for an entity with an observation.
939    pub fn check_behavioral_deviations(
940        &self,
941        entity_id: &str,
942        observation: &super::context::Observation,
943    ) -> Vec<super::context::BehavioralDeviation> {
944        if let Some(ref baseline) = self.behavioral_baseline {
945            baseline.check_deviation(entity_id, observation)
946        } else {
947            Vec::new()
948        }
949    }
950
951    /// Gets the baseline for an entity.
952    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
953        if let Some(ref baseline) = self.behavioral_baseline {
954            baseline.get_baseline(entity_id)
955        } else {
956            None
957        }
958    }
959
960    /// Returns the number of active schemes.
961    pub fn active_scheme_count(&self) -> usize {
962        if let Some(ref advancer) = self.scheme_advancer {
963            advancer.active_scheme_count()
964        } else {
965            0
966        }
967    }
968
969    /// Returns whether enhanced features are enabled.
970    pub fn has_enhanced_features(&self) -> bool {
971        self.scheme_advancer.is_some()
972            || self.near_miss_generator.is_some()
973            || self.difficulty_calculator.is_some()
974            || self.entity_aware_injector.is_some()
975    }
976}
977
978/// Builder for AnomalyInjectorConfig.
979pub struct AnomalyInjectorConfigBuilder {
980    config: AnomalyInjectorConfig,
981}
982
983impl AnomalyInjectorConfigBuilder {
984    /// Creates a new builder with default configuration.
985    pub fn new() -> Self {
986        Self {
987            config: AnomalyInjectorConfig::default(),
988        }
989    }
990
991    /// Sets the total anomaly rate.
992    pub fn with_total_rate(mut self, rate: f64) -> Self {
993        self.config.rates.total_rate = rate;
994        self
995    }
996
997    /// Sets the fraud rate (proportion of anomalies).
998    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
999        self.config.rates.fraud_rate = rate;
1000        self
1001    }
1002
1003    /// Sets the error rate (proportion of anomalies).
1004    pub fn with_error_rate(mut self, rate: f64) -> Self {
1005        self.config.rates.error_rate = rate;
1006        self
1007    }
1008
1009    /// Sets the random seed.
1010    pub fn with_seed(mut self, seed: u64) -> Self {
1011        self.config.seed = seed;
1012        self
1013    }
1014
1015    /// Sets the temporal pattern.
1016    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1017        self.config.patterns.temporal_pattern = pattern;
1018        self
1019    }
1020
1021    /// Enables or disables label generation.
1022    pub fn with_labels(mut self, generate: bool) -> Self {
1023        self.config.generate_labels = generate;
1024        self
1025    }
1026
1027    /// Sets target companies.
1028    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1029        self.config.target_companies = companies;
1030        self
1031    }
1032
1033    /// Sets the date range.
1034    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1035        self.config.date_range = Some((start, end));
1036        self
1037    }
1038
1039    // =========================================================================
1040    // Enhanced Features Configuration (v0.3.0+)
1041    // =========================================================================
1042
1043    /// Enables multi-stage fraud scheme generation.
1044    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1045        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1046        self.config.enhanced.scheme_probability = probability;
1047        self
1048    }
1049
1050    /// Enables near-miss generation.
1051    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1052        self.config.enhanced.near_miss_enabled = enabled;
1053        self.config.enhanced.near_miss_proportion = proportion;
1054        self
1055    }
1056
1057    /// Sets approval thresholds for threshold-proximity near-misses.
1058    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1059        self.config.enhanced.approval_thresholds = thresholds;
1060        self
1061    }
1062
1063    /// Enables correlated anomaly injection.
1064    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1065        self.config.enhanced.correlated_injection_enabled = enabled;
1066        self
1067    }
1068
1069    /// Enables temporal clustering (period-end spikes).
1070    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1071        self.config.enhanced.temporal_clustering_enabled = enabled;
1072        self.config.enhanced.period_end_multiplier = multiplier;
1073        self
1074    }
1075
1076    /// Enables detection difficulty classification.
1077    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1078        self.config.enhanced.difficulty_classification_enabled = enabled;
1079        self
1080    }
1081
1082    /// Enables context-aware injection.
1083    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1084        self.config.enhanced.context_aware_enabled = enabled;
1085        self
1086    }
1087
1088    /// Sets behavioral baseline configuration.
1089    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1090        self.config.enhanced.behavioral_baseline_config = config;
1091        self
1092    }
1093
1094    /// Enables all enhanced features with default settings.
1095    pub fn with_all_enhanced_features(mut self) -> Self {
1096        self.config.enhanced.multi_stage_schemes_enabled = true;
1097        self.config.enhanced.scheme_probability = 0.02;
1098        self.config.enhanced.correlated_injection_enabled = true;
1099        self.config.enhanced.temporal_clustering_enabled = true;
1100        self.config.enhanced.period_end_multiplier = 2.5;
1101        self.config.enhanced.near_miss_enabled = true;
1102        self.config.enhanced.near_miss_proportion = 0.30;
1103        self.config.enhanced.difficulty_classification_enabled = true;
1104        self.config.enhanced.context_aware_enabled = true;
1105        self.config.enhanced.behavioral_baseline_config.enabled = true;
1106        self
1107    }
1108
1109    /// Builds the configuration.
1110    pub fn build(self) -> AnomalyInjectorConfig {
1111        self.config
1112    }
1113}
1114
1115impl Default for AnomalyInjectorConfigBuilder {
1116    fn default() -> Self {
1117        Self::new()
1118    }
1119}
1120
1121#[cfg(test)]
1122mod tests {
1123    use super::*;
1124    use chrono::NaiveDate;
1125    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1126    use rust_decimal_macros::dec;
1127
1128    fn create_test_entry(doc_num: &str) -> JournalEntry {
1129        let mut entry = JournalEntry::new_simple(
1130            doc_num.to_string(),
1131            "1000".to_string(),
1132            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1133            "Test Entry".to_string(),
1134        );
1135
1136        entry.add_line(JournalEntryLine {
1137            line_number: 1,
1138            gl_account: "5000".to_string(),
1139            debit_amount: dec!(1000),
1140            ..Default::default()
1141        });
1142
1143        entry.add_line(JournalEntryLine {
1144            line_number: 2,
1145            gl_account: "1000".to_string(),
1146            credit_amount: dec!(1000),
1147            ..Default::default()
1148        });
1149
1150        entry
1151    }
1152
1153    #[test]
1154    fn test_anomaly_injector_basic() {
1155        let config = AnomalyInjectorConfigBuilder::new()
1156            .with_total_rate(0.5) // High rate for testing
1157            .with_seed(42)
1158            .build();
1159
1160        let mut injector = AnomalyInjector::new(config);
1161
1162        let mut entries: Vec<_> = (0..100)
1163            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1164            .collect();
1165
1166        let result = injector.process_entries(&mut entries);
1167
1168        // With 50% rate, we should have some anomalies
1169        assert!(result.anomalies_injected > 0);
1170        assert!(!result.labels.is_empty());
1171        assert_eq!(result.labels.len(), result.anomalies_injected);
1172    }
1173
1174    #[test]
1175    fn test_specific_injection() {
1176        let config = AnomalyInjectorConfig::default();
1177        let mut injector = AnomalyInjector::new(config);
1178
1179        let mut entry = create_test_entry("JE001");
1180        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1181
1182        let label = injector.inject_specific(&mut entry, anomaly_type);
1183
1184        assert!(label.is_some());
1185        let label = label.unwrap();
1186        // document_id is the UUID string from the journal entry header
1187        assert!(!label.document_id.is_empty());
1188        assert_eq!(label.document_id, entry.document_number());
1189    }
1190
1191    #[test]
1192    fn test_self_approval_injection() {
1193        let config = AnomalyInjectorConfig::default();
1194        let mut injector = AnomalyInjector::new(config);
1195
1196        let mut entry = create_test_entry("JE001");
1197        let label = injector.create_self_approval(&mut entry, "USER001");
1198
1199        assert!(label.is_some());
1200        let label = label.unwrap();
1201        assert!(matches!(
1202            label.anomaly_type,
1203            AnomalyType::Fraud(FraudType::SelfApproval)
1204        ));
1205        assert!(label.related_entities.contains(&"USER001".to_string()));
1206    }
1207
1208    #[test]
1209    fn test_company_filtering() {
1210        let config = AnomalyInjectorConfigBuilder::new()
1211            .with_total_rate(1.0) // Inject all
1212            .with_target_companies(vec!["2000".to_string()])
1213            .build();
1214
1215        let mut injector = AnomalyInjector::new(config);
1216
1217        let mut entries = vec![
1218            create_test_entry("JE001"), // company 1000
1219            create_test_entry("JE002"), // company 1000
1220        ];
1221
1222        let result = injector.process_entries(&mut entries);
1223
1224        // No anomalies because entries are in company 1000, not 2000
1225        assert_eq!(result.anomalies_injected, 0);
1226    }
1227
1228    // =========================================================================
1229    // Entity Context Tests
1230    // =========================================================================
1231
1232    /// Helper to create a test entry with specific vendor reference and employee.
1233    fn create_test_entry_with_context(
1234        doc_num: &str,
1235        vendor_ref: Option<&str>,
1236        employee_id: &str,
1237        gl_account: &str,
1238    ) -> JournalEntry {
1239        let mut entry = JournalEntry::new_simple(
1240            doc_num.to_string(),
1241            "1000".to_string(),
1242            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1243            "Test Entry".to_string(),
1244        );
1245
1246        entry.header.reference = vendor_ref.map(|v| v.to_string());
1247        entry.header.created_by = employee_id.to_string();
1248
1249        entry.add_line(JournalEntryLine {
1250            line_number: 1,
1251            gl_account: gl_account.to_string(),
1252            debit_amount: dec!(1000),
1253            ..Default::default()
1254        });
1255
1256        entry.add_line(JournalEntryLine {
1257            line_number: 2,
1258            gl_account: "1000".to_string(),
1259            credit_amount: dec!(1000),
1260            ..Default::default()
1261        });
1262
1263        entry
1264    }
1265
1266    #[test]
1267    fn test_set_entity_contexts() {
1268        let config = AnomalyInjectorConfig::default();
1269        let mut injector = AnomalyInjector::new(config);
1270
1271        // Initially empty
1272        assert!(injector.vendor_contexts().is_empty());
1273        assert!(injector.employee_contexts().is_empty());
1274        assert!(injector.account_contexts().is_empty());
1275
1276        // Set contexts
1277        let mut vendors = HashMap::new();
1278        vendors.insert(
1279            "V001".to_string(),
1280            VendorContext {
1281                vendor_id: "V001".to_string(),
1282                is_new: true,
1283                ..Default::default()
1284            },
1285        );
1286
1287        let mut employees = HashMap::new();
1288        employees.insert(
1289            "EMP001".to_string(),
1290            EmployeeContext {
1291                employee_id: "EMP001".to_string(),
1292                is_new: true,
1293                ..Default::default()
1294            },
1295        );
1296
1297        let mut accounts = HashMap::new();
1298        accounts.insert(
1299            "8100".to_string(),
1300            AccountContext {
1301                account_code: "8100".to_string(),
1302                is_high_risk: true,
1303                ..Default::default()
1304            },
1305        );
1306
1307        injector.set_entity_contexts(vendors, employees, accounts);
1308
1309        assert_eq!(injector.vendor_contexts().len(), 1);
1310        assert_eq!(injector.employee_contexts().len(), 1);
1311        assert_eq!(injector.account_contexts().len(), 1);
1312        assert!(injector.vendor_contexts().contains_key("V001"));
1313        assert!(injector.employee_contexts().contains_key("EMP001"));
1314        assert!(injector.account_contexts().contains_key("8100"));
1315    }
1316
1317    #[test]
1318    fn test_default_behavior_no_contexts() {
1319        // Without any entity contexts, the base rate is used unchanged.
1320        let config = AnomalyInjectorConfigBuilder::new()
1321            .with_total_rate(0.5)
1322            .with_seed(42)
1323            .build();
1324
1325        let mut injector = AnomalyInjector::new(config);
1326
1327        let mut entries: Vec<_> = (0..200)
1328            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1329            .collect();
1330
1331        let result = injector.process_entries(&mut entries);
1332
1333        // With 50% base rate and no context, expect roughly 50% injection
1334        // Allow wide margin for randomness
1335        assert!(result.anomalies_injected > 0);
1336        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1337        assert!(
1338            rate > 0.2 && rate < 0.8,
1339            "Expected ~50% rate, got {:.2}%",
1340            rate * 100.0
1341        );
1342    }
1343
1344    #[test]
1345    fn test_entity_context_increases_injection_rate() {
1346        // With high-risk entity contexts, the effective rate should be higher
1347        // than the base rate, leading to more anomalies being injected.
1348        let base_rate = 0.10; // Low base rate
1349
1350        // Run without contexts
1351        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1352            .with_total_rate(base_rate)
1353            .with_seed(123)
1354            .build();
1355
1356        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1357
1358        let mut entries_no_ctx: Vec<_> = (0..500)
1359            .map(|i| {
1360                create_test_entry_with_context(
1361                    &format!("JE{:04}", i),
1362                    Some("V001"),
1363                    "EMP001",
1364                    "8100",
1365                )
1366            })
1367            .collect();
1368
1369        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1370
1371        // Run with high-risk contexts (same seed for comparable randomness)
1372        let config_ctx = AnomalyInjectorConfigBuilder::new()
1373            .with_total_rate(base_rate)
1374            .with_seed(123)
1375            .build();
1376
1377        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1378
1379        // Set up high-risk contexts
1380        let mut vendors = HashMap::new();
1381        vendors.insert(
1382            "V001".to_string(),
1383            VendorContext {
1384                vendor_id: "V001".to_string(),
1385                is_new: true,                  // 2.0x multiplier
1386                is_dormant_reactivation: true, // 1.5x multiplier
1387                ..Default::default()
1388            },
1389        );
1390
1391        let mut employees = HashMap::new();
1392        employees.insert(
1393            "EMP001".to_string(),
1394            EmployeeContext {
1395                employee_id: "EMP001".to_string(),
1396                is_new: true, // 1.5x multiplier
1397                ..Default::default()
1398            },
1399        );
1400
1401        let mut accounts = HashMap::new();
1402        accounts.insert(
1403            "8100".to_string(),
1404            AccountContext {
1405                account_code: "8100".to_string(),
1406                is_high_risk: true, // 2.0x multiplier
1407                ..Default::default()
1408            },
1409        );
1410
1411        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1412
1413        let mut entries_ctx: Vec<_> = (0..500)
1414            .map(|i| {
1415                create_test_entry_with_context(
1416                    &format!("JE{:04}", i),
1417                    Some("V001"),
1418                    "EMP001",
1419                    "8100",
1420                )
1421            })
1422            .collect();
1423
1424        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1425
1426        // The context-enhanced run should inject more anomalies
1427        assert!(
1428            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1429            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1430            result_ctx.anomalies_injected,
1431            result_no_ctx.anomalies_injected,
1432        );
1433    }
1434
1435    #[test]
1436    fn test_risk_score_multiplication() {
1437        // Verify the calculate_context_rate_multiplier produces correct values.
1438        let config = AnomalyInjectorConfig::default();
1439        let mut injector = AnomalyInjector::new(config);
1440
1441        // No contexts: multiplier should be 1.0
1442        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1443        assert!(
1444            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1445        );
1446
1447        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1448        let mut vendors = HashMap::new();
1449        vendors.insert(
1450            "V_RISKY".to_string(),
1451            VendorContext {
1452                vendor_id: "V_RISKY".to_string(),
1453                is_new: true,
1454                ..Default::default()
1455            },
1456        );
1457
1458        let mut accounts = HashMap::new();
1459        accounts.insert(
1460            "9000".to_string(),
1461            AccountContext {
1462                account_code: "9000".to_string(),
1463                is_high_risk: true,
1464                ..Default::default()
1465            },
1466        );
1467
1468        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1469
1470        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1471        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1472        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1473        assert!(
1474            (multiplier - 4.0).abs() < f64::EPSILON,
1475            "Expected 4.0x multiplier, got {}",
1476            multiplier,
1477        );
1478
1479        // Entry with only vendor context match (no account match)
1480        let entry_vendor_only =
1481            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1482        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1483        assert!(
1484            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1485            "Expected 2.0x multiplier (vendor only), got {}",
1486            multiplier_vendor,
1487        );
1488
1489        // Entry with no matching contexts
1490        let entry_no_match =
1491            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1492        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1493        assert!(
1494            (multiplier_none - 1.0).abs() < f64::EPSILON,
1495            "Expected 1.0x multiplier (no match), got {}",
1496            multiplier_none,
1497        );
1498    }
1499
1500    #[test]
1501    fn test_employee_context_multiplier() {
1502        let config = AnomalyInjectorConfig::default();
1503        let mut injector = AnomalyInjector::new(config);
1504
1505        let mut employees = HashMap::new();
1506        employees.insert(
1507            "EMP_NEW".to_string(),
1508            EmployeeContext {
1509                employee_id: "EMP_NEW".to_string(),
1510                is_new: true,             // 1.5x
1511                is_volume_fatigued: true, // 1.3x
1512                is_overtime: true,        // 1.2x
1513                ..Default::default()
1514            },
1515        );
1516
1517        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1518
1519        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1520        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1521
1522        // 1.5 * 1.3 * 1.2 = 2.34
1523        let expected = 1.5 * 1.3 * 1.2;
1524        assert!(
1525            (multiplier - expected).abs() < 0.01,
1526            "Expected {:.3}x multiplier, got {:.3}",
1527            expected,
1528            multiplier,
1529        );
1530    }
1531
1532    #[test]
1533    fn test_entity_contexts_persist_across_reset() {
1534        let config = AnomalyInjectorConfig::default();
1535        let mut injector = AnomalyInjector::new(config);
1536
1537        let mut vendors = HashMap::new();
1538        vendors.insert(
1539            "V001".to_string(),
1540            VendorContext {
1541                vendor_id: "V001".to_string(),
1542                is_new: true,
1543                ..Default::default()
1544            },
1545        );
1546
1547        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1548        assert_eq!(injector.vendor_contexts().len(), 1);
1549
1550        // Reset clears labels and stats but not entity contexts
1551        injector.reset();
1552        assert_eq!(injector.vendor_contexts().len(), 1);
1553    }
1554
1555    #[test]
1556    fn test_set_empty_contexts_clears() {
1557        let config = AnomalyInjectorConfig::default();
1558        let mut injector = AnomalyInjector::new(config);
1559
1560        let mut vendors = HashMap::new();
1561        vendors.insert(
1562            "V001".to_string(),
1563            VendorContext {
1564                vendor_id: "V001".to_string(),
1565                ..Default::default()
1566            },
1567        );
1568
1569        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1570        assert_eq!(injector.vendor_contexts().len(), 1);
1571
1572        // Setting empty maps clears
1573        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1574        assert!(injector.vendor_contexts().is_empty());
1575    }
1576
1577    #[test]
1578    fn test_dormant_vendor_multiplier() {
1579        let config = AnomalyInjectorConfig::default();
1580        let mut injector = AnomalyInjector::new(config);
1581
1582        let mut vendors = HashMap::new();
1583        vendors.insert(
1584            "V_DORMANT".to_string(),
1585            VendorContext {
1586                vendor_id: "V_DORMANT".to_string(),
1587                is_dormant_reactivation: true, // 1.5x
1588                ..Default::default()
1589            },
1590        );
1591
1592        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1593
1594        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1595        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1596        assert!(
1597            (multiplier - 1.5).abs() < f64::EPSILON,
1598            "Expected 1.5x multiplier for dormant vendor, got {}",
1599            multiplier,
1600        );
1601    }
1602}