Skip to main content

datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::Rng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25    RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31    EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38    TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45/// Configuration for the anomaly injector.
46#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48    /// Rate configuration.
49    pub rates: AnomalyRateConfig,
50    /// Pattern configuration.
51    pub patterns: AnomalyPatternConfig,
52    /// Random seed for reproducibility.
53    pub seed: u64,
54    /// Whether to generate labels.
55    pub generate_labels: bool,
56    /// Whether to allow duplicate injection.
57    pub allow_duplicates: bool,
58    /// Maximum anomalies per document.
59    pub max_anomalies_per_document: usize,
60    /// Company codes to target (empty = all).
61    pub target_companies: Vec<String>,
62    /// Date range for injection.
63    pub date_range: Option<(NaiveDate, NaiveDate)>,
64    /// Enhanced features configuration.
65    pub enhanced: EnhancedInjectionConfig,
66}
67
68/// Enhanced injection configuration for v0.3.0+ features.
69#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71    /// Enable multi-stage fraud scheme generation.
72    pub multi_stage_schemes_enabled: bool,
73    /// Probability of starting a new scheme per perpetrator per year.
74    pub scheme_probability: f64,
75    /// Enable correlated anomaly injection.
76    pub correlated_injection_enabled: bool,
77    /// Enable temporal clustering (period-end spikes).
78    pub temporal_clustering_enabled: bool,
79    /// Period-end anomaly rate multiplier.
80    pub period_end_multiplier: f64,
81    /// Enable near-miss generation.
82    pub near_miss_enabled: bool,
83    /// Proportion of anomalies that are near-misses.
84    pub near_miss_proportion: f64,
85    /// Approval thresholds for threshold-proximity near-misses.
86    pub approval_thresholds: Vec<Decimal>,
87    /// Enable detection difficulty classification.
88    pub difficulty_classification_enabled: bool,
89    /// Enable context-aware injection.
90    pub context_aware_enabled: bool,
91    /// Behavioral baseline configuration.
92    pub behavioral_baseline_config: BehavioralBaselineConfig,
93}
94
95impl Default for AnomalyInjectorConfig {
96    fn default() -> Self {
97        Self {
98            rates: AnomalyRateConfig::default(),
99            patterns: AnomalyPatternConfig::default(),
100            seed: 42,
101            generate_labels: true,
102            allow_duplicates: true,
103            max_anomalies_per_document: 2,
104            target_companies: Vec::new(),
105            date_range: None,
106            enhanced: EnhancedInjectionConfig::default(),
107        }
108    }
109}
110
111/// Result of an injection batch.
112#[derive(Debug, Clone)]
113pub struct InjectionBatchResult {
114    /// Number of entries processed.
115    pub entries_processed: usize,
116    /// Number of anomalies injected.
117    pub anomalies_injected: usize,
118    /// Number of duplicates created.
119    pub duplicates_created: usize,
120    /// Labels generated.
121    pub labels: Vec<LabeledAnomaly>,
122    /// Summary of anomalies.
123    pub summary: AnomalySummary,
124    /// Entries that were modified (document numbers).
125    pub modified_documents: Vec<String>,
126    /// Near-miss labels (suspicious but legitimate transactions).
127    pub near_miss_labels: Vec<NearMissLabel>,
128    /// Multi-stage scheme actions generated.
129    pub scheme_actions: Vec<SchemeAction>,
130    /// Difficulty distribution summary.
131    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
132}
133
134/// Main anomaly injection engine.
135pub struct AnomalyInjector {
136    config: AnomalyInjectorConfig,
137    rng: ChaCha8Rng,
138    uuid_factory: DeterministicUuidFactory,
139    type_selector: AnomalyTypeSelector,
140    strategies: StrategyCollection,
141    cluster_manager: ClusterManager,
142    // Constructed from config; will be consumed when entity-aware injection
143    // patterns are integrated into the main inject loop.
144    #[allow(dead_code)]
145    entity_targeting: EntityTargetingManager,
146    /// Tracking which documents already have anomalies.
147    document_anomaly_counts: HashMap<String, usize>,
148    /// All generated labels.
149    labels: Vec<LabeledAnomaly>,
150    /// Statistics.
151    stats: InjectorStats,
152    // Enhanced components (v0.3.0+)
153    /// Multi-stage fraud scheme advancer.
154    scheme_advancer: Option<SchemeAdvancer>,
155    /// Near-miss generator.
156    near_miss_generator: Option<NearMissGenerator>,
157    /// Near-miss labels generated.
158    near_miss_labels: Vec<NearMissLabel>,
159    // Constructed when correlated_injection_enabled; pending integration.
160    #[allow(dead_code)]
161    co_occurrence_handler: Option<AnomalyCoOccurrence>,
162    // Constructed when temporal_clustering_enabled; pending integration.
163    #[allow(dead_code)]
164    temporal_cluster_generator: Option<TemporalClusterGenerator>,
165    /// Difficulty calculator.
166    difficulty_calculator: Option<DifficultyCalculator>,
167    /// Entity-aware injector.
168    entity_aware_injector: Option<EntityAwareInjector>,
169    /// Behavioral baseline tracker.
170    behavioral_baseline: Option<BehavioralBaseline>,
171    /// Scheme actions generated.
172    scheme_actions: Vec<SchemeAction>,
173    /// Difficulty distribution.
174    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
175    // Entity context lookup maps for risk-adjusted injection rates
176    /// Vendor contexts keyed by vendor ID.
177    vendor_contexts: HashMap<String, VendorContext>,
178    /// Employee contexts keyed by employee ID.
179    employee_contexts: HashMap<String, EmployeeContext>,
180    /// Account contexts keyed by account code.
181    account_contexts: HashMap<String, AccountContext>,
182}
183
184/// Injection statistics tracking.
185#[derive(Debug, Clone, Default)]
186pub struct InjectorStats {
187    /// Total number of entries processed.
188    pub total_processed: usize,
189    /// Total number of anomalies injected.
190    pub total_injected: usize,
191    /// Anomalies injected by category (e.g., "Fraud", "Error").
192    pub by_category: HashMap<String, usize>,
193    /// Anomalies injected by specific type name.
194    pub by_type: HashMap<String, usize>,
195    /// Anomalies injected by company code.
196    pub by_company: HashMap<String, usize>,
197    /// Entries skipped due to rate check.
198    pub skipped_rate: usize,
199    /// Entries skipped due to date range filter.
200    pub skipped_date: usize,
201    /// Entries skipped due to company filter.
202    pub skipped_company: usize,
203    /// Entries skipped due to max-anomalies-per-document limit.
204    pub skipped_max_per_doc: usize,
205}
206
207impl AnomalyInjector {
208    /// Creates a new anomaly injector.
209    pub fn new(config: AnomalyInjectorConfig) -> Self {
210        let mut rng = seeded_rng(config.seed, 0);
211        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
212        let entity_targeting =
213            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
214
215        // Initialize enhanced components based on configuration
216        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
217            let scheme_config = SchemeAdvancerConfig {
218                embezzlement_probability: config.enhanced.scheme_probability,
219                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
220                kickback_probability: config.enhanced.scheme_probability * 0.5,
221                seed: rng.gen(),
222                ..Default::default()
223            };
224            Some(SchemeAdvancer::new(scheme_config))
225        } else {
226            None
227        };
228
229        let near_miss_generator = if config.enhanced.near_miss_enabled {
230            let near_miss_config = NearMissConfig {
231                proportion: config.enhanced.near_miss_proportion,
232                seed: rng.gen(),
233                ..Default::default()
234            };
235            Some(NearMissGenerator::new(near_miss_config))
236        } else {
237            None
238        };
239
240        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
241            Some(AnomalyCoOccurrence::new())
242        } else {
243            None
244        };
245
246        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
247            Some(TemporalClusterGenerator::new())
248        } else {
249            None
250        };
251
252        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
253            Some(DifficultyCalculator::new())
254        } else {
255            None
256        };
257
258        let entity_aware_injector = if config.enhanced.context_aware_enabled {
259            Some(EntityAwareInjector::default())
260        } else {
261            None
262        };
263
264        let behavioral_baseline = if config.enhanced.context_aware_enabled
265            && config.enhanced.behavioral_baseline_config.enabled
266        {
267            Some(BehavioralBaseline::new(
268                config.enhanced.behavioral_baseline_config.clone(),
269            ))
270        } else {
271            None
272        };
273
274        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
275
276        Self {
277            config,
278            rng,
279            uuid_factory,
280            type_selector: AnomalyTypeSelector::new(),
281            strategies: StrategyCollection::default(),
282            cluster_manager,
283            entity_targeting,
284            document_anomaly_counts: HashMap::new(),
285            labels: Vec::new(),
286            stats: InjectorStats::default(),
287            scheme_advancer,
288            near_miss_generator,
289            near_miss_labels: Vec::new(),
290            co_occurrence_handler,
291            temporal_cluster_generator,
292            difficulty_calculator,
293            entity_aware_injector,
294            behavioral_baseline,
295            scheme_actions: Vec::new(),
296            difficulty_distribution: HashMap::new(),
297            vendor_contexts: HashMap::new(),
298            employee_contexts: HashMap::new(),
299            account_contexts: HashMap::new(),
300        }
301    }
302
303    /// Processes a batch of journal entries, potentially injecting anomalies.
304    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
305        debug!(
306            entry_count = entries.len(),
307            total_rate = self.config.rates.total_rate,
308            seed = self.config.seed,
309            "Injecting anomalies into journal entries"
310        );
311
312        let mut modified_documents = Vec::new();
313        let mut duplicates = Vec::new();
314
315        for entry in entries.iter_mut() {
316            self.stats.total_processed += 1;
317
318            // Update behavioral baseline if enabled
319            if let Some(ref mut baseline) = self.behavioral_baseline {
320                use super::context::Observation;
321                // Record the observation for baseline building
322                let entity_id = entry.header.created_by.clone();
323                let observation =
324                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
325                baseline.record_observation(&entity_id, observation);
326            }
327
328            // Check if we should process this entry
329            if !self.should_process(entry) {
330                continue;
331            }
332
333            // Calculate effective rate (temporal clustering is applied later per-type)
334            let base_rate = self.config.rates.total_rate;
335
336            // Calculate entity-aware rate adjustment using context lookup maps
337            let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
338                let employee_id = &entry.header.created_by;
339                let first_account = entry
340                    .lines
341                    .first()
342                    .map(|l| l.gl_account.as_str())
343                    .unwrap_or("");
344                // Look up vendor from the entry's reference field (vendor ID convention)
345                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
346
347                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
348                let employee_ctx = self.employee_contexts.get(employee_id);
349                let account_ctx = self.account_contexts.get(first_account);
350
351                let multiplier =
352                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
353                (base_rate * multiplier).min(1.0)
354            } else {
355                // No entity-aware injector: fall back to context maps alone
356                self.calculate_context_rate_multiplier(entry) * base_rate
357            };
358
359            // Determine if we inject an anomaly
360            if should_inject_anomaly(
361                effective_rate,
362                entry.posting_date(),
363                &self.config.patterns.temporal_pattern,
364                &mut self.rng,
365            ) {
366                // Check if this should be a near-miss instead
367                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
368                    // Record the transaction for near-duplicate detection
369                    let account = entry
370                        .lines
371                        .first()
372                        .map(|l| l.gl_account.clone())
373                        .unwrap_or_default();
374                    near_miss_gen.record_transaction(
375                        entry.document_number().clone(),
376                        entry.posting_date(),
377                        entry.total_debit(),
378                        &account,
379                        None,
380                    );
381
382                    // Check if this could be a near-miss
383                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
384                        entry.document_number().clone(),
385                        entry.posting_date(),
386                        entry.total_debit(),
387                        &account,
388                        None,
389                        &self.config.enhanced.approval_thresholds,
390                    ) {
391                        self.near_miss_labels.push(near_miss_label);
392                        continue; // Skip actual anomaly injection
393                    }
394                }
395
396                // Select anomaly category based on rates
397                let anomaly_type = self.select_anomaly_category();
398
399                // Apply the anomaly
400                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
401                    // Calculate detection difficulty if enabled
402                    if let Some(ref calculator) = self.difficulty_calculator {
403                        let difficulty = calculator.calculate(&label);
404
405                        // Store difficulty in metadata
406                        label = label
407                            .with_metadata("detection_difficulty", &format!("{:?}", difficulty));
408                        label = label.with_metadata(
409                            "difficulty_score",
410                            &difficulty.difficulty_score().to_string(),
411                        );
412
413                        // Update difficulty distribution
414                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
415                    }
416
417                    modified_documents.push(entry.document_number().clone());
418                    self.labels.push(label);
419                    self.stats.total_injected += 1;
420                }
421
422                // Check for duplicate injection
423                if self.config.allow_duplicates
424                    && matches!(
425                        self.labels.last().map(|l| &l.anomaly_type),
426                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
427                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
428                    )
429                {
430                    let dup_strategy = DuplicationStrategy::default();
431                    let duplicate =
432                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
433                    duplicates.push(duplicate);
434                }
435            }
436        }
437
438        // Count duplicates
439        let duplicates_created = duplicates.len();
440
441        // Build summary
442        let summary = AnomalySummary::from_anomalies(&self.labels);
443
444        InjectionBatchResult {
445            entries_processed: self.stats.total_processed,
446            anomalies_injected: self.stats.total_injected,
447            duplicates_created,
448            labels: self.labels.clone(),
449            summary,
450            modified_documents,
451            near_miss_labels: self.near_miss_labels.clone(),
452            scheme_actions: self.scheme_actions.clone(),
453            difficulty_distribution: self.difficulty_distribution.clone(),
454        }
455    }
456
457    /// Checks if an entry should be processed.
458    fn should_process(&mut self, entry: &JournalEntry) -> bool {
459        // Check company filter
460        if !self.config.target_companies.is_empty()
461            && !self
462                .config
463                .target_companies
464                .iter()
465                .any(|c| c == entry.company_code())
466        {
467            self.stats.skipped_company += 1;
468            return false;
469        }
470
471        // Check date range
472        if let Some((start, end)) = self.config.date_range {
473            if entry.posting_date() < start || entry.posting_date() > end {
474                self.stats.skipped_date += 1;
475                return false;
476            }
477        }
478
479        // Check max anomalies per document
480        let current_count = self
481            .document_anomaly_counts
482            .get(&entry.document_number())
483            .copied()
484            .unwrap_or(0);
485        if current_count >= self.config.max_anomalies_per_document {
486            self.stats.skipped_max_per_doc += 1;
487            return false;
488        }
489
490        true
491    }
492
493    /// Selects an anomaly category based on configured rates.
494    fn select_anomaly_category(&mut self) -> AnomalyType {
495        let r = self.rng.gen::<f64>();
496        let rates = &self.config.rates;
497
498        let mut cumulative = 0.0;
499
500        cumulative += rates.fraud_rate;
501        if r < cumulative {
502            return self.type_selector.select_fraud(&mut self.rng);
503        }
504
505        cumulative += rates.error_rate;
506        if r < cumulative {
507            return self.type_selector.select_error(&mut self.rng);
508        }
509
510        cumulative += rates.process_issue_rate;
511        if r < cumulative {
512            return self.type_selector.select_process_issue(&mut self.rng);
513        }
514
515        cumulative += rates.statistical_rate;
516        if r < cumulative {
517            return self.type_selector.select_statistical(&mut self.rng);
518        }
519
520        self.type_selector.select_relational(&mut self.rng)
521    }
522
523    /// Injects an anomaly into an entry.
524    fn inject_anomaly(
525        &mut self,
526        entry: &mut JournalEntry,
527        anomaly_type: AnomalyType,
528    ) -> Option<LabeledAnomaly> {
529        // Check if strategy can be applied
530        if !self.strategies.can_apply(entry, &anomaly_type) {
531            return None;
532        }
533
534        // Apply the strategy
535        let result = self
536            .strategies
537            .apply_strategy(entry, &anomaly_type, &mut self.rng);
538
539        if !result.success {
540            return None;
541        }
542
543        // Update document anomaly count
544        *self
545            .document_anomaly_counts
546            .entry(entry.document_number().clone())
547            .or_insert(0) += 1;
548
549        // Update statistics
550        let category = anomaly_type.category().to_string();
551        let type_name = anomaly_type.type_name();
552
553        *self.stats.by_category.entry(category).or_insert(0) += 1;
554        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
555        *self
556            .stats
557            .by_company
558            .entry(entry.company_code().to_string())
559            .or_insert(0) += 1;
560
561        // Generate label
562        if self.config.generate_labels {
563            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
564
565            // Update entry header with anomaly tracking fields
566            entry.header.is_anomaly = true;
567            entry.header.anomaly_id = Some(anomaly_id.clone());
568            entry.header.anomaly_type = Some(type_name.clone());
569
570            // Also set fraud flag if this is a fraud anomaly
571            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
572                entry.header.is_fraud = true;
573                if let AnomalyType::Fraud(ref ft) = anomaly_type {
574                    entry.header.fraud_type = Some(*ft);
575                }
576            }
577
578            let mut label = LabeledAnomaly::new(
579                anomaly_id,
580                anomaly_type.clone(),
581                entry.document_number().clone(),
582                "JE".to_string(),
583                entry.company_code().to_string(),
584                entry.posting_date(),
585            )
586            .with_description(&result.description)
587            .with_injection_strategy(&type_name);
588
589            // Add causal reason with injection context (provenance tracking)
590            let causal_reason = AnomalyCausalReason::RandomRate {
591                base_rate: self.config.rates.total_rate,
592            };
593            label = label.with_causal_reason(causal_reason);
594
595            // Add entity context metadata if contexts are populated
596            let context_multiplier = self.calculate_context_rate_multiplier(entry);
597            if (context_multiplier - 1.0).abs() > f64::EPSILON {
598                label = label.with_metadata(
599                    "entity_context_multiplier",
600                    &format!("{:.3}", context_multiplier),
601                );
602                label = label.with_metadata(
603                    "effective_rate",
604                    &format!(
605                        "{:.6}",
606                        (self.config.rates.total_rate * context_multiplier).min(1.0)
607                    ),
608                );
609            }
610
611            // Add monetary impact
612            if let Some(impact) = result.monetary_impact {
613                label = label.with_monetary_impact(impact);
614            }
615
616            // Add related entities
617            for entity in &result.related_entities {
618                label = label.with_related_entity(entity);
619            }
620
621            // Add metadata
622            for (key, value) in &result.metadata {
623                label = label.with_metadata(key, value);
624            }
625
626            // Assign cluster and update causal reason if in cluster
627            if let Some(cluster_id) =
628                self.cluster_manager
629                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
630            {
631                label = label.with_cluster(&cluster_id);
632                // Update causal reason to reflect cluster membership
633                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
634                    cluster_id: cluster_id.clone(),
635                });
636            }
637
638            return Some(label);
639        }
640
641        None
642    }
643
644    /// Injects a specific anomaly type into an entry.
645    pub fn inject_specific(
646        &mut self,
647        entry: &mut JournalEntry,
648        anomaly_type: AnomalyType,
649    ) -> Option<LabeledAnomaly> {
650        self.inject_anomaly(entry, anomaly_type)
651    }
652
653    /// Creates a self-approval anomaly.
654    pub fn create_self_approval(
655        &mut self,
656        entry: &mut JournalEntry,
657        user_id: &str,
658    ) -> Option<LabeledAnomaly> {
659        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
660
661        let label = LabeledAnomaly::new(
662            format!("ANO{:08}", self.labels.len() + 1),
663            anomaly_type,
664            entry.document_number().clone(),
665            "JE".to_string(),
666            entry.company_code().to_string(),
667            entry.posting_date(),
668        )
669        .with_description(&format!("User {} approved their own transaction", user_id))
670        .with_related_entity(user_id)
671        .with_injection_strategy("ManualSelfApproval")
672        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
673            target_type: "User".to_string(),
674            target_id: user_id.to_string(),
675        });
676
677        // Set entry header anomaly tracking fields
678        entry.header.is_anomaly = true;
679        entry.header.is_fraud = true;
680        entry.header.anomaly_id = Some(label.anomaly_id.clone());
681        entry.header.anomaly_type = Some("SelfApproval".to_string());
682        entry.header.fraud_type = Some(FraudType::SelfApproval);
683
684        // Set approver = requester
685        entry.header.created_by = user_id.to_string();
686
687        self.labels.push(label.clone());
688        Some(label)
689    }
690
691    /// Creates a segregation of duties violation.
692    pub fn create_sod_violation(
693        &mut self,
694        entry: &mut JournalEntry,
695        user_id: &str,
696        conflicting_duties: (&str, &str),
697    ) -> Option<LabeledAnomaly> {
698        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
699
700        let label = LabeledAnomaly::new(
701            format!("ANO{:08}", self.labels.len() + 1),
702            anomaly_type,
703            entry.document_number().clone(),
704            "JE".to_string(),
705            entry.company_code().to_string(),
706            entry.posting_date(),
707        )
708        .with_description(&format!(
709            "User {} performed conflicting duties: {} and {}",
710            user_id, conflicting_duties.0, conflicting_duties.1
711        ))
712        .with_related_entity(user_id)
713        .with_metadata("duty1", conflicting_duties.0)
714        .with_metadata("duty2", conflicting_duties.1)
715        .with_injection_strategy("ManualSoDViolation")
716        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
717            target_type: "User".to_string(),
718            target_id: user_id.to_string(),
719        });
720
721        // Set entry header anomaly tracking fields
722        entry.header.is_anomaly = true;
723        entry.header.is_fraud = true;
724        entry.header.anomaly_id = Some(label.anomaly_id.clone());
725        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
726        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
727
728        self.labels.push(label.clone());
729        Some(label)
730    }
731
732    /// Creates an intercompany mismatch anomaly.
733    pub fn create_ic_mismatch(
734        &mut self,
735        entry: &mut JournalEntry,
736        matching_company: &str,
737        expected_amount: Decimal,
738        actual_amount: Decimal,
739    ) -> Option<LabeledAnomaly> {
740        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
741
742        let label = LabeledAnomaly::new(
743            format!("ANO{:08}", self.labels.len() + 1),
744            anomaly_type,
745            entry.document_number().clone(),
746            "JE".to_string(),
747            entry.company_code().to_string(),
748            entry.posting_date(),
749        )
750        .with_description(&format!(
751            "Intercompany mismatch with {}: expected {} but got {}",
752            matching_company, expected_amount, actual_amount
753        ))
754        .with_related_entity(matching_company)
755        .with_monetary_impact(actual_amount - expected_amount)
756        .with_metadata("expected_amount", &expected_amount.to_string())
757        .with_metadata("actual_amount", &actual_amount.to_string())
758        .with_injection_strategy("ManualICMismatch")
759        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
760            target_type: "Intercompany".to_string(),
761            target_id: matching_company.to_string(),
762        });
763
764        // Set entry header anomaly tracking fields
765        entry.header.is_anomaly = true;
766        entry.header.anomaly_id = Some(label.anomaly_id.clone());
767        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
768
769        self.labels.push(label.clone());
770        Some(label)
771    }
772
773    /// Returns all generated labels.
774    pub fn get_labels(&self) -> &[LabeledAnomaly] {
775        &self.labels
776    }
777
778    /// Returns the anomaly summary.
779    pub fn get_summary(&self) -> AnomalySummary {
780        AnomalySummary::from_anomalies(&self.labels)
781    }
782
783    /// Returns injection statistics.
784    pub fn get_stats(&self) -> &InjectorStats {
785        &self.stats
786    }
787
788    /// Clears all labels and resets statistics.
789    pub fn reset(&mut self) {
790        self.labels.clear();
791        self.document_anomaly_counts.clear();
792        self.stats = InjectorStats::default();
793        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
794
795        // Reset enhanced components
796        self.near_miss_labels.clear();
797        self.scheme_actions.clear();
798        self.difficulty_distribution.clear();
799
800        if let Some(ref mut baseline) = self.behavioral_baseline {
801            *baseline =
802                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
803        }
804    }
805
806    /// Returns the number of clusters created.
807    pub fn cluster_count(&self) -> usize {
808        self.cluster_manager.cluster_count()
809    }
810
811    // =========================================================================
812    // Entity Context API
813    // =========================================================================
814
815    /// Sets entity contexts for risk-adjusted anomaly injection.
816    ///
817    /// When entity contexts are provided, the injector adjusts anomaly injection
818    /// rates based on entity risk factors. Entries involving high-risk vendors,
819    /// new employees, or sensitive accounts will have higher effective injection
820    /// rates.
821    ///
822    /// Pass empty HashMaps to clear previously set contexts.
823    pub fn set_entity_contexts(
824        &mut self,
825        vendors: HashMap<String, VendorContext>,
826        employees: HashMap<String, EmployeeContext>,
827        accounts: HashMap<String, AccountContext>,
828    ) {
829        self.vendor_contexts = vendors;
830        self.employee_contexts = employees;
831        self.account_contexts = accounts;
832    }
833
834    /// Returns a reference to the vendor context map.
835    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
836        &self.vendor_contexts
837    }
838
839    /// Returns a reference to the employee context map.
840    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
841        &self.employee_contexts
842    }
843
844    /// Returns a reference to the account context map.
845    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
846        &self.account_contexts
847    }
848
849    /// Calculates a rate multiplier from the entity context maps alone (no
850    /// `EntityAwareInjector` needed). This provides a lightweight fallback
851    /// when context-aware injection is not fully enabled but context maps
852    /// have been populated.
853    ///
854    /// The multiplier is the product of individual entity risk factors found
855    /// in the context maps for the given journal entry. If no contexts match,
856    /// returns 1.0 (no adjustment).
857    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
858        if self.vendor_contexts.is_empty()
859            && self.employee_contexts.is_empty()
860            && self.account_contexts.is_empty()
861        {
862            return 1.0;
863        }
864
865        let mut multiplier = 1.0;
866
867        // Vendor lookup via reference field
868        if let Some(ref vendor_ref) = entry.header.reference {
869            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
870                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
871                if ctx.is_new {
872                    multiplier *= 2.0;
873                }
874                if ctx.is_dormant_reactivation {
875                    multiplier *= 1.5;
876                }
877            }
878        }
879
880        // Employee lookup via created_by
881        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
882            if ctx.is_new {
883                multiplier *= 1.5;
884            }
885            if ctx.is_volume_fatigued {
886                multiplier *= 1.3;
887            }
888            if ctx.is_overtime {
889                multiplier *= 1.2;
890            }
891        }
892
893        // Account lookup via first line's GL account
894        if let Some(first_line) = entry.lines.first() {
895            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
896                if ctx.is_high_risk {
897                    multiplier *= 2.0;
898                }
899            }
900        }
901
902        multiplier
903    }
904
905    // =========================================================================
906    // Enhanced Features API (v0.3.0+)
907    // =========================================================================
908
909    /// Advances all active fraud schemes by one time step.
910    ///
911    /// Call this method once per simulated day to generate scheme actions.
912    /// Returns the scheme actions generated for this date.
913    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
914        if let Some(ref mut advancer) = self.scheme_advancer {
915            let context = SchemeContext::new(date, company_code);
916            let actions = advancer.advance_all(&context);
917            self.scheme_actions.extend(actions.clone());
918            actions
919        } else {
920            Vec::new()
921        }
922    }
923
924    /// Potentially starts a new fraud scheme based on probabilities.
925    ///
926    /// Call this method periodically (e.g., once per period) to allow new
927    /// schemes to start based on configured probabilities.
928    /// Returns the scheme ID if a scheme was started.
929    pub fn maybe_start_scheme(
930        &mut self,
931        date: NaiveDate,
932        company_code: &str,
933        available_users: Vec<String>,
934        available_accounts: Vec<String>,
935        available_counterparties: Vec<String>,
936    ) -> Option<uuid::Uuid> {
937        if let Some(ref mut advancer) = self.scheme_advancer {
938            let mut context = SchemeContext::new(date, company_code);
939            context.available_users = available_users;
940            context.available_accounts = available_accounts;
941            context.available_counterparties = available_counterparties;
942
943            advancer.maybe_start_scheme(&context)
944        } else {
945            None
946        }
947    }
948
949    /// Returns all near-miss labels generated.
950    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
951        &self.near_miss_labels
952    }
953
954    /// Returns all scheme actions generated.
955    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
956        &self.scheme_actions
957    }
958
959    /// Returns the detection difficulty distribution.
960    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
961        &self.difficulty_distribution
962    }
963
964    /// Checks for behavioral deviations for an entity with an observation.
965    pub fn check_behavioral_deviations(
966        &self,
967        entity_id: &str,
968        observation: &super::context::Observation,
969    ) -> Vec<super::context::BehavioralDeviation> {
970        if let Some(ref baseline) = self.behavioral_baseline {
971            baseline.check_deviation(entity_id, observation)
972        } else {
973            Vec::new()
974        }
975    }
976
977    /// Gets the baseline for an entity.
978    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
979        if let Some(ref baseline) = self.behavioral_baseline {
980            baseline.get_baseline(entity_id)
981        } else {
982            None
983        }
984    }
985
986    /// Returns the number of active schemes.
987    pub fn active_scheme_count(&self) -> usize {
988        if let Some(ref advancer) = self.scheme_advancer {
989            advancer.active_scheme_count()
990        } else {
991            0
992        }
993    }
994
995    /// Returns whether enhanced features are enabled.
996    pub fn has_enhanced_features(&self) -> bool {
997        self.scheme_advancer.is_some()
998            || self.near_miss_generator.is_some()
999            || self.difficulty_calculator.is_some()
1000            || self.entity_aware_injector.is_some()
1001    }
1002}
1003
1004/// Builder for AnomalyInjectorConfig.
1005pub struct AnomalyInjectorConfigBuilder {
1006    config: AnomalyInjectorConfig,
1007}
1008
1009impl AnomalyInjectorConfigBuilder {
1010    /// Creates a new builder with default configuration.
1011    pub fn new() -> Self {
1012        Self {
1013            config: AnomalyInjectorConfig::default(),
1014        }
1015    }
1016
1017    /// Sets the total anomaly rate.
1018    pub fn with_total_rate(mut self, rate: f64) -> Self {
1019        self.config.rates.total_rate = rate;
1020        self
1021    }
1022
1023    /// Sets the fraud rate (proportion of anomalies).
1024    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1025        self.config.rates.fraud_rate = rate;
1026        self
1027    }
1028
1029    /// Sets the error rate (proportion of anomalies).
1030    pub fn with_error_rate(mut self, rate: f64) -> Self {
1031        self.config.rates.error_rate = rate;
1032        self
1033    }
1034
1035    /// Sets the random seed.
1036    pub fn with_seed(mut self, seed: u64) -> Self {
1037        self.config.seed = seed;
1038        self
1039    }
1040
1041    /// Sets the temporal pattern.
1042    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1043        self.config.patterns.temporal_pattern = pattern;
1044        self
1045    }
1046
1047    /// Enables or disables label generation.
1048    pub fn with_labels(mut self, generate: bool) -> Self {
1049        self.config.generate_labels = generate;
1050        self
1051    }
1052
1053    /// Sets target companies.
1054    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1055        self.config.target_companies = companies;
1056        self
1057    }
1058
1059    /// Sets the date range.
1060    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1061        self.config.date_range = Some((start, end));
1062        self
1063    }
1064
1065    // =========================================================================
1066    // Enhanced Features Configuration (v0.3.0+)
1067    // =========================================================================
1068
1069    /// Enables multi-stage fraud scheme generation.
1070    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1071        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1072        self.config.enhanced.scheme_probability = probability;
1073        self
1074    }
1075
1076    /// Enables near-miss generation.
1077    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1078        self.config.enhanced.near_miss_enabled = enabled;
1079        self.config.enhanced.near_miss_proportion = proportion;
1080        self
1081    }
1082
1083    /// Sets approval thresholds for threshold-proximity near-misses.
1084    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1085        self.config.enhanced.approval_thresholds = thresholds;
1086        self
1087    }
1088
1089    /// Enables correlated anomaly injection.
1090    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1091        self.config.enhanced.correlated_injection_enabled = enabled;
1092        self
1093    }
1094
1095    /// Enables temporal clustering (period-end spikes).
1096    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1097        self.config.enhanced.temporal_clustering_enabled = enabled;
1098        self.config.enhanced.period_end_multiplier = multiplier;
1099        self
1100    }
1101
1102    /// Enables detection difficulty classification.
1103    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1104        self.config.enhanced.difficulty_classification_enabled = enabled;
1105        self
1106    }
1107
1108    /// Enables context-aware injection.
1109    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1110        self.config.enhanced.context_aware_enabled = enabled;
1111        self
1112    }
1113
1114    /// Sets behavioral baseline configuration.
1115    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1116        self.config.enhanced.behavioral_baseline_config = config;
1117        self
1118    }
1119
1120    /// Enables all enhanced features with default settings.
1121    pub fn with_all_enhanced_features(mut self) -> Self {
1122        self.config.enhanced.multi_stage_schemes_enabled = true;
1123        self.config.enhanced.scheme_probability = 0.02;
1124        self.config.enhanced.correlated_injection_enabled = true;
1125        self.config.enhanced.temporal_clustering_enabled = true;
1126        self.config.enhanced.period_end_multiplier = 2.5;
1127        self.config.enhanced.near_miss_enabled = true;
1128        self.config.enhanced.near_miss_proportion = 0.30;
1129        self.config.enhanced.difficulty_classification_enabled = true;
1130        self.config.enhanced.context_aware_enabled = true;
1131        self.config.enhanced.behavioral_baseline_config.enabled = true;
1132        self
1133    }
1134
1135    /// Builds the configuration.
1136    pub fn build(self) -> AnomalyInjectorConfig {
1137        self.config
1138    }
1139}
1140
1141impl Default for AnomalyInjectorConfigBuilder {
1142    fn default() -> Self {
1143        Self::new()
1144    }
1145}
1146
1147#[cfg(test)]
1148#[allow(clippy::unwrap_used)]
1149mod tests {
1150    use super::*;
1151    use chrono::NaiveDate;
1152    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1153    use rust_decimal_macros::dec;
1154
1155    fn create_test_entry(doc_num: &str) -> JournalEntry {
1156        let mut entry = JournalEntry::new_simple(
1157            doc_num.to_string(),
1158            "1000".to_string(),
1159            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1160            "Test Entry".to_string(),
1161        );
1162
1163        entry.add_line(JournalEntryLine {
1164            line_number: 1,
1165            gl_account: "5000".to_string(),
1166            debit_amount: dec!(1000),
1167            ..Default::default()
1168        });
1169
1170        entry.add_line(JournalEntryLine {
1171            line_number: 2,
1172            gl_account: "1000".to_string(),
1173            credit_amount: dec!(1000),
1174            ..Default::default()
1175        });
1176
1177        entry
1178    }
1179
1180    #[test]
1181    fn test_anomaly_injector_basic() {
1182        let config = AnomalyInjectorConfigBuilder::new()
1183            .with_total_rate(0.5) // High rate for testing
1184            .with_seed(42)
1185            .build();
1186
1187        let mut injector = AnomalyInjector::new(config);
1188
1189        let mut entries: Vec<_> = (0..100)
1190            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1191            .collect();
1192
1193        let result = injector.process_entries(&mut entries);
1194
1195        // With 50% rate, we should have some anomalies
1196        assert!(result.anomalies_injected > 0);
1197        assert!(!result.labels.is_empty());
1198        assert_eq!(result.labels.len(), result.anomalies_injected);
1199    }
1200
1201    #[test]
1202    fn test_specific_injection() {
1203        let config = AnomalyInjectorConfig::default();
1204        let mut injector = AnomalyInjector::new(config);
1205
1206        let mut entry = create_test_entry("JE001");
1207        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1208
1209        let label = injector.inject_specific(&mut entry, anomaly_type);
1210
1211        assert!(label.is_some());
1212        let label = label.unwrap();
1213        // document_id is the UUID string from the journal entry header
1214        assert!(!label.document_id.is_empty());
1215        assert_eq!(label.document_id, entry.document_number());
1216    }
1217
1218    #[test]
1219    fn test_self_approval_injection() {
1220        let config = AnomalyInjectorConfig::default();
1221        let mut injector = AnomalyInjector::new(config);
1222
1223        let mut entry = create_test_entry("JE001");
1224        let label = injector.create_self_approval(&mut entry, "USER001");
1225
1226        assert!(label.is_some());
1227        let label = label.unwrap();
1228        assert!(matches!(
1229            label.anomaly_type,
1230            AnomalyType::Fraud(FraudType::SelfApproval)
1231        ));
1232        assert!(label.related_entities.contains(&"USER001".to_string()));
1233    }
1234
1235    #[test]
1236    fn test_company_filtering() {
1237        let config = AnomalyInjectorConfigBuilder::new()
1238            .with_total_rate(1.0) // Inject all
1239            .with_target_companies(vec!["2000".to_string()])
1240            .build();
1241
1242        let mut injector = AnomalyInjector::new(config);
1243
1244        let mut entries = vec![
1245            create_test_entry("JE001"), // company 1000
1246            create_test_entry("JE002"), // company 1000
1247        ];
1248
1249        let result = injector.process_entries(&mut entries);
1250
1251        // No anomalies because entries are in company 1000, not 2000
1252        assert_eq!(result.anomalies_injected, 0);
1253    }
1254
1255    // =========================================================================
1256    // Entity Context Tests
1257    // =========================================================================
1258
1259    /// Helper to create a test entry with specific vendor reference and employee.
1260    fn create_test_entry_with_context(
1261        doc_num: &str,
1262        vendor_ref: Option<&str>,
1263        employee_id: &str,
1264        gl_account: &str,
1265    ) -> JournalEntry {
1266        let mut entry = JournalEntry::new_simple(
1267            doc_num.to_string(),
1268            "1000".to_string(),
1269            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1270            "Test Entry".to_string(),
1271        );
1272
1273        entry.header.reference = vendor_ref.map(|v| v.to_string());
1274        entry.header.created_by = employee_id.to_string();
1275
1276        entry.add_line(JournalEntryLine {
1277            line_number: 1,
1278            gl_account: gl_account.to_string(),
1279            debit_amount: dec!(1000),
1280            ..Default::default()
1281        });
1282
1283        entry.add_line(JournalEntryLine {
1284            line_number: 2,
1285            gl_account: "1000".to_string(),
1286            credit_amount: dec!(1000),
1287            ..Default::default()
1288        });
1289
1290        entry
1291    }
1292
1293    #[test]
1294    fn test_set_entity_contexts() {
1295        let config = AnomalyInjectorConfig::default();
1296        let mut injector = AnomalyInjector::new(config);
1297
1298        // Initially empty
1299        assert!(injector.vendor_contexts().is_empty());
1300        assert!(injector.employee_contexts().is_empty());
1301        assert!(injector.account_contexts().is_empty());
1302
1303        // Set contexts
1304        let mut vendors = HashMap::new();
1305        vendors.insert(
1306            "V001".to_string(),
1307            VendorContext {
1308                vendor_id: "V001".to_string(),
1309                is_new: true,
1310                ..Default::default()
1311            },
1312        );
1313
1314        let mut employees = HashMap::new();
1315        employees.insert(
1316            "EMP001".to_string(),
1317            EmployeeContext {
1318                employee_id: "EMP001".to_string(),
1319                is_new: true,
1320                ..Default::default()
1321            },
1322        );
1323
1324        let mut accounts = HashMap::new();
1325        accounts.insert(
1326            "8100".to_string(),
1327            AccountContext {
1328                account_code: "8100".to_string(),
1329                is_high_risk: true,
1330                ..Default::default()
1331            },
1332        );
1333
1334        injector.set_entity_contexts(vendors, employees, accounts);
1335
1336        assert_eq!(injector.vendor_contexts().len(), 1);
1337        assert_eq!(injector.employee_contexts().len(), 1);
1338        assert_eq!(injector.account_contexts().len(), 1);
1339        assert!(injector.vendor_contexts().contains_key("V001"));
1340        assert!(injector.employee_contexts().contains_key("EMP001"));
1341        assert!(injector.account_contexts().contains_key("8100"));
1342    }
1343
1344    #[test]
1345    fn test_default_behavior_no_contexts() {
1346        // Without any entity contexts, the base rate is used unchanged.
1347        let config = AnomalyInjectorConfigBuilder::new()
1348            .with_total_rate(0.5)
1349            .with_seed(42)
1350            .build();
1351
1352        let mut injector = AnomalyInjector::new(config);
1353
1354        let mut entries: Vec<_> = (0..200)
1355            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1356            .collect();
1357
1358        let result = injector.process_entries(&mut entries);
1359
1360        // With 50% base rate and no context, expect roughly 50% injection
1361        // Allow wide margin for randomness
1362        assert!(result.anomalies_injected > 0);
1363        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1364        assert!(
1365            rate > 0.2 && rate < 0.8,
1366            "Expected ~50% rate, got {:.2}%",
1367            rate * 100.0
1368        );
1369    }
1370
1371    #[test]
1372    fn test_entity_context_increases_injection_rate() {
1373        // With high-risk entity contexts, the effective rate should be higher
1374        // than the base rate, leading to more anomalies being injected.
1375        let base_rate = 0.10; // Low base rate
1376
1377        // Run without contexts
1378        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1379            .with_total_rate(base_rate)
1380            .with_seed(123)
1381            .build();
1382
1383        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1384
1385        let mut entries_no_ctx: Vec<_> = (0..500)
1386            .map(|i| {
1387                create_test_entry_with_context(
1388                    &format!("JE{:04}", i),
1389                    Some("V001"),
1390                    "EMP001",
1391                    "8100",
1392                )
1393            })
1394            .collect();
1395
1396        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1397
1398        // Run with high-risk contexts (same seed for comparable randomness)
1399        let config_ctx = AnomalyInjectorConfigBuilder::new()
1400            .with_total_rate(base_rate)
1401            .with_seed(123)
1402            .build();
1403
1404        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1405
1406        // Set up high-risk contexts
1407        let mut vendors = HashMap::new();
1408        vendors.insert(
1409            "V001".to_string(),
1410            VendorContext {
1411                vendor_id: "V001".to_string(),
1412                is_new: true,                  // 2.0x multiplier
1413                is_dormant_reactivation: true, // 1.5x multiplier
1414                ..Default::default()
1415            },
1416        );
1417
1418        let mut employees = HashMap::new();
1419        employees.insert(
1420            "EMP001".to_string(),
1421            EmployeeContext {
1422                employee_id: "EMP001".to_string(),
1423                is_new: true, // 1.5x multiplier
1424                ..Default::default()
1425            },
1426        );
1427
1428        let mut accounts = HashMap::new();
1429        accounts.insert(
1430            "8100".to_string(),
1431            AccountContext {
1432                account_code: "8100".to_string(),
1433                is_high_risk: true, // 2.0x multiplier
1434                ..Default::default()
1435            },
1436        );
1437
1438        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1439
1440        let mut entries_ctx: Vec<_> = (0..500)
1441            .map(|i| {
1442                create_test_entry_with_context(
1443                    &format!("JE{:04}", i),
1444                    Some("V001"),
1445                    "EMP001",
1446                    "8100",
1447                )
1448            })
1449            .collect();
1450
1451        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1452
1453        // The context-enhanced run should inject more anomalies
1454        assert!(
1455            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1456            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1457            result_ctx.anomalies_injected,
1458            result_no_ctx.anomalies_injected,
1459        );
1460    }
1461
1462    #[test]
1463    fn test_risk_score_multiplication() {
1464        // Verify the calculate_context_rate_multiplier produces correct values.
1465        let config = AnomalyInjectorConfig::default();
1466        let mut injector = AnomalyInjector::new(config);
1467
1468        // No contexts: multiplier should be 1.0
1469        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1470        assert!(
1471            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1472        );
1473
1474        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1475        let mut vendors = HashMap::new();
1476        vendors.insert(
1477            "V_RISKY".to_string(),
1478            VendorContext {
1479                vendor_id: "V_RISKY".to_string(),
1480                is_new: true,
1481                ..Default::default()
1482            },
1483        );
1484
1485        let mut accounts = HashMap::new();
1486        accounts.insert(
1487            "9000".to_string(),
1488            AccountContext {
1489                account_code: "9000".to_string(),
1490                is_high_risk: true,
1491                ..Default::default()
1492            },
1493        );
1494
1495        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1496
1497        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1498        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1499        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1500        assert!(
1501            (multiplier - 4.0).abs() < f64::EPSILON,
1502            "Expected 4.0x multiplier, got {}",
1503            multiplier,
1504        );
1505
1506        // Entry with only vendor context match (no account match)
1507        let entry_vendor_only =
1508            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1509        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1510        assert!(
1511            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1512            "Expected 2.0x multiplier (vendor only), got {}",
1513            multiplier_vendor,
1514        );
1515
1516        // Entry with no matching contexts
1517        let entry_no_match =
1518            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1519        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1520        assert!(
1521            (multiplier_none - 1.0).abs() < f64::EPSILON,
1522            "Expected 1.0x multiplier (no match), got {}",
1523            multiplier_none,
1524        );
1525    }
1526
1527    #[test]
1528    fn test_employee_context_multiplier() {
1529        let config = AnomalyInjectorConfig::default();
1530        let mut injector = AnomalyInjector::new(config);
1531
1532        let mut employees = HashMap::new();
1533        employees.insert(
1534            "EMP_NEW".to_string(),
1535            EmployeeContext {
1536                employee_id: "EMP_NEW".to_string(),
1537                is_new: true,             // 1.5x
1538                is_volume_fatigued: true, // 1.3x
1539                is_overtime: true,        // 1.2x
1540                ..Default::default()
1541            },
1542        );
1543
1544        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1545
1546        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1547        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1548
1549        // 1.5 * 1.3 * 1.2 = 2.34
1550        let expected = 1.5 * 1.3 * 1.2;
1551        assert!(
1552            (multiplier - expected).abs() < 0.01,
1553            "Expected {:.3}x multiplier, got {:.3}",
1554            expected,
1555            multiplier,
1556        );
1557    }
1558
1559    #[test]
1560    fn test_entity_contexts_persist_across_reset() {
1561        let config = AnomalyInjectorConfig::default();
1562        let mut injector = AnomalyInjector::new(config);
1563
1564        let mut vendors = HashMap::new();
1565        vendors.insert(
1566            "V001".to_string(),
1567            VendorContext {
1568                vendor_id: "V001".to_string(),
1569                is_new: true,
1570                ..Default::default()
1571            },
1572        );
1573
1574        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1575        assert_eq!(injector.vendor_contexts().len(), 1);
1576
1577        // Reset clears labels and stats but not entity contexts
1578        injector.reset();
1579        assert_eq!(injector.vendor_contexts().len(), 1);
1580    }
1581
1582    #[test]
1583    fn test_set_empty_contexts_clears() {
1584        let config = AnomalyInjectorConfig::default();
1585        let mut injector = AnomalyInjector::new(config);
1586
1587        let mut vendors = HashMap::new();
1588        vendors.insert(
1589            "V001".to_string(),
1590            VendorContext {
1591                vendor_id: "V001".to_string(),
1592                ..Default::default()
1593            },
1594        );
1595
1596        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1597        assert_eq!(injector.vendor_contexts().len(), 1);
1598
1599        // Setting empty maps clears
1600        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1601        assert!(injector.vendor_contexts().is_empty());
1602    }
1603
1604    #[test]
1605    fn test_dormant_vendor_multiplier() {
1606        let config = AnomalyInjectorConfig::default();
1607        let mut injector = AnomalyInjector::new(config);
1608
1609        let mut vendors = HashMap::new();
1610        vendors.insert(
1611            "V_DORMANT".to_string(),
1612            VendorContext {
1613                vendor_id: "V_DORMANT".to_string(),
1614                is_dormant_reactivation: true, // 1.5x
1615                ..Default::default()
1616            },
1617        );
1618
1619        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1620
1621        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1622        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1623        assert!(
1624            (multiplier - 1.5).abs() < f64::EPSILON,
1625            "Expected 1.5x multiplier for dormant vendor, got {}",
1626            multiplier,
1627        );
1628    }
1629}