Skip to main content

datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::Rng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25    RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31    EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38    TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45/// Configuration for the anomaly injector.
46#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48    /// Rate configuration.
49    pub rates: AnomalyRateConfig,
50    /// Pattern configuration.
51    pub patterns: AnomalyPatternConfig,
52    /// Random seed for reproducibility.
53    pub seed: u64,
54    /// Whether to generate labels.
55    pub generate_labels: bool,
56    /// Whether to allow duplicate injection.
57    pub allow_duplicates: bool,
58    /// Maximum anomalies per document.
59    pub max_anomalies_per_document: usize,
60    /// Company codes to target (empty = all).
61    pub target_companies: Vec<String>,
62    /// Date range for injection.
63    pub date_range: Option<(NaiveDate, NaiveDate)>,
64    /// Enhanced features configuration.
65    pub enhanced: EnhancedInjectionConfig,
66}
67
68/// Enhanced injection configuration for v0.3.0+ features.
69#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71    /// Enable multi-stage fraud scheme generation.
72    pub multi_stage_schemes_enabled: bool,
73    /// Probability of starting a new scheme per perpetrator per year.
74    pub scheme_probability: f64,
75    /// Enable correlated anomaly injection.
76    pub correlated_injection_enabled: bool,
77    /// Enable temporal clustering (period-end spikes).
78    pub temporal_clustering_enabled: bool,
79    /// Period-end anomaly rate multiplier.
80    pub period_end_multiplier: f64,
81    /// Enable near-miss generation.
82    pub near_miss_enabled: bool,
83    /// Proportion of anomalies that are near-misses.
84    pub near_miss_proportion: f64,
85    /// Approval thresholds for threshold-proximity near-misses.
86    pub approval_thresholds: Vec<Decimal>,
87    /// Enable detection difficulty classification.
88    pub difficulty_classification_enabled: bool,
89    /// Enable context-aware injection.
90    pub context_aware_enabled: bool,
91    /// Behavioral baseline configuration.
92    pub behavioral_baseline_config: BehavioralBaselineConfig,
93}
94
95impl Default for AnomalyInjectorConfig {
96    fn default() -> Self {
97        Self {
98            rates: AnomalyRateConfig::default(),
99            patterns: AnomalyPatternConfig::default(),
100            seed: 42,
101            generate_labels: true,
102            allow_duplicates: true,
103            max_anomalies_per_document: 2,
104            target_companies: Vec::new(),
105            date_range: None,
106            enhanced: EnhancedInjectionConfig::default(),
107        }
108    }
109}
110
111/// Result of an injection batch.
112#[derive(Debug, Clone)]
113pub struct InjectionBatchResult {
114    /// Number of entries processed.
115    pub entries_processed: usize,
116    /// Number of anomalies injected.
117    pub anomalies_injected: usize,
118    /// Number of duplicates created.
119    pub duplicates_created: usize,
120    /// Labels generated.
121    pub labels: Vec<LabeledAnomaly>,
122    /// Summary of anomalies.
123    pub summary: AnomalySummary,
124    /// Entries that were modified (document numbers).
125    pub modified_documents: Vec<String>,
126    /// Near-miss labels (suspicious but legitimate transactions).
127    pub near_miss_labels: Vec<NearMissLabel>,
128    /// Multi-stage scheme actions generated.
129    pub scheme_actions: Vec<SchemeAction>,
130    /// Difficulty distribution summary.
131    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
132}
133
134/// Main anomaly injection engine.
135pub struct AnomalyInjector {
136    config: AnomalyInjectorConfig,
137    rng: ChaCha8Rng,
138    uuid_factory: DeterministicUuidFactory,
139    type_selector: AnomalyTypeSelector,
140    strategies: StrategyCollection,
141    cluster_manager: ClusterManager,
142    /// Constructed from config; will be consumed when entity-aware injection
143    /// patterns are integrated into the main inject loop (v0.4.0 roadmap).
144    #[allow(dead_code)]
145    entity_targeting: EntityTargetingManager,
146    /// Tracking which documents already have anomalies.
147    document_anomaly_counts: HashMap<String, usize>,
148    /// All generated labels.
149    labels: Vec<LabeledAnomaly>,
150    /// Statistics.
151    stats: InjectorStats,
152    // Enhanced components (v0.3.0+)
153    /// Multi-stage fraud scheme advancer.
154    scheme_advancer: Option<SchemeAdvancer>,
155    /// Near-miss generator.
156    near_miss_generator: Option<NearMissGenerator>,
157    /// Near-miss labels generated.
158    near_miss_labels: Vec<NearMissLabel>,
159    /// Constructed when `correlated_injection_enabled`; will drive correlated
160    /// anomaly pairs once the co-occurrence integration pass lands.
161    #[allow(dead_code)]
162    co_occurrence_handler: Option<AnomalyCoOccurrence>,
163    /// Constructed when `temporal_clustering_enabled`; will group anomalies
164    /// into temporal bursts once the clustering integration pass lands.
165    #[allow(dead_code)]
166    temporal_cluster_generator: Option<TemporalClusterGenerator>,
167    /// Difficulty calculator.
168    difficulty_calculator: Option<DifficultyCalculator>,
169    /// Entity-aware injector.
170    entity_aware_injector: Option<EntityAwareInjector>,
171    /// Behavioral baseline tracker.
172    behavioral_baseline: Option<BehavioralBaseline>,
173    /// Scheme actions generated.
174    scheme_actions: Vec<SchemeAction>,
175    /// Difficulty distribution.
176    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
177    // Entity context lookup maps for risk-adjusted injection rates
178    /// Vendor contexts keyed by vendor ID.
179    vendor_contexts: HashMap<String, VendorContext>,
180    /// Employee contexts keyed by employee ID.
181    employee_contexts: HashMap<String, EmployeeContext>,
182    /// Account contexts keyed by account code.
183    account_contexts: HashMap<String, AccountContext>,
184}
185
186/// Injection statistics tracking.
187#[derive(Debug, Clone, Default)]
188pub struct InjectorStats {
189    /// Total number of entries processed.
190    pub total_processed: usize,
191    /// Total number of anomalies injected.
192    pub total_injected: usize,
193    /// Anomalies injected by category (e.g., "Fraud", "Error").
194    pub by_category: HashMap<String, usize>,
195    /// Anomalies injected by specific type name.
196    pub by_type: HashMap<String, usize>,
197    /// Anomalies injected by company code.
198    pub by_company: HashMap<String, usize>,
199    /// Entries skipped due to rate check.
200    pub skipped_rate: usize,
201    /// Entries skipped due to date range filter.
202    pub skipped_date: usize,
203    /// Entries skipped due to company filter.
204    pub skipped_company: usize,
205    /// Entries skipped due to max-anomalies-per-document limit.
206    pub skipped_max_per_doc: usize,
207}
208
209impl AnomalyInjector {
210    /// Creates a new anomaly injector.
211    pub fn new(config: AnomalyInjectorConfig) -> Self {
212        let mut rng = seeded_rng(config.seed, 0);
213        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
214        let entity_targeting =
215            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
216
217        // Initialize enhanced components based on configuration
218        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
219            let scheme_config = SchemeAdvancerConfig {
220                embezzlement_probability: config.enhanced.scheme_probability,
221                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
222                kickback_probability: config.enhanced.scheme_probability * 0.5,
223                seed: rng.random(),
224                ..Default::default()
225            };
226            Some(SchemeAdvancer::new(scheme_config))
227        } else {
228            None
229        };
230
231        let near_miss_generator = if config.enhanced.near_miss_enabled {
232            let near_miss_config = NearMissConfig {
233                proportion: config.enhanced.near_miss_proportion,
234                seed: rng.random(),
235                ..Default::default()
236            };
237            Some(NearMissGenerator::new(near_miss_config))
238        } else {
239            None
240        };
241
242        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
243            Some(AnomalyCoOccurrence::new())
244        } else {
245            None
246        };
247
248        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
249            Some(TemporalClusterGenerator::new())
250        } else {
251            None
252        };
253
254        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
255            Some(DifficultyCalculator::new())
256        } else {
257            None
258        };
259
260        let entity_aware_injector = if config.enhanced.context_aware_enabled {
261            Some(EntityAwareInjector::default())
262        } else {
263            None
264        };
265
266        let behavioral_baseline = if config.enhanced.context_aware_enabled
267            && config.enhanced.behavioral_baseline_config.enabled
268        {
269            Some(BehavioralBaseline::new(
270                config.enhanced.behavioral_baseline_config.clone(),
271            ))
272        } else {
273            None
274        };
275
276        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
277
278        Self {
279            config,
280            rng,
281            uuid_factory,
282            type_selector: AnomalyTypeSelector::new(),
283            strategies: StrategyCollection::default(),
284            cluster_manager,
285            entity_targeting,
286            document_anomaly_counts: HashMap::new(),
287            labels: Vec::new(),
288            stats: InjectorStats::default(),
289            scheme_advancer,
290            near_miss_generator,
291            near_miss_labels: Vec::new(),
292            co_occurrence_handler,
293            temporal_cluster_generator,
294            difficulty_calculator,
295            entity_aware_injector,
296            behavioral_baseline,
297            scheme_actions: Vec::new(),
298            difficulty_distribution: HashMap::new(),
299            vendor_contexts: HashMap::new(),
300            employee_contexts: HashMap::new(),
301            account_contexts: HashMap::new(),
302        }
303    }
304
305    /// Processes a batch of journal entries, potentially injecting anomalies.
306    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
307        debug!(
308            entry_count = entries.len(),
309            total_rate = self.config.rates.total_rate,
310            seed = self.config.seed,
311            "Injecting anomalies into journal entries"
312        );
313
314        let mut modified_documents = Vec::new();
315        let mut duplicates = Vec::new();
316
317        for entry in entries.iter_mut() {
318            self.stats.total_processed += 1;
319
320            // Update behavioral baseline if enabled
321            if let Some(ref mut baseline) = self.behavioral_baseline {
322                use super::context::Observation;
323                // Record the observation for baseline building
324                let entity_id = entry.header.created_by.clone();
325                let observation =
326                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
327                baseline.record_observation(&entity_id, observation);
328            }
329
330            // Check if we should process this entry
331            if !self.should_process(entry) {
332                continue;
333            }
334
335            // Calculate effective rate (temporal clustering is applied later per-type)
336            let base_rate = self.config.rates.total_rate;
337
338            // Calculate entity-aware rate adjustment using context lookup maps
339            let effective_rate = if let Some(ref injector) = self.entity_aware_injector {
340                let employee_id = &entry.header.created_by;
341                let first_account = entry
342                    .lines
343                    .first()
344                    .map(|l| l.gl_account.as_str())
345                    .unwrap_or("");
346                // Look up vendor from the entry's reference field (vendor ID convention)
347                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
348
349                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
350                let employee_ctx = self.employee_contexts.get(employee_id);
351                let account_ctx = self.account_contexts.get(first_account);
352
353                let multiplier =
354                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
355                (base_rate * multiplier).min(1.0)
356            } else {
357                // No entity-aware injector: fall back to context maps alone
358                self.calculate_context_rate_multiplier(entry) * base_rate
359            };
360
361            // Determine if we inject an anomaly
362            if should_inject_anomaly(
363                effective_rate,
364                entry.posting_date(),
365                &self.config.patterns.temporal_pattern,
366                &mut self.rng,
367            ) {
368                // Check if this should be a near-miss instead
369                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
370                    // Record the transaction for near-duplicate detection
371                    let account = entry
372                        .lines
373                        .first()
374                        .map(|l| l.gl_account.clone())
375                        .unwrap_or_default();
376                    near_miss_gen.record_transaction(
377                        entry.document_number().clone(),
378                        entry.posting_date(),
379                        entry.total_debit(),
380                        &account,
381                        None,
382                    );
383
384                    // Check if this could be a near-miss
385                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
386                        entry.document_number().clone(),
387                        entry.posting_date(),
388                        entry.total_debit(),
389                        &account,
390                        None,
391                        &self.config.enhanced.approval_thresholds,
392                    ) {
393                        self.near_miss_labels.push(near_miss_label);
394                        continue; // Skip actual anomaly injection
395                    }
396                }
397
398                // Select anomaly category based on rates
399                let anomaly_type = self.select_anomaly_category();
400
401                // Apply the anomaly
402                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type) {
403                    // Calculate detection difficulty if enabled
404                    if let Some(ref calculator) = self.difficulty_calculator {
405                        let difficulty = calculator.calculate(&label);
406
407                        // Store difficulty in metadata
408                        label =
409                            label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
410                        label = label.with_metadata(
411                            "difficulty_score",
412                            &difficulty.difficulty_score().to_string(),
413                        );
414
415                        // Update difficulty distribution
416                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
417                    }
418
419                    modified_documents.push(entry.document_number().clone());
420                    self.labels.push(label);
421                    self.stats.total_injected += 1;
422                }
423
424                // Check for duplicate injection
425                if self.config.allow_duplicates
426                    && matches!(
427                        self.labels.last().map(|l| &l.anomaly_type),
428                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
429                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
430                    )
431                {
432                    let dup_strategy = DuplicationStrategy::default();
433                    let duplicate =
434                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
435                    duplicates.push(duplicate);
436                }
437            }
438        }
439
440        // Count duplicates
441        let duplicates_created = duplicates.len();
442
443        // Build summary
444        let summary = AnomalySummary::from_anomalies(&self.labels);
445
446        InjectionBatchResult {
447            entries_processed: self.stats.total_processed,
448            anomalies_injected: self.stats.total_injected,
449            duplicates_created,
450            labels: self.labels.clone(),
451            summary,
452            modified_documents,
453            near_miss_labels: self.near_miss_labels.clone(),
454            scheme_actions: self.scheme_actions.clone(),
455            difficulty_distribution: self.difficulty_distribution.clone(),
456        }
457    }
458
459    /// Checks if an entry should be processed.
460    fn should_process(&mut self, entry: &JournalEntry) -> bool {
461        // Check company filter
462        if !self.config.target_companies.is_empty()
463            && !self
464                .config
465                .target_companies
466                .iter()
467                .any(|c| c == entry.company_code())
468        {
469            self.stats.skipped_company += 1;
470            return false;
471        }
472
473        // Check date range
474        if let Some((start, end)) = self.config.date_range {
475            if entry.posting_date() < start || entry.posting_date() > end {
476                self.stats.skipped_date += 1;
477                return false;
478            }
479        }
480
481        // Check max anomalies per document
482        let current_count = self
483            .document_anomaly_counts
484            .get(&entry.document_number())
485            .copied()
486            .unwrap_or(0);
487        if current_count >= self.config.max_anomalies_per_document {
488            self.stats.skipped_max_per_doc += 1;
489            return false;
490        }
491
492        true
493    }
494
495    /// Selects an anomaly category based on configured rates.
496    fn select_anomaly_category(&mut self) -> AnomalyType {
497        let r = self.rng.random::<f64>();
498        let rates = &self.config.rates;
499
500        let mut cumulative = 0.0;
501
502        cumulative += rates.fraud_rate;
503        if r < cumulative {
504            return self.type_selector.select_fraud(&mut self.rng);
505        }
506
507        cumulative += rates.error_rate;
508        if r < cumulative {
509            return self.type_selector.select_error(&mut self.rng);
510        }
511
512        cumulative += rates.process_issue_rate;
513        if r < cumulative {
514            return self.type_selector.select_process_issue(&mut self.rng);
515        }
516
517        cumulative += rates.statistical_rate;
518        if r < cumulative {
519            return self.type_selector.select_statistical(&mut self.rng);
520        }
521
522        self.type_selector.select_relational(&mut self.rng)
523    }
524
525    /// Injects an anomaly into an entry.
526    fn inject_anomaly(
527        &mut self,
528        entry: &mut JournalEntry,
529        anomaly_type: AnomalyType,
530    ) -> Option<LabeledAnomaly> {
531        // Check if strategy can be applied
532        if !self.strategies.can_apply(entry, &anomaly_type) {
533            return None;
534        }
535
536        // Apply the strategy
537        let result = self
538            .strategies
539            .apply_strategy(entry, &anomaly_type, &mut self.rng);
540
541        if !result.success {
542            return None;
543        }
544
545        // Update document anomaly count
546        *self
547            .document_anomaly_counts
548            .entry(entry.document_number().clone())
549            .or_insert(0) += 1;
550
551        // Update statistics
552        let category = anomaly_type.category().to_string();
553        let type_name = anomaly_type.type_name();
554
555        *self.stats.by_category.entry(category).or_insert(0) += 1;
556        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
557        *self
558            .stats
559            .by_company
560            .entry(entry.company_code().to_string())
561            .or_insert(0) += 1;
562
563        // Generate label
564        if self.config.generate_labels {
565            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
566
567            // Update entry header with anomaly tracking fields
568            entry.header.is_anomaly = true;
569            entry.header.anomaly_id = Some(anomaly_id.clone());
570            entry.header.anomaly_type = Some(type_name.clone());
571
572            // Also set fraud flag if this is a fraud anomaly
573            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
574                entry.header.is_fraud = true;
575                if let AnomalyType::Fraud(ref ft) = anomaly_type {
576                    entry.header.fraud_type = Some(*ft);
577                }
578            }
579
580            let mut label = LabeledAnomaly::new(
581                anomaly_id,
582                anomaly_type.clone(),
583                entry.document_number().clone(),
584                "JE".to_string(),
585                entry.company_code().to_string(),
586                entry.posting_date(),
587            )
588            .with_description(&result.description)
589            .with_injection_strategy(&type_name);
590
591            // Add causal reason with injection context (provenance tracking)
592            let causal_reason = AnomalyCausalReason::RandomRate {
593                base_rate: self.config.rates.total_rate,
594            };
595            label = label.with_causal_reason(causal_reason);
596
597            // Add entity context metadata if contexts are populated
598            let context_multiplier = self.calculate_context_rate_multiplier(entry);
599            if (context_multiplier - 1.0).abs() > f64::EPSILON {
600                label = label.with_metadata(
601                    "entity_context_multiplier",
602                    &format!("{context_multiplier:.3}"),
603                );
604                label = label.with_metadata(
605                    "effective_rate",
606                    &format!(
607                        "{:.6}",
608                        (self.config.rates.total_rate * context_multiplier).min(1.0)
609                    ),
610                );
611            }
612
613            // Add monetary impact
614            if let Some(impact) = result.monetary_impact {
615                label = label.with_monetary_impact(impact);
616            }
617
618            // Add related entities
619            for entity in &result.related_entities {
620                label = label.with_related_entity(entity);
621            }
622
623            // Add metadata
624            for (key, value) in &result.metadata {
625                label = label.with_metadata(key, value);
626            }
627
628            // Assign cluster and update causal reason if in cluster
629            if let Some(cluster_id) =
630                self.cluster_manager
631                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
632            {
633                label = label.with_cluster(&cluster_id);
634                // Update causal reason to reflect cluster membership
635                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
636                    cluster_id: cluster_id.clone(),
637                });
638            }
639
640            return Some(label);
641        }
642
643        None
644    }
645
646    /// Injects a specific anomaly type into an entry.
647    pub fn inject_specific(
648        &mut self,
649        entry: &mut JournalEntry,
650        anomaly_type: AnomalyType,
651    ) -> Option<LabeledAnomaly> {
652        self.inject_anomaly(entry, anomaly_type)
653    }
654
655    /// Creates a self-approval anomaly.
656    pub fn create_self_approval(
657        &mut self,
658        entry: &mut JournalEntry,
659        user_id: &str,
660    ) -> Option<LabeledAnomaly> {
661        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
662
663        let label = LabeledAnomaly::new(
664            format!("ANO{:08}", self.labels.len() + 1),
665            anomaly_type,
666            entry.document_number().clone(),
667            "JE".to_string(),
668            entry.company_code().to_string(),
669            entry.posting_date(),
670        )
671        .with_description(&format!("User {user_id} approved their own transaction"))
672        .with_related_entity(user_id)
673        .with_injection_strategy("ManualSelfApproval")
674        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
675            target_type: "User".to_string(),
676            target_id: user_id.to_string(),
677        });
678
679        // Set entry header anomaly tracking fields
680        entry.header.is_anomaly = true;
681        entry.header.is_fraud = true;
682        entry.header.anomaly_id = Some(label.anomaly_id.clone());
683        entry.header.anomaly_type = Some("SelfApproval".to_string());
684        entry.header.fraud_type = Some(FraudType::SelfApproval);
685
686        // Set approver = requester
687        entry.header.created_by = user_id.to_string();
688
689        self.labels.push(label.clone());
690        Some(label)
691    }
692
693    /// Creates a segregation of duties violation.
694    pub fn create_sod_violation(
695        &mut self,
696        entry: &mut JournalEntry,
697        user_id: &str,
698        conflicting_duties: (&str, &str),
699    ) -> Option<LabeledAnomaly> {
700        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
701
702        let label = LabeledAnomaly::new(
703            format!("ANO{:08}", self.labels.len() + 1),
704            anomaly_type,
705            entry.document_number().clone(),
706            "JE".to_string(),
707            entry.company_code().to_string(),
708            entry.posting_date(),
709        )
710        .with_description(&format!(
711            "User {} performed conflicting duties: {} and {}",
712            user_id, conflicting_duties.0, conflicting_duties.1
713        ))
714        .with_related_entity(user_id)
715        .with_metadata("duty1", conflicting_duties.0)
716        .with_metadata("duty2", conflicting_duties.1)
717        .with_injection_strategy("ManualSoDViolation")
718        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
719            target_type: "User".to_string(),
720            target_id: user_id.to_string(),
721        });
722
723        // Set entry header anomaly tracking fields
724        entry.header.is_anomaly = true;
725        entry.header.is_fraud = true;
726        entry.header.anomaly_id = Some(label.anomaly_id.clone());
727        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
728        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
729
730        self.labels.push(label.clone());
731        Some(label)
732    }
733
734    /// Creates an intercompany mismatch anomaly.
735    pub fn create_ic_mismatch(
736        &mut self,
737        entry: &mut JournalEntry,
738        matching_company: &str,
739        expected_amount: Decimal,
740        actual_amount: Decimal,
741    ) -> Option<LabeledAnomaly> {
742        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
743
744        let label = LabeledAnomaly::new(
745            format!("ANO{:08}", self.labels.len() + 1),
746            anomaly_type,
747            entry.document_number().clone(),
748            "JE".to_string(),
749            entry.company_code().to_string(),
750            entry.posting_date(),
751        )
752        .with_description(&format!(
753            "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
754        ))
755        .with_related_entity(matching_company)
756        .with_monetary_impact(actual_amount - expected_amount)
757        .with_metadata("expected_amount", &expected_amount.to_string())
758        .with_metadata("actual_amount", &actual_amount.to_string())
759        .with_injection_strategy("ManualICMismatch")
760        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
761            target_type: "Intercompany".to_string(),
762            target_id: matching_company.to_string(),
763        });
764
765        // Set entry header anomaly tracking fields
766        entry.header.is_anomaly = true;
767        entry.header.anomaly_id = Some(label.anomaly_id.clone());
768        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
769
770        self.labels.push(label.clone());
771        Some(label)
772    }
773
774    /// Returns all generated labels.
775    pub fn get_labels(&self) -> &[LabeledAnomaly] {
776        &self.labels
777    }
778
779    /// Returns the anomaly summary.
780    pub fn get_summary(&self) -> AnomalySummary {
781        AnomalySummary::from_anomalies(&self.labels)
782    }
783
784    /// Returns injection statistics.
785    pub fn get_stats(&self) -> &InjectorStats {
786        &self.stats
787    }
788
789    /// Clears all labels and resets statistics.
790    pub fn reset(&mut self) {
791        self.labels.clear();
792        self.document_anomaly_counts.clear();
793        self.stats = InjectorStats::default();
794        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
795
796        // Reset enhanced components
797        self.near_miss_labels.clear();
798        self.scheme_actions.clear();
799        self.difficulty_distribution.clear();
800
801        if let Some(ref mut baseline) = self.behavioral_baseline {
802            *baseline =
803                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
804        }
805    }
806
807    /// Returns the number of clusters created.
808    pub fn cluster_count(&self) -> usize {
809        self.cluster_manager.cluster_count()
810    }
811
812    // =========================================================================
813    // Entity Context API
814    // =========================================================================
815
816    /// Sets entity contexts for risk-adjusted anomaly injection.
817    ///
818    /// When entity contexts are provided, the injector adjusts anomaly injection
819    /// rates based on entity risk factors. Entries involving high-risk vendors,
820    /// new employees, or sensitive accounts will have higher effective injection
821    /// rates.
822    ///
823    /// Pass empty HashMaps to clear previously set contexts.
824    pub fn set_entity_contexts(
825        &mut self,
826        vendors: HashMap<String, VendorContext>,
827        employees: HashMap<String, EmployeeContext>,
828        accounts: HashMap<String, AccountContext>,
829    ) {
830        self.vendor_contexts = vendors;
831        self.employee_contexts = employees;
832        self.account_contexts = accounts;
833    }
834
835    /// Returns a reference to the vendor context map.
836    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
837        &self.vendor_contexts
838    }
839
840    /// Returns a reference to the employee context map.
841    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
842        &self.employee_contexts
843    }
844
845    /// Returns a reference to the account context map.
846    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
847        &self.account_contexts
848    }
849
850    /// Calculates a rate multiplier from the entity context maps alone (no
851    /// `EntityAwareInjector` needed). This provides a lightweight fallback
852    /// when context-aware injection is not fully enabled but context maps
853    /// have been populated.
854    ///
855    /// The multiplier is the product of individual entity risk factors found
856    /// in the context maps for the given journal entry. If no contexts match,
857    /// returns 1.0 (no adjustment).
858    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
859        if self.vendor_contexts.is_empty()
860            && self.employee_contexts.is_empty()
861            && self.account_contexts.is_empty()
862        {
863            return 1.0;
864        }
865
866        let mut multiplier = 1.0;
867
868        // Vendor lookup via reference field
869        if let Some(ref vendor_ref) = entry.header.reference {
870            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
871                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
872                if ctx.is_new {
873                    multiplier *= 2.0;
874                }
875                if ctx.is_dormant_reactivation {
876                    multiplier *= 1.5;
877                }
878            }
879        }
880
881        // Employee lookup via created_by
882        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
883            if ctx.is_new {
884                multiplier *= 1.5;
885            }
886            if ctx.is_volume_fatigued {
887                multiplier *= 1.3;
888            }
889            if ctx.is_overtime {
890                multiplier *= 1.2;
891            }
892        }
893
894        // Account lookup via first line's GL account
895        if let Some(first_line) = entry.lines.first() {
896            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
897                if ctx.is_high_risk {
898                    multiplier *= 2.0;
899                }
900            }
901        }
902
903        multiplier
904    }
905
906    // =========================================================================
907    // Enhanced Features API (v0.3.0+)
908    // =========================================================================
909
910    /// Advances all active fraud schemes by one time step.
911    ///
912    /// Call this method once per simulated day to generate scheme actions.
913    /// Returns the scheme actions generated for this date.
914    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
915        if let Some(ref mut advancer) = self.scheme_advancer {
916            let context = SchemeContext::new(date, company_code);
917            let actions = advancer.advance_all(&context);
918            self.scheme_actions.extend(actions.clone());
919            actions
920        } else {
921            Vec::new()
922        }
923    }
924
925    /// Potentially starts a new fraud scheme based on probabilities.
926    ///
927    /// Call this method periodically (e.g., once per period) to allow new
928    /// schemes to start based on configured probabilities.
929    /// Returns the scheme ID if a scheme was started.
930    pub fn maybe_start_scheme(
931        &mut self,
932        date: NaiveDate,
933        company_code: &str,
934        available_users: Vec<String>,
935        available_accounts: Vec<String>,
936        available_counterparties: Vec<String>,
937    ) -> Option<uuid::Uuid> {
938        if let Some(ref mut advancer) = self.scheme_advancer {
939            let mut context = SchemeContext::new(date, company_code);
940            context.available_users = available_users;
941            context.available_accounts = available_accounts;
942            context.available_counterparties = available_counterparties;
943
944            advancer.maybe_start_scheme(&context)
945        } else {
946            None
947        }
948    }
949
950    /// Returns all near-miss labels generated.
951    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
952        &self.near_miss_labels
953    }
954
955    /// Returns all scheme actions generated.
956    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
957        &self.scheme_actions
958    }
959
960    /// Returns the detection difficulty distribution.
961    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
962        &self.difficulty_distribution
963    }
964
965    /// Checks for behavioral deviations for an entity with an observation.
966    pub fn check_behavioral_deviations(
967        &self,
968        entity_id: &str,
969        observation: &super::context::Observation,
970    ) -> Vec<super::context::BehavioralDeviation> {
971        if let Some(ref baseline) = self.behavioral_baseline {
972            baseline.check_deviation(entity_id, observation)
973        } else {
974            Vec::new()
975        }
976    }
977
978    /// Gets the baseline for an entity.
979    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
980        if let Some(ref baseline) = self.behavioral_baseline {
981            baseline.get_baseline(entity_id)
982        } else {
983            None
984        }
985    }
986
987    /// Returns the number of active schemes.
988    pub fn active_scheme_count(&self) -> usize {
989        if let Some(ref advancer) = self.scheme_advancer {
990            advancer.active_scheme_count()
991        } else {
992            0
993        }
994    }
995
996    /// Returns whether enhanced features are enabled.
997    pub fn has_enhanced_features(&self) -> bool {
998        self.scheme_advancer.is_some()
999            || self.near_miss_generator.is_some()
1000            || self.difficulty_calculator.is_some()
1001            || self.entity_aware_injector.is_some()
1002    }
1003}
1004
1005/// Builder for AnomalyInjectorConfig.
1006pub struct AnomalyInjectorConfigBuilder {
1007    config: AnomalyInjectorConfig,
1008}
1009
1010impl AnomalyInjectorConfigBuilder {
1011    /// Creates a new builder with default configuration.
1012    pub fn new() -> Self {
1013        Self {
1014            config: AnomalyInjectorConfig::default(),
1015        }
1016    }
1017
1018    /// Sets the total anomaly rate.
1019    pub fn with_total_rate(mut self, rate: f64) -> Self {
1020        self.config.rates.total_rate = rate;
1021        self
1022    }
1023
1024    /// Sets the fraud rate (proportion of anomalies).
1025    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1026        self.config.rates.fraud_rate = rate;
1027        self
1028    }
1029
1030    /// Sets the error rate (proportion of anomalies).
1031    pub fn with_error_rate(mut self, rate: f64) -> Self {
1032        self.config.rates.error_rate = rate;
1033        self
1034    }
1035
1036    /// Sets the random seed.
1037    pub fn with_seed(mut self, seed: u64) -> Self {
1038        self.config.seed = seed;
1039        self
1040    }
1041
1042    /// Sets the temporal pattern.
1043    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1044        self.config.patterns.temporal_pattern = pattern;
1045        self
1046    }
1047
1048    /// Enables or disables label generation.
1049    pub fn with_labels(mut self, generate: bool) -> Self {
1050        self.config.generate_labels = generate;
1051        self
1052    }
1053
1054    /// Sets target companies.
1055    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1056        self.config.target_companies = companies;
1057        self
1058    }
1059
1060    /// Sets the date range.
1061    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1062        self.config.date_range = Some((start, end));
1063        self
1064    }
1065
1066    // =========================================================================
1067    // Enhanced Features Configuration (v0.3.0+)
1068    // =========================================================================
1069
1070    /// Enables multi-stage fraud scheme generation.
1071    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1072        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1073        self.config.enhanced.scheme_probability = probability;
1074        self
1075    }
1076
1077    /// Enables near-miss generation.
1078    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1079        self.config.enhanced.near_miss_enabled = enabled;
1080        self.config.enhanced.near_miss_proportion = proportion;
1081        self
1082    }
1083
1084    /// Sets approval thresholds for threshold-proximity near-misses.
1085    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1086        self.config.enhanced.approval_thresholds = thresholds;
1087        self
1088    }
1089
1090    /// Enables correlated anomaly injection.
1091    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1092        self.config.enhanced.correlated_injection_enabled = enabled;
1093        self
1094    }
1095
1096    /// Enables temporal clustering (period-end spikes).
1097    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1098        self.config.enhanced.temporal_clustering_enabled = enabled;
1099        self.config.enhanced.period_end_multiplier = multiplier;
1100        self
1101    }
1102
1103    /// Enables detection difficulty classification.
1104    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1105        self.config.enhanced.difficulty_classification_enabled = enabled;
1106        self
1107    }
1108
1109    /// Enables context-aware injection.
1110    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1111        self.config.enhanced.context_aware_enabled = enabled;
1112        self
1113    }
1114
1115    /// Sets behavioral baseline configuration.
1116    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1117        self.config.enhanced.behavioral_baseline_config = config;
1118        self
1119    }
1120
1121    /// Enables all enhanced features with default settings.
1122    pub fn with_all_enhanced_features(mut self) -> Self {
1123        self.config.enhanced.multi_stage_schemes_enabled = true;
1124        self.config.enhanced.scheme_probability = 0.02;
1125        self.config.enhanced.correlated_injection_enabled = true;
1126        self.config.enhanced.temporal_clustering_enabled = true;
1127        self.config.enhanced.period_end_multiplier = 2.5;
1128        self.config.enhanced.near_miss_enabled = true;
1129        self.config.enhanced.near_miss_proportion = 0.30;
1130        self.config.enhanced.difficulty_classification_enabled = true;
1131        self.config.enhanced.context_aware_enabled = true;
1132        self.config.enhanced.behavioral_baseline_config.enabled = true;
1133        self
1134    }
1135
1136    /// Builds the configuration.
1137    pub fn build(self) -> AnomalyInjectorConfig {
1138        self.config
1139    }
1140}
1141
1142impl Default for AnomalyInjectorConfigBuilder {
1143    fn default() -> Self {
1144        Self::new()
1145    }
1146}
1147
1148#[cfg(test)]
1149#[allow(clippy::unwrap_used)]
1150mod tests {
1151    use super::*;
1152    use chrono::NaiveDate;
1153    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1154    use rust_decimal_macros::dec;
1155
1156    fn create_test_entry(doc_num: &str) -> JournalEntry {
1157        let mut entry = JournalEntry::new_simple(
1158            doc_num.to_string(),
1159            "1000".to_string(),
1160            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1161            "Test Entry".to_string(),
1162        );
1163
1164        entry.add_line(JournalEntryLine {
1165            line_number: 1,
1166            gl_account: "5000".to_string(),
1167            debit_amount: dec!(1000),
1168            ..Default::default()
1169        });
1170
1171        entry.add_line(JournalEntryLine {
1172            line_number: 2,
1173            gl_account: "1000".to_string(),
1174            credit_amount: dec!(1000),
1175            ..Default::default()
1176        });
1177
1178        entry
1179    }
1180
1181    #[test]
1182    fn test_anomaly_injector_basic() {
1183        let config = AnomalyInjectorConfigBuilder::new()
1184            .with_total_rate(0.5) // High rate for testing
1185            .with_seed(42)
1186            .build();
1187
1188        let mut injector = AnomalyInjector::new(config);
1189
1190        let mut entries: Vec<_> = (0..100)
1191            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1192            .collect();
1193
1194        let result = injector.process_entries(&mut entries);
1195
1196        // With 50% rate, we should have some anomalies
1197        assert!(result.anomalies_injected > 0);
1198        assert!(!result.labels.is_empty());
1199        assert_eq!(result.labels.len(), result.anomalies_injected);
1200    }
1201
1202    #[test]
1203    fn test_specific_injection() {
1204        let config = AnomalyInjectorConfig::default();
1205        let mut injector = AnomalyInjector::new(config);
1206
1207        let mut entry = create_test_entry("JE001");
1208        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1209
1210        let label = injector.inject_specific(&mut entry, anomaly_type);
1211
1212        assert!(label.is_some());
1213        let label = label.unwrap();
1214        // document_id is the UUID string from the journal entry header
1215        assert!(!label.document_id.is_empty());
1216        assert_eq!(label.document_id, entry.document_number());
1217    }
1218
1219    #[test]
1220    fn test_self_approval_injection() {
1221        let config = AnomalyInjectorConfig::default();
1222        let mut injector = AnomalyInjector::new(config);
1223
1224        let mut entry = create_test_entry("JE001");
1225        let label = injector.create_self_approval(&mut entry, "USER001");
1226
1227        assert!(label.is_some());
1228        let label = label.unwrap();
1229        assert!(matches!(
1230            label.anomaly_type,
1231            AnomalyType::Fraud(FraudType::SelfApproval)
1232        ));
1233        assert!(label.related_entities.contains(&"USER001".to_string()));
1234    }
1235
1236    #[test]
1237    fn test_company_filtering() {
1238        let config = AnomalyInjectorConfigBuilder::new()
1239            .with_total_rate(1.0) // Inject all
1240            .with_target_companies(vec!["2000".to_string()])
1241            .build();
1242
1243        let mut injector = AnomalyInjector::new(config);
1244
1245        let mut entries = vec![
1246            create_test_entry("JE001"), // company 1000
1247            create_test_entry("JE002"), // company 1000
1248        ];
1249
1250        let result = injector.process_entries(&mut entries);
1251
1252        // No anomalies because entries are in company 1000, not 2000
1253        assert_eq!(result.anomalies_injected, 0);
1254    }
1255
1256    // =========================================================================
1257    // Entity Context Tests
1258    // =========================================================================
1259
1260    /// Helper to create a test entry with specific vendor reference and employee.
1261    fn create_test_entry_with_context(
1262        doc_num: &str,
1263        vendor_ref: Option<&str>,
1264        employee_id: &str,
1265        gl_account: &str,
1266    ) -> JournalEntry {
1267        let mut entry = JournalEntry::new_simple(
1268            doc_num.to_string(),
1269            "1000".to_string(),
1270            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1271            "Test Entry".to_string(),
1272        );
1273
1274        entry.header.reference = vendor_ref.map(|v| v.to_string());
1275        entry.header.created_by = employee_id.to_string();
1276
1277        entry.add_line(JournalEntryLine {
1278            line_number: 1,
1279            gl_account: gl_account.to_string(),
1280            debit_amount: dec!(1000),
1281            ..Default::default()
1282        });
1283
1284        entry.add_line(JournalEntryLine {
1285            line_number: 2,
1286            gl_account: "1000".to_string(),
1287            credit_amount: dec!(1000),
1288            ..Default::default()
1289        });
1290
1291        entry
1292    }
1293
1294    #[test]
1295    fn test_set_entity_contexts() {
1296        let config = AnomalyInjectorConfig::default();
1297        let mut injector = AnomalyInjector::new(config);
1298
1299        // Initially empty
1300        assert!(injector.vendor_contexts().is_empty());
1301        assert!(injector.employee_contexts().is_empty());
1302        assert!(injector.account_contexts().is_empty());
1303
1304        // Set contexts
1305        let mut vendors = HashMap::new();
1306        vendors.insert(
1307            "V001".to_string(),
1308            VendorContext {
1309                vendor_id: "V001".to_string(),
1310                is_new: true,
1311                ..Default::default()
1312            },
1313        );
1314
1315        let mut employees = HashMap::new();
1316        employees.insert(
1317            "EMP001".to_string(),
1318            EmployeeContext {
1319                employee_id: "EMP001".to_string(),
1320                is_new: true,
1321                ..Default::default()
1322            },
1323        );
1324
1325        let mut accounts = HashMap::new();
1326        accounts.insert(
1327            "8100".to_string(),
1328            AccountContext {
1329                account_code: "8100".to_string(),
1330                is_high_risk: true,
1331                ..Default::default()
1332            },
1333        );
1334
1335        injector.set_entity_contexts(vendors, employees, accounts);
1336
1337        assert_eq!(injector.vendor_contexts().len(), 1);
1338        assert_eq!(injector.employee_contexts().len(), 1);
1339        assert_eq!(injector.account_contexts().len(), 1);
1340        assert!(injector.vendor_contexts().contains_key("V001"));
1341        assert!(injector.employee_contexts().contains_key("EMP001"));
1342        assert!(injector.account_contexts().contains_key("8100"));
1343    }
1344
1345    #[test]
1346    fn test_default_behavior_no_contexts() {
1347        // Without any entity contexts, the base rate is used unchanged.
1348        let config = AnomalyInjectorConfigBuilder::new()
1349            .with_total_rate(0.5)
1350            .with_seed(42)
1351            .build();
1352
1353        let mut injector = AnomalyInjector::new(config);
1354
1355        let mut entries: Vec<_> = (0..200)
1356            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1357            .collect();
1358
1359        let result = injector.process_entries(&mut entries);
1360
1361        // With 50% base rate and no context, expect roughly 50% injection
1362        // Allow wide margin for randomness
1363        assert!(result.anomalies_injected > 0);
1364        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1365        assert!(
1366            rate > 0.2 && rate < 0.8,
1367            "Expected ~50% rate, got {:.2}%",
1368            rate * 100.0
1369        );
1370    }
1371
1372    #[test]
1373    fn test_entity_context_increases_injection_rate() {
1374        // With high-risk entity contexts, the effective rate should be higher
1375        // than the base rate, leading to more anomalies being injected.
1376        let base_rate = 0.10; // Low base rate
1377
1378        // Run without contexts
1379        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1380            .with_total_rate(base_rate)
1381            .with_seed(123)
1382            .build();
1383
1384        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1385
1386        let mut entries_no_ctx: Vec<_> = (0..500)
1387            .map(|i| {
1388                create_test_entry_with_context(
1389                    &format!("JE{:04}", i),
1390                    Some("V001"),
1391                    "EMP001",
1392                    "8100",
1393                )
1394            })
1395            .collect();
1396
1397        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1398
1399        // Run with high-risk contexts (same seed for comparable randomness)
1400        let config_ctx = AnomalyInjectorConfigBuilder::new()
1401            .with_total_rate(base_rate)
1402            .with_seed(123)
1403            .build();
1404
1405        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1406
1407        // Set up high-risk contexts
1408        let mut vendors = HashMap::new();
1409        vendors.insert(
1410            "V001".to_string(),
1411            VendorContext {
1412                vendor_id: "V001".to_string(),
1413                is_new: true,                  // 2.0x multiplier
1414                is_dormant_reactivation: true, // 1.5x multiplier
1415                ..Default::default()
1416            },
1417        );
1418
1419        let mut employees = HashMap::new();
1420        employees.insert(
1421            "EMP001".to_string(),
1422            EmployeeContext {
1423                employee_id: "EMP001".to_string(),
1424                is_new: true, // 1.5x multiplier
1425                ..Default::default()
1426            },
1427        );
1428
1429        let mut accounts = HashMap::new();
1430        accounts.insert(
1431            "8100".to_string(),
1432            AccountContext {
1433                account_code: "8100".to_string(),
1434                is_high_risk: true, // 2.0x multiplier
1435                ..Default::default()
1436            },
1437        );
1438
1439        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1440
1441        let mut entries_ctx: Vec<_> = (0..500)
1442            .map(|i| {
1443                create_test_entry_with_context(
1444                    &format!("JE{:04}", i),
1445                    Some("V001"),
1446                    "EMP001",
1447                    "8100",
1448                )
1449            })
1450            .collect();
1451
1452        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1453
1454        // The context-enhanced run should inject more anomalies
1455        assert!(
1456            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1457            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1458            result_ctx.anomalies_injected,
1459            result_no_ctx.anomalies_injected,
1460        );
1461    }
1462
1463    #[test]
1464    fn test_risk_score_multiplication() {
1465        // Verify the calculate_context_rate_multiplier produces correct values.
1466        let config = AnomalyInjectorConfig::default();
1467        let mut injector = AnomalyInjector::new(config);
1468
1469        // No contexts: multiplier should be 1.0
1470        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1471        assert!(
1472            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1473        );
1474
1475        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1476        let mut vendors = HashMap::new();
1477        vendors.insert(
1478            "V_RISKY".to_string(),
1479            VendorContext {
1480                vendor_id: "V_RISKY".to_string(),
1481                is_new: true,
1482                ..Default::default()
1483            },
1484        );
1485
1486        let mut accounts = HashMap::new();
1487        accounts.insert(
1488            "9000".to_string(),
1489            AccountContext {
1490                account_code: "9000".to_string(),
1491                is_high_risk: true,
1492                ..Default::default()
1493            },
1494        );
1495
1496        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1497
1498        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1499        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1500        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1501        assert!(
1502            (multiplier - 4.0).abs() < f64::EPSILON,
1503            "Expected 4.0x multiplier, got {}",
1504            multiplier,
1505        );
1506
1507        // Entry with only vendor context match (no account match)
1508        let entry_vendor_only =
1509            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1510        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1511        assert!(
1512            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1513            "Expected 2.0x multiplier (vendor only), got {}",
1514            multiplier_vendor,
1515        );
1516
1517        // Entry with no matching contexts
1518        let entry_no_match =
1519            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1520        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1521        assert!(
1522            (multiplier_none - 1.0).abs() < f64::EPSILON,
1523            "Expected 1.0x multiplier (no match), got {}",
1524            multiplier_none,
1525        );
1526    }
1527
1528    #[test]
1529    fn test_employee_context_multiplier() {
1530        let config = AnomalyInjectorConfig::default();
1531        let mut injector = AnomalyInjector::new(config);
1532
1533        let mut employees = HashMap::new();
1534        employees.insert(
1535            "EMP_NEW".to_string(),
1536            EmployeeContext {
1537                employee_id: "EMP_NEW".to_string(),
1538                is_new: true,             // 1.5x
1539                is_volume_fatigued: true, // 1.3x
1540                is_overtime: true,        // 1.2x
1541                ..Default::default()
1542            },
1543        );
1544
1545        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1546
1547        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1548        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1549
1550        // 1.5 * 1.3 * 1.2 = 2.34
1551        let expected = 1.5 * 1.3 * 1.2;
1552        assert!(
1553            (multiplier - expected).abs() < 0.01,
1554            "Expected {:.3}x multiplier, got {:.3}",
1555            expected,
1556            multiplier,
1557        );
1558    }
1559
1560    #[test]
1561    fn test_entity_contexts_persist_across_reset() {
1562        let config = AnomalyInjectorConfig::default();
1563        let mut injector = AnomalyInjector::new(config);
1564
1565        let mut vendors = HashMap::new();
1566        vendors.insert(
1567            "V001".to_string(),
1568            VendorContext {
1569                vendor_id: "V001".to_string(),
1570                is_new: true,
1571                ..Default::default()
1572            },
1573        );
1574
1575        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1576        assert_eq!(injector.vendor_contexts().len(), 1);
1577
1578        // Reset clears labels and stats but not entity contexts
1579        injector.reset();
1580        assert_eq!(injector.vendor_contexts().len(), 1);
1581    }
1582
1583    #[test]
1584    fn test_set_empty_contexts_clears() {
1585        let config = AnomalyInjectorConfig::default();
1586        let mut injector = AnomalyInjector::new(config);
1587
1588        let mut vendors = HashMap::new();
1589        vendors.insert(
1590            "V001".to_string(),
1591            VendorContext {
1592                vendor_id: "V001".to_string(),
1593                ..Default::default()
1594            },
1595        );
1596
1597        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1598        assert_eq!(injector.vendor_contexts().len(), 1);
1599
1600        // Setting empty maps clears
1601        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1602        assert!(injector.vendor_contexts().is_empty());
1603    }
1604
1605    #[test]
1606    fn test_dormant_vendor_multiplier() {
1607        let config = AnomalyInjectorConfig::default();
1608        let mut injector = AnomalyInjector::new(config);
1609
1610        let mut vendors = HashMap::new();
1611        vendors.insert(
1612            "V_DORMANT".to_string(),
1613            VendorContext {
1614                vendor_id: "V_DORMANT".to_string(),
1615                is_dormant_reactivation: true, // 1.5x
1616                ..Default::default()
1617            },
1618        );
1619
1620        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1621
1622        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1623        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1624        assert!(
1625            (multiplier - 1.5).abs() < f64::EPSILON,
1626            "Expected 1.5x multiplier for dormant vendor, got {}",
1627            multiplier,
1628        );
1629    }
1630}