Skip to main content

datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::Rng;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25    RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31    EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38    TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45/// Configuration for the anomaly injector.
46#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48    /// Rate configuration.
49    pub rates: AnomalyRateConfig,
50    /// Pattern configuration.
51    pub patterns: AnomalyPatternConfig,
52    /// Random seed for reproducibility.
53    pub seed: u64,
54    /// Whether to generate labels.
55    pub generate_labels: bool,
56    /// Whether to allow duplicate injection.
57    pub allow_duplicates: bool,
58    /// Maximum anomalies per document.
59    pub max_anomalies_per_document: usize,
60    /// Company codes to target (empty = all).
61    pub target_companies: Vec<String>,
62    /// Date range for injection.
63    pub date_range: Option<(NaiveDate, NaiveDate)>,
64    /// Enhanced features configuration.
65    pub enhanced: EnhancedInjectionConfig,
66}
67
68/// Enhanced injection configuration for v0.3.0+ features.
69#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71    /// Enable multi-stage fraud scheme generation.
72    pub multi_stage_schemes_enabled: bool,
73    /// Probability of starting a new scheme per perpetrator per year.
74    pub scheme_probability: f64,
75    /// Enable correlated anomaly injection.
76    pub correlated_injection_enabled: bool,
77    /// Enable temporal clustering (period-end spikes).
78    pub temporal_clustering_enabled: bool,
79    /// Period-end anomaly rate multiplier.
80    pub period_end_multiplier: f64,
81    /// Enable near-miss generation.
82    pub near_miss_enabled: bool,
83    /// Proportion of anomalies that are near-misses.
84    pub near_miss_proportion: f64,
85    /// Approval thresholds for threshold-proximity near-misses.
86    pub approval_thresholds: Vec<Decimal>,
87    /// Enable detection difficulty classification.
88    pub difficulty_classification_enabled: bool,
89    /// Enable context-aware injection.
90    pub context_aware_enabled: bool,
91    /// Behavioral baseline configuration.
92    pub behavioral_baseline_config: BehavioralBaselineConfig,
93}
94
95impl Default for AnomalyInjectorConfig {
96    fn default() -> Self {
97        Self {
98            rates: AnomalyRateConfig::default(),
99            patterns: AnomalyPatternConfig::default(),
100            seed: 42,
101            generate_labels: true,
102            allow_duplicates: true,
103            max_anomalies_per_document: 2,
104            target_companies: Vec::new(),
105            date_range: None,
106            enhanced: EnhancedInjectionConfig::default(),
107        }
108    }
109}
110
111/// Result of an injection batch.
112#[derive(Debug, Clone)]
113pub struct InjectionBatchResult {
114    /// Number of entries processed.
115    pub entries_processed: usize,
116    /// Number of anomalies injected.
117    pub anomalies_injected: usize,
118    /// Number of duplicates created.
119    pub duplicates_created: usize,
120    /// Labels generated.
121    pub labels: Vec<LabeledAnomaly>,
122    /// Summary of anomalies.
123    pub summary: AnomalySummary,
124    /// Entries that were modified (document numbers).
125    pub modified_documents: Vec<String>,
126    /// Near-miss labels (suspicious but legitimate transactions).
127    pub near_miss_labels: Vec<NearMissLabel>,
128    /// Multi-stage scheme actions generated.
129    pub scheme_actions: Vec<SchemeAction>,
130    /// Difficulty distribution summary.
131    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
132}
133
134/// Main anomaly injection engine.
135pub struct AnomalyInjector {
136    config: AnomalyInjectorConfig,
137    rng: ChaCha8Rng,
138    uuid_factory: DeterministicUuidFactory,
139    type_selector: AnomalyTypeSelector,
140    strategies: StrategyCollection,
141    cluster_manager: ClusterManager,
142    /// Selects target entities for anomaly injection (RepeatOffender, etc.).
143    entity_targeting: EntityTargetingManager,
144    /// Tracking which documents already have anomalies.
145    document_anomaly_counts: HashMap<String, usize>,
146    /// All generated labels.
147    labels: Vec<LabeledAnomaly>,
148    /// Statistics.
149    stats: InjectorStats,
150    // Enhanced components (v0.3.0+)
151    /// Multi-stage fraud scheme advancer.
152    scheme_advancer: Option<SchemeAdvancer>,
153    /// Near-miss generator.
154    near_miss_generator: Option<NearMissGenerator>,
155    /// Near-miss labels generated.
156    near_miss_labels: Vec<NearMissLabel>,
157    /// Drives correlated anomaly pairs (e.g., FictitiousVendor + InvoiceManipulation).
158    co_occurrence_handler: Option<AnomalyCoOccurrence>,
159    /// Queued correlated anomalies waiting to be injected.
160    queued_co_occurrences: Vec<QueuedAnomaly>,
161    /// Groups anomalies into temporal bursts during period-end windows.
162    temporal_cluster_generator: Option<TemporalClusterGenerator>,
163    /// Difficulty calculator.
164    difficulty_calculator: Option<DifficultyCalculator>,
165    /// Entity-aware injector.
166    entity_aware_injector: Option<EntityAwareInjector>,
167    /// Behavioral baseline tracker.
168    behavioral_baseline: Option<BehavioralBaseline>,
169    /// Scheme actions generated.
170    scheme_actions: Vec<SchemeAction>,
171    /// Difficulty distribution.
172    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
173    // Entity context lookup maps for risk-adjusted injection rates
174    /// Vendor contexts keyed by vendor ID.
175    vendor_contexts: HashMap<String, VendorContext>,
176    /// Employee contexts keyed by employee ID.
177    employee_contexts: HashMap<String, EmployeeContext>,
178    /// Account contexts keyed by account code.
179    account_contexts: HashMap<String, AccountContext>,
180}
181
182/// Injection statistics tracking.
183#[derive(Debug, Clone, Default)]
184pub struct InjectorStats {
185    /// Total number of entries processed.
186    pub total_processed: usize,
187    /// Total number of anomalies injected.
188    pub total_injected: usize,
189    /// Anomalies injected by category (e.g., "Fraud", "Error").
190    pub by_category: HashMap<String, usize>,
191    /// Anomalies injected by specific type name.
192    pub by_type: HashMap<String, usize>,
193    /// Anomalies injected by company code.
194    pub by_company: HashMap<String, usize>,
195    /// Entries skipped due to rate check.
196    pub skipped_rate: usize,
197    /// Entries skipped due to date range filter.
198    pub skipped_date: usize,
199    /// Entries skipped due to company filter.
200    pub skipped_company: usize,
201    /// Entries skipped due to max-anomalies-per-document limit.
202    pub skipped_max_per_doc: usize,
203}
204
205/// A correlated anomaly queued for future injection.
206struct QueuedAnomaly {
207    /// Anomaly type to inject.
208    anomaly_type: AnomalyType,
209    /// Target entity (if same_entity was specified in the co-occurrence pattern).
210    target_entity: Option<String>,
211    /// Earliest date this can be injected.
212    earliest_date: NaiveDate,
213    /// Description from the co-occurrence pattern.
214    description: String,
215}
216
217impl AnomalyInjector {
218    /// Creates a new anomaly injector.
219    pub fn new(config: AnomalyInjectorConfig) -> Self {
220        let mut rng = seeded_rng(config.seed, 0);
221        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
222        let entity_targeting =
223            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
224
225        // Initialize enhanced components based on configuration
226        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
227            let scheme_config = SchemeAdvancerConfig {
228                embezzlement_probability: config.enhanced.scheme_probability,
229                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
230                kickback_probability: config.enhanced.scheme_probability * 0.5,
231                seed: rng.random(),
232                ..Default::default()
233            };
234            Some(SchemeAdvancer::new(scheme_config))
235        } else {
236            None
237        };
238
239        let near_miss_generator = if config.enhanced.near_miss_enabled {
240            let near_miss_config = NearMissConfig {
241                proportion: config.enhanced.near_miss_proportion,
242                seed: rng.random(),
243                ..Default::default()
244            };
245            Some(NearMissGenerator::new(near_miss_config))
246        } else {
247            None
248        };
249
250        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
251            Some(AnomalyCoOccurrence::new())
252        } else {
253            None
254        };
255
256        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
257            Some(TemporalClusterGenerator::new())
258        } else {
259            None
260        };
261
262        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
263            Some(DifficultyCalculator::new())
264        } else {
265            None
266        };
267
268        let entity_aware_injector = if config.enhanced.context_aware_enabled {
269            Some(EntityAwareInjector::default())
270        } else {
271            None
272        };
273
274        let behavioral_baseline = if config.enhanced.context_aware_enabled
275            && config.enhanced.behavioral_baseline_config.enabled
276        {
277            Some(BehavioralBaseline::new(
278                config.enhanced.behavioral_baseline_config.clone(),
279            ))
280        } else {
281            None
282        };
283
284        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
285
286        Self {
287            config,
288            rng,
289            uuid_factory,
290            type_selector: AnomalyTypeSelector::new(),
291            strategies: StrategyCollection::default(),
292            cluster_manager,
293            entity_targeting,
294            document_anomaly_counts: HashMap::new(),
295            labels: Vec::new(),
296            stats: InjectorStats::default(),
297            scheme_advancer,
298            near_miss_generator,
299            near_miss_labels: Vec::new(),
300            co_occurrence_handler,
301            queued_co_occurrences: Vec::new(),
302            temporal_cluster_generator,
303            difficulty_calculator,
304            entity_aware_injector,
305            behavioral_baseline,
306            scheme_actions: Vec::new(),
307            difficulty_distribution: HashMap::new(),
308            vendor_contexts: HashMap::new(),
309            employee_contexts: HashMap::new(),
310            account_contexts: HashMap::new(),
311        }
312    }
313
314    /// Processes a batch of journal entries, potentially injecting anomalies.
315    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
316        debug!(
317            entry_count = entries.len(),
318            total_rate = self.config.rates.total_rate,
319            seed = self.config.seed,
320            "Injecting anomalies into journal entries"
321        );
322
323        let mut modified_documents = Vec::new();
324        let mut duplicates = Vec::new();
325
326        for entry in entries.iter_mut() {
327            self.stats.total_processed += 1;
328
329            // Update behavioral baseline if enabled
330            if let Some(ref mut baseline) = self.behavioral_baseline {
331                use super::context::Observation;
332                // Record the observation for baseline building
333                let entity_id = entry.header.created_by.clone();
334                let observation =
335                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
336                baseline.record_observation(&entity_id, observation);
337            }
338
339            // Check if we should process this entry
340            if !self.should_process(entry) {
341                continue;
342            }
343
344            // --- Check queued co-occurrences first ---
345            let entry_date = entry.posting_date();
346            let ready_indices: Vec<usize> = self
347                .queued_co_occurrences
348                .iter()
349                .enumerate()
350                .filter(|(_, q)| entry_date >= q.earliest_date)
351                .map(|(i, _)| i)
352                .collect();
353
354            if let Some(&idx) = ready_indices.first() {
355                let queued = self.queued_co_occurrences.remove(idx);
356                if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
357                    label = label.with_metadata("co_occurrence", "true");
358                    label = label.with_metadata("co_occurrence_description", &queued.description);
359                    if let Some(ref target) = queued.target_entity {
360                        label = label.with_related_entity(target);
361                        label = label.with_metadata("co_occurrence_target", target);
362                    }
363                    modified_documents.push(entry.document_number().clone());
364                    self.labels.push(label);
365                    self.stats.total_injected += 1;
366                }
367                continue; // This entry was used for a queued co-occurrence
368            }
369
370            // Calculate effective rate
371            let base_rate = self.config.rates.total_rate;
372
373            // Calculate entity-aware rate adjustment using context lookup maps
374            let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
375                let employee_id = &entry.header.created_by;
376                let first_account = entry
377                    .lines
378                    .first()
379                    .map(|l| l.gl_account.as_str())
380                    .unwrap_or("");
381                // Look up vendor from the entry's reference field (vendor ID convention)
382                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
383
384                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
385                let employee_ctx = self.employee_contexts.get(employee_id);
386                let account_ctx = self.account_contexts.get(first_account);
387
388                let multiplier =
389                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
390                (base_rate * multiplier).min(1.0)
391            } else {
392                // No entity-aware injector: fall back to context maps alone
393                self.calculate_context_rate_multiplier(entry) * base_rate
394            };
395
396            // --- Temporal clustering: boost rate during period-end windows ---
397            if let Some(ref tcg) = self.temporal_cluster_generator {
398                let temporal_multiplier = tcg
399                    .get_active_clusters(entry_date)
400                    .iter()
401                    .map(|c| c.rate_multiplier)
402                    .fold(1.0_f64, f64::max);
403                effective_rate = (effective_rate * temporal_multiplier).min(1.0);
404            }
405
406            // Determine if we inject an anomaly
407            if should_inject_anomaly(
408                effective_rate,
409                entry_date,
410                &self.config.patterns.temporal_pattern,
411                &mut self.rng,
412            ) {
413                // Check if this should be a near-miss instead
414                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
415                    // Record the transaction for near-duplicate detection
416                    let account = entry
417                        .lines
418                        .first()
419                        .map(|l| l.gl_account.clone())
420                        .unwrap_or_default();
421                    near_miss_gen.record_transaction(
422                        entry.document_number().clone(),
423                        entry_date,
424                        entry.total_debit(),
425                        &account,
426                        None,
427                    );
428
429                    // Check if this could be a near-miss
430                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
431                        entry.document_number().clone(),
432                        entry_date,
433                        entry.total_debit(),
434                        &account,
435                        None,
436                        &self.config.enhanced.approval_thresholds,
437                    ) {
438                        self.near_miss_labels.push(near_miss_label);
439                        continue; // Skip actual anomaly injection
440                    }
441                }
442
443                // Select anomaly category based on rates
444                let anomaly_type = self.select_anomaly_category();
445
446                // --- Entity targeting: select and track target entity ---
447                let target_entity = {
448                    let mut candidates: Vec<String> =
449                        self.vendor_contexts.keys().cloned().collect();
450                    candidates.extend(self.employee_contexts.keys().cloned());
451                    if candidates.is_empty() {
452                        // Fall back to entry's reference field as a candidate
453                        if let Some(ref r) = entry.header.reference {
454                            candidates.push(r.clone());
455                        }
456                    }
457                    self.entity_targeting
458                        .select_entity(&candidates, &mut self.rng)
459                };
460
461                // Apply the anomaly
462                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
463                    // Add entity targeting metadata
464                    if let Some(ref entity_id) = target_entity {
465                        label = label.with_metadata("entity_target", entity_id);
466                        label = label.with_related_entity(entity_id);
467                        label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
468                            target_type: "Entity".to_string(),
469                            target_id: entity_id.clone(),
470                        });
471                    }
472
473                    // Calculate detection difficulty if enabled
474                    if let Some(ref calculator) = self.difficulty_calculator {
475                        let difficulty = calculator.calculate(&label);
476
477                        // Store difficulty in metadata
478                        label =
479                            label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
480                        label = label.with_metadata(
481                            "difficulty_score",
482                            &difficulty.difficulty_score().to_string(),
483                        );
484
485                        // Update difficulty distribution
486                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
487                    }
488
489                    modified_documents.push(entry.document_number().clone());
490                    self.labels.push(label);
491                    self.stats.total_injected += 1;
492
493                    // --- Co-occurrence: queue correlated anomalies ---
494                    if let Some(ref co_occ) = self.co_occurrence_handler {
495                        let correlated =
496                            co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
497                        for result in correlated {
498                            self.queued_co_occurrences.push(QueuedAnomaly {
499                                anomaly_type: result.anomaly_type,
500                                target_entity: if result.same_entity {
501                                    target_entity.clone()
502                                } else {
503                                    None
504                                },
505                                earliest_date: entry_date
506                                    + chrono::Duration::days(i64::from(result.lag_days)),
507                                description: result.description,
508                            });
509                        }
510                    }
511                }
512
513                // Check for duplicate injection
514                if self.config.allow_duplicates
515                    && matches!(
516                        self.labels.last().map(|l| &l.anomaly_type),
517                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
518                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
519                    )
520                {
521                    let dup_strategy = DuplicationStrategy::default();
522                    let duplicate =
523                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
524                    duplicates.push(duplicate);
525                }
526            }
527        }
528
529        // Count duplicates
530        let duplicates_created = duplicates.len();
531
532        // Build summary
533        let summary = AnomalySummary::from_anomalies(&self.labels);
534
535        InjectionBatchResult {
536            entries_processed: self.stats.total_processed,
537            anomalies_injected: self.stats.total_injected,
538            duplicates_created,
539            labels: self.labels.clone(),
540            summary,
541            modified_documents,
542            near_miss_labels: self.near_miss_labels.clone(),
543            scheme_actions: self.scheme_actions.clone(),
544            difficulty_distribution: self.difficulty_distribution.clone(),
545        }
546    }
547
548    /// Checks if an entry should be processed.
549    fn should_process(&mut self, entry: &JournalEntry) -> bool {
550        // Check company filter
551        if !self.config.target_companies.is_empty()
552            && !self
553                .config
554                .target_companies
555                .iter()
556                .any(|c| c == entry.company_code())
557        {
558            self.stats.skipped_company += 1;
559            return false;
560        }
561
562        // Check date range
563        if let Some((start, end)) = self.config.date_range {
564            if entry.posting_date() < start || entry.posting_date() > end {
565                self.stats.skipped_date += 1;
566                return false;
567            }
568        }
569
570        // Check max anomalies per document
571        let current_count = self
572            .document_anomaly_counts
573            .get(&entry.document_number())
574            .copied()
575            .unwrap_or(0);
576        if current_count >= self.config.max_anomalies_per_document {
577            self.stats.skipped_max_per_doc += 1;
578            return false;
579        }
580
581        true
582    }
583
584    /// Selects an anomaly category based on configured rates.
585    fn select_anomaly_category(&mut self) -> AnomalyType {
586        let r = self.rng.random::<f64>();
587        let rates = &self.config.rates;
588
589        let mut cumulative = 0.0;
590
591        cumulative += rates.fraud_rate;
592        if r < cumulative {
593            return self.type_selector.select_fraud(&mut self.rng);
594        }
595
596        cumulative += rates.error_rate;
597        if r < cumulative {
598            return self.type_selector.select_error(&mut self.rng);
599        }
600
601        cumulative += rates.process_issue_rate;
602        if r < cumulative {
603            return self.type_selector.select_process_issue(&mut self.rng);
604        }
605
606        cumulative += rates.statistical_rate;
607        if r < cumulative {
608            return self.type_selector.select_statistical(&mut self.rng);
609        }
610
611        self.type_selector.select_relational(&mut self.rng)
612    }
613
614    /// Injects an anomaly into an entry.
615    fn inject_anomaly(
616        &mut self,
617        entry: &mut JournalEntry,
618        anomaly_type: AnomalyType,
619    ) -> Option<LabeledAnomaly> {
620        // Check if strategy can be applied
621        if !self.strategies.can_apply(entry, &anomaly_type) {
622            return None;
623        }
624
625        // Apply the strategy
626        let result = self
627            .strategies
628            .apply_strategy(entry, &anomaly_type, &mut self.rng);
629
630        if !result.success {
631            return None;
632        }
633
634        // Update document anomaly count
635        *self
636            .document_anomaly_counts
637            .entry(entry.document_number().clone())
638            .or_insert(0) += 1;
639
640        // Update statistics
641        let category = anomaly_type.category().to_string();
642        let type_name = anomaly_type.type_name();
643
644        *self.stats.by_category.entry(category).or_insert(0) += 1;
645        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
646        *self
647            .stats
648            .by_company
649            .entry(entry.company_code().to_string())
650            .or_insert(0) += 1;
651
652        // Generate label
653        if self.config.generate_labels {
654            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
655
656            // Update entry header with anomaly tracking fields
657            entry.header.is_anomaly = true;
658            entry.header.anomaly_id = Some(anomaly_id.clone());
659            entry.header.anomaly_type = Some(type_name.clone());
660
661            // Also set fraud flag if this is a fraud anomaly
662            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
663                entry.header.is_fraud = true;
664                if let AnomalyType::Fraud(ref ft) = anomaly_type {
665                    entry.header.fraud_type = Some(*ft);
666                }
667            }
668
669            let mut label = LabeledAnomaly::new(
670                anomaly_id,
671                anomaly_type.clone(),
672                entry.document_number().clone(),
673                "JE".to_string(),
674                entry.company_code().to_string(),
675                entry.posting_date(),
676            )
677            .with_description(&result.description)
678            .with_injection_strategy(&type_name);
679
680            // Add causal reason with injection context (provenance tracking)
681            let causal_reason = AnomalyCausalReason::RandomRate {
682                base_rate: self.config.rates.total_rate,
683            };
684            label = label.with_causal_reason(causal_reason);
685
686            // Add entity context metadata if contexts are populated
687            let context_multiplier = self.calculate_context_rate_multiplier(entry);
688            if (context_multiplier - 1.0).abs() > f64::EPSILON {
689                label = label.with_metadata(
690                    "entity_context_multiplier",
691                    &format!("{context_multiplier:.3}"),
692                );
693                label = label.with_metadata(
694                    "effective_rate",
695                    &format!(
696                        "{:.6}",
697                        (self.config.rates.total_rate * context_multiplier).min(1.0)
698                    ),
699                );
700            }
701
702            // Add monetary impact
703            if let Some(impact) = result.monetary_impact {
704                label = label.with_monetary_impact(impact);
705            }
706
707            // Add related entities
708            for entity in &result.related_entities {
709                label = label.with_related_entity(entity);
710            }
711
712            // Add metadata
713            for (key, value) in &result.metadata {
714                label = label.with_metadata(key, value);
715            }
716
717            // Assign cluster and update causal reason if in cluster
718            if let Some(cluster_id) =
719                self.cluster_manager
720                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
721            {
722                label = label.with_cluster(&cluster_id);
723                // Update causal reason to reflect cluster membership
724                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
725                    cluster_id: cluster_id.clone(),
726                });
727            }
728
729            return Some(label);
730        }
731
732        None
733    }
734
735    /// Injects a specific anomaly type into an entry.
736    pub fn inject_specific(
737        &mut self,
738        entry: &mut JournalEntry,
739        anomaly_type: AnomalyType,
740    ) -> Option<LabeledAnomaly> {
741        self.inject_anomaly(entry, anomaly_type)
742    }
743
744    /// Creates a self-approval anomaly.
745    pub fn create_self_approval(
746        &mut self,
747        entry: &mut JournalEntry,
748        user_id: &str,
749    ) -> Option<LabeledAnomaly> {
750        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
751
752        let label = LabeledAnomaly::new(
753            format!("ANO{:08}", self.labels.len() + 1),
754            anomaly_type,
755            entry.document_number().clone(),
756            "JE".to_string(),
757            entry.company_code().to_string(),
758            entry.posting_date(),
759        )
760        .with_description(&format!("User {user_id} approved their own transaction"))
761        .with_related_entity(user_id)
762        .with_injection_strategy("ManualSelfApproval")
763        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
764            target_type: "User".to_string(),
765            target_id: user_id.to_string(),
766        });
767
768        // Set entry header anomaly tracking fields
769        entry.header.is_anomaly = true;
770        entry.header.is_fraud = true;
771        entry.header.anomaly_id = Some(label.anomaly_id.clone());
772        entry.header.anomaly_type = Some("SelfApproval".to_string());
773        entry.header.fraud_type = Some(FraudType::SelfApproval);
774
775        // Set approver = requester
776        entry.header.created_by = user_id.to_string();
777
778        self.labels.push(label.clone());
779        Some(label)
780    }
781
782    /// Creates a segregation of duties violation.
783    pub fn create_sod_violation(
784        &mut self,
785        entry: &mut JournalEntry,
786        user_id: &str,
787        conflicting_duties: (&str, &str),
788    ) -> Option<LabeledAnomaly> {
789        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
790
791        let label = LabeledAnomaly::new(
792            format!("ANO{:08}", self.labels.len() + 1),
793            anomaly_type,
794            entry.document_number().clone(),
795            "JE".to_string(),
796            entry.company_code().to_string(),
797            entry.posting_date(),
798        )
799        .with_description(&format!(
800            "User {} performed conflicting duties: {} and {}",
801            user_id, conflicting_duties.0, conflicting_duties.1
802        ))
803        .with_related_entity(user_id)
804        .with_metadata("duty1", conflicting_duties.0)
805        .with_metadata("duty2", conflicting_duties.1)
806        .with_injection_strategy("ManualSoDViolation")
807        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
808            target_type: "User".to_string(),
809            target_id: user_id.to_string(),
810        });
811
812        // Set entry header anomaly tracking fields
813        entry.header.is_anomaly = true;
814        entry.header.is_fraud = true;
815        entry.header.anomaly_id = Some(label.anomaly_id.clone());
816        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
817        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
818
819        self.labels.push(label.clone());
820        Some(label)
821    }
822
823    /// Creates an intercompany mismatch anomaly.
824    pub fn create_ic_mismatch(
825        &mut self,
826        entry: &mut JournalEntry,
827        matching_company: &str,
828        expected_amount: Decimal,
829        actual_amount: Decimal,
830    ) -> Option<LabeledAnomaly> {
831        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
832
833        let label = LabeledAnomaly::new(
834            format!("ANO{:08}", self.labels.len() + 1),
835            anomaly_type,
836            entry.document_number().clone(),
837            "JE".to_string(),
838            entry.company_code().to_string(),
839            entry.posting_date(),
840        )
841        .with_description(&format!(
842            "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
843        ))
844        .with_related_entity(matching_company)
845        .with_monetary_impact(actual_amount - expected_amount)
846        .with_metadata("expected_amount", &expected_amount.to_string())
847        .with_metadata("actual_amount", &actual_amount.to_string())
848        .with_injection_strategy("ManualICMismatch")
849        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
850            target_type: "Intercompany".to_string(),
851            target_id: matching_company.to_string(),
852        });
853
854        // Set entry header anomaly tracking fields
855        entry.header.is_anomaly = true;
856        entry.header.anomaly_id = Some(label.anomaly_id.clone());
857        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
858
859        self.labels.push(label.clone());
860        Some(label)
861    }
862
863    /// Returns all generated labels.
864    pub fn get_labels(&self) -> &[LabeledAnomaly] {
865        &self.labels
866    }
867
868    /// Returns the anomaly summary.
869    pub fn get_summary(&self) -> AnomalySummary {
870        AnomalySummary::from_anomalies(&self.labels)
871    }
872
873    /// Returns injection statistics.
874    pub fn get_stats(&self) -> &InjectorStats {
875        &self.stats
876    }
877
878    /// Clears all labels and resets statistics.
879    pub fn reset(&mut self) {
880        self.labels.clear();
881        self.document_anomaly_counts.clear();
882        self.stats = InjectorStats::default();
883        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
884
885        // Reset enhanced components
886        self.near_miss_labels.clear();
887        self.scheme_actions.clear();
888        self.difficulty_distribution.clear();
889
890        if let Some(ref mut baseline) = self.behavioral_baseline {
891            *baseline =
892                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
893        }
894    }
895
896    /// Returns the number of clusters created.
897    pub fn cluster_count(&self) -> usize {
898        self.cluster_manager.cluster_count()
899    }
900
901    // =========================================================================
902    // Entity Context API
903    // =========================================================================
904
905    /// Sets entity contexts for risk-adjusted anomaly injection.
906    ///
907    /// When entity contexts are provided, the injector adjusts anomaly injection
908    /// rates based on entity risk factors. Entries involving high-risk vendors,
909    /// new employees, or sensitive accounts will have higher effective injection
910    /// rates.
911    ///
912    /// Pass empty HashMaps to clear previously set contexts.
913    pub fn set_entity_contexts(
914        &mut self,
915        vendors: HashMap<String, VendorContext>,
916        employees: HashMap<String, EmployeeContext>,
917        accounts: HashMap<String, AccountContext>,
918    ) {
919        self.vendor_contexts = vendors;
920        self.employee_contexts = employees;
921        self.account_contexts = accounts;
922    }
923
924    /// Returns a reference to the vendor context map.
925    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
926        &self.vendor_contexts
927    }
928
929    /// Returns a reference to the employee context map.
930    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
931        &self.employee_contexts
932    }
933
934    /// Returns a reference to the account context map.
935    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
936        &self.account_contexts
937    }
938
939    /// Calculates a rate multiplier from the entity context maps alone (no
940    /// `EntityAwareInjector` needed). This provides a lightweight fallback
941    /// when context-aware injection is not fully enabled but context maps
942    /// have been populated.
943    ///
944    /// The multiplier is the product of individual entity risk factors found
945    /// in the context maps for the given journal entry. If no contexts match,
946    /// returns 1.0 (no adjustment).
947    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
948        if self.vendor_contexts.is_empty()
949            && self.employee_contexts.is_empty()
950            && self.account_contexts.is_empty()
951        {
952            return 1.0;
953        }
954
955        let mut multiplier = 1.0;
956
957        // Vendor lookup via reference field
958        if let Some(ref vendor_ref) = entry.header.reference {
959            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
960                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
961                if ctx.is_new {
962                    multiplier *= 2.0;
963                }
964                if ctx.is_dormant_reactivation {
965                    multiplier *= 1.5;
966                }
967            }
968        }
969
970        // Employee lookup via created_by
971        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
972            if ctx.is_new {
973                multiplier *= 1.5;
974            }
975            if ctx.is_volume_fatigued {
976                multiplier *= 1.3;
977            }
978            if ctx.is_overtime {
979                multiplier *= 1.2;
980            }
981        }
982
983        // Account lookup via first line's GL account
984        if let Some(first_line) = entry.lines.first() {
985            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
986                if ctx.is_high_risk {
987                    multiplier *= 2.0;
988                }
989            }
990        }
991
992        multiplier
993    }
994
995    // =========================================================================
996    // Enhanced Features API (v0.3.0+)
997    // =========================================================================
998
999    /// Advances all active fraud schemes by one time step.
1000    ///
1001    /// Call this method once per simulated day to generate scheme actions.
1002    /// Returns the scheme actions generated for this date.
1003    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1004        if let Some(ref mut advancer) = self.scheme_advancer {
1005            let context = SchemeContext::new(date, company_code);
1006            let actions = advancer.advance_all(&context);
1007            self.scheme_actions.extend(actions.clone());
1008            actions
1009        } else {
1010            Vec::new()
1011        }
1012    }
1013
1014    /// Potentially starts a new fraud scheme based on probabilities.
1015    ///
1016    /// Call this method periodically (e.g., once per period) to allow new
1017    /// schemes to start based on configured probabilities.
1018    /// Returns the scheme ID if a scheme was started.
1019    pub fn maybe_start_scheme(
1020        &mut self,
1021        date: NaiveDate,
1022        company_code: &str,
1023        available_users: Vec<String>,
1024        available_accounts: Vec<String>,
1025        available_counterparties: Vec<String>,
1026    ) -> Option<uuid::Uuid> {
1027        if let Some(ref mut advancer) = self.scheme_advancer {
1028            let mut context = SchemeContext::new(date, company_code);
1029            context.available_users = available_users;
1030            context.available_accounts = available_accounts;
1031            context.available_counterparties = available_counterparties;
1032
1033            advancer.maybe_start_scheme(&context)
1034        } else {
1035            None
1036        }
1037    }
1038
1039    /// Returns all near-miss labels generated.
1040    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1041        &self.near_miss_labels
1042    }
1043
1044    /// Returns all scheme actions generated.
1045    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1046        &self.scheme_actions
1047    }
1048
1049    /// Returns the detection difficulty distribution.
1050    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1051        &self.difficulty_distribution
1052    }
1053
1054    /// Checks for behavioral deviations for an entity with an observation.
1055    pub fn check_behavioral_deviations(
1056        &self,
1057        entity_id: &str,
1058        observation: &super::context::Observation,
1059    ) -> Vec<super::context::BehavioralDeviation> {
1060        if let Some(ref baseline) = self.behavioral_baseline {
1061            baseline.check_deviation(entity_id, observation)
1062        } else {
1063            Vec::new()
1064        }
1065    }
1066
1067    /// Gets the baseline for an entity.
1068    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1069        if let Some(ref baseline) = self.behavioral_baseline {
1070            baseline.get_baseline(entity_id)
1071        } else {
1072            None
1073        }
1074    }
1075
1076    /// Returns the number of active schemes.
1077    pub fn active_scheme_count(&self) -> usize {
1078        if let Some(ref advancer) = self.scheme_advancer {
1079            advancer.active_scheme_count()
1080        } else {
1081            0
1082        }
1083    }
1084
1085    /// Returns whether enhanced features are enabled.
1086    pub fn has_enhanced_features(&self) -> bool {
1087        self.scheme_advancer.is_some()
1088            || self.near_miss_generator.is_some()
1089            || self.difficulty_calculator.is_some()
1090            || self.entity_aware_injector.is_some()
1091    }
1092}
1093
1094/// Builder for AnomalyInjectorConfig.
1095pub struct AnomalyInjectorConfigBuilder {
1096    config: AnomalyInjectorConfig,
1097}
1098
1099impl AnomalyInjectorConfigBuilder {
1100    /// Creates a new builder with default configuration.
1101    pub fn new() -> Self {
1102        Self {
1103            config: AnomalyInjectorConfig::default(),
1104        }
1105    }
1106
1107    /// Sets the total anomaly rate.
1108    pub fn with_total_rate(mut self, rate: f64) -> Self {
1109        self.config.rates.total_rate = rate;
1110        self
1111    }
1112
1113    /// Sets the fraud rate (proportion of anomalies).
1114    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1115        self.config.rates.fraud_rate = rate;
1116        self
1117    }
1118
1119    /// Sets the error rate (proportion of anomalies).
1120    pub fn with_error_rate(mut self, rate: f64) -> Self {
1121        self.config.rates.error_rate = rate;
1122        self
1123    }
1124
1125    /// Sets the random seed.
1126    pub fn with_seed(mut self, seed: u64) -> Self {
1127        self.config.seed = seed;
1128        self
1129    }
1130
1131    /// Sets the temporal pattern.
1132    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1133        self.config.patterns.temporal_pattern = pattern;
1134        self
1135    }
1136
1137    /// Enables or disables label generation.
1138    pub fn with_labels(mut self, generate: bool) -> Self {
1139        self.config.generate_labels = generate;
1140        self
1141    }
1142
1143    /// Sets target companies.
1144    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1145        self.config.target_companies = companies;
1146        self
1147    }
1148
1149    /// Sets the date range.
1150    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1151        self.config.date_range = Some((start, end));
1152        self
1153    }
1154
1155    // =========================================================================
1156    // Enhanced Features Configuration (v0.3.0+)
1157    // =========================================================================
1158
1159    /// Enables multi-stage fraud scheme generation.
1160    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1161        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1162        self.config.enhanced.scheme_probability = probability;
1163        self
1164    }
1165
1166    /// Enables near-miss generation.
1167    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1168        self.config.enhanced.near_miss_enabled = enabled;
1169        self.config.enhanced.near_miss_proportion = proportion;
1170        self
1171    }
1172
1173    /// Sets approval thresholds for threshold-proximity near-misses.
1174    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1175        self.config.enhanced.approval_thresholds = thresholds;
1176        self
1177    }
1178
1179    /// Enables correlated anomaly injection.
1180    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1181        self.config.enhanced.correlated_injection_enabled = enabled;
1182        self
1183    }
1184
1185    /// Enables temporal clustering (period-end spikes).
1186    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1187        self.config.enhanced.temporal_clustering_enabled = enabled;
1188        self.config.enhanced.period_end_multiplier = multiplier;
1189        self
1190    }
1191
1192    /// Enables detection difficulty classification.
1193    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1194        self.config.enhanced.difficulty_classification_enabled = enabled;
1195        self
1196    }
1197
1198    /// Enables context-aware injection.
1199    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1200        self.config.enhanced.context_aware_enabled = enabled;
1201        self
1202    }
1203
1204    /// Sets behavioral baseline configuration.
1205    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1206        self.config.enhanced.behavioral_baseline_config = config;
1207        self
1208    }
1209
1210    /// Enables all enhanced features with default settings.
1211    pub fn with_all_enhanced_features(mut self) -> Self {
1212        self.config.enhanced.multi_stage_schemes_enabled = true;
1213        self.config.enhanced.scheme_probability = 0.02;
1214        self.config.enhanced.correlated_injection_enabled = true;
1215        self.config.enhanced.temporal_clustering_enabled = true;
1216        self.config.enhanced.period_end_multiplier = 2.5;
1217        self.config.enhanced.near_miss_enabled = true;
1218        self.config.enhanced.near_miss_proportion = 0.30;
1219        self.config.enhanced.difficulty_classification_enabled = true;
1220        self.config.enhanced.context_aware_enabled = true;
1221        self.config.enhanced.behavioral_baseline_config.enabled = true;
1222        self
1223    }
1224
1225    /// Builds the configuration.
1226    pub fn build(self) -> AnomalyInjectorConfig {
1227        self.config
1228    }
1229}
1230
1231impl Default for AnomalyInjectorConfigBuilder {
1232    fn default() -> Self {
1233        Self::new()
1234    }
1235}
1236
1237#[cfg(test)]
1238#[allow(clippy::unwrap_used)]
1239mod tests {
1240    use super::*;
1241    use chrono::NaiveDate;
1242    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1243    use rust_decimal_macros::dec;
1244
1245    fn create_test_entry(doc_num: &str) -> JournalEntry {
1246        let mut entry = JournalEntry::new_simple(
1247            doc_num.to_string(),
1248            "1000".to_string(),
1249            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1250            "Test Entry".to_string(),
1251        );
1252
1253        entry.add_line(JournalEntryLine {
1254            line_number: 1,
1255            gl_account: "5000".to_string(),
1256            debit_amount: dec!(1000),
1257            ..Default::default()
1258        });
1259
1260        entry.add_line(JournalEntryLine {
1261            line_number: 2,
1262            gl_account: "1000".to_string(),
1263            credit_amount: dec!(1000),
1264            ..Default::default()
1265        });
1266
1267        entry
1268    }
1269
1270    #[test]
1271    fn test_anomaly_injector_basic() {
1272        let config = AnomalyInjectorConfigBuilder::new()
1273            .with_total_rate(0.5) // High rate for testing
1274            .with_seed(42)
1275            .build();
1276
1277        let mut injector = AnomalyInjector::new(config);
1278
1279        let mut entries: Vec<_> = (0..100)
1280            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1281            .collect();
1282
1283        let result = injector.process_entries(&mut entries);
1284
1285        // With 50% rate, we should have some anomalies
1286        assert!(result.anomalies_injected > 0);
1287        assert!(!result.labels.is_empty());
1288        assert_eq!(result.labels.len(), result.anomalies_injected);
1289    }
1290
1291    #[test]
1292    fn test_specific_injection() {
1293        let config = AnomalyInjectorConfig::default();
1294        let mut injector = AnomalyInjector::new(config);
1295
1296        let mut entry = create_test_entry("JE001");
1297        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1298
1299        let label = injector.inject_specific(&mut entry, anomaly_type);
1300
1301        assert!(label.is_some());
1302        let label = label.unwrap();
1303        // document_id is the UUID string from the journal entry header
1304        assert!(!label.document_id.is_empty());
1305        assert_eq!(label.document_id, entry.document_number());
1306    }
1307
1308    #[test]
1309    fn test_self_approval_injection() {
1310        let config = AnomalyInjectorConfig::default();
1311        let mut injector = AnomalyInjector::new(config);
1312
1313        let mut entry = create_test_entry("JE001");
1314        let label = injector.create_self_approval(&mut entry, "USER001");
1315
1316        assert!(label.is_some());
1317        let label = label.unwrap();
1318        assert!(matches!(
1319            label.anomaly_type,
1320            AnomalyType::Fraud(FraudType::SelfApproval)
1321        ));
1322        assert!(label.related_entities.contains(&"USER001".to_string()));
1323    }
1324
1325    #[test]
1326    fn test_company_filtering() {
1327        let config = AnomalyInjectorConfigBuilder::new()
1328            .with_total_rate(1.0) // Inject all
1329            .with_target_companies(vec!["2000".to_string()])
1330            .build();
1331
1332        let mut injector = AnomalyInjector::new(config);
1333
1334        let mut entries = vec![
1335            create_test_entry("JE001"), // company 1000
1336            create_test_entry("JE002"), // company 1000
1337        ];
1338
1339        let result = injector.process_entries(&mut entries);
1340
1341        // No anomalies because entries are in company 1000, not 2000
1342        assert_eq!(result.anomalies_injected, 0);
1343    }
1344
1345    // =========================================================================
1346    // Entity Context Tests
1347    // =========================================================================
1348
1349    /// Helper to create a test entry with specific vendor reference and employee.
1350    fn create_test_entry_with_context(
1351        doc_num: &str,
1352        vendor_ref: Option<&str>,
1353        employee_id: &str,
1354        gl_account: &str,
1355    ) -> JournalEntry {
1356        let mut entry = JournalEntry::new_simple(
1357            doc_num.to_string(),
1358            "1000".to_string(),
1359            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1360            "Test Entry".to_string(),
1361        );
1362
1363        entry.header.reference = vendor_ref.map(|v| v.to_string());
1364        entry.header.created_by = employee_id.to_string();
1365
1366        entry.add_line(JournalEntryLine {
1367            line_number: 1,
1368            gl_account: gl_account.to_string(),
1369            debit_amount: dec!(1000),
1370            ..Default::default()
1371        });
1372
1373        entry.add_line(JournalEntryLine {
1374            line_number: 2,
1375            gl_account: "1000".to_string(),
1376            credit_amount: dec!(1000),
1377            ..Default::default()
1378        });
1379
1380        entry
1381    }
1382
1383    #[test]
1384    fn test_set_entity_contexts() {
1385        let config = AnomalyInjectorConfig::default();
1386        let mut injector = AnomalyInjector::new(config);
1387
1388        // Initially empty
1389        assert!(injector.vendor_contexts().is_empty());
1390        assert!(injector.employee_contexts().is_empty());
1391        assert!(injector.account_contexts().is_empty());
1392
1393        // Set contexts
1394        let mut vendors = HashMap::new();
1395        vendors.insert(
1396            "V001".to_string(),
1397            VendorContext {
1398                vendor_id: "V001".to_string(),
1399                is_new: true,
1400                ..Default::default()
1401            },
1402        );
1403
1404        let mut employees = HashMap::new();
1405        employees.insert(
1406            "EMP001".to_string(),
1407            EmployeeContext {
1408                employee_id: "EMP001".to_string(),
1409                is_new: true,
1410                ..Default::default()
1411            },
1412        );
1413
1414        let mut accounts = HashMap::new();
1415        accounts.insert(
1416            "8100".to_string(),
1417            AccountContext {
1418                account_code: "8100".to_string(),
1419                is_high_risk: true,
1420                ..Default::default()
1421            },
1422        );
1423
1424        injector.set_entity_contexts(vendors, employees, accounts);
1425
1426        assert_eq!(injector.vendor_contexts().len(), 1);
1427        assert_eq!(injector.employee_contexts().len(), 1);
1428        assert_eq!(injector.account_contexts().len(), 1);
1429        assert!(injector.vendor_contexts().contains_key("V001"));
1430        assert!(injector.employee_contexts().contains_key("EMP001"));
1431        assert!(injector.account_contexts().contains_key("8100"));
1432    }
1433
1434    #[test]
1435    fn test_default_behavior_no_contexts() {
1436        // Without any entity contexts, the base rate is used unchanged.
1437        let config = AnomalyInjectorConfigBuilder::new()
1438            .with_total_rate(0.5)
1439            .with_seed(42)
1440            .build();
1441
1442        let mut injector = AnomalyInjector::new(config);
1443
1444        let mut entries: Vec<_> = (0..200)
1445            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1446            .collect();
1447
1448        let result = injector.process_entries(&mut entries);
1449
1450        // With 50% base rate and no context, expect roughly 50% injection
1451        // Allow wide margin for randomness
1452        assert!(result.anomalies_injected > 0);
1453        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1454        assert!(
1455            rate > 0.2 && rate < 0.8,
1456            "Expected ~50% rate, got {:.2}%",
1457            rate * 100.0
1458        );
1459    }
1460
1461    #[test]
1462    fn test_entity_context_increases_injection_rate() {
1463        // With high-risk entity contexts, the effective rate should be higher
1464        // than the base rate, leading to more anomalies being injected.
1465        let base_rate = 0.10; // Low base rate
1466
1467        // Run without contexts
1468        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1469            .with_total_rate(base_rate)
1470            .with_seed(123)
1471            .build();
1472
1473        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1474
1475        let mut entries_no_ctx: Vec<_> = (0..500)
1476            .map(|i| {
1477                create_test_entry_with_context(
1478                    &format!("JE{:04}", i),
1479                    Some("V001"),
1480                    "EMP001",
1481                    "8100",
1482                )
1483            })
1484            .collect();
1485
1486        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1487
1488        // Run with high-risk contexts (same seed for comparable randomness)
1489        let config_ctx = AnomalyInjectorConfigBuilder::new()
1490            .with_total_rate(base_rate)
1491            .with_seed(123)
1492            .build();
1493
1494        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1495
1496        // Set up high-risk contexts
1497        let mut vendors = HashMap::new();
1498        vendors.insert(
1499            "V001".to_string(),
1500            VendorContext {
1501                vendor_id: "V001".to_string(),
1502                is_new: true,                  // 2.0x multiplier
1503                is_dormant_reactivation: true, // 1.5x multiplier
1504                ..Default::default()
1505            },
1506        );
1507
1508        let mut employees = HashMap::new();
1509        employees.insert(
1510            "EMP001".to_string(),
1511            EmployeeContext {
1512                employee_id: "EMP001".to_string(),
1513                is_new: true, // 1.5x multiplier
1514                ..Default::default()
1515            },
1516        );
1517
1518        let mut accounts = HashMap::new();
1519        accounts.insert(
1520            "8100".to_string(),
1521            AccountContext {
1522                account_code: "8100".to_string(),
1523                is_high_risk: true, // 2.0x multiplier
1524                ..Default::default()
1525            },
1526        );
1527
1528        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1529
1530        let mut entries_ctx: Vec<_> = (0..500)
1531            .map(|i| {
1532                create_test_entry_with_context(
1533                    &format!("JE{:04}", i),
1534                    Some("V001"),
1535                    "EMP001",
1536                    "8100",
1537                )
1538            })
1539            .collect();
1540
1541        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1542
1543        // The context-enhanced run should inject more anomalies
1544        assert!(
1545            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1546            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1547            result_ctx.anomalies_injected,
1548            result_no_ctx.anomalies_injected,
1549        );
1550    }
1551
1552    #[test]
1553    fn test_risk_score_multiplication() {
1554        // Verify the calculate_context_rate_multiplier produces correct values.
1555        let config = AnomalyInjectorConfig::default();
1556        let mut injector = AnomalyInjector::new(config);
1557
1558        // No contexts: multiplier should be 1.0
1559        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1560        assert!(
1561            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1562        );
1563
1564        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1565        let mut vendors = HashMap::new();
1566        vendors.insert(
1567            "V_RISKY".to_string(),
1568            VendorContext {
1569                vendor_id: "V_RISKY".to_string(),
1570                is_new: true,
1571                ..Default::default()
1572            },
1573        );
1574
1575        let mut accounts = HashMap::new();
1576        accounts.insert(
1577            "9000".to_string(),
1578            AccountContext {
1579                account_code: "9000".to_string(),
1580                is_high_risk: true,
1581                ..Default::default()
1582            },
1583        );
1584
1585        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1586
1587        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1588        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1589        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1590        assert!(
1591            (multiplier - 4.0).abs() < f64::EPSILON,
1592            "Expected 4.0x multiplier, got {}",
1593            multiplier,
1594        );
1595
1596        // Entry with only vendor context match (no account match)
1597        let entry_vendor_only =
1598            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1599        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1600        assert!(
1601            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1602            "Expected 2.0x multiplier (vendor only), got {}",
1603            multiplier_vendor,
1604        );
1605
1606        // Entry with no matching contexts
1607        let entry_no_match =
1608            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1609        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1610        assert!(
1611            (multiplier_none - 1.0).abs() < f64::EPSILON,
1612            "Expected 1.0x multiplier (no match), got {}",
1613            multiplier_none,
1614        );
1615    }
1616
1617    #[test]
1618    fn test_employee_context_multiplier() {
1619        let config = AnomalyInjectorConfig::default();
1620        let mut injector = AnomalyInjector::new(config);
1621
1622        let mut employees = HashMap::new();
1623        employees.insert(
1624            "EMP_NEW".to_string(),
1625            EmployeeContext {
1626                employee_id: "EMP_NEW".to_string(),
1627                is_new: true,             // 1.5x
1628                is_volume_fatigued: true, // 1.3x
1629                is_overtime: true,        // 1.2x
1630                ..Default::default()
1631            },
1632        );
1633
1634        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1635
1636        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1637        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1638
1639        // 1.5 * 1.3 * 1.2 = 2.34
1640        let expected = 1.5 * 1.3 * 1.2;
1641        assert!(
1642            (multiplier - expected).abs() < 0.01,
1643            "Expected {:.3}x multiplier, got {:.3}",
1644            expected,
1645            multiplier,
1646        );
1647    }
1648
1649    #[test]
1650    fn test_entity_contexts_persist_across_reset() {
1651        let config = AnomalyInjectorConfig::default();
1652        let mut injector = AnomalyInjector::new(config);
1653
1654        let mut vendors = HashMap::new();
1655        vendors.insert(
1656            "V001".to_string(),
1657            VendorContext {
1658                vendor_id: "V001".to_string(),
1659                is_new: true,
1660                ..Default::default()
1661            },
1662        );
1663
1664        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1665        assert_eq!(injector.vendor_contexts().len(), 1);
1666
1667        // Reset clears labels and stats but not entity contexts
1668        injector.reset();
1669        assert_eq!(injector.vendor_contexts().len(), 1);
1670    }
1671
1672    #[test]
1673    fn test_set_empty_contexts_clears() {
1674        let config = AnomalyInjectorConfig::default();
1675        let mut injector = AnomalyInjector::new(config);
1676
1677        let mut vendors = HashMap::new();
1678        vendors.insert(
1679            "V001".to_string(),
1680            VendorContext {
1681                vendor_id: "V001".to_string(),
1682                ..Default::default()
1683            },
1684        );
1685
1686        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1687        assert_eq!(injector.vendor_contexts().len(), 1);
1688
1689        // Setting empty maps clears
1690        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1691        assert!(injector.vendor_contexts().is_empty());
1692    }
1693
1694    #[test]
1695    fn test_dormant_vendor_multiplier() {
1696        let config = AnomalyInjectorConfig::default();
1697        let mut injector = AnomalyInjector::new(config);
1698
1699        let mut vendors = HashMap::new();
1700        vendors.insert(
1701            "V_DORMANT".to_string(),
1702            VendorContext {
1703                vendor_id: "V_DORMANT".to_string(),
1704                is_dormant_reactivation: true, // 1.5x
1705                ..Default::default()
1706            },
1707        );
1708
1709        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1710
1711        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1712        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1713        assert!(
1714            (multiplier - 1.5).abs() < f64::EPSILON,
1715            "Expected 1.5x multiplier for dormant vendor, got {}",
1716            multiplier,
1717        );
1718    }
1719}