datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::models::{
23    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
24    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
25    RelationalAnomalyType,
26};
27use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
28
29use super::context::{
30    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
31    EntityAwareInjector, VendorContext,
32};
33use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
34use super::difficulty::DifficultyCalculator;
35use super::near_miss::{NearMissConfig, NearMissGenerator};
36use super::patterns::{
37    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
38    TemporalPattern,
39};
40use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
41use super::schemes::{SchemeAction, SchemeContext};
42use super::strategies::{DuplicationStrategy, StrategyCollection};
43use super::types::AnomalyTypeSelector;
44
45/// Configuration for the anomaly injector.
46#[derive(Debug, Clone)]
47pub struct AnomalyInjectorConfig {
48    /// Rate configuration.
49    pub rates: AnomalyRateConfig,
50    /// Pattern configuration.
51    pub patterns: AnomalyPatternConfig,
52    /// Random seed for reproducibility.
53    pub seed: u64,
54    /// Whether to generate labels.
55    pub generate_labels: bool,
56    /// Whether to allow duplicate injection.
57    pub allow_duplicates: bool,
58    /// Maximum anomalies per document.
59    pub max_anomalies_per_document: usize,
60    /// Company codes to target (empty = all).
61    pub target_companies: Vec<String>,
62    /// Date range for injection.
63    pub date_range: Option<(NaiveDate, NaiveDate)>,
64    /// Enhanced features configuration.
65    pub enhanced: EnhancedInjectionConfig,
66}
67
68/// Enhanced injection configuration for v0.3.0+ features.
69#[derive(Debug, Clone, Default)]
70pub struct EnhancedInjectionConfig {
71    /// Enable multi-stage fraud scheme generation.
72    pub multi_stage_schemes_enabled: bool,
73    /// Probability of starting a new scheme per perpetrator per year.
74    pub scheme_probability: f64,
75    /// Enable correlated anomaly injection.
76    pub correlated_injection_enabled: bool,
77    /// Enable temporal clustering (period-end spikes).
78    pub temporal_clustering_enabled: bool,
79    /// Period-end anomaly rate multiplier.
80    pub period_end_multiplier: f64,
81    /// Enable near-miss generation.
82    pub near_miss_enabled: bool,
83    /// Proportion of anomalies that are near-misses.
84    pub near_miss_proportion: f64,
85    /// Approval thresholds for threshold-proximity near-misses.
86    pub approval_thresholds: Vec<Decimal>,
87    /// Enable detection difficulty classification.
88    pub difficulty_classification_enabled: bool,
89    /// Enable context-aware injection.
90    pub context_aware_enabled: bool,
91    /// Behavioral baseline configuration.
92    pub behavioral_baseline_config: BehavioralBaselineConfig,
93    /// Behavioral bias applied to fraud entries so canonical forensic
94    /// signals (weekend posting, round-dollar amounts, off-hours posting,
95    /// post-close adjustments) show measurable lift on fraud vs legitimate
96    /// populations. Defaults enable all four biases.
97    pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
98}
99
100/// Behavioral bias applied to fraud entries to surface canonical forensic
101/// signals. Without these biases, a fraud detector trained on DataSynth output
102/// sees ~0 feature importance for weekend/round/off-hours/post-close flags
103/// because fraud entries inherit the normal temporal and amount distributions.
104#[derive(Debug, Clone)]
105pub struct FraudBehavioralBiasConfig {
106    /// Master switch — when false, no behavioral bias is applied.
107    pub enabled: bool,
108    /// Probability that a fraud entry's posting date is shifted to a weekend.
109    /// Normal data has ~10 % weekend activity; 0.30 yields ~3× lift on fraud.
110    pub weekend_bias: f64,
111    /// Probability that a fraud entry's amount is rounded to a "suspicious"
112    /// round-dollar value ($1 K, $5 K, $10 K, $25 K, $50 K, $100 K).
113    pub round_dollar_bias: f64,
114    /// Probability that a fraud entry's `created_at` is shifted to off-hours
115    /// (22:00–05:59 UTC). Baseline after-hours probability is ~5 %.
116    pub off_hours_bias: f64,
117    /// Probability that a fraud entry is marked `is_post_close = true`.
118    pub post_close_bias: f64,
119}
120
121impl Default for FraudBehavioralBiasConfig {
122    fn default() -> Self {
123        Self {
124            enabled: true,
125            weekend_bias: 0.30,
126            round_dollar_bias: 0.40,
127            off_hours_bias: 0.35,
128            post_close_bias: 0.25,
129        }
130    }
131}
132
133impl Default for AnomalyInjectorConfig {
134    fn default() -> Self {
135        Self {
136            rates: AnomalyRateConfig::default(),
137            patterns: AnomalyPatternConfig::default(),
138            seed: 42,
139            generate_labels: true,
140            allow_duplicates: true,
141            max_anomalies_per_document: 2,
142            target_companies: Vec::new(),
143            date_range: None,
144            enhanced: EnhancedInjectionConfig::default(),
145        }
146    }
147}
148
149/// Result of an injection batch.
150#[derive(Debug, Clone)]
151pub struct InjectionBatchResult {
152    /// Number of entries processed.
153    pub entries_processed: usize,
154    /// Number of anomalies injected.
155    pub anomalies_injected: usize,
156    /// Number of duplicates created.
157    pub duplicates_created: usize,
158    /// Labels generated.
159    pub labels: Vec<LabeledAnomaly>,
160    /// Summary of anomalies.
161    pub summary: AnomalySummary,
162    /// Entries that were modified (document numbers).
163    pub modified_documents: Vec<String>,
164    /// Near-miss labels (suspicious but legitimate transactions).
165    pub near_miss_labels: Vec<NearMissLabel>,
166    /// Multi-stage scheme actions generated.
167    pub scheme_actions: Vec<SchemeAction>,
168    /// Difficulty distribution summary.
169    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
170}
171
172/// Main anomaly injection engine.
173pub struct AnomalyInjector {
174    config: AnomalyInjectorConfig,
175    rng: ChaCha8Rng,
176    uuid_factory: DeterministicUuidFactory,
177    type_selector: AnomalyTypeSelector,
178    strategies: StrategyCollection,
179    cluster_manager: ClusterManager,
180    /// Selects target entities for anomaly injection (RepeatOffender, etc.).
181    entity_targeting: EntityTargetingManager,
182    /// Tracking which documents already have anomalies.
183    document_anomaly_counts: HashMap<String, usize>,
184    /// All generated labels.
185    labels: Vec<LabeledAnomaly>,
186    /// Statistics.
187    stats: InjectorStats,
188    // Enhanced components (v0.3.0+)
189    /// Multi-stage fraud scheme advancer.
190    scheme_advancer: Option<SchemeAdvancer>,
191    /// Near-miss generator.
192    near_miss_generator: Option<NearMissGenerator>,
193    /// Near-miss labels generated.
194    near_miss_labels: Vec<NearMissLabel>,
195    /// Drives correlated anomaly pairs (e.g., FictitiousVendor + InvoiceManipulation).
196    co_occurrence_handler: Option<AnomalyCoOccurrence>,
197    /// Queued correlated anomalies waiting to be injected.
198    queued_co_occurrences: Vec<QueuedAnomaly>,
199    /// Groups anomalies into temporal bursts during period-end windows.
200    temporal_cluster_generator: Option<TemporalClusterGenerator>,
201    /// Difficulty calculator.
202    difficulty_calculator: Option<DifficultyCalculator>,
203    /// Entity-aware injector.
204    entity_aware_injector: Option<EntityAwareInjector>,
205    /// Behavioral baseline tracker.
206    behavioral_baseline: Option<BehavioralBaseline>,
207    /// Scheme actions generated.
208    scheme_actions: Vec<SchemeAction>,
209    /// Difficulty distribution.
210    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
211    // Entity context lookup maps for risk-adjusted injection rates
212    /// Vendor contexts keyed by vendor ID.
213    vendor_contexts: HashMap<String, VendorContext>,
214    /// Employee contexts keyed by employee ID.
215    employee_contexts: HashMap<String, EmployeeContext>,
216    /// Account contexts keyed by account code.
217    account_contexts: HashMap<String, AccountContext>,
218}
219
220/// Injection statistics tracking.
221#[derive(Debug, Clone, Default)]
222pub struct InjectorStats {
223    /// Total number of entries processed.
224    pub total_processed: usize,
225    /// Total number of anomalies injected.
226    pub total_injected: usize,
227    /// Anomalies injected by category (e.g., "Fraud", "Error").
228    pub by_category: HashMap<String, usize>,
229    /// Anomalies injected by specific type name.
230    pub by_type: HashMap<String, usize>,
231    /// Anomalies injected by company code.
232    pub by_company: HashMap<String, usize>,
233    /// Entries skipped due to rate check.
234    pub skipped_rate: usize,
235    /// Entries skipped due to date range filter.
236    pub skipped_date: usize,
237    /// Entries skipped due to company filter.
238    pub skipped_company: usize,
239    /// Entries skipped due to max-anomalies-per-document limit.
240    pub skipped_max_per_doc: usize,
241    /// Fraud entries that received weekend-posting bias.
242    pub fraud_weekend_bias_applied: usize,
243    /// Fraud entries that received round-dollar amount bias.
244    pub fraud_round_dollar_bias_applied: usize,
245    /// Fraud entries that received off-hours created_at bias.
246    pub fraud_off_hours_bias_applied: usize,
247    /// Fraud entries that received post-close marking bias.
248    pub fraud_post_close_bias_applied: usize,
249}
250
251/// A correlated anomaly queued for future injection.
252struct QueuedAnomaly {
253    /// Anomaly type to inject.
254    anomaly_type: AnomalyType,
255    /// Target entity (if same_entity was specified in the co-occurrence pattern).
256    target_entity: Option<String>,
257    /// Earliest date this can be injected.
258    earliest_date: NaiveDate,
259    /// Description from the co-occurrence pattern.
260    description: String,
261}
262
263impl AnomalyInjector {
264    /// Creates a new anomaly injector.
265    pub fn new(config: AnomalyInjectorConfig) -> Self {
266        let mut rng = seeded_rng(config.seed, 0);
267        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
268        let entity_targeting =
269            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
270
271        // Initialize enhanced components based on configuration
272        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
273            let scheme_config = SchemeAdvancerConfig {
274                embezzlement_probability: config.enhanced.scheme_probability,
275                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
276                kickback_probability: config.enhanced.scheme_probability * 0.5,
277                seed: rng.random(),
278                ..Default::default()
279            };
280            Some(SchemeAdvancer::new(scheme_config))
281        } else {
282            None
283        };
284
285        let near_miss_generator = if config.enhanced.near_miss_enabled {
286            let near_miss_config = NearMissConfig {
287                proportion: config.enhanced.near_miss_proportion,
288                seed: rng.random(),
289                ..Default::default()
290            };
291            Some(NearMissGenerator::new(near_miss_config))
292        } else {
293            None
294        };
295
296        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
297            Some(AnomalyCoOccurrence::new())
298        } else {
299            None
300        };
301
302        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
303            Some(TemporalClusterGenerator::new())
304        } else {
305            None
306        };
307
308        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
309            Some(DifficultyCalculator::new())
310        } else {
311            None
312        };
313
314        let entity_aware_injector = if config.enhanced.context_aware_enabled {
315            Some(EntityAwareInjector::default())
316        } else {
317            None
318        };
319
320        let behavioral_baseline = if config.enhanced.context_aware_enabled
321            && config.enhanced.behavioral_baseline_config.enabled
322        {
323            Some(BehavioralBaseline::new(
324                config.enhanced.behavioral_baseline_config.clone(),
325            ))
326        } else {
327            None
328        };
329
330        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
331
332        Self {
333            config,
334            rng,
335            uuid_factory,
336            type_selector: AnomalyTypeSelector::new(),
337            strategies: StrategyCollection::default(),
338            cluster_manager,
339            entity_targeting,
340            document_anomaly_counts: HashMap::new(),
341            labels: Vec::new(),
342            stats: InjectorStats::default(),
343            scheme_advancer,
344            near_miss_generator,
345            near_miss_labels: Vec::new(),
346            co_occurrence_handler,
347            queued_co_occurrences: Vec::new(),
348            temporal_cluster_generator,
349            difficulty_calculator,
350            entity_aware_injector,
351            behavioral_baseline,
352            scheme_actions: Vec::new(),
353            difficulty_distribution: HashMap::new(),
354            vendor_contexts: HashMap::new(),
355            employee_contexts: HashMap::new(),
356            account_contexts: HashMap::new(),
357        }
358    }
359
360    /// Processes a batch of journal entries, potentially injecting anomalies.
361    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
362        debug!(
363            entry_count = entries.len(),
364            total_rate = self.config.rates.total_rate,
365            seed = self.config.seed,
366            "Injecting anomalies into journal entries"
367        );
368
369        let mut modified_documents = Vec::new();
370        let mut duplicates = Vec::new();
371
372        for entry in entries.iter_mut() {
373            self.stats.total_processed += 1;
374
375            // Update behavioral baseline if enabled
376            if let Some(ref mut baseline) = self.behavioral_baseline {
377                use super::context::Observation;
378                // Record the observation for baseline building
379                let entity_id = entry.header.created_by.clone();
380                let observation =
381                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
382                baseline.record_observation(&entity_id, observation);
383            }
384
385            // Check if we should process this entry
386            if !self.should_process(entry) {
387                continue;
388            }
389
390            // --- Check queued co-occurrences first ---
391            let entry_date = entry.posting_date();
392            let ready_indices: Vec<usize> = self
393                .queued_co_occurrences
394                .iter()
395                .enumerate()
396                .filter(|(_, q)| entry_date >= q.earliest_date)
397                .map(|(i, _)| i)
398                .collect();
399
400            if let Some(&idx) = ready_indices.first() {
401                let queued = self.queued_co_occurrences.remove(idx);
402                if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
403                    label = label.with_metadata("co_occurrence", "true");
404                    label = label.with_metadata("co_occurrence_description", &queued.description);
405                    if let Some(ref target) = queued.target_entity {
406                        label = label.with_related_entity(target);
407                        label = label.with_metadata("co_occurrence_target", target);
408                    }
409                    modified_documents.push(entry.document_number().clone());
410                    self.labels.push(label);
411                    self.stats.total_injected += 1;
412                }
413                continue; // This entry was used for a queued co-occurrence
414            }
415
416            // Calculate effective rate
417            let base_rate = self.config.rates.total_rate;
418
419            // Calculate entity-aware rate adjustment using context lookup maps
420            let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
421                let employee_id = &entry.header.created_by;
422                let first_account = entry
423                    .lines
424                    .first()
425                    .map(|l| l.gl_account.as_str())
426                    .unwrap_or("");
427                // Look up vendor from the entry's reference field (vendor ID convention)
428                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
429
430                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
431                let employee_ctx = self.employee_contexts.get(employee_id);
432                let account_ctx = self.account_contexts.get(first_account);
433
434                let multiplier =
435                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
436                (base_rate * multiplier).min(1.0)
437            } else {
438                // No entity-aware injector: fall back to context maps alone
439                self.calculate_context_rate_multiplier(entry) * base_rate
440            };
441
442            // --- Temporal clustering: boost rate during period-end windows ---
443            if let Some(ref tcg) = self.temporal_cluster_generator {
444                let temporal_multiplier = tcg
445                    .get_active_clusters(entry_date)
446                    .iter()
447                    .map(|c| c.rate_multiplier)
448                    .fold(1.0_f64, f64::max);
449                effective_rate = (effective_rate * temporal_multiplier).min(1.0);
450            }
451
452            // Determine if we inject an anomaly
453            if should_inject_anomaly(
454                effective_rate,
455                entry_date,
456                &self.config.patterns.temporal_pattern,
457                &mut self.rng,
458            ) {
459                // Check if this should be a near-miss instead
460                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
461                    // Record the transaction for near-duplicate detection
462                    let account = entry
463                        .lines
464                        .first()
465                        .map(|l| l.gl_account.clone())
466                        .unwrap_or_default();
467                    near_miss_gen.record_transaction(
468                        entry.document_number().clone(),
469                        entry_date,
470                        entry.total_debit(),
471                        &account,
472                        None,
473                    );
474
475                    // Check if this could be a near-miss
476                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
477                        entry.document_number().clone(),
478                        entry_date,
479                        entry.total_debit(),
480                        &account,
481                        None,
482                        &self.config.enhanced.approval_thresholds,
483                    ) {
484                        self.near_miss_labels.push(near_miss_label);
485                        continue; // Skip actual anomaly injection
486                    }
487                }
488
489                // Select anomaly category based on rates
490                let anomaly_type = self.select_anomaly_category();
491
492                // --- Entity targeting: select and track target entity ---
493                let target_entity = {
494                    let mut candidates: Vec<String> =
495                        self.vendor_contexts.keys().cloned().collect();
496                    candidates.extend(self.employee_contexts.keys().cloned());
497                    if candidates.is_empty() {
498                        // Fall back to entry's reference field as a candidate
499                        if let Some(ref r) = entry.header.reference {
500                            candidates.push(r.clone());
501                        }
502                    }
503                    self.entity_targeting
504                        .select_entity(&candidates, &mut self.rng)
505                };
506
507                // Apply the anomaly
508                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
509                    // Add entity targeting metadata
510                    if let Some(ref entity_id) = target_entity {
511                        label = label.with_metadata("entity_target", entity_id);
512                        label = label.with_related_entity(entity_id);
513                        label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
514                            target_type: "Entity".to_string(),
515                            target_id: entity_id.clone(),
516                        });
517                    }
518
519                    // Calculate detection difficulty if enabled
520                    if let Some(ref calculator) = self.difficulty_calculator {
521                        let difficulty = calculator.calculate(&label);
522
523                        // Store difficulty in metadata
524                        label =
525                            label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
526                        label = label.with_metadata(
527                            "difficulty_score",
528                            &difficulty.difficulty_score().to_string(),
529                        );
530
531                        // Update difficulty distribution
532                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
533                    }
534
535                    modified_documents.push(entry.document_number().clone());
536                    self.labels.push(label);
537                    self.stats.total_injected += 1;
538
539                    // --- Co-occurrence: queue correlated anomalies ---
540                    if let Some(ref co_occ) = self.co_occurrence_handler {
541                        let correlated =
542                            co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
543                        for result in correlated {
544                            self.queued_co_occurrences.push(QueuedAnomaly {
545                                anomaly_type: result.anomaly_type,
546                                target_entity: if result.same_entity {
547                                    target_entity.clone()
548                                } else {
549                                    None
550                                },
551                                earliest_date: entry_date
552                                    + chrono::Duration::days(i64::from(result.lag_days)),
553                                description: result.description,
554                            });
555                        }
556                    }
557                }
558
559                // Check for duplicate injection
560                if self.config.allow_duplicates
561                    && matches!(
562                        self.labels.last().map(|l| &l.anomaly_type),
563                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
564                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
565                    )
566                {
567                    let dup_strategy = DuplicationStrategy::default();
568                    let duplicate =
569                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
570                    duplicates.push(duplicate);
571                }
572            }
573        }
574
575        // Count duplicates
576        let duplicates_created = duplicates.len();
577
578        // Build summary
579        let summary = AnomalySummary::from_anomalies(&self.labels);
580
581        InjectionBatchResult {
582            entries_processed: self.stats.total_processed,
583            anomalies_injected: self.stats.total_injected,
584            duplicates_created,
585            labels: self.labels.clone(),
586            summary,
587            modified_documents,
588            near_miss_labels: self.near_miss_labels.clone(),
589            scheme_actions: self.scheme_actions.clone(),
590            difficulty_distribution: self.difficulty_distribution.clone(),
591        }
592    }
593
594    /// Checks if an entry should be processed.
595    fn should_process(&mut self, entry: &JournalEntry) -> bool {
596        // Check company filter
597        if !self.config.target_companies.is_empty()
598            && !self
599                .config
600                .target_companies
601                .iter()
602                .any(|c| c == entry.company_code())
603        {
604            self.stats.skipped_company += 1;
605            return false;
606        }
607
608        // Check date range
609        if let Some((start, end)) = self.config.date_range {
610            if entry.posting_date() < start || entry.posting_date() > end {
611                self.stats.skipped_date += 1;
612                return false;
613            }
614        }
615
616        // Check max anomalies per document
617        let current_count = self
618            .document_anomaly_counts
619            .get(&entry.document_number())
620            .copied()
621            .unwrap_or(0);
622        if current_count >= self.config.max_anomalies_per_document {
623            self.stats.skipped_max_per_doc += 1;
624            return false;
625        }
626
627        true
628    }
629
630    /// Selects an anomaly category based on configured rates.
631    fn select_anomaly_category(&mut self) -> AnomalyType {
632        let r = self.rng.random::<f64>();
633        let rates = &self.config.rates;
634
635        let mut cumulative = 0.0;
636
637        cumulative += rates.fraud_rate;
638        if r < cumulative {
639            return self.type_selector.select_fraud(&mut self.rng);
640        }
641
642        cumulative += rates.error_rate;
643        if r < cumulative {
644            return self.type_selector.select_error(&mut self.rng);
645        }
646
647        cumulative += rates.process_issue_rate;
648        if r < cumulative {
649            return self.type_selector.select_process_issue(&mut self.rng);
650        }
651
652        cumulative += rates.statistical_rate;
653        if r < cumulative {
654            return self.type_selector.select_statistical(&mut self.rng);
655        }
656
657        self.type_selector.select_relational(&mut self.rng)
658    }
659
660    /// Injects an anomaly into an entry.
661    fn inject_anomaly(
662        &mut self,
663        entry: &mut JournalEntry,
664        anomaly_type: AnomalyType,
665    ) -> Option<LabeledAnomaly> {
666        // Check if strategy can be applied
667        if !self.strategies.can_apply(entry, &anomaly_type) {
668            return None;
669        }
670
671        // Apply the strategy
672        let result = self
673            .strategies
674            .apply_strategy(entry, &anomaly_type, &mut self.rng);
675
676        if !result.success {
677            return None;
678        }
679
680        // Update document anomaly count
681        *self
682            .document_anomaly_counts
683            .entry(entry.document_number().clone())
684            .or_insert(0) += 1;
685
686        // Update statistics
687        let category = anomaly_type.category().to_string();
688        let type_name = anomaly_type.type_name();
689
690        *self.stats.by_category.entry(category).or_insert(0) += 1;
691        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
692        *self
693            .stats
694            .by_company
695            .entry(entry.company_code().to_string())
696            .or_insert(0) += 1;
697
698        // Generate label
699        if self.config.generate_labels {
700            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
701
702            // Update entry header with anomaly tracking fields
703            entry.header.is_anomaly = true;
704            entry.header.anomaly_id = Some(anomaly_id.clone());
705            entry.header.anomaly_type = Some(type_name.clone());
706
707            // Also set fraud flag if this is a fraud anomaly
708            let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
709                Vec::new();
710            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
711                entry.header.is_fraud = true;
712                if let AnomalyType::Fraud(ref ft) = anomaly_type {
713                    entry.header.fraud_type = Some(*ft);
714                }
715                // Apply behavioral bias so forensic signals (weekend posting,
716                // round dollars, off-hours, post-close adjustments) are
717                // learnable from fraud-labeled data. The returned list of
718                // biases that fired is used below to emit secondary
719                // ProcessIssue labels.
720                secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
721            }
722
723            let mut label = LabeledAnomaly::new(
724                anomaly_id,
725                anomaly_type.clone(),
726                entry.document_number().clone(),
727                "JE".to_string(),
728                entry.company_code().to_string(),
729                entry.posting_date(),
730            )
731            .with_description(&result.description)
732            .with_injection_strategy(&type_name);
733
734            // Add causal reason with injection context (provenance tracking)
735            let causal_reason = AnomalyCausalReason::RandomRate {
736                base_rate: self.config.rates.total_rate,
737            };
738            label = label.with_causal_reason(causal_reason);
739
740            // Add entity context metadata if contexts are populated
741            let context_multiplier = self.calculate_context_rate_multiplier(entry);
742            if (context_multiplier - 1.0).abs() > f64::EPSILON {
743                label = label.with_metadata(
744                    "entity_context_multiplier",
745                    &format!("{context_multiplier:.3}"),
746                );
747                label = label.with_metadata(
748                    "effective_rate",
749                    &format!(
750                        "{:.6}",
751                        (self.config.rates.total_rate * context_multiplier).min(1.0)
752                    ),
753                );
754            }
755
756            // Add monetary impact
757            if let Some(impact) = result.monetary_impact {
758                label = label.with_monetary_impact(impact);
759            }
760
761            // Add related entities
762            for entity in &result.related_entities {
763                label = label.with_related_entity(entity);
764            }
765
766            // Add metadata
767            for (key, value) in &result.metadata {
768                label = label.with_metadata(key, value);
769            }
770
771            // Assign cluster and update causal reason if in cluster
772            if let Some(cluster_id) =
773                self.cluster_manager
774                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
775            {
776                label = label.with_cluster(&cluster_id);
777                // Update causal reason to reflect cluster membership
778                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
779                    cluster_id: cluster_id.clone(),
780                });
781            }
782
783            // Secondary ProcessIssue labels for each behavioural bias that
784            // fired — lets auditors query the labels stream for specific
785            // forensic patterns (WeekendPosting / AfterHoursPosting /
786            // PostClosePosting) rather than reconstructing them from header
787            // flags. `stats.total_injected` counts injection acts (primary
788            // labels); `labels.len()` may exceed it due to these children.
789            for issue_type in &secondary_process_issues {
790                let child_id = format!("ANO{:08}", self.labels.len() + 1);
791                let child = LabeledAnomaly::new(
792                    child_id,
793                    AnomalyType::ProcessIssue(*issue_type),
794                    entry.document_number().clone(),
795                    "JE".to_string(),
796                    entry.company_code().to_string(),
797                    entry.posting_date(),
798                )
799                .with_description("Forensic pattern from fraud behavioral bias")
800                .with_injection_strategy("behavioral_bias")
801                .with_parent_anomaly(&label.anomaly_id);
802                self.labels.push(child);
803            }
804
805            return Some(label);
806        }
807
808        None
809    }
810
811    /// Injects a specific anomaly type into an entry.
812    pub fn inject_specific(
813        &mut self,
814        entry: &mut JournalEntry,
815        anomaly_type: AnomalyType,
816    ) -> Option<LabeledAnomaly> {
817        self.inject_anomaly(entry, anomaly_type)
818    }
819
820    /// Creates a self-approval anomaly.
821    pub fn create_self_approval(
822        &mut self,
823        entry: &mut JournalEntry,
824        user_id: &str,
825    ) -> Option<LabeledAnomaly> {
826        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
827
828        let label = LabeledAnomaly::new(
829            format!("ANO{:08}", self.labels.len() + 1),
830            anomaly_type,
831            entry.document_number().clone(),
832            "JE".to_string(),
833            entry.company_code().to_string(),
834            entry.posting_date(),
835        )
836        .with_description(&format!("User {user_id} approved their own transaction"))
837        .with_related_entity(user_id)
838        .with_injection_strategy("ManualSelfApproval")
839        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
840            target_type: "User".to_string(),
841            target_id: user_id.to_string(),
842        });
843
844        // Set entry header anomaly tracking fields
845        entry.header.is_anomaly = true;
846        entry.header.is_fraud = true;
847        entry.header.anomaly_id = Some(label.anomaly_id.clone());
848        entry.header.anomaly_type = Some("SelfApproval".to_string());
849        entry.header.fraud_type = Some(FraudType::SelfApproval);
850
851        // Set approver = requester
852        entry.header.created_by = user_id.to_string();
853
854        self.labels.push(label.clone());
855        Some(label)
856    }
857
858    /// Creates a segregation of duties violation.
859    pub fn create_sod_violation(
860        &mut self,
861        entry: &mut JournalEntry,
862        user_id: &str,
863        conflicting_duties: (&str, &str),
864    ) -> Option<LabeledAnomaly> {
865        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
866
867        let label = LabeledAnomaly::new(
868            format!("ANO{:08}", self.labels.len() + 1),
869            anomaly_type,
870            entry.document_number().clone(),
871            "JE".to_string(),
872            entry.company_code().to_string(),
873            entry.posting_date(),
874        )
875        .with_description(&format!(
876            "User {} performed conflicting duties: {} and {}",
877            user_id, conflicting_duties.0, conflicting_duties.1
878        ))
879        .with_related_entity(user_id)
880        .with_metadata("duty1", conflicting_duties.0)
881        .with_metadata("duty2", conflicting_duties.1)
882        .with_injection_strategy("ManualSoDViolation")
883        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
884            target_type: "User".to_string(),
885            target_id: user_id.to_string(),
886        });
887
888        // Set entry header anomaly tracking fields
889        entry.header.is_anomaly = true;
890        entry.header.is_fraud = true;
891        entry.header.anomaly_id = Some(label.anomaly_id.clone());
892        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
893        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
894
895        self.labels.push(label.clone());
896        Some(label)
897    }
898
899    /// Creates an intercompany mismatch anomaly.
900    pub fn create_ic_mismatch(
901        &mut self,
902        entry: &mut JournalEntry,
903        matching_company: &str,
904        expected_amount: Decimal,
905        actual_amount: Decimal,
906    ) -> Option<LabeledAnomaly> {
907        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
908
909        let label = LabeledAnomaly::new(
910            format!("ANO{:08}", self.labels.len() + 1),
911            anomaly_type,
912            entry.document_number().clone(),
913            "JE".to_string(),
914            entry.company_code().to_string(),
915            entry.posting_date(),
916        )
917        .with_description(&format!(
918            "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
919        ))
920        .with_related_entity(matching_company)
921        .with_monetary_impact(actual_amount - expected_amount)
922        .with_metadata("expected_amount", &expected_amount.to_string())
923        .with_metadata("actual_amount", &actual_amount.to_string())
924        .with_injection_strategy("ManualICMismatch")
925        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
926            target_type: "Intercompany".to_string(),
927            target_id: matching_company.to_string(),
928        });
929
930        // Set entry header anomaly tracking fields
931        entry.header.is_anomaly = true;
932        entry.header.anomaly_id = Some(label.anomaly_id.clone());
933        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
934
935        self.labels.push(label.clone());
936        Some(label)
937    }
938
939    /// Returns all generated labels.
940    pub fn get_labels(&self) -> &[LabeledAnomaly] {
941        &self.labels
942    }
943
944    /// Returns the anomaly summary.
945    pub fn get_summary(&self) -> AnomalySummary {
946        AnomalySummary::from_anomalies(&self.labels)
947    }
948
949    /// Returns injection statistics.
950    pub fn get_stats(&self) -> &InjectorStats {
951        &self.stats
952    }
953
954    /// Clears all labels and resets statistics.
955    pub fn reset(&mut self) {
956        self.labels.clear();
957        self.document_anomaly_counts.clear();
958        self.stats = InjectorStats::default();
959        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
960
961        // Reset enhanced components
962        self.near_miss_labels.clear();
963        self.scheme_actions.clear();
964        self.difficulty_distribution.clear();
965
966        if let Some(ref mut baseline) = self.behavioral_baseline {
967            *baseline =
968                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
969        }
970    }
971
972    /// Returns the number of clusters created.
973    pub fn cluster_count(&self) -> usize {
974        self.cluster_manager.cluster_count()
975    }
976
977    // =========================================================================
978    // Entity Context API
979    // =========================================================================
980
981    /// Sets entity contexts for risk-adjusted anomaly injection.
982    ///
983    /// When entity contexts are provided, the injector adjusts anomaly injection
984    /// rates based on entity risk factors. Entries involving high-risk vendors,
985    /// new employees, or sensitive accounts will have higher effective injection
986    /// rates.
987    ///
988    /// Pass empty HashMaps to clear previously set contexts.
989    pub fn set_entity_contexts(
990        &mut self,
991        vendors: HashMap<String, VendorContext>,
992        employees: HashMap<String, EmployeeContext>,
993        accounts: HashMap<String, AccountContext>,
994    ) {
995        self.vendor_contexts = vendors;
996        self.employee_contexts = employees;
997        self.account_contexts = accounts;
998    }
999
1000    /// Returns a reference to the vendor context map.
1001    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
1002        &self.vendor_contexts
1003    }
1004
1005    /// Returns a reference to the employee context map.
1006    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
1007        &self.employee_contexts
1008    }
1009
1010    /// Returns a reference to the account context map.
1011    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
1012        &self.account_contexts
1013    }
1014
1015    /// Calculates a rate multiplier from the entity context maps alone (no
1016    /// `EntityAwareInjector` needed). This provides a lightweight fallback
1017    /// when context-aware injection is not fully enabled but context maps
1018    /// have been populated.
1019    ///
1020    /// The multiplier is the product of individual entity risk factors found
1021    /// in the context maps for the given journal entry. If no contexts match,
1022    /// returns 1.0 (no adjustment).
1023    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1024        if self.vendor_contexts.is_empty()
1025            && self.employee_contexts.is_empty()
1026            && self.account_contexts.is_empty()
1027        {
1028            return 1.0;
1029        }
1030
1031        let mut multiplier = 1.0;
1032
1033        // Vendor lookup via reference field
1034        if let Some(ref vendor_ref) = entry.header.reference {
1035            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1036                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
1037                if ctx.is_new {
1038                    multiplier *= 2.0;
1039                }
1040                if ctx.is_dormant_reactivation {
1041                    multiplier *= 1.5;
1042                }
1043            }
1044        }
1045
1046        // Employee lookup via created_by
1047        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1048            if ctx.is_new {
1049                multiplier *= 1.5;
1050            }
1051            if ctx.is_volume_fatigued {
1052                multiplier *= 1.3;
1053            }
1054            if ctx.is_overtime {
1055                multiplier *= 1.2;
1056            }
1057        }
1058
1059        // Account lookup via first line's GL account
1060        if let Some(first_line) = entry.lines.first() {
1061            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1062                if ctx.is_high_risk {
1063                    multiplier *= 2.0;
1064                }
1065            }
1066        }
1067
1068        multiplier
1069    }
1070
1071    /// Apply behavioral bias to a fraud-labeled entry so canonical forensic
1072    /// signals (weekend posting, round dollars, off-hours timestamps,
1073    /// post-close adjustments) have measurable lift over legitimate data.
1074    ///
1075    /// Each bias is applied independently per the configured probabilities.
1076    /// Amount rounding is only applied to two-line entries where the balance
1077    /// can be preserved by rounding both the debit and credit to the same
1078    /// value.
1079    ///
1080    /// Returns the [`ProcessIssueType`] variants corresponding to each bias
1081    /// that fired. Callers emit these as secondary ProcessIssue labels so
1082    /// auditors can filter for specific forensic patterns.
1083    fn apply_fraud_behavioral_bias(
1084        &mut self,
1085        entry: &mut JournalEntry,
1086    ) -> Vec<datasynth_core::models::ProcessIssueType> {
1087        use chrono::{Datelike, Duration, TimeZone, Utc, Weekday};
1088        use datasynth_core::models::ProcessIssueType;
1089
1090        let mut fired: Vec<ProcessIssueType> = Vec::new();
1091
1092        let cfg = &self.config.enhanced.fraud_behavioral_bias;
1093        if !cfg.enabled {
1094            return fired;
1095        }
1096
1097        // --- Weekend bias ---
1098        if cfg.weekend_bias > 0.0 && self.rng.random::<f64>() < cfg.weekend_bias {
1099            let original = entry.header.posting_date;
1100            let days_to_weekend = match original.weekday() {
1101                Weekday::Mon => 5,
1102                Weekday::Tue => 4,
1103                Weekday::Wed => 3,
1104                Weekday::Thu => 2,
1105                Weekday::Fri => 1,
1106                Weekday::Sat | Weekday::Sun => 0,
1107            };
1108            let extra = if self.rng.random_bool(0.5) { 0 } else { 1 };
1109            entry.header.posting_date = original + Duration::days(days_to_weekend + extra);
1110            self.stats.fraud_weekend_bias_applied += 1;
1111            fired.push(ProcessIssueType::WeekendPosting);
1112        }
1113
1114        // --- Round-dollar bias (safe: only 2-line entries with matched sides).
1115        if cfg.round_dollar_bias > 0.0 && self.rng.random::<f64>() < cfg.round_dollar_bias {
1116            const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1117            if entry.lines.len() == 2 {
1118                let (debit_idx, credit_idx) = if entry.lines[0].is_debit() {
1119                    (0, 1)
1120                } else {
1121                    (1, 0)
1122                };
1123                let current = entry.lines[debit_idx]
1124                    .debit_amount
1125                    .max(entry.lines[credit_idx].credit_amount);
1126                if current > Decimal::ZERO {
1127                    // Pick a round target close to current magnitude.
1128                    let current_f64: f64 = current.try_into().unwrap_or(0.0);
1129                    let target = ROUND_TARGETS
1130                        .iter()
1131                        .min_by(|a, b| {
1132                            let da = (**a as f64 - current_f64).abs();
1133                            let db = (**b as f64 - current_f64).abs();
1134                            da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
1135                        })
1136                        .copied()
1137                        .unwrap_or(1_000);
1138                    let rounded = Decimal::from(target);
1139                    entry.lines[debit_idx].debit_amount = rounded;
1140                    entry.lines[debit_idx].credit_amount = Decimal::ZERO;
1141                    entry.lines[credit_idx].debit_amount = Decimal::ZERO;
1142                    entry.lines[credit_idx].credit_amount = rounded;
1143                    self.stats.fraud_round_dollar_bias_applied += 1;
1144                }
1145            }
1146        }
1147
1148        // --- Off-hours bias (22:00–05:59 UTC) ---
1149        if cfg.off_hours_bias > 0.0 && self.rng.random::<f64>() < cfg.off_hours_bias {
1150            // Pick an hour in [22, 23] ∪ [0, 5]
1151            let hour: u32 = if self.rng.random_bool(0.5) {
1152                self.rng.random_range(22..24)
1153            } else {
1154                self.rng.random_range(0..6)
1155            };
1156            let minute: u32 = self.rng.random_range(0..60);
1157            let second: u32 = self.rng.random_range(0..60);
1158            if let chrono::LocalResult::Single(new_ts) = Utc.with_ymd_and_hms(
1159                entry.header.posting_date.year(),
1160                entry.header.posting_date.month(),
1161                entry.header.posting_date.day(),
1162                hour,
1163                minute,
1164                second,
1165            ) {
1166                entry.header.created_at = new_ts;
1167                self.stats.fraud_off_hours_bias_applied += 1;
1168                fired.push(ProcessIssueType::AfterHoursPosting);
1169            }
1170        }
1171
1172        // --- Post-close marking bias ---
1173        if cfg.post_close_bias > 0.0
1174            && self.rng.random::<f64>() < cfg.post_close_bias
1175            && !entry.header.is_post_close
1176        {
1177            entry.header.is_post_close = true;
1178            self.stats.fraud_post_close_bias_applied += 1;
1179            fired.push(ProcessIssueType::PostClosePosting);
1180        }
1181
1182        fired
1183    }
1184
1185    // =========================================================================
1186    // Enhanced Features API (v0.3.0+)
1187    // =========================================================================
1188
1189    /// Advances all active fraud schemes by one time step.
1190    ///
1191    /// Call this method once per simulated day to generate scheme actions.
1192    /// Returns the scheme actions generated for this date.
1193    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1194        if let Some(ref mut advancer) = self.scheme_advancer {
1195            let context = SchemeContext::new(date, company_code);
1196            let actions = advancer.advance_all(&context);
1197            self.scheme_actions.extend(actions.clone());
1198            actions
1199        } else {
1200            Vec::new()
1201        }
1202    }
1203
1204    /// Potentially starts a new fraud scheme based on probabilities.
1205    ///
1206    /// Call this method periodically (e.g., once per period) to allow new
1207    /// schemes to start based on configured probabilities.
1208    /// Returns the scheme ID if a scheme was started.
1209    pub fn maybe_start_scheme(
1210        &mut self,
1211        date: NaiveDate,
1212        company_code: &str,
1213        available_users: Vec<String>,
1214        available_accounts: Vec<String>,
1215        available_counterparties: Vec<String>,
1216    ) -> Option<uuid::Uuid> {
1217        if let Some(ref mut advancer) = self.scheme_advancer {
1218            let mut context = SchemeContext::new(date, company_code);
1219            context.available_users = available_users;
1220            context.available_accounts = available_accounts;
1221            context.available_counterparties = available_counterparties;
1222
1223            advancer.maybe_start_scheme(&context)
1224        } else {
1225            None
1226        }
1227    }
1228
1229    /// Returns all near-miss labels generated.
1230    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1231        &self.near_miss_labels
1232    }
1233
1234    /// Returns all scheme actions generated.
1235    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1236        &self.scheme_actions
1237    }
1238
1239    /// Returns the detection difficulty distribution.
1240    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1241        &self.difficulty_distribution
1242    }
1243
1244    /// Checks for behavioral deviations for an entity with an observation.
1245    pub fn check_behavioral_deviations(
1246        &self,
1247        entity_id: &str,
1248        observation: &super::context::Observation,
1249    ) -> Vec<super::context::BehavioralDeviation> {
1250        if let Some(ref baseline) = self.behavioral_baseline {
1251            baseline.check_deviation(entity_id, observation)
1252        } else {
1253            Vec::new()
1254        }
1255    }
1256
1257    /// Gets the baseline for an entity.
1258    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1259        if let Some(ref baseline) = self.behavioral_baseline {
1260            baseline.get_baseline(entity_id)
1261        } else {
1262            None
1263        }
1264    }
1265
1266    /// Returns the number of active schemes.
1267    pub fn active_scheme_count(&self) -> usize {
1268        if let Some(ref advancer) = self.scheme_advancer {
1269            advancer.active_scheme_count()
1270        } else {
1271            0
1272        }
1273    }
1274
1275    /// Returns whether enhanced features are enabled.
1276    pub fn has_enhanced_features(&self) -> bool {
1277        self.scheme_advancer.is_some()
1278            || self.near_miss_generator.is_some()
1279            || self.difficulty_calculator.is_some()
1280            || self.entity_aware_injector.is_some()
1281    }
1282}
1283
1284/// Builder for AnomalyInjectorConfig.
1285pub struct AnomalyInjectorConfigBuilder {
1286    config: AnomalyInjectorConfig,
1287}
1288
1289impl AnomalyInjectorConfigBuilder {
1290    /// Creates a new builder with default configuration.
1291    pub fn new() -> Self {
1292        Self {
1293            config: AnomalyInjectorConfig::default(),
1294        }
1295    }
1296
1297    /// Sets the total anomaly rate.
1298    pub fn with_total_rate(mut self, rate: f64) -> Self {
1299        self.config.rates.total_rate = rate;
1300        self
1301    }
1302
1303    /// Sets the fraud rate (proportion of anomalies).
1304    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1305        self.config.rates.fraud_rate = rate;
1306        self
1307    }
1308
1309    /// Sets the error rate (proportion of anomalies).
1310    pub fn with_error_rate(mut self, rate: f64) -> Self {
1311        self.config.rates.error_rate = rate;
1312        self
1313    }
1314
1315    /// Sets the random seed.
1316    pub fn with_seed(mut self, seed: u64) -> Self {
1317        self.config.seed = seed;
1318        self
1319    }
1320
1321    /// Sets the temporal pattern.
1322    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1323        self.config.patterns.temporal_pattern = pattern;
1324        self
1325    }
1326
1327    /// Enables or disables label generation.
1328    pub fn with_labels(mut self, generate: bool) -> Self {
1329        self.config.generate_labels = generate;
1330        self
1331    }
1332
1333    /// Sets target companies.
1334    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1335        self.config.target_companies = companies;
1336        self
1337    }
1338
1339    /// Sets the date range.
1340    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1341        self.config.date_range = Some((start, end));
1342        self
1343    }
1344
1345    // =========================================================================
1346    // Enhanced Features Configuration (v0.3.0+)
1347    // =========================================================================
1348
1349    /// Enables multi-stage fraud scheme generation.
1350    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1351        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1352        self.config.enhanced.scheme_probability = probability;
1353        self
1354    }
1355
1356    /// Enables near-miss generation.
1357    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1358        self.config.enhanced.near_miss_enabled = enabled;
1359        self.config.enhanced.near_miss_proportion = proportion;
1360        self
1361    }
1362
1363    /// Sets approval thresholds for threshold-proximity near-misses.
1364    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1365        self.config.enhanced.approval_thresholds = thresholds;
1366        self
1367    }
1368
1369    /// Enables correlated anomaly injection.
1370    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1371        self.config.enhanced.correlated_injection_enabled = enabled;
1372        self
1373    }
1374
1375    /// Enables temporal clustering (period-end spikes).
1376    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1377        self.config.enhanced.temporal_clustering_enabled = enabled;
1378        self.config.enhanced.period_end_multiplier = multiplier;
1379        self
1380    }
1381
1382    /// Enables detection difficulty classification.
1383    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1384        self.config.enhanced.difficulty_classification_enabled = enabled;
1385        self
1386    }
1387
1388    /// Enables context-aware injection.
1389    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1390        self.config.enhanced.context_aware_enabled = enabled;
1391        self
1392    }
1393
1394    /// Sets behavioral baseline configuration.
1395    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1396        self.config.enhanced.behavioral_baseline_config = config;
1397        self
1398    }
1399
1400    /// Enables all enhanced features with default settings.
1401    pub fn with_all_enhanced_features(mut self) -> Self {
1402        self.config.enhanced.multi_stage_schemes_enabled = true;
1403        self.config.enhanced.scheme_probability = 0.02;
1404        self.config.enhanced.correlated_injection_enabled = true;
1405        self.config.enhanced.temporal_clustering_enabled = true;
1406        self.config.enhanced.period_end_multiplier = 2.5;
1407        self.config.enhanced.near_miss_enabled = true;
1408        self.config.enhanced.near_miss_proportion = 0.30;
1409        self.config.enhanced.difficulty_classification_enabled = true;
1410        self.config.enhanced.context_aware_enabled = true;
1411        self.config.enhanced.behavioral_baseline_config.enabled = true;
1412        self
1413    }
1414
1415    /// Builds the configuration.
1416    pub fn build(self) -> AnomalyInjectorConfig {
1417        self.config
1418    }
1419}
1420
1421impl Default for AnomalyInjectorConfigBuilder {
1422    fn default() -> Self {
1423        Self::new()
1424    }
1425}
1426
1427#[cfg(test)]
1428#[allow(clippy::unwrap_used)]
1429mod tests {
1430    use super::*;
1431    use chrono::NaiveDate;
1432    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1433    use rust_decimal_macros::dec;
1434
1435    fn create_test_entry(doc_num: &str) -> JournalEntry {
1436        let mut entry = JournalEntry::new_simple(
1437            doc_num.to_string(),
1438            "1000".to_string(),
1439            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1440            "Test Entry".to_string(),
1441        );
1442
1443        entry.add_line(JournalEntryLine {
1444            line_number: 1,
1445            gl_account: "5000".to_string(),
1446            debit_amount: dec!(1000),
1447            ..Default::default()
1448        });
1449
1450        entry.add_line(JournalEntryLine {
1451            line_number: 2,
1452            gl_account: "1000".to_string(),
1453            credit_amount: dec!(1000),
1454            ..Default::default()
1455        });
1456
1457        entry
1458    }
1459
1460    #[test]
1461    fn test_anomaly_injector_basic() {
1462        let config = AnomalyInjectorConfigBuilder::new()
1463            .with_total_rate(0.5) // High rate for testing
1464            .with_seed(42)
1465            .build();
1466
1467        let mut injector = AnomalyInjector::new(config);
1468
1469        let mut entries: Vec<_> = (0..100)
1470            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1471            .collect();
1472
1473        let result = injector.process_entries(&mut entries);
1474
1475        // With 50% rate, we should have some anomalies
1476        assert!(result.anomalies_injected > 0);
1477        assert!(!result.labels.is_empty());
1478        // `anomalies_injected` counts primary injection acts. `labels` also
1479        // includes secondary `ProcessIssue` labels emitted for each fraud
1480        // behavioural bias that fires, so `labels.len()` is always ≥ the
1481        // primary count.
1482        assert!(result.labels.len() >= result.anomalies_injected);
1483    }
1484
1485    #[test]
1486    fn test_specific_injection() {
1487        let config = AnomalyInjectorConfig::default();
1488        let mut injector = AnomalyInjector::new(config);
1489
1490        let mut entry = create_test_entry("JE001");
1491        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1492
1493        let label = injector.inject_specific(&mut entry, anomaly_type);
1494
1495        assert!(label.is_some());
1496        let label = label.unwrap();
1497        // document_id is the UUID string from the journal entry header
1498        assert!(!label.document_id.is_empty());
1499        assert_eq!(label.document_id, entry.document_number());
1500    }
1501
1502    #[test]
1503    fn test_self_approval_injection() {
1504        let config = AnomalyInjectorConfig::default();
1505        let mut injector = AnomalyInjector::new(config);
1506
1507        let mut entry = create_test_entry("JE001");
1508        let label = injector.create_self_approval(&mut entry, "USER001");
1509
1510        assert!(label.is_some());
1511        let label = label.unwrap();
1512        assert!(matches!(
1513            label.anomaly_type,
1514            AnomalyType::Fraud(FraudType::SelfApproval)
1515        ));
1516        assert!(label.related_entities.contains(&"USER001".to_string()));
1517    }
1518
1519    #[test]
1520    fn test_company_filtering() {
1521        let config = AnomalyInjectorConfigBuilder::new()
1522            .with_total_rate(1.0) // Inject all
1523            .with_target_companies(vec!["2000".to_string()])
1524            .build();
1525
1526        let mut injector = AnomalyInjector::new(config);
1527
1528        let mut entries = vec![
1529            create_test_entry("JE001"), // company 1000
1530            create_test_entry("JE002"), // company 1000
1531        ];
1532
1533        let result = injector.process_entries(&mut entries);
1534
1535        // No anomalies because entries are in company 1000, not 2000
1536        assert_eq!(result.anomalies_injected, 0);
1537    }
1538
1539    // =========================================================================
1540    // Entity Context Tests
1541    // =========================================================================
1542
1543    /// Helper to create a test entry with specific vendor reference and employee.
1544    fn create_test_entry_with_context(
1545        doc_num: &str,
1546        vendor_ref: Option<&str>,
1547        employee_id: &str,
1548        gl_account: &str,
1549    ) -> JournalEntry {
1550        let mut entry = JournalEntry::new_simple(
1551            doc_num.to_string(),
1552            "1000".to_string(),
1553            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1554            "Test Entry".to_string(),
1555        );
1556
1557        entry.header.reference = vendor_ref.map(|v| v.to_string());
1558        entry.header.created_by = employee_id.to_string();
1559
1560        entry.add_line(JournalEntryLine {
1561            line_number: 1,
1562            gl_account: gl_account.to_string(),
1563            debit_amount: dec!(1000),
1564            ..Default::default()
1565        });
1566
1567        entry.add_line(JournalEntryLine {
1568            line_number: 2,
1569            gl_account: "1000".to_string(),
1570            credit_amount: dec!(1000),
1571            ..Default::default()
1572        });
1573
1574        entry
1575    }
1576
1577    #[test]
1578    fn test_set_entity_contexts() {
1579        let config = AnomalyInjectorConfig::default();
1580        let mut injector = AnomalyInjector::new(config);
1581
1582        // Initially empty
1583        assert!(injector.vendor_contexts().is_empty());
1584        assert!(injector.employee_contexts().is_empty());
1585        assert!(injector.account_contexts().is_empty());
1586
1587        // Set contexts
1588        let mut vendors = HashMap::new();
1589        vendors.insert(
1590            "V001".to_string(),
1591            VendorContext {
1592                vendor_id: "V001".to_string(),
1593                is_new: true,
1594                ..Default::default()
1595            },
1596        );
1597
1598        let mut employees = HashMap::new();
1599        employees.insert(
1600            "EMP001".to_string(),
1601            EmployeeContext {
1602                employee_id: "EMP001".to_string(),
1603                is_new: true,
1604                ..Default::default()
1605            },
1606        );
1607
1608        let mut accounts = HashMap::new();
1609        accounts.insert(
1610            "8100".to_string(),
1611            AccountContext {
1612                account_code: "8100".to_string(),
1613                is_high_risk: true,
1614                ..Default::default()
1615            },
1616        );
1617
1618        injector.set_entity_contexts(vendors, employees, accounts);
1619
1620        assert_eq!(injector.vendor_contexts().len(), 1);
1621        assert_eq!(injector.employee_contexts().len(), 1);
1622        assert_eq!(injector.account_contexts().len(), 1);
1623        assert!(injector.vendor_contexts().contains_key("V001"));
1624        assert!(injector.employee_contexts().contains_key("EMP001"));
1625        assert!(injector.account_contexts().contains_key("8100"));
1626    }
1627
1628    #[test]
1629    fn test_default_behavior_no_contexts() {
1630        // Without any entity contexts, the base rate is used unchanged.
1631        let config = AnomalyInjectorConfigBuilder::new()
1632            .with_total_rate(0.5)
1633            .with_seed(42)
1634            .build();
1635
1636        let mut injector = AnomalyInjector::new(config);
1637
1638        let mut entries: Vec<_> = (0..200)
1639            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1640            .collect();
1641
1642        let result = injector.process_entries(&mut entries);
1643
1644        // With 50% base rate and no context, expect roughly 50% injection
1645        // Allow wide margin for randomness
1646        assert!(result.anomalies_injected > 0);
1647        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1648        assert!(
1649            rate > 0.2 && rate < 0.8,
1650            "Expected ~50% rate, got {:.2}%",
1651            rate * 100.0
1652        );
1653    }
1654
1655    #[test]
1656    fn test_entity_context_increases_injection_rate() {
1657        // With high-risk entity contexts, the effective rate should be higher
1658        // than the base rate, leading to more anomalies being injected.
1659        let base_rate = 0.10; // Low base rate
1660
1661        // Run without contexts
1662        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1663            .with_total_rate(base_rate)
1664            .with_seed(123)
1665            .build();
1666
1667        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1668
1669        let mut entries_no_ctx: Vec<_> = (0..500)
1670            .map(|i| {
1671                create_test_entry_with_context(
1672                    &format!("JE{:04}", i),
1673                    Some("V001"),
1674                    "EMP001",
1675                    "8100",
1676                )
1677            })
1678            .collect();
1679
1680        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1681
1682        // Run with high-risk contexts (same seed for comparable randomness)
1683        let config_ctx = AnomalyInjectorConfigBuilder::new()
1684            .with_total_rate(base_rate)
1685            .with_seed(123)
1686            .build();
1687
1688        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1689
1690        // Set up high-risk contexts
1691        let mut vendors = HashMap::new();
1692        vendors.insert(
1693            "V001".to_string(),
1694            VendorContext {
1695                vendor_id: "V001".to_string(),
1696                is_new: true,                  // 2.0x multiplier
1697                is_dormant_reactivation: true, // 1.5x multiplier
1698                ..Default::default()
1699            },
1700        );
1701
1702        let mut employees = HashMap::new();
1703        employees.insert(
1704            "EMP001".to_string(),
1705            EmployeeContext {
1706                employee_id: "EMP001".to_string(),
1707                is_new: true, // 1.5x multiplier
1708                ..Default::default()
1709            },
1710        );
1711
1712        let mut accounts = HashMap::new();
1713        accounts.insert(
1714            "8100".to_string(),
1715            AccountContext {
1716                account_code: "8100".to_string(),
1717                is_high_risk: true, // 2.0x multiplier
1718                ..Default::default()
1719            },
1720        );
1721
1722        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1723
1724        let mut entries_ctx: Vec<_> = (0..500)
1725            .map(|i| {
1726                create_test_entry_with_context(
1727                    &format!("JE{:04}", i),
1728                    Some("V001"),
1729                    "EMP001",
1730                    "8100",
1731                )
1732            })
1733            .collect();
1734
1735        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1736
1737        // The context-enhanced run should inject more anomalies
1738        assert!(
1739            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1740            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1741            result_ctx.anomalies_injected,
1742            result_no_ctx.anomalies_injected,
1743        );
1744    }
1745
1746    #[test]
1747    fn test_risk_score_multiplication() {
1748        // Verify the calculate_context_rate_multiplier produces correct values.
1749        let config = AnomalyInjectorConfig::default();
1750        let mut injector = AnomalyInjector::new(config);
1751
1752        // No contexts: multiplier should be 1.0
1753        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1754        assert!(
1755            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1756        );
1757
1758        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1759        let mut vendors = HashMap::new();
1760        vendors.insert(
1761            "V_RISKY".to_string(),
1762            VendorContext {
1763                vendor_id: "V_RISKY".to_string(),
1764                is_new: true,
1765                ..Default::default()
1766            },
1767        );
1768
1769        let mut accounts = HashMap::new();
1770        accounts.insert(
1771            "9000".to_string(),
1772            AccountContext {
1773                account_code: "9000".to_string(),
1774                is_high_risk: true,
1775                ..Default::default()
1776            },
1777        );
1778
1779        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1780
1781        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1782        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1783        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1784        assert!(
1785            (multiplier - 4.0).abs() < f64::EPSILON,
1786            "Expected 4.0x multiplier, got {}",
1787            multiplier,
1788        );
1789
1790        // Entry with only vendor context match (no account match)
1791        let entry_vendor_only =
1792            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1793        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1794        assert!(
1795            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1796            "Expected 2.0x multiplier (vendor only), got {}",
1797            multiplier_vendor,
1798        );
1799
1800        // Entry with no matching contexts
1801        let entry_no_match =
1802            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1803        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1804        assert!(
1805            (multiplier_none - 1.0).abs() < f64::EPSILON,
1806            "Expected 1.0x multiplier (no match), got {}",
1807            multiplier_none,
1808        );
1809    }
1810
1811    #[test]
1812    fn test_employee_context_multiplier() {
1813        let config = AnomalyInjectorConfig::default();
1814        let mut injector = AnomalyInjector::new(config);
1815
1816        let mut employees = HashMap::new();
1817        employees.insert(
1818            "EMP_NEW".to_string(),
1819            EmployeeContext {
1820                employee_id: "EMP_NEW".to_string(),
1821                is_new: true,             // 1.5x
1822                is_volume_fatigued: true, // 1.3x
1823                is_overtime: true,        // 1.2x
1824                ..Default::default()
1825            },
1826        );
1827
1828        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1829
1830        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1831        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1832
1833        // 1.5 * 1.3 * 1.2 = 2.34
1834        let expected = 1.5 * 1.3 * 1.2;
1835        assert!(
1836            (multiplier - expected).abs() < 0.01,
1837            "Expected {:.3}x multiplier, got {:.3}",
1838            expected,
1839            multiplier,
1840        );
1841    }
1842
1843    #[test]
1844    fn test_entity_contexts_persist_across_reset() {
1845        let config = AnomalyInjectorConfig::default();
1846        let mut injector = AnomalyInjector::new(config);
1847
1848        let mut vendors = HashMap::new();
1849        vendors.insert(
1850            "V001".to_string(),
1851            VendorContext {
1852                vendor_id: "V001".to_string(),
1853                is_new: true,
1854                ..Default::default()
1855            },
1856        );
1857
1858        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1859        assert_eq!(injector.vendor_contexts().len(), 1);
1860
1861        // Reset clears labels and stats but not entity contexts
1862        injector.reset();
1863        assert_eq!(injector.vendor_contexts().len(), 1);
1864    }
1865
1866    #[test]
1867    fn test_set_empty_contexts_clears() {
1868        let config = AnomalyInjectorConfig::default();
1869        let mut injector = AnomalyInjector::new(config);
1870
1871        let mut vendors = HashMap::new();
1872        vendors.insert(
1873            "V001".to_string(),
1874            VendorContext {
1875                vendor_id: "V001".to_string(),
1876                ..Default::default()
1877            },
1878        );
1879
1880        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1881        assert_eq!(injector.vendor_contexts().len(), 1);
1882
1883        // Setting empty maps clears
1884        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1885        assert!(injector.vendor_contexts().is_empty());
1886    }
1887
1888    #[test]
1889    fn test_dormant_vendor_multiplier() {
1890        let config = AnomalyInjectorConfig::default();
1891        let mut injector = AnomalyInjector::new(config);
1892
1893        let mut vendors = HashMap::new();
1894        vendors.insert(
1895            "V_DORMANT".to_string(),
1896            VendorContext {
1897                vendor_id: "V_DORMANT".to_string(),
1898                is_dormant_reactivation: true, // 1.5x
1899                ..Default::default()
1900            },
1901        );
1902
1903        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1904
1905        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1906        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1907        assert!(
1908            (multiplier - 1.5).abs() < f64::EPSILON,
1909            "Expected 1.5x multiplier for dormant vendor, got {}",
1910            multiplier,
1911        );
1912    }
1913
1914    // =========================================================================
1915    // Fraud Behavioral Bias Tests
1916    // =========================================================================
1917
1918    /// When all biases are set to 1.0, every fraud entry gets every behavioral
1919    /// flag: weekend posting date, round-dollar amount, off-hours created_at,
1920    /// and post-close marking. This is the strong "all biases fire" guarantee
1921    /// that lets downstream ML classifiers learn these canonical signals.
1922    #[test]
1923    fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1924        use chrono::{Datelike, Timelike, Weekday};
1925        use datasynth_core::models::FraudType;
1926
1927        let mut config = AnomalyInjectorConfig::default();
1928        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1929            enabled: true,
1930            weekend_bias: 1.0,
1931            round_dollar_bias: 1.0,
1932            off_hours_bias: 1.0,
1933            post_close_bias: 1.0,
1934        };
1935        let mut injector = AnomalyInjector::new(config);
1936
1937        // Use a Monday date so the weekend-shift always moves it.
1938        let mut entry = JournalEntry::new_simple(
1939            "JE001".to_string(),
1940            "1000".to_string(),
1941            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), // Monday
1942            "Test Entry".to_string(),
1943        );
1944        entry.add_line(JournalEntryLine {
1945            line_number: 1,
1946            gl_account: "5000".to_string(),
1947            debit_amount: dec!(1237),
1948            ..Default::default()
1949        });
1950        entry.add_line(JournalEntryLine {
1951            line_number: 2,
1952            gl_account: "1000".to_string(),
1953            credit_amount: dec!(1237),
1954            ..Default::default()
1955        });
1956
1957        let _ =
1958            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1959
1960        // Weekend: shifted to Sat or Sun.
1961        assert!(
1962            matches!(
1963                entry.header.posting_date.weekday(),
1964                Weekday::Sat | Weekday::Sun
1965            ),
1966            "expected weekend posting date, got {:?}",
1967            entry.header.posting_date.weekday()
1968        );
1969        // Round-dollar: exactly one of the known round targets.
1970        let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1971        let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1972        assert_eq!(debit_total, credit_total, "entry must remain balanced");
1973        assert!(
1974            [
1975                dec!(1_000),
1976                dec!(5_000),
1977                dec!(10_000),
1978                dec!(25_000),
1979                dec!(50_000),
1980                dec!(100_000)
1981            ]
1982            .contains(&debit_total),
1983            "expected round-dollar total, got {}",
1984            debit_total
1985        );
1986        // Off-hours: 22:00–05:59 UTC.
1987        let hour = entry.header.created_at.hour();
1988        assert!(
1989            !(6..22).contains(&hour),
1990            "expected off-hours timestamp, got hour {}",
1991            hour
1992        );
1993        // Post-close marked.
1994        assert!(entry.header.is_post_close);
1995
1996        // Stats reflect each bias application.
1997        let stats = injector.get_stats();
1998        assert_eq!(stats.fraud_weekend_bias_applied, 1);
1999        assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
2000        assert_eq!(stats.fraud_off_hours_bias_applied, 1);
2001        assert_eq!(stats.fraud_post_close_bias_applied, 1);
2002    }
2003
2004    /// When biases are all zero, no flags are applied even to fraud entries —
2005    /// the feature is fully opt-outable.
2006    #[test]
2007    fn fraud_behavioral_bias_rate_zero_applies_nothing() {
2008        use datasynth_core::models::FraudType;
2009
2010        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
2011        let mut config = AnomalyInjectorConfig::default();
2012        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2013            enabled: true,
2014            weekend_bias: 0.0,
2015            round_dollar_bias: 0.0,
2016            off_hours_bias: 0.0,
2017            post_close_bias: 0.0,
2018        };
2019        let mut injector = AnomalyInjector::new(config);
2020        let mut entry = create_test_entry("JE001");
2021        entry.header.posting_date = original_date;
2022
2023        let _ =
2024            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
2025
2026        assert_eq!(entry.header.posting_date, original_date);
2027        assert!(!entry.header.is_post_close);
2028        let stats = injector.get_stats();
2029        assert_eq!(stats.fraud_weekend_bias_applied, 0);
2030        assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
2031        assert_eq!(stats.fraud_off_hours_bias_applied, 0);
2032        assert_eq!(stats.fraud_post_close_bias_applied, 0);
2033    }
2034
2035    /// Non-fraud anomalies (errors, process issues, etc.) are not touched by
2036    /// the bias — only `AnomalyType::Fraud(_)` triggers it.
2037    #[test]
2038    fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
2039        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
2040        let mut config = AnomalyInjectorConfig::default();
2041        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2042            enabled: true,
2043            weekend_bias: 1.0,
2044            round_dollar_bias: 1.0,
2045            off_hours_bias: 1.0,
2046            post_close_bias: 1.0,
2047        };
2048        let mut injector = AnomalyInjector::new(config);
2049        let mut entry = create_test_entry("JE001");
2050        entry.header.posting_date = original_date;
2051
2052        let _ = injector.inject_specific(
2053            &mut entry,
2054            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
2055        );
2056
2057        assert_eq!(entry.header.posting_date, original_date);
2058        let stats = injector.get_stats();
2059        assert_eq!(stats.fraud_weekend_bias_applied, 0);
2060    }
2061
2062    /// When behavioural biases fire on a fraud entry, secondary
2063    /// `ProcessIssue` labels should be pushed into the labels stream so
2064    /// auditors can filter for specific forensic patterns.
2065    #[test]
2066    fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
2067        use datasynth_core::models::{FraudType, ProcessIssueType};
2068
2069        let mut config = AnomalyInjectorConfig::default();
2070        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2071            enabled: true,
2072            weekend_bias: 1.0,
2073            round_dollar_bias: 0.0, // round-dollar does not emit a process-issue label
2074            off_hours_bias: 1.0,
2075            post_close_bias: 1.0,
2076        };
2077        let mut injector = AnomalyInjector::new(config);
2078        let mut entry = JournalEntry::new_simple(
2079            "JE001".into(),
2080            "1000".into(),
2081            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
2082            "Test".into(),
2083        );
2084        entry.add_line(JournalEntryLine {
2085            line_number: 1,
2086            gl_account: "5000".into(),
2087            debit_amount: dec!(1000),
2088            ..Default::default()
2089        });
2090        entry.add_line(JournalEntryLine {
2091            line_number: 2,
2092            gl_account: "1000".into(),
2093            credit_amount: dec!(1000),
2094            ..Default::default()
2095        });
2096
2097        let primary = injector
2098            .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2099            .expect("fraud label should be produced");
2100
2101        // Primary fraud label + 3 secondary process-issue labels.
2102        let labels = injector.get_labels();
2103        assert_eq!(
2104            labels.len(),
2105            3,
2106            "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2107        );
2108        let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2109        assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2110        assert!(types.contains(&AnomalyType::ProcessIssue(
2111            ProcessIssueType::AfterHoursPosting
2112        )));
2113        assert!(types.contains(&AnomalyType::ProcessIssue(
2114            ProcessIssueType::PostClosePosting
2115        )));
2116        assert_eq!(
2117            primary.anomaly_type,
2118            AnomalyType::Fraud(FraudType::FictitiousEntry)
2119        );
2120    }
2121}
datasynth_generators/anomaly/injector.rs

datasynth_generators/anomaly/
injector.rs