datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_core::utils::seeded_rng;
16use rand::RngExt;
17use rand_chacha::ChaCha8Rng;
18use rust_decimal::Decimal;
19use std::collections::HashMap;
20use tracing::debug;
21
22use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
23use datasynth_core::models::{
24    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
25    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
26    RelationalAnomalyType,
27};
28use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
29
30use super::context::{
31    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
32    EntityAwareInjector, VendorContext,
33};
34use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
35use super::difficulty::DifficultyCalculator;
36use super::near_miss::{NearMissConfig, NearMissGenerator};
37use super::patterns::{
38    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
39    TemporalPattern,
40};
41use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
42use super::schemes::{SchemeAction, SchemeContext};
43use super::strategies::{DuplicationStrategy, StrategyCollection};
44use super::types::AnomalyTypeSelector;
45
46/// Configuration for the anomaly injector.
47#[derive(Debug, Clone)]
48pub struct AnomalyInjectorConfig {
49    /// Rate configuration.
50    pub rates: AnomalyRateConfig,
51    /// Pattern configuration.
52    pub patterns: AnomalyPatternConfig,
53    /// Random seed for reproducibility.
54    pub seed: u64,
55    /// Whether to generate labels.
56    pub generate_labels: bool,
57    /// Whether to allow duplicate injection.
58    pub allow_duplicates: bool,
59    /// Maximum anomalies per document.
60    pub max_anomalies_per_document: usize,
61    /// Company codes to target (empty = all).
62    pub target_companies: Vec<String>,
63    /// Date range for injection.
64    pub date_range: Option<(NaiveDate, NaiveDate)>,
65    /// Enhanced features configuration.
66    pub enhanced: EnhancedInjectionConfig,
67}
68
69/// Enhanced injection configuration for v0.3.0+ features.
70#[derive(Debug, Clone, Default)]
71pub struct EnhancedInjectionConfig {
72    /// Enable multi-stage fraud scheme generation.
73    pub multi_stage_schemes_enabled: bool,
74    /// Probability of starting a new scheme per perpetrator per year.
75    pub scheme_probability: f64,
76    /// Enable correlated anomaly injection.
77    pub correlated_injection_enabled: bool,
78    /// Enable temporal clustering (period-end spikes).
79    pub temporal_clustering_enabled: bool,
80    /// Period-end anomaly rate multiplier.
81    pub period_end_multiplier: f64,
82    /// Enable near-miss generation.
83    pub near_miss_enabled: bool,
84    /// Proportion of anomalies that are near-misses.
85    pub near_miss_proportion: f64,
86    /// Approval thresholds for threshold-proximity near-misses.
87    pub approval_thresholds: Vec<Decimal>,
88    /// Enable detection difficulty classification.
89    pub difficulty_classification_enabled: bool,
90    /// Enable context-aware injection.
91    pub context_aware_enabled: bool,
92    /// Behavioral baseline configuration.
93    pub behavioral_baseline_config: BehavioralBaselineConfig,
94    /// Behavioral bias applied to fraud entries so canonical forensic
95    /// signals (weekend posting, round-dollar amounts, off-hours posting,
96    /// post-close adjustments) show measurable lift on fraud vs legitimate
97    /// populations. Defaults enable all four biases.
98    pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
99}
100
101impl Default for AnomalyInjectorConfig {
102    fn default() -> Self {
103        Self {
104            rates: AnomalyRateConfig::default(),
105            patterns: AnomalyPatternConfig::default(),
106            seed: 42,
107            generate_labels: true,
108            allow_duplicates: true,
109            max_anomalies_per_document: 2,
110            target_companies: Vec::new(),
111            date_range: None,
112            enhanced: EnhancedInjectionConfig::default(),
113        }
114    }
115}
116
117/// Result of an injection batch.
118#[derive(Debug, Clone)]
119pub struct InjectionBatchResult {
120    /// Number of entries processed.
121    pub entries_processed: usize,
122    /// Number of anomalies injected.
123    pub anomalies_injected: usize,
124    /// Number of duplicates created.
125    pub duplicates_created: usize,
126    /// Labels generated.
127    pub labels: Vec<LabeledAnomaly>,
128    /// Summary of anomalies.
129    pub summary: AnomalySummary,
130    /// Entries that were modified (document numbers).
131    pub modified_documents: Vec<String>,
132    /// Near-miss labels (suspicious but legitimate transactions).
133    pub near_miss_labels: Vec<NearMissLabel>,
134    /// Multi-stage scheme actions generated.
135    pub scheme_actions: Vec<SchemeAction>,
136    /// Difficulty distribution summary.
137    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
138}
139
140/// Main anomaly injection engine.
141pub struct AnomalyInjector {
142    config: AnomalyInjectorConfig,
143    rng: ChaCha8Rng,
144    uuid_factory: DeterministicUuidFactory,
145    type_selector: AnomalyTypeSelector,
146    strategies: StrategyCollection,
147    cluster_manager: ClusterManager,
148    /// Selects target entities for anomaly injection (RepeatOffender, etc.).
149    entity_targeting: EntityTargetingManager,
150    /// Tracking which documents already have anomalies.
151    document_anomaly_counts: HashMap<String, usize>,
152    /// All generated labels.
153    labels: Vec<LabeledAnomaly>,
154    /// Statistics.
155    stats: InjectorStats,
156    // Enhanced components (v0.3.0+)
157    /// Multi-stage fraud scheme advancer.
158    scheme_advancer: Option<SchemeAdvancer>,
159    /// Near-miss generator.
160    near_miss_generator: Option<NearMissGenerator>,
161    /// Near-miss labels generated.
162    near_miss_labels: Vec<NearMissLabel>,
163    /// Drives correlated anomaly pairs (e.g., FictitiousVendor + InvoiceManipulation).
164    co_occurrence_handler: Option<AnomalyCoOccurrence>,
165    /// Queued correlated anomalies waiting to be injected.
166    queued_co_occurrences: Vec<QueuedAnomaly>,
167    /// Groups anomalies into temporal bursts during period-end windows.
168    temporal_cluster_generator: Option<TemporalClusterGenerator>,
169    /// Difficulty calculator.
170    difficulty_calculator: Option<DifficultyCalculator>,
171    /// Entity-aware injector.
172    entity_aware_injector: Option<EntityAwareInjector>,
173    /// Behavioral baseline tracker.
174    behavioral_baseline: Option<BehavioralBaseline>,
175    /// Scheme actions generated.
176    scheme_actions: Vec<SchemeAction>,
177    /// Difficulty distribution.
178    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
179    // Entity context lookup maps for risk-adjusted injection rates
180    /// Vendor contexts keyed by vendor ID.
181    vendor_contexts: HashMap<String, VendorContext>,
182    /// Employee contexts keyed by employee ID.
183    employee_contexts: HashMap<String, EmployeeContext>,
184    /// Account contexts keyed by account code.
185    account_contexts: HashMap<String, AccountContext>,
186}
187
188/// Injection statistics tracking.
189#[derive(Debug, Clone, Default)]
190pub struct InjectorStats {
191    /// Total number of entries processed.
192    pub total_processed: usize,
193    /// Total number of anomalies injected.
194    pub total_injected: usize,
195    /// Anomalies injected by category (e.g., "Fraud", "Error").
196    pub by_category: HashMap<String, usize>,
197    /// Anomalies injected by specific type name.
198    pub by_type: HashMap<String, usize>,
199    /// Anomalies injected by company code.
200    pub by_company: HashMap<String, usize>,
201    /// Entries skipped due to rate check.
202    pub skipped_rate: usize,
203    /// Entries skipped due to date range filter.
204    pub skipped_date: usize,
205    /// Entries skipped due to company filter.
206    pub skipped_company: usize,
207    /// Entries skipped due to max-anomalies-per-document limit.
208    pub skipped_max_per_doc: usize,
209    /// Fraud entries that received weekend-posting bias.
210    pub fraud_weekend_bias_applied: usize,
211    /// Fraud entries that received round-dollar amount bias.
212    pub fraud_round_dollar_bias_applied: usize,
213    /// Fraud entries that received off-hours created_at bias.
214    pub fraud_off_hours_bias_applied: usize,
215    /// Fraud entries that received post-close marking bias.
216    pub fraud_post_close_bias_applied: usize,
217}
218
219/// A correlated anomaly queued for future injection.
220struct QueuedAnomaly {
221    /// Anomaly type to inject.
222    anomaly_type: AnomalyType,
223    /// Target entity (if same_entity was specified in the co-occurrence pattern).
224    target_entity: Option<String>,
225    /// Earliest date this can be injected.
226    earliest_date: NaiveDate,
227    /// Description from the co-occurrence pattern.
228    description: String,
229}
230
231impl AnomalyInjector {
232    /// Creates a new anomaly injector.
233    pub fn new(config: AnomalyInjectorConfig) -> Self {
234        let mut rng = seeded_rng(config.seed, 0);
235        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
236        let entity_targeting =
237            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
238
239        // Initialize enhanced components based on configuration
240        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
241            let scheme_config = SchemeAdvancerConfig {
242                embezzlement_probability: config.enhanced.scheme_probability,
243                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
244                kickback_probability: config.enhanced.scheme_probability * 0.5,
245                seed: rng.random(),
246                ..Default::default()
247            };
248            Some(SchemeAdvancer::new(scheme_config))
249        } else {
250            None
251        };
252
253        let near_miss_generator = if config.enhanced.near_miss_enabled {
254            let near_miss_config = NearMissConfig {
255                proportion: config.enhanced.near_miss_proportion,
256                seed: rng.random(),
257                ..Default::default()
258            };
259            Some(NearMissGenerator::new(near_miss_config))
260        } else {
261            None
262        };
263
264        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
265            Some(AnomalyCoOccurrence::new())
266        } else {
267            None
268        };
269
270        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
271            Some(TemporalClusterGenerator::new())
272        } else {
273            None
274        };
275
276        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
277            Some(DifficultyCalculator::new())
278        } else {
279            None
280        };
281
282        let entity_aware_injector = if config.enhanced.context_aware_enabled {
283            Some(EntityAwareInjector::default())
284        } else {
285            None
286        };
287
288        let behavioral_baseline = if config.enhanced.context_aware_enabled
289            && config.enhanced.behavioral_baseline_config.enabled
290        {
291            Some(BehavioralBaseline::new(
292                config.enhanced.behavioral_baseline_config.clone(),
293            ))
294        } else {
295            None
296        };
297
298        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
299
300        Self {
301            config,
302            rng,
303            uuid_factory,
304            type_selector: AnomalyTypeSelector::new(),
305            strategies: StrategyCollection::default(),
306            cluster_manager,
307            entity_targeting,
308            document_anomaly_counts: HashMap::new(),
309            labels: Vec::new(),
310            stats: InjectorStats::default(),
311            scheme_advancer,
312            near_miss_generator,
313            near_miss_labels: Vec::new(),
314            co_occurrence_handler,
315            queued_co_occurrences: Vec::new(),
316            temporal_cluster_generator,
317            difficulty_calculator,
318            entity_aware_injector,
319            behavioral_baseline,
320            scheme_actions: Vec::new(),
321            difficulty_distribution: HashMap::new(),
322            vendor_contexts: HashMap::new(),
323            employee_contexts: HashMap::new(),
324            account_contexts: HashMap::new(),
325        }
326    }
327
328    /// Processes a batch of journal entries, potentially injecting anomalies.
329    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
330        debug!(
331            entry_count = entries.len(),
332            total_rate = self.config.rates.total_rate,
333            seed = self.config.seed,
334            "Injecting anomalies into journal entries"
335        );
336
337        let mut modified_documents = Vec::new();
338        let mut duplicates = Vec::new();
339
340        for entry in entries.iter_mut() {
341            self.stats.total_processed += 1;
342
343            // Update behavioral baseline if enabled
344            if let Some(ref mut baseline) = self.behavioral_baseline {
345                use super::context::Observation;
346                // Record the observation for baseline building
347                let entity_id = entry.header.created_by.clone();
348                let observation =
349                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
350                baseline.record_observation(&entity_id, observation);
351            }
352
353            // Check if we should process this entry
354            if !self.should_process(entry) {
355                continue;
356            }
357
358            // --- Check queued co-occurrences first ---
359            let entry_date = entry.posting_date();
360            let ready_indices: Vec<usize> = self
361                .queued_co_occurrences
362                .iter()
363                .enumerate()
364                .filter(|(_, q)| entry_date >= q.earliest_date)
365                .map(|(i, _)| i)
366                .collect();
367
368            if let Some(&idx) = ready_indices.first() {
369                let queued = self.queued_co_occurrences.remove(idx);
370                if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
371                    label = label.with_metadata("co_occurrence", "true");
372                    label = label.with_metadata("co_occurrence_description", &queued.description);
373                    if let Some(ref target) = queued.target_entity {
374                        label = label.with_related_entity(target);
375                        label = label.with_metadata("co_occurrence_target", target);
376                    }
377                    modified_documents.push(entry.document_number().clone());
378                    self.labels.push(label);
379                    self.stats.total_injected += 1;
380                }
381                continue; // This entry was used for a queued co-occurrence
382            }
383
384            // Calculate effective rate
385            let base_rate = self.config.rates.total_rate;
386
387            // Calculate entity-aware rate adjustment using context lookup maps
388            let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
389                let employee_id = &entry.header.created_by;
390                let first_account = entry
391                    .lines
392                    .first()
393                    .map(|l| l.gl_account.as_str())
394                    .unwrap_or("");
395                // Look up vendor from the entry's reference field (vendor ID convention)
396                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
397
398                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
399                let employee_ctx = self.employee_contexts.get(employee_id);
400                let account_ctx = self.account_contexts.get(first_account);
401
402                let multiplier =
403                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
404                (base_rate * multiplier).min(1.0)
405            } else {
406                // No entity-aware injector: fall back to context maps alone
407                self.calculate_context_rate_multiplier(entry) * base_rate
408            };
409
410            // --- Temporal clustering: boost rate during period-end windows ---
411            if let Some(ref tcg) = self.temporal_cluster_generator {
412                let temporal_multiplier = tcg
413                    .get_active_clusters(entry_date)
414                    .iter()
415                    .map(|c| c.rate_multiplier)
416                    .fold(1.0_f64, f64::max);
417                effective_rate = (effective_rate * temporal_multiplier).min(1.0);
418            }
419
420            // Determine if we inject an anomaly
421            if should_inject_anomaly(
422                effective_rate,
423                entry_date,
424                &self.config.patterns.temporal_pattern,
425                &mut self.rng,
426            ) {
427                // Check if this should be a near-miss instead
428                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
429                    // Record the transaction for near-duplicate detection
430                    let account = entry
431                        .lines
432                        .first()
433                        .map(|l| l.gl_account.clone())
434                        .unwrap_or_default();
435                    near_miss_gen.record_transaction(
436                        entry.document_number().clone(),
437                        entry_date,
438                        entry.total_debit(),
439                        &account,
440                        None,
441                    );
442
443                    // Check if this could be a near-miss
444                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
445                        entry.document_number().clone(),
446                        entry_date,
447                        entry.total_debit(),
448                        &account,
449                        None,
450                        &self.config.enhanced.approval_thresholds,
451                    ) {
452                        self.near_miss_labels.push(near_miss_label);
453                        continue; // Skip actual anomaly injection
454                    }
455                }
456
457                // Select anomaly category based on rates
458                let anomaly_type = self.select_anomaly_category();
459
460                // --- Entity targeting: select and track target entity ---
461                let target_entity = {
462                    let mut candidates: Vec<String> =
463                        self.vendor_contexts.keys().cloned().collect();
464                    candidates.extend(self.employee_contexts.keys().cloned());
465                    if candidates.is_empty() {
466                        // Fall back to entry's reference field as a candidate
467                        if let Some(ref r) = entry.header.reference {
468                            candidates.push(r.clone());
469                        }
470                    }
471                    self.entity_targeting
472                        .select_entity(&candidates, &mut self.rng)
473                };
474
475                // Apply the anomaly
476                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
477                    // Add entity targeting metadata
478                    if let Some(ref entity_id) = target_entity {
479                        label = label.with_metadata("entity_target", entity_id);
480                        label = label.with_related_entity(entity_id);
481                        label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
482                            target_type: "Entity".to_string(),
483                            target_id: entity_id.clone(),
484                        });
485                    }
486
487                    // Calculate detection difficulty if enabled
488                    if let Some(ref calculator) = self.difficulty_calculator {
489                        let difficulty = calculator.calculate(&label);
490
491                        // Store difficulty in metadata
492                        label =
493                            label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
494                        label = label.with_metadata(
495                            "difficulty_score",
496                            &difficulty.difficulty_score().to_string(),
497                        );
498
499                        // Update difficulty distribution
500                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
501                    }
502
503                    modified_documents.push(entry.document_number().clone());
504                    self.labels.push(label);
505                    self.stats.total_injected += 1;
506
507                    // --- Co-occurrence: queue correlated anomalies ---
508                    if let Some(ref co_occ) = self.co_occurrence_handler {
509                        let correlated =
510                            co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
511                        for result in correlated {
512                            self.queued_co_occurrences.push(QueuedAnomaly {
513                                anomaly_type: result.anomaly_type,
514                                target_entity: if result.same_entity {
515                                    target_entity.clone()
516                                } else {
517                                    None
518                                },
519                                earliest_date: entry_date
520                                    + chrono::Duration::days(i64::from(result.lag_days)),
521                                description: result.description,
522                            });
523                        }
524                    }
525                }
526
527                // Check for duplicate injection.
528                //
529                // v5.31 C1 Phase 7+: skip duplication on IC injector
530                // JEs. Cloning an IC JE produces two JEs on the same
531                // entity carrying the same ic_pair_id, which makes the
532                // group IC matcher see 3 observed sides for that pair
533                // (the partner + both copies on this side) and fail
534                // hard with "expected at most 2 (one seller + one
535                // buyer)". IC postings are deterministic by manifest
536                // contract; duplication is a fraud-typology signal that
537                // doesn't apply to them.
538                if self.config.allow_duplicates
539                    && entry.header.ic_pair_id.is_none()
540                    && matches!(
541                        self.labels.last().map(|l| &l.anomaly_type),
542                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
543                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
544                    )
545                {
546                    let dup_strategy = DuplicationStrategy::default();
547                    let duplicate =
548                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
549                    duplicates.push(duplicate);
550                }
551            }
552        }
553
554        // Count duplicates
555        let duplicates_created = duplicates.len();
556
557        // Build summary
558        let summary = AnomalySummary::from_anomalies(&self.labels);
559
560        InjectionBatchResult {
561            entries_processed: self.stats.total_processed,
562            anomalies_injected: self.stats.total_injected,
563            duplicates_created,
564            labels: self.labels.clone(),
565            summary,
566            modified_documents,
567            near_miss_labels: self.near_miss_labels.clone(),
568            scheme_actions: self.scheme_actions.clone(),
569            difficulty_distribution: self.difficulty_distribution.clone(),
570        }
571    }
572
573    /// Checks if an entry should be processed.
574    fn should_process(&mut self, entry: &JournalEntry) -> bool {
575        // Check company filter
576        if !self.config.target_companies.is_empty()
577            && !self
578                .config
579                .target_companies
580                .iter()
581                .any(|c| c == entry.company_code())
582        {
583            self.stats.skipped_company += 1;
584            return false;
585        }
586
587        // Check date range
588        if let Some((start, end)) = self.config.date_range {
589            if entry.posting_date() < start || entry.posting_date() > end {
590                self.stats.skipped_date += 1;
591                return false;
592            }
593        }
594
595        // Check max anomalies per document
596        let current_count = self
597            .document_anomaly_counts
598            .get(&entry.document_number())
599            .copied()
600            .unwrap_or(0);
601        if current_count >= self.config.max_anomalies_per_document {
602            self.stats.skipped_max_per_doc += 1;
603            return false;
604        }
605
606        true
607    }
608
609    /// Selects an anomaly category based on configured rates.
610    fn select_anomaly_category(&mut self) -> AnomalyType {
611        let r = self.rng.random::<f64>();
612        let rates = &self.config.rates;
613
614        let mut cumulative = 0.0;
615
616        cumulative += rates.fraud_rate;
617        if r < cumulative {
618            return self.type_selector.select_fraud(&mut self.rng);
619        }
620
621        cumulative += rates.error_rate;
622        if r < cumulative {
623            return self.type_selector.select_error(&mut self.rng);
624        }
625
626        cumulative += rates.process_issue_rate;
627        if r < cumulative {
628            return self.type_selector.select_process_issue(&mut self.rng);
629        }
630
631        cumulative += rates.statistical_rate;
632        if r < cumulative {
633            return self.type_selector.select_statistical(&mut self.rng);
634        }
635
636        self.type_selector.select_relational(&mut self.rng)
637    }
638
639    /// Injects an anomaly into an entry.
640    fn inject_anomaly(
641        &mut self,
642        entry: &mut JournalEntry,
643        anomaly_type: AnomalyType,
644    ) -> Option<LabeledAnomaly> {
645        // Check if strategy can be applied
646        if !self.strategies.can_apply(entry, &anomaly_type) {
647            return None;
648        }
649
650        // Apply the strategy
651        let result = self
652            .strategies
653            .apply_strategy(entry, &anomaly_type, &mut self.rng);
654
655        if !result.success {
656            return None;
657        }
658
659        // Update document anomaly count
660        *self
661            .document_anomaly_counts
662            .entry(entry.document_number().clone())
663            .or_insert(0) += 1;
664
665        // Update statistics
666        let category = anomaly_type.category().to_string();
667        let type_name = anomaly_type.type_name();
668
669        *self.stats.by_category.entry(category).or_insert(0) += 1;
670        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
671        *self
672            .stats
673            .by_company
674            .entry(entry.company_code().to_string())
675            .or_insert(0) += 1;
676
677        // Generate label
678        if self.config.generate_labels {
679            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
680
681            // Update entry header with anomaly tracking fields
682            entry.header.is_anomaly = true;
683            entry.header.anomaly_id = Some(anomaly_id.clone());
684            entry.header.anomaly_type = Some(type_name.clone());
685
686            // Also set fraud flag if this is a fraud anomaly
687            let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
688                Vec::new();
689            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
690                entry.header.is_fraud = true;
691                if let AnomalyType::Fraud(ref ft) = anomaly_type {
692                    entry.header.fraud_type = Some(*ft);
693                }
694                // Apply behavioral bias so forensic signals (weekend posting,
695                // round dollars, off-hours, post-close adjustments) are
696                // learnable from fraud-labeled data. The returned list of
697                // biases that fired is used below to emit secondary
698                // ProcessIssue labels.
699                secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
700            }
701
702            let mut label = LabeledAnomaly::new(
703                anomaly_id,
704                anomaly_type.clone(),
705                entry.document_number().clone(),
706                "JE".to_string(),
707                entry.company_code().to_string(),
708                entry.posting_date(),
709            )
710            .with_description(&result.description)
711            .with_injection_strategy(&type_name);
712
713            // Add causal reason with injection context (provenance tracking)
714            let causal_reason = AnomalyCausalReason::RandomRate {
715                base_rate: self.config.rates.total_rate,
716            };
717            label = label.with_causal_reason(causal_reason);
718
719            // Add entity context metadata if contexts are populated
720            let context_multiplier = self.calculate_context_rate_multiplier(entry);
721            if (context_multiplier - 1.0).abs() > f64::EPSILON {
722                label = label.with_metadata(
723                    "entity_context_multiplier",
724                    &format!("{context_multiplier:.3}"),
725                );
726                label = label.with_metadata(
727                    "effective_rate",
728                    &format!(
729                        "{:.6}",
730                        (self.config.rates.total_rate * context_multiplier).min(1.0)
731                    ),
732                );
733            }
734
735            // Add monetary impact
736            if let Some(impact) = result.monetary_impact {
737                label = label.with_monetary_impact(impact);
738            }
739
740            // Add related entities
741            for entity in &result.related_entities {
742                label = label.with_related_entity(entity);
743            }
744
745            // Add metadata
746            for (key, value) in &result.metadata {
747                label = label.with_metadata(key, value);
748            }
749
750            // Assign cluster and update causal reason if in cluster
751            if let Some(cluster_id) =
752                self.cluster_manager
753                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
754            {
755                label = label.with_cluster(&cluster_id);
756                // Update causal reason to reflect cluster membership
757                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
758                    cluster_id: cluster_id.clone(),
759                });
760            }
761
762            // Secondary ProcessIssue labels for each behavioural bias that
763            // fired — lets auditors query the labels stream for specific
764            // forensic patterns (WeekendPosting / AfterHoursPosting /
765            // PostClosePosting) rather than reconstructing them from header
766            // flags. `stats.total_injected` counts injection acts (primary
767            // labels); `labels.len()` may exceed it due to these children.
768            for issue_type in &secondary_process_issues {
769                let child_id = format!("ANO{:08}", self.labels.len() + 1);
770                let child = LabeledAnomaly::new(
771                    child_id,
772                    AnomalyType::ProcessIssue(*issue_type),
773                    entry.document_number().clone(),
774                    "JE".to_string(),
775                    entry.company_code().to_string(),
776                    entry.posting_date(),
777                )
778                .with_description("Forensic pattern from fraud behavioral bias")
779                .with_injection_strategy("behavioral_bias")
780                .with_parent_anomaly(&label.anomaly_id);
781                self.labels.push(child);
782            }
783
784            return Some(label);
785        }
786
787        None
788    }
789
790    /// Injects a specific anomaly type into an entry.
791    pub fn inject_specific(
792        &mut self,
793        entry: &mut JournalEntry,
794        anomaly_type: AnomalyType,
795    ) -> Option<LabeledAnomaly> {
796        self.inject_anomaly(entry, anomaly_type)
797    }
798
799    /// Creates a self-approval anomaly.
800    pub fn create_self_approval(
801        &mut self,
802        entry: &mut JournalEntry,
803        user_id: &str,
804    ) -> Option<LabeledAnomaly> {
805        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
806
807        let label = LabeledAnomaly::new(
808            format!("ANO{:08}", self.labels.len() + 1),
809            anomaly_type,
810            entry.document_number().clone(),
811            "JE".to_string(),
812            entry.company_code().to_string(),
813            entry.posting_date(),
814        )
815        .with_description(&format!("User {user_id} approved their own transaction"))
816        .with_related_entity(user_id)
817        .with_injection_strategy("ManualSelfApproval")
818        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
819            target_type: "User".to_string(),
820            target_id: user_id.to_string(),
821        });
822
823        // Set entry header anomaly tracking fields
824        entry.header.is_anomaly = true;
825        entry.header.is_fraud = true;
826        entry.header.anomaly_id = Some(label.anomaly_id.clone());
827        entry.header.anomaly_type = Some("SelfApproval".to_string());
828        entry.header.fraud_type = Some(FraudType::SelfApproval);
829
830        // Set approver = requester
831        entry.header.created_by = user_id.to_string();
832
833        // Apply canonical behavioral biases so self-approval frauds have
834        // the same forensic lift as other fraud paths.
835        self.apply_fraud_behavioral_bias(entry);
836
837        self.labels.push(label.clone());
838        Some(label)
839    }
840
841    /// Creates a segregation of duties violation.
842    pub fn create_sod_violation(
843        &mut self,
844        entry: &mut JournalEntry,
845        user_id: &str,
846        conflicting_duties: (&str, &str),
847    ) -> Option<LabeledAnomaly> {
848        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
849
850        let label = LabeledAnomaly::new(
851            format!("ANO{:08}", self.labels.len() + 1),
852            anomaly_type,
853            entry.document_number().clone(),
854            "JE".to_string(),
855            entry.company_code().to_string(),
856            entry.posting_date(),
857        )
858        .with_description(&format!(
859            "User {} performed conflicting duties: {} and {}",
860            user_id, conflicting_duties.0, conflicting_duties.1
861        ))
862        .with_related_entity(user_id)
863        .with_metadata("duty1", conflicting_duties.0)
864        .with_metadata("duty2", conflicting_duties.1)
865        .with_injection_strategy("ManualSoDViolation")
866        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
867            target_type: "User".to_string(),
868            target_id: user_id.to_string(),
869        });
870
871        // Set entry header anomaly tracking fields
872        entry.header.is_anomaly = true;
873        entry.header.is_fraud = true;
874        entry.header.anomaly_id = Some(label.anomaly_id.clone());
875        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
876        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
877
878        // Apply canonical behavioral biases.
879        self.apply_fraud_behavioral_bias(entry);
880
881        self.labels.push(label.clone());
882        Some(label)
883    }
884
885    /// Creates an intercompany mismatch anomaly.
886    pub fn create_ic_mismatch(
887        &mut self,
888        entry: &mut JournalEntry,
889        matching_company: &str,
890        expected_amount: Decimal,
891        actual_amount: Decimal,
892    ) -> Option<LabeledAnomaly> {
893        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
894
895        let label = LabeledAnomaly::new(
896            format!("ANO{:08}", self.labels.len() + 1),
897            anomaly_type,
898            entry.document_number().clone(),
899            "JE".to_string(),
900            entry.company_code().to_string(),
901            entry.posting_date(),
902        )
903        .with_description(&format!(
904            "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
905        ))
906        .with_related_entity(matching_company)
907        .with_monetary_impact(actual_amount - expected_amount)
908        .with_metadata("expected_amount", &expected_amount.to_string())
909        .with_metadata("actual_amount", &actual_amount.to_string())
910        .with_injection_strategy("ManualICMismatch")
911        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
912            target_type: "Intercompany".to_string(),
913            target_id: matching_company.to_string(),
914        });
915
916        // Set entry header anomaly tracking fields
917        entry.header.is_anomaly = true;
918        entry.header.anomaly_id = Some(label.anomaly_id.clone());
919        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
920
921        self.labels.push(label.clone());
922        Some(label)
923    }
924
925    /// Returns all generated labels.
926    pub fn get_labels(&self) -> &[LabeledAnomaly] {
927        &self.labels
928    }
929
930    /// Returns the anomaly summary.
931    pub fn get_summary(&self) -> AnomalySummary {
932        AnomalySummary::from_anomalies(&self.labels)
933    }
934
935    /// Returns injection statistics.
936    pub fn get_stats(&self) -> &InjectorStats {
937        &self.stats
938    }
939
940    /// Clears all labels and resets statistics.
941    pub fn reset(&mut self) {
942        self.labels.clear();
943        self.document_anomaly_counts.clear();
944        self.stats = InjectorStats::default();
945        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
946
947        // Reset enhanced components
948        self.near_miss_labels.clear();
949        self.scheme_actions.clear();
950        self.difficulty_distribution.clear();
951
952        if let Some(ref mut baseline) = self.behavioral_baseline {
953            *baseline =
954                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
955        }
956    }
957
958    /// Returns the number of clusters created.
959    pub fn cluster_count(&self) -> usize {
960        self.cluster_manager.cluster_count()
961    }
962
963    // =========================================================================
964    // Entity Context API
965    // =========================================================================
966
967    /// Sets entity contexts for risk-adjusted anomaly injection.
968    ///
969    /// When entity contexts are provided, the injector adjusts anomaly injection
970    /// rates based on entity risk factors. Entries involving high-risk vendors,
971    /// new employees, or sensitive accounts will have higher effective injection
972    /// rates.
973    ///
974    /// Pass empty HashMaps to clear previously set contexts.
975    pub fn set_entity_contexts(
976        &mut self,
977        vendors: HashMap<String, VendorContext>,
978        employees: HashMap<String, EmployeeContext>,
979        accounts: HashMap<String, AccountContext>,
980    ) {
981        self.vendor_contexts = vendors;
982        self.employee_contexts = employees;
983        self.account_contexts = accounts;
984    }
985
986    /// Returns a reference to the vendor context map.
987    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
988        &self.vendor_contexts
989    }
990
991    /// Returns a reference to the employee context map.
992    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
993        &self.employee_contexts
994    }
995
996    /// Returns a reference to the account context map.
997    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
998        &self.account_contexts
999    }
1000
1001    /// Calculates a rate multiplier from the entity context maps alone (no
1002    /// `EntityAwareInjector` needed). This provides a lightweight fallback
1003    /// when context-aware injection is not fully enabled but context maps
1004    /// have been populated.
1005    ///
1006    /// The multiplier is the product of individual entity risk factors found
1007    /// in the context maps for the given journal entry. If no contexts match,
1008    /// returns 1.0 (no adjustment).
1009    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1010        if self.vendor_contexts.is_empty()
1011            && self.employee_contexts.is_empty()
1012            && self.account_contexts.is_empty()
1013        {
1014            return 1.0;
1015        }
1016
1017        let mut multiplier = 1.0;
1018
1019        // Vendor lookup via reference field
1020        if let Some(ref vendor_ref) = entry.header.reference {
1021            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1022                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
1023                if ctx.is_new {
1024                    multiplier *= 2.0;
1025                }
1026                if ctx.is_dormant_reactivation {
1027                    multiplier *= 1.5;
1028                }
1029            }
1030        }
1031
1032        // Employee lookup via created_by
1033        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1034            if ctx.is_new {
1035                multiplier *= 1.5;
1036            }
1037            if ctx.is_volume_fatigued {
1038                multiplier *= 1.3;
1039            }
1040            if ctx.is_overtime {
1041                multiplier *= 1.2;
1042            }
1043        }
1044
1045        // Account lookup via first line's GL account
1046        if let Some(first_line) = entry.lines.first() {
1047            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1048                if ctx.is_high_risk {
1049                    multiplier *= 2.0;
1050                }
1051            }
1052        }
1053
1054        multiplier
1055    }
1056
1057    /// Apply behavioral bias to a fraud-labeled entry so canonical forensic
1058    /// signals (weekend posting, round dollars, off-hours timestamps,
1059    /// post-close adjustments) have measurable lift over legitimate data.
1060    ///
1061    /// Delegates to [`datasynth_core::fraud_bias::apply_fraud_behavioral_bias`]
1062    /// and updates per-bias counters on `stats`. Returns the [`ProcessIssueType`]
1063    /// variants corresponding to each bias that fired so callers can emit
1064    /// secondary labels.
1065    fn apply_fraud_behavioral_bias(
1066        &mut self,
1067        entry: &mut JournalEntry,
1068    ) -> Vec<datasynth_core::models::ProcessIssueType> {
1069        use datasynth_core::models::ProcessIssueType;
1070
1071        let cfg = self.config.enhanced.fraud_behavioral_bias;
1072        let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1073        for issue in &fired {
1074            match issue {
1075                ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1076                ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1077                ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1078                _ => {}
1079            }
1080        }
1081        // `round_dollar_bias` doesn't emit a ProcessIssueType (no canonical
1082        // process-issue label for it) — detect by matching the max line
1083        // amount against the set of round targets. If the bias fired,
1084        // the max amount is exactly one of [1K, 5K, 10K, 25K, 50K, 100K].
1085        if cfg.round_dollar_bias > 0.0 {
1086            const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1087            let max_amt: Decimal = entry
1088                .lines
1089                .iter()
1090                .map(|l| l.debit_amount.max(l.credit_amount))
1091                .max()
1092                .unwrap_or(Decimal::ZERO);
1093            if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1094                self.stats.fraud_round_dollar_bias_applied += 1;
1095            }
1096        }
1097        fired
1098    }
1099
1100    // =========================================================================
1101    // Enhanced Features API (v0.3.0+)
1102    // =========================================================================
1103
1104    /// Advances all active fraud schemes by one time step.
1105    ///
1106    /// Call this method once per simulated day to generate scheme actions.
1107    /// Returns the scheme actions generated for this date.
1108    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1109        if let Some(ref mut advancer) = self.scheme_advancer {
1110            let context = SchemeContext::new(date, company_code);
1111            let actions = advancer.advance_all(&context);
1112            self.scheme_actions.extend(actions.clone());
1113            actions
1114        } else {
1115            Vec::new()
1116        }
1117    }
1118
1119    /// Potentially starts a new fraud scheme based on probabilities.
1120    ///
1121    /// Call this method periodically (e.g., once per period) to allow new
1122    /// schemes to start based on configured probabilities.
1123    /// Returns the scheme ID if a scheme was started.
1124    pub fn maybe_start_scheme(
1125        &mut self,
1126        date: NaiveDate,
1127        company_code: &str,
1128        available_users: Vec<String>,
1129        available_accounts: Vec<String>,
1130        available_counterparties: Vec<String>,
1131    ) -> Option<uuid::Uuid> {
1132        if let Some(ref mut advancer) = self.scheme_advancer {
1133            let mut context = SchemeContext::new(date, company_code);
1134            context.available_users = available_users;
1135            context.available_accounts = available_accounts;
1136            context.available_counterparties = available_counterparties;
1137
1138            advancer.maybe_start_scheme(&context)
1139        } else {
1140            None
1141        }
1142    }
1143
1144    /// Returns all near-miss labels generated.
1145    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1146        &self.near_miss_labels
1147    }
1148
1149    /// Returns all scheme actions generated.
1150    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1151        &self.scheme_actions
1152    }
1153
1154    /// Returns the detection difficulty distribution.
1155    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1156        &self.difficulty_distribution
1157    }
1158
1159    /// Checks for behavioral deviations for an entity with an observation.
1160    pub fn check_behavioral_deviations(
1161        &self,
1162        entity_id: &str,
1163        observation: &super::context::Observation,
1164    ) -> Vec<super::context::BehavioralDeviation> {
1165        if let Some(ref baseline) = self.behavioral_baseline {
1166            baseline.check_deviation(entity_id, observation)
1167        } else {
1168            Vec::new()
1169        }
1170    }
1171
1172    /// Gets the baseline for an entity.
1173    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1174        if let Some(ref baseline) = self.behavioral_baseline {
1175            baseline.get_baseline(entity_id)
1176        } else {
1177            None
1178        }
1179    }
1180
1181    /// Returns the number of active schemes.
1182    pub fn active_scheme_count(&self) -> usize {
1183        if let Some(ref advancer) = self.scheme_advancer {
1184            advancer.active_scheme_count()
1185        } else {
1186            0
1187        }
1188    }
1189
1190    /// Returns whether enhanced features are enabled.
1191    pub fn has_enhanced_features(&self) -> bool {
1192        self.scheme_advancer.is_some()
1193            || self.near_miss_generator.is_some()
1194            || self.difficulty_calculator.is_some()
1195            || self.entity_aware_injector.is_some()
1196    }
1197}
1198
1199/// Builder for AnomalyInjectorConfig.
1200pub struct AnomalyInjectorConfigBuilder {
1201    config: AnomalyInjectorConfig,
1202}
1203
1204impl AnomalyInjectorConfigBuilder {
1205    /// Creates a new builder with default configuration.
1206    pub fn new() -> Self {
1207        Self {
1208            config: AnomalyInjectorConfig::default(),
1209        }
1210    }
1211
1212    /// Sets the total anomaly rate.
1213    pub fn with_total_rate(mut self, rate: f64) -> Self {
1214        self.config.rates.total_rate = rate;
1215        self
1216    }
1217
1218    /// Sets the fraud rate (proportion of anomalies).
1219    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1220        self.config.rates.fraud_rate = rate;
1221        self
1222    }
1223
1224    /// Sets the error rate (proportion of anomalies).
1225    pub fn with_error_rate(mut self, rate: f64) -> Self {
1226        self.config.rates.error_rate = rate;
1227        self
1228    }
1229
1230    /// Sets the random seed.
1231    pub fn with_seed(mut self, seed: u64) -> Self {
1232        self.config.seed = seed;
1233        self
1234    }
1235
1236    /// Sets the temporal pattern.
1237    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1238        self.config.patterns.temporal_pattern = pattern;
1239        self
1240    }
1241
1242    /// Enables or disables label generation.
1243    pub fn with_labels(mut self, generate: bool) -> Self {
1244        self.config.generate_labels = generate;
1245        self
1246    }
1247
1248    /// Sets target companies.
1249    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1250        self.config.target_companies = companies;
1251        self
1252    }
1253
1254    /// Sets the date range.
1255    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1256        self.config.date_range = Some((start, end));
1257        self
1258    }
1259
1260    // =========================================================================
1261    // Enhanced Features Configuration (v0.3.0+)
1262    // =========================================================================
1263
1264    /// Enables multi-stage fraud scheme generation.
1265    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1266        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1267        self.config.enhanced.scheme_probability = probability;
1268        self
1269    }
1270
1271    /// Enables near-miss generation.
1272    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1273        self.config.enhanced.near_miss_enabled = enabled;
1274        self.config.enhanced.near_miss_proportion = proportion;
1275        self
1276    }
1277
1278    /// Sets approval thresholds for threshold-proximity near-misses.
1279    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1280        self.config.enhanced.approval_thresholds = thresholds;
1281        self
1282    }
1283
1284    /// Enables correlated anomaly injection.
1285    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1286        self.config.enhanced.correlated_injection_enabled = enabled;
1287        self
1288    }
1289
1290    /// Enables temporal clustering (period-end spikes).
1291    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1292        self.config.enhanced.temporal_clustering_enabled = enabled;
1293        self.config.enhanced.period_end_multiplier = multiplier;
1294        self
1295    }
1296
1297    /// Enables detection difficulty classification.
1298    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1299        self.config.enhanced.difficulty_classification_enabled = enabled;
1300        self
1301    }
1302
1303    /// Enables context-aware injection.
1304    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1305        self.config.enhanced.context_aware_enabled = enabled;
1306        self
1307    }
1308
1309    /// Sets behavioral baseline configuration.
1310    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1311        self.config.enhanced.behavioral_baseline_config = config;
1312        self
1313    }
1314
1315    /// Enables all enhanced features with default settings.
1316    pub fn with_all_enhanced_features(mut self) -> Self {
1317        self.config.enhanced.multi_stage_schemes_enabled = true;
1318        self.config.enhanced.scheme_probability = 0.02;
1319        self.config.enhanced.correlated_injection_enabled = true;
1320        self.config.enhanced.temporal_clustering_enabled = true;
1321        self.config.enhanced.period_end_multiplier = 2.5;
1322        self.config.enhanced.near_miss_enabled = true;
1323        self.config.enhanced.near_miss_proportion = 0.30;
1324        self.config.enhanced.difficulty_classification_enabled = true;
1325        self.config.enhanced.context_aware_enabled = true;
1326        self.config.enhanced.behavioral_baseline_config.enabled = true;
1327        self
1328    }
1329
1330    /// Builds the configuration.
1331    pub fn build(self) -> AnomalyInjectorConfig {
1332        self.config
1333    }
1334}
1335
1336impl Default for AnomalyInjectorConfigBuilder {
1337    fn default() -> Self {
1338        Self::new()
1339    }
1340}
1341
1342#[cfg(test)]
1343mod tests {
1344    use super::*;
1345    use chrono::NaiveDate;
1346    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1347    use rust_decimal_macros::dec;
1348
1349    fn create_test_entry(doc_num: &str) -> JournalEntry {
1350        let mut entry = JournalEntry::new_simple(
1351            doc_num.to_string(),
1352            "1000".to_string(),
1353            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1354            "Test Entry".to_string(),
1355        );
1356
1357        entry.add_line(JournalEntryLine {
1358            line_number: 1,
1359            gl_account: "5000".to_string(),
1360            debit_amount: dec!(1000),
1361            ..Default::default()
1362        });
1363
1364        entry.add_line(JournalEntryLine {
1365            line_number: 2,
1366            gl_account: "1000".to_string(),
1367            credit_amount: dec!(1000),
1368            ..Default::default()
1369        });
1370
1371        entry
1372    }
1373
1374    #[test]
1375    fn test_anomaly_injector_basic() {
1376        let config = AnomalyInjectorConfigBuilder::new()
1377            .with_total_rate(0.5) // High rate for testing
1378            .with_seed(42)
1379            .build();
1380
1381        let mut injector = AnomalyInjector::new(config);
1382
1383        let mut entries: Vec<_> = (0..100)
1384            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1385            .collect();
1386
1387        let result = injector.process_entries(&mut entries);
1388
1389        // With 50% rate, we should have some anomalies
1390        assert!(result.anomalies_injected > 0);
1391        assert!(!result.labels.is_empty());
1392        // `anomalies_injected` counts primary injection acts. `labels` also
1393        // includes secondary `ProcessIssue` labels emitted for each fraud
1394        // behavioural bias that fires, so `labels.len()` is always ≥ the
1395        // primary count.
1396        assert!(result.labels.len() >= result.anomalies_injected);
1397    }
1398
1399    #[test]
1400    fn test_specific_injection() {
1401        let config = AnomalyInjectorConfig::default();
1402        let mut injector = AnomalyInjector::new(config);
1403
1404        let mut entry = create_test_entry("JE001");
1405        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1406
1407        let label = injector.inject_specific(&mut entry, anomaly_type);
1408
1409        assert!(label.is_some());
1410        let label = label.unwrap();
1411        // document_id is the UUID string from the journal entry header
1412        assert!(!label.document_id.is_empty());
1413        assert_eq!(label.document_id, entry.document_number());
1414    }
1415
1416    #[test]
1417    fn test_self_approval_injection() {
1418        let config = AnomalyInjectorConfig::default();
1419        let mut injector = AnomalyInjector::new(config);
1420
1421        let mut entry = create_test_entry("JE001");
1422        let label = injector.create_self_approval(&mut entry, "USER001");
1423
1424        assert!(label.is_some());
1425        let label = label.unwrap();
1426        assert!(matches!(
1427            label.anomaly_type,
1428            AnomalyType::Fraud(FraudType::SelfApproval)
1429        ));
1430        assert!(label.related_entities.contains(&"USER001".to_string()));
1431    }
1432
1433    #[test]
1434    fn test_company_filtering() {
1435        let config = AnomalyInjectorConfigBuilder::new()
1436            .with_total_rate(1.0) // Inject all
1437            .with_target_companies(vec!["2000".to_string()])
1438            .build();
1439
1440        let mut injector = AnomalyInjector::new(config);
1441
1442        let mut entries = vec![
1443            create_test_entry("JE001"), // company 1000
1444            create_test_entry("JE002"), // company 1000
1445        ];
1446
1447        let result = injector.process_entries(&mut entries);
1448
1449        // No anomalies because entries are in company 1000, not 2000
1450        assert_eq!(result.anomalies_injected, 0);
1451    }
1452
1453    // =========================================================================
1454    // Entity Context Tests
1455    // =========================================================================
1456
1457    /// Helper to create a test entry with specific vendor reference and employee.
1458    fn create_test_entry_with_context(
1459        doc_num: &str,
1460        vendor_ref: Option<&str>,
1461        employee_id: &str,
1462        gl_account: &str,
1463    ) -> JournalEntry {
1464        let mut entry = JournalEntry::new_simple(
1465            doc_num.to_string(),
1466            "1000".to_string(),
1467            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1468            "Test Entry".to_string(),
1469        );
1470
1471        entry.header.reference = vendor_ref.map(|v| v.to_string());
1472        entry.header.created_by = employee_id.to_string();
1473
1474        entry.add_line(JournalEntryLine {
1475            line_number: 1,
1476            gl_account: gl_account.to_string(),
1477            debit_amount: dec!(1000),
1478            ..Default::default()
1479        });
1480
1481        entry.add_line(JournalEntryLine {
1482            line_number: 2,
1483            gl_account: "1000".to_string(),
1484            credit_amount: dec!(1000),
1485            ..Default::default()
1486        });
1487
1488        entry
1489    }
1490
1491    #[test]
1492    fn test_set_entity_contexts() {
1493        let config = AnomalyInjectorConfig::default();
1494        let mut injector = AnomalyInjector::new(config);
1495
1496        // Initially empty
1497        assert!(injector.vendor_contexts().is_empty());
1498        assert!(injector.employee_contexts().is_empty());
1499        assert!(injector.account_contexts().is_empty());
1500
1501        // Set contexts
1502        let mut vendors = HashMap::new();
1503        vendors.insert(
1504            "V001".to_string(),
1505            VendorContext {
1506                vendor_id: "V001".to_string(),
1507                is_new: true,
1508                ..Default::default()
1509            },
1510        );
1511
1512        let mut employees = HashMap::new();
1513        employees.insert(
1514            "EMP001".to_string(),
1515            EmployeeContext {
1516                employee_id: "EMP001".to_string(),
1517                is_new: true,
1518                ..Default::default()
1519            },
1520        );
1521
1522        let mut accounts = HashMap::new();
1523        accounts.insert(
1524            "8100".to_string(),
1525            AccountContext {
1526                account_code: "8100".to_string(),
1527                is_high_risk: true,
1528                ..Default::default()
1529            },
1530        );
1531
1532        injector.set_entity_contexts(vendors, employees, accounts);
1533
1534        assert_eq!(injector.vendor_contexts().len(), 1);
1535        assert_eq!(injector.employee_contexts().len(), 1);
1536        assert_eq!(injector.account_contexts().len(), 1);
1537        assert!(injector.vendor_contexts().contains_key("V001"));
1538        assert!(injector.employee_contexts().contains_key("EMP001"));
1539        assert!(injector.account_contexts().contains_key("8100"));
1540    }
1541
1542    #[test]
1543    fn test_default_behavior_no_contexts() {
1544        // Without any entity contexts, the base rate is used unchanged.
1545        let config = AnomalyInjectorConfigBuilder::new()
1546            .with_total_rate(0.5)
1547            .with_seed(42)
1548            .build();
1549
1550        let mut injector = AnomalyInjector::new(config);
1551
1552        let mut entries: Vec<_> = (0..200)
1553            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1554            .collect();
1555
1556        let result = injector.process_entries(&mut entries);
1557
1558        // With 50% base rate and no context, expect roughly 50% injection
1559        // Allow wide margin for randomness
1560        assert!(result.anomalies_injected > 0);
1561        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1562        assert!(
1563            rate > 0.2 && rate < 0.8,
1564            "Expected ~50% rate, got {:.2}%",
1565            rate * 100.0
1566        );
1567    }
1568
1569    #[test]
1570    fn test_entity_context_increases_injection_rate() {
1571        // With high-risk entity contexts, the effective rate should be higher
1572        // than the base rate, leading to more anomalies being injected.
1573        let base_rate = 0.10; // Low base rate
1574
1575        // Run without contexts
1576        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1577            .with_total_rate(base_rate)
1578            .with_seed(123)
1579            .build();
1580
1581        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1582
1583        let mut entries_no_ctx: Vec<_> = (0..500)
1584            .map(|i| {
1585                create_test_entry_with_context(
1586                    &format!("JE{:04}", i),
1587                    Some("V001"),
1588                    "EMP001",
1589                    "8100",
1590                )
1591            })
1592            .collect();
1593
1594        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1595
1596        // Run with high-risk contexts (same seed for comparable randomness)
1597        let config_ctx = AnomalyInjectorConfigBuilder::new()
1598            .with_total_rate(base_rate)
1599            .with_seed(123)
1600            .build();
1601
1602        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1603
1604        // Set up high-risk contexts
1605        let mut vendors = HashMap::new();
1606        vendors.insert(
1607            "V001".to_string(),
1608            VendorContext {
1609                vendor_id: "V001".to_string(),
1610                is_new: true,                  // 2.0x multiplier
1611                is_dormant_reactivation: true, // 1.5x multiplier
1612                ..Default::default()
1613            },
1614        );
1615
1616        let mut employees = HashMap::new();
1617        employees.insert(
1618            "EMP001".to_string(),
1619            EmployeeContext {
1620                employee_id: "EMP001".to_string(),
1621                is_new: true, // 1.5x multiplier
1622                ..Default::default()
1623            },
1624        );
1625
1626        let mut accounts = HashMap::new();
1627        accounts.insert(
1628            "8100".to_string(),
1629            AccountContext {
1630                account_code: "8100".to_string(),
1631                is_high_risk: true, // 2.0x multiplier
1632                ..Default::default()
1633            },
1634        );
1635
1636        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1637
1638        let mut entries_ctx: Vec<_> = (0..500)
1639            .map(|i| {
1640                create_test_entry_with_context(
1641                    &format!("JE{:04}", i),
1642                    Some("V001"),
1643                    "EMP001",
1644                    "8100",
1645                )
1646            })
1647            .collect();
1648
1649        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1650
1651        // The context-enhanced run should inject more anomalies
1652        assert!(
1653            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1654            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1655            result_ctx.anomalies_injected,
1656            result_no_ctx.anomalies_injected,
1657        );
1658    }
1659
1660    #[test]
1661    fn test_risk_score_multiplication() {
1662        // Verify the calculate_context_rate_multiplier produces correct values.
1663        let config = AnomalyInjectorConfig::default();
1664        let mut injector = AnomalyInjector::new(config);
1665
1666        // No contexts: multiplier should be 1.0
1667        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1668        assert!(
1669            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1670        );
1671
1672        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1673        let mut vendors = HashMap::new();
1674        vendors.insert(
1675            "V_RISKY".to_string(),
1676            VendorContext {
1677                vendor_id: "V_RISKY".to_string(),
1678                is_new: true,
1679                ..Default::default()
1680            },
1681        );
1682
1683        let mut accounts = HashMap::new();
1684        accounts.insert(
1685            "9000".to_string(),
1686            AccountContext {
1687                account_code: "9000".to_string(),
1688                is_high_risk: true,
1689                ..Default::default()
1690            },
1691        );
1692
1693        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1694
1695        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1696        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1697        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1698        assert!(
1699            (multiplier - 4.0).abs() < f64::EPSILON,
1700            "Expected 4.0x multiplier, got {}",
1701            multiplier,
1702        );
1703
1704        // Entry with only vendor context match (no account match)
1705        let entry_vendor_only =
1706            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1707        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1708        assert!(
1709            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1710            "Expected 2.0x multiplier (vendor only), got {}",
1711            multiplier_vendor,
1712        );
1713
1714        // Entry with no matching contexts
1715        let entry_no_match =
1716            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1717        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1718        assert!(
1719            (multiplier_none - 1.0).abs() < f64::EPSILON,
1720            "Expected 1.0x multiplier (no match), got {}",
1721            multiplier_none,
1722        );
1723    }
1724
1725    #[test]
1726    fn test_employee_context_multiplier() {
1727        let config = AnomalyInjectorConfig::default();
1728        let mut injector = AnomalyInjector::new(config);
1729
1730        let mut employees = HashMap::new();
1731        employees.insert(
1732            "EMP_NEW".to_string(),
1733            EmployeeContext {
1734                employee_id: "EMP_NEW".to_string(),
1735                is_new: true,             // 1.5x
1736                is_volume_fatigued: true, // 1.3x
1737                is_overtime: true,        // 1.2x
1738                ..Default::default()
1739            },
1740        );
1741
1742        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1743
1744        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1745        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1746
1747        // 1.5 * 1.3 * 1.2 = 2.34
1748        let expected = 1.5 * 1.3 * 1.2;
1749        assert!(
1750            (multiplier - expected).abs() < 0.01,
1751            "Expected {:.3}x multiplier, got {:.3}",
1752            expected,
1753            multiplier,
1754        );
1755    }
1756
1757    #[test]
1758    fn test_entity_contexts_persist_across_reset() {
1759        let config = AnomalyInjectorConfig::default();
1760        let mut injector = AnomalyInjector::new(config);
1761
1762        let mut vendors = HashMap::new();
1763        vendors.insert(
1764            "V001".to_string(),
1765            VendorContext {
1766                vendor_id: "V001".to_string(),
1767                is_new: true,
1768                ..Default::default()
1769            },
1770        );
1771
1772        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1773        assert_eq!(injector.vendor_contexts().len(), 1);
1774
1775        // Reset clears labels and stats but not entity contexts
1776        injector.reset();
1777        assert_eq!(injector.vendor_contexts().len(), 1);
1778    }
1779
1780    #[test]
1781    fn test_set_empty_contexts_clears() {
1782        let config = AnomalyInjectorConfig::default();
1783        let mut injector = AnomalyInjector::new(config);
1784
1785        let mut vendors = HashMap::new();
1786        vendors.insert(
1787            "V001".to_string(),
1788            VendorContext {
1789                vendor_id: "V001".to_string(),
1790                ..Default::default()
1791            },
1792        );
1793
1794        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1795        assert_eq!(injector.vendor_contexts().len(), 1);
1796
1797        // Setting empty maps clears
1798        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1799        assert!(injector.vendor_contexts().is_empty());
1800    }
1801
1802    #[test]
1803    fn test_dormant_vendor_multiplier() {
1804        let config = AnomalyInjectorConfig::default();
1805        let mut injector = AnomalyInjector::new(config);
1806
1807        let mut vendors = HashMap::new();
1808        vendors.insert(
1809            "V_DORMANT".to_string(),
1810            VendorContext {
1811                vendor_id: "V_DORMANT".to_string(),
1812                is_dormant_reactivation: true, // 1.5x
1813                ..Default::default()
1814            },
1815        );
1816
1817        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1818
1819        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1820        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1821        assert!(
1822            (multiplier - 1.5).abs() < f64::EPSILON,
1823            "Expected 1.5x multiplier for dormant vendor, got {}",
1824            multiplier,
1825        );
1826    }
1827
1828    // =========================================================================
1829    // Fraud Behavioral Bias Tests
1830    // =========================================================================
1831
1832    /// When all biases are set to 1.0, every fraud entry gets every behavioral
1833    /// flag: weekend posting date, round-dollar amount, off-hours created_at,
1834    /// and post-close marking. This is the strong "all biases fire" guarantee
1835    /// that lets downstream ML classifiers learn these canonical signals.
1836    #[test]
1837    fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1838        use chrono::{Datelike, Timelike, Weekday};
1839        use datasynth_core::models::FraudType;
1840
1841        let mut config = AnomalyInjectorConfig::default();
1842        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1843            enabled: true,
1844            weekend_bias: 1.0,
1845            round_dollar_bias: 1.0,
1846            off_hours_bias: 1.0,
1847            post_close_bias: 1.0,
1848        };
1849        let mut injector = AnomalyInjector::new(config);
1850
1851        // Use a Monday date so the weekend-shift always moves it.
1852        let mut entry = JournalEntry::new_simple(
1853            "JE001".to_string(),
1854            "1000".to_string(),
1855            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), // Monday
1856            "Test Entry".to_string(),
1857        );
1858        entry.add_line(JournalEntryLine {
1859            line_number: 1,
1860            gl_account: "5000".to_string(),
1861            debit_amount: dec!(1237),
1862            ..Default::default()
1863        });
1864        entry.add_line(JournalEntryLine {
1865            line_number: 2,
1866            gl_account: "1000".to_string(),
1867            credit_amount: dec!(1237),
1868            ..Default::default()
1869        });
1870
1871        let _ =
1872            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1873
1874        // Weekend: shifted to Sat or Sun.
1875        assert!(
1876            matches!(
1877                entry.header.posting_date.weekday(),
1878                Weekday::Sat | Weekday::Sun
1879            ),
1880            "expected weekend posting date, got {:?}",
1881            entry.header.posting_date.weekday()
1882        );
1883        // Round-dollar: exactly one of the known round targets.
1884        let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1885        let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1886        assert_eq!(debit_total, credit_total, "entry must remain balanced");
1887        assert!(
1888            [
1889                dec!(1_000),
1890                dec!(5_000),
1891                dec!(10_000),
1892                dec!(25_000),
1893                dec!(50_000),
1894                dec!(100_000)
1895            ]
1896            .contains(&debit_total),
1897            "expected round-dollar total, got {}",
1898            debit_total
1899        );
1900        // Off-hours: 22:00–05:59 UTC.
1901        let hour = entry.header.created_at.hour();
1902        assert!(
1903            !(6..22).contains(&hour),
1904            "expected off-hours timestamp, got hour {}",
1905            hour
1906        );
1907        // Post-close marked.
1908        assert!(entry.header.is_post_close);
1909
1910        // Stats reflect each bias application.
1911        let stats = injector.get_stats();
1912        assert_eq!(stats.fraud_weekend_bias_applied, 1);
1913        assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1914        assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1915        assert_eq!(stats.fraud_post_close_bias_applied, 1);
1916    }
1917
1918    /// When biases are all zero, no flags are applied even to fraud entries —
1919    /// the feature is fully opt-outable.
1920    #[test]
1921    fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1922        use datasynth_core::models::FraudType;
1923
1924        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
1925        let mut config = AnomalyInjectorConfig::default();
1926        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1927            enabled: true,
1928            weekend_bias: 0.0,
1929            round_dollar_bias: 0.0,
1930            off_hours_bias: 0.0,
1931            post_close_bias: 0.0,
1932        };
1933        let mut injector = AnomalyInjector::new(config);
1934        let mut entry = create_test_entry("JE001");
1935        entry.header.posting_date = original_date;
1936
1937        let _ =
1938            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1939
1940        assert_eq!(entry.header.posting_date, original_date);
1941        assert!(!entry.header.is_post_close);
1942        let stats = injector.get_stats();
1943        assert_eq!(stats.fraud_weekend_bias_applied, 0);
1944        assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1945        assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1946        assert_eq!(stats.fraud_post_close_bias_applied, 0);
1947    }
1948
1949    /// Non-fraud anomalies (errors, process issues, etc.) are not touched by
1950    /// the bias — only `AnomalyType::Fraud(_)` triggers it.
1951    #[test]
1952    fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1953        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
1954        let mut config = AnomalyInjectorConfig::default();
1955        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1956            enabled: true,
1957            weekend_bias: 1.0,
1958            round_dollar_bias: 1.0,
1959            off_hours_bias: 1.0,
1960            post_close_bias: 1.0,
1961        };
1962        let mut injector = AnomalyInjector::new(config);
1963        let mut entry = create_test_entry("JE001");
1964        entry.header.posting_date = original_date;
1965
1966        let _ = injector.inject_specific(
1967            &mut entry,
1968            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
1969        );
1970
1971        assert_eq!(entry.header.posting_date, original_date);
1972        let stats = injector.get_stats();
1973        assert_eq!(stats.fraud_weekend_bias_applied, 0);
1974    }
1975
1976    /// When behavioural biases fire on a fraud entry, secondary
1977    /// `ProcessIssue` labels should be pushed into the labels stream so
1978    /// auditors can filter for specific forensic patterns.
1979    #[test]
1980    fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
1981        use datasynth_core::models::{FraudType, ProcessIssueType};
1982
1983        let mut config = AnomalyInjectorConfig::default();
1984        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1985            enabled: true,
1986            weekend_bias: 1.0,
1987            round_dollar_bias: 0.0, // round-dollar does not emit a process-issue label
1988            off_hours_bias: 1.0,
1989            post_close_bias: 1.0,
1990        };
1991        let mut injector = AnomalyInjector::new(config);
1992        let mut entry = JournalEntry::new_simple(
1993            "JE001".into(),
1994            "1000".into(),
1995            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
1996            "Test".into(),
1997        );
1998        entry.add_line(JournalEntryLine {
1999            line_number: 1,
2000            gl_account: "5000".into(),
2001            debit_amount: dec!(1000),
2002            ..Default::default()
2003        });
2004        entry.add_line(JournalEntryLine {
2005            line_number: 2,
2006            gl_account: "1000".into(),
2007            credit_amount: dec!(1000),
2008            ..Default::default()
2009        });
2010
2011        let primary = injector
2012            .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2013            .expect("fraud label should be produced");
2014
2015        // Primary fraud label + 3 secondary process-issue labels.
2016        let labels = injector.get_labels();
2017        assert_eq!(
2018            labels.len(),
2019            3,
2020            "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2021        );
2022        let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2023        assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2024        assert!(types.contains(&AnomalyType::ProcessIssue(
2025            ProcessIssueType::AfterHoursPosting
2026        )));
2027        assert!(types.contains(&AnomalyType::ProcessIssue(
2028            ProcessIssueType::PostClosePosting
2029        )));
2030        assert_eq!(
2031            primary.anomaly_type,
2032            AnomalyType::Fraud(FraudType::FictitiousEntry)
2033        );
2034    }
2035}
datasynth_generators/anomaly/injector.rs

datasynth_generators/anomaly/
injector.rs