datasynth_generators/anomaly/
injector.rs

1//! Main anomaly injection engine.
2//!
3//! The injector coordinates anomaly generation across all data types,
4//! managing rates, patterns, clustering, and label generation.
5//!
6//! ## Enhanced Features (v0.3.0+)
7//!
8//! - **Multi-stage fraud schemes**: Embezzlement, revenue manipulation, kickbacks
9//! - **Correlated injection**: Co-occurrence patterns and error cascades
10//! - **Near-miss generation**: Suspicious but legitimate transactions
11//! - **Detection difficulty classification**: Trivial to expert levels
12//! - **Context-aware injection**: Entity-specific anomaly patterns
13
14use chrono::NaiveDate;
15use datasynth_config::schema::FraudCampaignConfig;
16use datasynth_core::utils::seeded_rng;
17use rand::RngExt;
18use rand_chacha::ChaCha8Rng;
19use rust_decimal::Decimal;
20use std::collections::HashMap;
21use tracing::debug;
22
23use datasynth_core::fraud_bias::{apply_fraud_behavioral_bias, FraudBehavioralBiasConfig};
24use datasynth_core::models::{
25    AnomalyCausalReason, AnomalyDetectionDifficulty, AnomalyRateConfig, AnomalySummary,
26    AnomalyType, ErrorType, FraudType, JournalEntry, LabeledAnomaly, NearMissLabel,
27    RelationalAnomalyType,
28};
29use datasynth_core::uuid_factory::{DeterministicUuidFactory, GeneratorType};
30
31use super::context::{
32    AccountContext, BehavioralBaseline, BehavioralBaselineConfig, EmployeeContext,
33    EntityAwareInjector, VendorContext,
34};
35use super::correlation::{AnomalyCoOccurrence, TemporalClusterGenerator};
36use super::difficulty::DifficultyCalculator;
37use super::near_miss::{NearMissConfig, NearMissGenerator};
38use super::patterns::{
39    should_inject_anomaly, AnomalyPatternConfig, ClusterManager, EntityTargetingManager,
40    TemporalPattern,
41};
42use super::scheme_advancer::{SchemeAdvancer, SchemeAdvancerConfig};
43use super::schemes::{SchemeAction, SchemeContext};
44use super::strategies::{DuplicationStrategy, StrategyCollection};
45use super::types::AnomalyTypeSelector;
46
47/// Configuration for the anomaly injector.
48#[derive(Debug, Clone)]
49pub struct AnomalyInjectorConfig {
50    /// Rate configuration.
51    pub rates: AnomalyRateConfig,
52    /// Pattern configuration.
53    pub patterns: AnomalyPatternConfig,
54    /// Random seed for reproducibility.
55    pub seed: u64,
56    /// Whether to generate labels.
57    pub generate_labels: bool,
58    /// Whether to allow duplicate injection.
59    pub allow_duplicates: bool,
60    /// Maximum anomalies per document.
61    pub max_anomalies_per_document: usize,
62    /// Company codes to target (empty = all).
63    pub target_companies: Vec<String>,
64    /// Date range for injection.
65    pub date_range: Option<(NaiveDate, NaiveDate)>,
66    /// Enhanced features configuration.
67    pub enhanced: EnhancedInjectionConfig,
68}
69
70/// Enhanced injection configuration for v0.3.0+ features.
71#[derive(Debug, Clone, Default)]
72pub struct EnhancedInjectionConfig {
73    /// Enable multi-stage fraud scheme generation.
74    pub multi_stage_schemes_enabled: bool,
75    /// Probability of starting a new scheme per perpetrator per year.
76    pub scheme_probability: f64,
77    /// Enable correlated anomaly injection.
78    pub correlated_injection_enabled: bool,
79    /// Enable temporal clustering (period-end spikes).
80    pub temporal_clustering_enabled: bool,
81    /// Period-end anomaly rate multiplier.
82    pub period_end_multiplier: f64,
83    /// Enable near-miss generation.
84    pub near_miss_enabled: bool,
85    /// Proportion of anomalies that are near-misses.
86    pub near_miss_proportion: f64,
87    /// Approval thresholds for threshold-proximity near-misses.
88    pub approval_thresholds: Vec<Decimal>,
89    /// Enable detection difficulty classification.
90    pub difficulty_classification_enabled: bool,
91    /// Enable context-aware injection.
92    pub context_aware_enabled: bool,
93    /// Behavioral baseline configuration.
94    pub behavioral_baseline_config: BehavioralBaselineConfig,
95    /// Behavioral bias applied to fraud entries so canonical forensic
96    /// signals (weekend posting, round-dollar amounts, off-hours posting,
97    /// post-close adjustments) show measurable lift on fraud vs legitimate
98    /// populations. Defaults enable all four biases.
99    pub fraud_behavioral_bias: FraudBehavioralBiasConfig,
100    /// Persistent fraud campaigns — counterparty-pinned, relocation-structured fraud across
101    /// periods. Off by default; see [`crate::anomaly::campaign`].
102    pub fraud_campaign: FraudCampaignConfig,
103}
104
105impl Default for AnomalyInjectorConfig {
106    fn default() -> Self {
107        Self {
108            rates: AnomalyRateConfig::default(),
109            patterns: AnomalyPatternConfig::default(),
110            seed: 42,
111            generate_labels: true,
112            allow_duplicates: true,
113            max_anomalies_per_document: 2,
114            target_companies: Vec::new(),
115            date_range: None,
116            enhanced: EnhancedInjectionConfig::default(),
117        }
118    }
119}
120
121/// Result of an injection batch.
122#[derive(Debug, Clone)]
123pub struct InjectionBatchResult {
124    /// Number of entries processed.
125    pub entries_processed: usize,
126    /// Number of anomalies injected.
127    pub anomalies_injected: usize,
128    /// Number of duplicates created.
129    pub duplicates_created: usize,
130    /// Labels generated.
131    pub labels: Vec<LabeledAnomaly>,
132    /// Summary of anomalies.
133    pub summary: AnomalySummary,
134    /// Entries that were modified (document numbers).
135    pub modified_documents: Vec<String>,
136    /// Near-miss labels (suspicious but legitimate transactions).
137    pub near_miss_labels: Vec<NearMissLabel>,
138    /// Multi-stage scheme actions generated.
139    pub scheme_actions: Vec<SchemeAction>,
140    /// Difficulty distribution summary.
141    pub difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
142    /// Synthetic prior-year carry-forward register (confirmed campaign counterparties) — the
143    /// confirmation channel the memory arm consumes (§40/§59). Empty unless `carry_forward` is on.
144    pub carry_forward: Vec<super::campaign::CarryForwardRecord>,
145}
146
147/// Main anomaly injection engine.
148pub struct AnomalyInjector {
149    config: AnomalyInjectorConfig,
150    rng: ChaCha8Rng,
151    uuid_factory: DeterministicUuidFactory,
152    type_selector: AnomalyTypeSelector,
153    strategies: StrategyCollection,
154    cluster_manager: ClusterManager,
155    /// Selects target entities for anomaly injection (RepeatOffender, etc.).
156    entity_targeting: EntityTargetingManager,
157    /// Tracking which documents already have anomalies.
158    document_anomaly_counts: HashMap<String, usize>,
159    /// All generated labels.
160    labels: Vec<LabeledAnomaly>,
161    /// Statistics.
162    stats: InjectorStats,
163    // Enhanced components (v0.3.0+)
164    /// Multi-stage fraud scheme advancer.
165    scheme_advancer: Option<SchemeAdvancer>,
166    /// Near-miss generator.
167    near_miss_generator: Option<NearMissGenerator>,
168    /// Near-miss labels generated.
169    near_miss_labels: Vec<NearMissLabel>,
170    /// Drives correlated anomaly pairs (e.g., FictitiousVendor + InvoiceManipulation).
171    co_occurrence_handler: Option<AnomalyCoOccurrence>,
172    /// Queued correlated anomalies waiting to be injected.
173    queued_co_occurrences: Vec<QueuedAnomaly>,
174    /// Groups anomalies into temporal bursts during period-end windows.
175    temporal_cluster_generator: Option<TemporalClusterGenerator>,
176    /// Difficulty calculator.
177    difficulty_calculator: Option<DifficultyCalculator>,
178    /// Entity-aware injector.
179    entity_aware_injector: Option<EntityAwareInjector>,
180    /// Behavioral baseline tracker.
181    behavioral_baseline: Option<BehavioralBaseline>,
182    /// Scheme actions generated.
183    scheme_actions: Vec<SchemeAction>,
184    /// Difficulty distribution.
185    difficulty_distribution: HashMap<AnomalyDetectionDifficulty, usize>,
186    // Entity context lookup maps for risk-adjusted injection rates
187    /// Vendor contexts keyed by vendor ID.
188    vendor_contexts: HashMap<String, VendorContext>,
189    /// Employee contexts keyed by employee ID.
190    employee_contexts: HashMap<String, EmployeeContext>,
191    /// Account contexts keyed by account code.
192    account_contexts: HashMap<String, AccountContext>,
193}
194
195/// Injection statistics tracking.
196#[derive(Debug, Clone, Default)]
197pub struct InjectorStats {
198    /// Total number of entries processed.
199    pub total_processed: usize,
200    /// Total number of anomalies injected.
201    pub total_injected: usize,
202    /// Anomalies injected by category (e.g., "Fraud", "Error").
203    pub by_category: HashMap<String, usize>,
204    /// Anomalies injected by specific type name.
205    pub by_type: HashMap<String, usize>,
206    /// Anomalies injected by company code.
207    pub by_company: HashMap<String, usize>,
208    /// Entries skipped due to rate check.
209    pub skipped_rate: usize,
210    /// Entries skipped due to date range filter.
211    pub skipped_date: usize,
212    /// Entries skipped due to company filter.
213    pub skipped_company: usize,
214    /// Entries skipped due to max-anomalies-per-document limit.
215    pub skipped_max_per_doc: usize,
216    /// Fraud entries that received weekend-posting bias.
217    pub fraud_weekend_bias_applied: usize,
218    /// Fraud entries that received round-dollar amount bias.
219    pub fraud_round_dollar_bias_applied: usize,
220    /// Fraud entries that received off-hours created_at bias.
221    pub fraud_off_hours_bias_applied: usize,
222    /// Fraud entries that received post-close marking bias.
223    pub fraud_post_close_bias_applied: usize,
224}
225
226/// A correlated anomaly queued for future injection.
227struct QueuedAnomaly {
228    /// Anomaly type to inject.
229    anomaly_type: AnomalyType,
230    /// Target entity (if same_entity was specified in the co-occurrence pattern).
231    target_entity: Option<String>,
232    /// Earliest date this can be injected.
233    earliest_date: NaiveDate,
234    /// Description from the co-occurrence pattern.
235    description: String,
236}
237
238impl AnomalyInjector {
239    /// Creates a new anomaly injector.
240    pub fn new(config: AnomalyInjectorConfig) -> Self {
241        let mut rng = seeded_rng(config.seed, 0);
242        let cluster_manager = ClusterManager::new(config.patterns.clustering.clone());
243        let entity_targeting =
244            EntityTargetingManager::new(config.patterns.entity_targeting.clone());
245
246        // Initialize enhanced components based on configuration
247        let scheme_advancer = if config.enhanced.multi_stage_schemes_enabled {
248            let scheme_config = SchemeAdvancerConfig {
249                embezzlement_probability: config.enhanced.scheme_probability,
250                revenue_manipulation_probability: config.enhanced.scheme_probability * 0.5,
251                kickback_probability: config.enhanced.scheme_probability * 0.5,
252                seed: rng.random(),
253                ..Default::default()
254            };
255            Some(SchemeAdvancer::new(scheme_config))
256        } else {
257            None
258        };
259
260        let near_miss_generator = if config.enhanced.near_miss_enabled {
261            let near_miss_config = NearMissConfig {
262                proportion: config.enhanced.near_miss_proportion,
263                seed: rng.random(),
264                ..Default::default()
265            };
266            Some(NearMissGenerator::new(near_miss_config))
267        } else {
268            None
269        };
270
271        let co_occurrence_handler = if config.enhanced.correlated_injection_enabled {
272            Some(AnomalyCoOccurrence::new())
273        } else {
274            None
275        };
276
277        let temporal_cluster_generator = if config.enhanced.temporal_clustering_enabled {
278            Some(TemporalClusterGenerator::new())
279        } else {
280            None
281        };
282
283        let difficulty_calculator = if config.enhanced.difficulty_classification_enabled {
284            Some(DifficultyCalculator::new())
285        } else {
286            None
287        };
288
289        let entity_aware_injector = if config.enhanced.context_aware_enabled {
290            Some(EntityAwareInjector::default())
291        } else {
292            None
293        };
294
295        let behavioral_baseline = if config.enhanced.context_aware_enabled
296            && config.enhanced.behavioral_baseline_config.enabled
297        {
298            Some(BehavioralBaseline::new(
299                config.enhanced.behavioral_baseline_config.clone(),
300            ))
301        } else {
302            None
303        };
304
305        let uuid_factory = DeterministicUuidFactory::new(config.seed, GeneratorType::Anomaly);
306
307        Self {
308            config,
309            rng,
310            uuid_factory,
311            type_selector: AnomalyTypeSelector::new(),
312            strategies: StrategyCollection::default(),
313            cluster_manager,
314            entity_targeting,
315            document_anomaly_counts: HashMap::new(),
316            labels: Vec::new(),
317            stats: InjectorStats::default(),
318            scheme_advancer,
319            near_miss_generator,
320            near_miss_labels: Vec::new(),
321            co_occurrence_handler,
322            queued_co_occurrences: Vec::new(),
323            temporal_cluster_generator,
324            difficulty_calculator,
325            entity_aware_injector,
326            behavioral_baseline,
327            scheme_actions: Vec::new(),
328            difficulty_distribution: HashMap::new(),
329            vendor_contexts: HashMap::new(),
330            employee_contexts: HashMap::new(),
331            account_contexts: HashMap::new(),
332        }
333    }
334
335    /// Processes a batch of journal entries, potentially injecting anomalies.
336    pub fn process_entries(&mut self, entries: &mut [JournalEntry]) -> InjectionBatchResult {
337        debug!(
338            entry_count = entries.len(),
339            total_rate = self.config.rates.total_rate,
340            seed = self.config.seed,
341            "Injecting anomalies into journal entries"
342        );
343
344        let mut modified_documents = Vec::new();
345        let mut duplicates = Vec::new();
346
347        for entry in entries.iter_mut() {
348            self.stats.total_processed += 1;
349
350            // Update behavioral baseline if enabled
351            if let Some(ref mut baseline) = self.behavioral_baseline {
352                use super::context::Observation;
353                // Record the observation for baseline building
354                let entity_id = entry.header.created_by.clone();
355                let observation =
356                    Observation::new(entry.posting_date()).with_amount(entry.total_debit());
357                baseline.record_observation(&entity_id, observation);
358            }
359
360            // Check if we should process this entry
361            if !self.should_process(entry) {
362                continue;
363            }
364
365            // --- Check queued co-occurrences first ---
366            let entry_date = entry.posting_date();
367            let ready_indices: Vec<usize> = self
368                .queued_co_occurrences
369                .iter()
370                .enumerate()
371                .filter(|(_, q)| entry_date >= q.earliest_date)
372                .map(|(i, _)| i)
373                .collect();
374
375            if let Some(&idx) = ready_indices.first() {
376                let queued = self.queued_co_occurrences.remove(idx);
377                if let Some(mut label) = self.inject_anomaly(entry, queued.anomaly_type) {
378                    label = label.with_metadata("co_occurrence", "true");
379                    label = label.with_metadata("co_occurrence_description", &queued.description);
380                    if let Some(ref target) = queued.target_entity {
381                        label = label.with_related_entity(target);
382                        label = label.with_metadata("co_occurrence_target", target);
383                    }
384                    modified_documents.push(entry.document_number().clone());
385                    self.labels.push(label);
386                    self.stats.total_injected += 1;
387                }
388                continue; // This entry was used for a queued co-occurrence
389            }
390
391            // Calculate effective rate
392            let base_rate = self.config.rates.total_rate;
393
394            // Calculate entity-aware rate adjustment using context lookup maps
395            let mut effective_rate = if let Some(ref injector) = self.entity_aware_injector {
396                let employee_id = &entry.header.created_by;
397                let first_account = entry
398                    .lines
399                    .first()
400                    .map(|l| l.gl_account.as_str())
401                    .unwrap_or("");
402                // Look up vendor from the entry's reference field (vendor ID convention)
403                let vendor_ref = entry.header.reference.as_deref().unwrap_or("");
404
405                let vendor_ctx = self.vendor_contexts.get(vendor_ref);
406                let employee_ctx = self.employee_contexts.get(employee_id);
407                let account_ctx = self.account_contexts.get(first_account);
408
409                let multiplier =
410                    injector.get_rate_multiplier(vendor_ctx, employee_ctx, account_ctx);
411                (base_rate * multiplier).min(1.0)
412            } else {
413                // No entity-aware injector: fall back to context maps alone
414                self.calculate_context_rate_multiplier(entry) * base_rate
415            };
416
417            // --- Temporal clustering: boost rate during period-end windows ---
418            if let Some(ref tcg) = self.temporal_cluster_generator {
419                let temporal_multiplier = tcg
420                    .get_active_clusters(entry_date)
421                    .iter()
422                    .map(|c| c.rate_multiplier)
423                    .fold(1.0_f64, f64::max);
424                effective_rate = (effective_rate * temporal_multiplier).min(1.0);
425            }
426
427            // Determine if we inject an anomaly
428            if should_inject_anomaly(
429                effective_rate,
430                entry_date,
431                &self.config.patterns.temporal_pattern,
432                &mut self.rng,
433            ) {
434                // Check if this should be a near-miss instead
435                if let Some(ref mut near_miss_gen) = self.near_miss_generator {
436                    // Record the transaction for near-duplicate detection
437                    let account = entry
438                        .lines
439                        .first()
440                        .map(|l| l.gl_account.clone())
441                        .unwrap_or_default();
442                    near_miss_gen.record_transaction(
443                        entry.document_number().clone(),
444                        entry_date,
445                        entry.total_debit(),
446                        &account,
447                        None,
448                    );
449
450                    // Check if this could be a near-miss
451                    if let Some(near_miss_label) = near_miss_gen.check_near_miss(
452                        entry.document_number().clone(),
453                        entry_date,
454                        entry.total_debit(),
455                        &account,
456                        None,
457                        &self.config.enhanced.approval_thresholds,
458                    ) {
459                        self.near_miss_labels.push(near_miss_label);
460                        continue; // Skip actual anomaly injection
461                    }
462                }
463
464                // Select anomaly category based on rates
465                let anomaly_type = self.select_anomaly_category();
466
467                // --- Entity targeting: select and track target entity ---
468                let target_entity = {
469                    let mut candidates: Vec<String> =
470                        self.vendor_contexts.keys().cloned().collect();
471                    candidates.extend(self.employee_contexts.keys().cloned());
472                    if candidates.is_empty() {
473                        // Fall back to entry's reference field as a candidate
474                        if let Some(ref r) = entry.header.reference {
475                            candidates.push(r.clone());
476                        }
477                    }
478                    self.entity_targeting
479                        .select_entity(&candidates, &mut self.rng)
480                };
481
482                // Apply the anomaly
483                if let Some(mut label) = self.inject_anomaly(entry, anomaly_type.clone()) {
484                    // Add entity targeting metadata
485                    if let Some(ref entity_id) = target_entity {
486                        label = label.with_metadata("entity_target", entity_id);
487                        label = label.with_related_entity(entity_id);
488                        label = label.with_causal_reason(AnomalyCausalReason::EntityTargeting {
489                            target_type: "Entity".to_string(),
490                            target_id: entity_id.clone(),
491                        });
492                    }
493
494                    // Calculate detection difficulty if enabled
495                    if let Some(ref calculator) = self.difficulty_calculator {
496                        let difficulty = calculator.calculate(&label);
497
498                        // Store difficulty in metadata
499                        label =
500                            label.with_metadata("detection_difficulty", &format!("{difficulty:?}"));
501                        label = label.with_metadata(
502                            "difficulty_score",
503                            &difficulty.difficulty_score().to_string(),
504                        );
505
506                        // Update difficulty distribution
507                        *self.difficulty_distribution.entry(difficulty).or_insert(0) += 1;
508                    }
509
510                    modified_documents.push(entry.document_number().clone());
511                    self.labels.push(label);
512                    self.stats.total_injected += 1;
513
514                    // --- Co-occurrence: queue correlated anomalies ---
515                    if let Some(ref co_occ) = self.co_occurrence_handler {
516                        let correlated =
517                            co_occ.get_correlated_anomalies(&anomaly_type, &mut self.rng);
518                        for result in correlated {
519                            self.queued_co_occurrences.push(QueuedAnomaly {
520                                anomaly_type: result.anomaly_type,
521                                target_entity: if result.same_entity {
522                                    target_entity.clone()
523                                } else {
524                                    None
525                                },
526                                earliest_date: entry_date
527                                    + chrono::Duration::days(i64::from(result.lag_days)),
528                                description: result.description,
529                            });
530                        }
531                    }
532                }
533
534                // Check for duplicate injection.
535                //
536                // v5.31 C1 Phase 7+: skip duplication on IC injector
537                // JEs. Cloning an IC JE produces two JEs on the same
538                // entity carrying the same ic_pair_id, which makes the
539                // group IC matcher see 3 observed sides for that pair
540                // (the partner + both copies on this side) and fail
541                // hard with "expected at most 2 (one seller + one
542                // buyer)". IC postings are deterministic by manifest
543                // contract; duplication is a fraud-typology signal that
544                // doesn't apply to them.
545                if self.config.allow_duplicates
546                    && entry.header.ic_pair_id.is_none()
547                    && matches!(
548                        self.labels.last().map(|l| &l.anomaly_type),
549                        Some(AnomalyType::Error(ErrorType::DuplicateEntry))
550                            | Some(AnomalyType::Fraud(FraudType::DuplicatePayment))
551                    )
552                {
553                    let dup_strategy = DuplicationStrategy::default();
554                    let duplicate =
555                        dup_strategy.duplicate(entry, &mut self.rng, &self.uuid_factory);
556                    duplicates.push(duplicate);
557                }
558            }
559        }
560
561        // --- Persistent fraud campaigns (A1): counterparty-pinned, relocation-structured fraud
562        // restructured across periods. Runs once over the whole batch after per-entry injection.
563        let mut carry_forward: Vec<super::campaign::CarryForwardRecord> = Vec::new();
564        if self.config.enhanced.fraud_campaign.is_active() {
565            let campaign_cfg = self.config.enhanced.fraud_campaign.clone();
566            let campaign_labels =
567                super::campaign::plan_campaigns(&campaign_cfg, entries, &mut self.rng);
568            self.stats.total_injected += campaign_labels.len();
569            for label in &campaign_labels {
570                modified_documents.push(label.document_id.clone());
571            }
572            // Synthetic prior-year carry-forward register (the confirmation channel, §40/§59):
573            // a partial/noisy set of confirmed campaign counterparties the audit memory carries.
574            if campaign_cfg.carry_forward.enabled {
575                let accounts: Vec<String> = {
576                    let mut set = std::collections::BTreeSet::new();
577                    for e in entries.iter() {
578                        for l in &e.lines {
579                            set.insert(l.gl_account.clone());
580                        }
581                    }
582                    set.into_iter().collect()
583                };
584                carry_forward = super::campaign::build_carry_forward_register(
585                    &campaign_labels,
586                    &campaign_cfg.carry_forward,
587                    &accounts,
588                    &mut self.rng,
589                );
590            }
591            self.labels.extend(campaign_labels);
592        }
593
594        // Count duplicates
595        let duplicates_created = duplicates.len();
596
597        // Build summary
598        let summary = AnomalySummary::from_anomalies(&self.labels);
599
600        InjectionBatchResult {
601            entries_processed: self.stats.total_processed,
602            anomalies_injected: self.stats.total_injected,
603            duplicates_created,
604            labels: self.labels.clone(),
605            summary,
606            modified_documents,
607            near_miss_labels: self.near_miss_labels.clone(),
608            scheme_actions: self.scheme_actions.clone(),
609            difficulty_distribution: self.difficulty_distribution.clone(),
610            carry_forward,
611        }
612    }
613
614    /// Checks if an entry should be processed.
615    fn should_process(&mut self, entry: &JournalEntry) -> bool {
616        // Check company filter
617        if !self.config.target_companies.is_empty()
618            && !self
619                .config
620                .target_companies
621                .iter()
622                .any(|c| c == entry.company_code())
623        {
624            self.stats.skipped_company += 1;
625            return false;
626        }
627
628        // Check date range
629        if let Some((start, end)) = self.config.date_range {
630            if entry.posting_date() < start || entry.posting_date() > end {
631                self.stats.skipped_date += 1;
632                return false;
633            }
634        }
635
636        // Check max anomalies per document
637        let current_count = self
638            .document_anomaly_counts
639            .get(&entry.document_number())
640            .copied()
641            .unwrap_or(0);
642        if current_count >= self.config.max_anomalies_per_document {
643            self.stats.skipped_max_per_doc += 1;
644            return false;
645        }
646
647        true
648    }
649
650    /// Selects an anomaly category based on configured rates.
651    fn select_anomaly_category(&mut self) -> AnomalyType {
652        let r = self.rng.random::<f64>();
653        let rates = &self.config.rates;
654
655        let mut cumulative = 0.0;
656
657        cumulative += rates.fraud_rate;
658        if r < cumulative {
659            return self.type_selector.select_fraud(&mut self.rng);
660        }
661
662        cumulative += rates.error_rate;
663        if r < cumulative {
664            return self.type_selector.select_error(&mut self.rng);
665        }
666
667        cumulative += rates.process_issue_rate;
668        if r < cumulative {
669            return self.type_selector.select_process_issue(&mut self.rng);
670        }
671
672        cumulative += rates.statistical_rate;
673        if r < cumulative {
674            return self.type_selector.select_statistical(&mut self.rng);
675        }
676
677        self.type_selector.select_relational(&mut self.rng)
678    }
679
680    /// Injects an anomaly into an entry.
681    fn inject_anomaly(
682        &mut self,
683        entry: &mut JournalEntry,
684        anomaly_type: AnomalyType,
685    ) -> Option<LabeledAnomaly> {
686        // Check if strategy can be applied
687        if !self.strategies.can_apply(entry, &anomaly_type) {
688            return None;
689        }
690
691        // Apply the strategy
692        let result = self
693            .strategies
694            .apply_strategy(entry, &anomaly_type, &mut self.rng);
695
696        if !result.success {
697            return None;
698        }
699
700        // Update document anomaly count
701        *self
702            .document_anomaly_counts
703            .entry(entry.document_number().clone())
704            .or_insert(0) += 1;
705
706        // Update statistics
707        let category = anomaly_type.category().to_string();
708        let type_name = anomaly_type.type_name();
709
710        *self.stats.by_category.entry(category).or_insert(0) += 1;
711        *self.stats.by_type.entry(type_name.clone()).or_insert(0) += 1;
712        *self
713            .stats
714            .by_company
715            .entry(entry.company_code().to_string())
716            .or_insert(0) += 1;
717
718        // Generate label
719        if self.config.generate_labels {
720            let anomaly_id = format!("ANO{:08}", self.labels.len() + 1);
721
722            // Update entry header with anomaly tracking fields
723            entry.header.is_anomaly = true;
724            entry.header.anomaly_id = Some(anomaly_id.clone());
725            entry.header.anomaly_type = Some(type_name.clone());
726
727            // Also set fraud flag if this is a fraud anomaly
728            let mut secondary_process_issues: Vec<datasynth_core::models::ProcessIssueType> =
729                Vec::new();
730            if matches!(anomaly_type, AnomalyType::Fraud(_)) {
731                entry.header.is_fraud = true;
732                if let AnomalyType::Fraud(ref ft) = anomaly_type {
733                    entry.header.fraud_type = Some(*ft);
734                }
735                // Apply behavioral bias so forensic signals (weekend posting,
736                // round dollars, off-hours, post-close adjustments) are
737                // learnable from fraud-labeled data. The returned list of
738                // biases that fired is used below to emit secondary
739                // ProcessIssue labels.
740                secondary_process_issues = self.apply_fraud_behavioral_bias(entry);
741            }
742
743            let mut label = LabeledAnomaly::new(
744                anomaly_id,
745                anomaly_type.clone(),
746                entry.document_number().clone(),
747                "JE".to_string(),
748                entry.company_code().to_string(),
749                entry.posting_date(),
750            )
751            .with_description(&result.description)
752            .with_injection_strategy(&type_name);
753
754            // Add causal reason with injection context (provenance tracking)
755            let causal_reason = AnomalyCausalReason::RandomRate {
756                base_rate: self.config.rates.total_rate,
757            };
758            label = label.with_causal_reason(causal_reason);
759
760            // Add entity context metadata if contexts are populated
761            let context_multiplier = self.calculate_context_rate_multiplier(entry);
762            if (context_multiplier - 1.0).abs() > f64::EPSILON {
763                label = label.with_metadata(
764                    "entity_context_multiplier",
765                    &format!("{context_multiplier:.3}"),
766                );
767                label = label.with_metadata(
768                    "effective_rate",
769                    &format!(
770                        "{:.6}",
771                        (self.config.rates.total_rate * context_multiplier).min(1.0)
772                    ),
773                );
774            }
775
776            // Add monetary impact
777            if let Some(impact) = result.monetary_impact {
778                label = label.with_monetary_impact(impact);
779            }
780
781            // Add related entities
782            for entity in &result.related_entities {
783                label = label.with_related_entity(entity);
784            }
785
786            // Add metadata
787            for (key, value) in &result.metadata {
788                label = label.with_metadata(key, value);
789            }
790
791            // Assign cluster and update causal reason if in cluster
792            if let Some(cluster_id) =
793                self.cluster_manager
794                    .assign_cluster(entry.posting_date(), &type_name, &mut self.rng)
795            {
796                label = label.with_cluster(&cluster_id);
797                // Update causal reason to reflect cluster membership
798                label = label.with_causal_reason(AnomalyCausalReason::ClusterMembership {
799                    cluster_id: cluster_id.clone(),
800                });
801            }
802
803            // Secondary ProcessIssue labels for each behavioural bias that
804            // fired — lets auditors query the labels stream for specific
805            // forensic patterns (WeekendPosting / AfterHoursPosting /
806            // PostClosePosting) rather than reconstructing them from header
807            // flags. `stats.total_injected` counts injection acts (primary
808            // labels); `labels.len()` may exceed it due to these children.
809            for issue_type in &secondary_process_issues {
810                let child_id = format!("ANO{:08}", self.labels.len() + 1);
811                let child = LabeledAnomaly::new(
812                    child_id,
813                    AnomalyType::ProcessIssue(*issue_type),
814                    entry.document_number().clone(),
815                    "JE".to_string(),
816                    entry.company_code().to_string(),
817                    entry.posting_date(),
818                )
819                .with_description("Forensic pattern from fraud behavioral bias")
820                .with_injection_strategy("behavioral_bias")
821                .with_parent_anomaly(&label.anomaly_id);
822                self.labels.push(child);
823            }
824
825            return Some(label);
826        }
827
828        None
829    }
830
831    /// Injects a specific anomaly type into an entry.
832    pub fn inject_specific(
833        &mut self,
834        entry: &mut JournalEntry,
835        anomaly_type: AnomalyType,
836    ) -> Option<LabeledAnomaly> {
837        self.inject_anomaly(entry, anomaly_type)
838    }
839
840    /// Creates a self-approval anomaly.
841    pub fn create_self_approval(
842        &mut self,
843        entry: &mut JournalEntry,
844        user_id: &str,
845    ) -> Option<LabeledAnomaly> {
846        let anomaly_type = AnomalyType::Fraud(FraudType::SelfApproval);
847
848        let label = LabeledAnomaly::new(
849            format!("ANO{:08}", self.labels.len() + 1),
850            anomaly_type,
851            entry.document_number().clone(),
852            "JE".to_string(),
853            entry.company_code().to_string(),
854            entry.posting_date(),
855        )
856        .with_description(&format!("User {user_id} approved their own transaction"))
857        .with_related_entity(user_id)
858        .with_injection_strategy("ManualSelfApproval")
859        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
860            target_type: "User".to_string(),
861            target_id: user_id.to_string(),
862        });
863
864        // Set entry header anomaly tracking fields
865        entry.header.is_anomaly = true;
866        entry.header.is_fraud = true;
867        entry.header.anomaly_id = Some(label.anomaly_id.clone());
868        entry.header.anomaly_type = Some("SelfApproval".to_string());
869        entry.header.fraud_type = Some(FraudType::SelfApproval);
870
871        // Set approver = requester
872        entry.header.created_by = user_id.to_string();
873
874        // Apply canonical behavioral biases so self-approval frauds have
875        // the same forensic lift as other fraud paths.
876        self.apply_fraud_behavioral_bias(entry);
877
878        self.labels.push(label.clone());
879        Some(label)
880    }
881
882    /// Creates a segregation of duties violation.
883    pub fn create_sod_violation(
884        &mut self,
885        entry: &mut JournalEntry,
886        user_id: &str,
887        conflicting_duties: (&str, &str),
888    ) -> Option<LabeledAnomaly> {
889        let anomaly_type = AnomalyType::Fraud(FraudType::SegregationOfDutiesViolation);
890
891        let label = LabeledAnomaly::new(
892            format!("ANO{:08}", self.labels.len() + 1),
893            anomaly_type,
894            entry.document_number().clone(),
895            "JE".to_string(),
896            entry.company_code().to_string(),
897            entry.posting_date(),
898        )
899        .with_description(&format!(
900            "User {} performed conflicting duties: {} and {}",
901            user_id, conflicting_duties.0, conflicting_duties.1
902        ))
903        .with_related_entity(user_id)
904        .with_metadata("duty1", conflicting_duties.0)
905        .with_metadata("duty2", conflicting_duties.1)
906        .with_injection_strategy("ManualSoDViolation")
907        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
908            target_type: "User".to_string(),
909            target_id: user_id.to_string(),
910        });
911
912        // Set entry header anomaly tracking fields
913        entry.header.is_anomaly = true;
914        entry.header.is_fraud = true;
915        entry.header.anomaly_id = Some(label.anomaly_id.clone());
916        entry.header.anomaly_type = Some("SegregationOfDutiesViolation".to_string());
917        entry.header.fraud_type = Some(FraudType::SegregationOfDutiesViolation);
918
919        // Apply canonical behavioral biases.
920        self.apply_fraud_behavioral_bias(entry);
921
922        self.labels.push(label.clone());
923        Some(label)
924    }
925
926    /// Creates an intercompany mismatch anomaly.
927    pub fn create_ic_mismatch(
928        &mut self,
929        entry: &mut JournalEntry,
930        matching_company: &str,
931        expected_amount: Decimal,
932        actual_amount: Decimal,
933    ) -> Option<LabeledAnomaly> {
934        let anomaly_type = AnomalyType::Relational(RelationalAnomalyType::UnmatchedIntercompany);
935
936        let label = LabeledAnomaly::new(
937            format!("ANO{:08}", self.labels.len() + 1),
938            anomaly_type,
939            entry.document_number().clone(),
940            "JE".to_string(),
941            entry.company_code().to_string(),
942            entry.posting_date(),
943        )
944        .with_description(&format!(
945            "Intercompany mismatch with {matching_company}: expected {expected_amount} but got {actual_amount}"
946        ))
947        .with_related_entity(matching_company)
948        .with_monetary_impact(actual_amount - expected_amount)
949        .with_metadata("expected_amount", &expected_amount.to_string())
950        .with_metadata("actual_amount", &actual_amount.to_string())
951        .with_injection_strategy("ManualICMismatch")
952        .with_causal_reason(AnomalyCausalReason::EntityTargeting {
953            target_type: "Intercompany".to_string(),
954            target_id: matching_company.to_string(),
955        });
956
957        // Set entry header anomaly tracking fields
958        entry.header.is_anomaly = true;
959        entry.header.anomaly_id = Some(label.anomaly_id.clone());
960        entry.header.anomaly_type = Some("UnmatchedIntercompany".to_string());
961
962        self.labels.push(label.clone());
963        Some(label)
964    }
965
966    /// Returns all generated labels.
967    pub fn get_labels(&self) -> &[LabeledAnomaly] {
968        &self.labels
969    }
970
971    /// Returns the anomaly summary.
972    pub fn get_summary(&self) -> AnomalySummary {
973        AnomalySummary::from_anomalies(&self.labels)
974    }
975
976    /// Returns injection statistics.
977    pub fn get_stats(&self) -> &InjectorStats {
978        &self.stats
979    }
980
981    /// Clears all labels and resets statistics.
982    pub fn reset(&mut self) {
983        self.labels.clear();
984        self.document_anomaly_counts.clear();
985        self.stats = InjectorStats::default();
986        self.cluster_manager = ClusterManager::new(self.config.patterns.clustering.clone());
987
988        // Reset enhanced components
989        self.near_miss_labels.clear();
990        self.scheme_actions.clear();
991        self.difficulty_distribution.clear();
992
993        if let Some(ref mut baseline) = self.behavioral_baseline {
994            *baseline =
995                BehavioralBaseline::new(self.config.enhanced.behavioral_baseline_config.clone());
996        }
997    }
998
999    /// Returns the number of clusters created.
1000    pub fn cluster_count(&self) -> usize {
1001        self.cluster_manager.cluster_count()
1002    }
1003
1004    // =========================================================================
1005    // Entity Context API
1006    // =========================================================================
1007
1008    /// Sets entity contexts for risk-adjusted anomaly injection.
1009    ///
1010    /// When entity contexts are provided, the injector adjusts anomaly injection
1011    /// rates based on entity risk factors. Entries involving high-risk vendors,
1012    /// new employees, or sensitive accounts will have higher effective injection
1013    /// rates.
1014    ///
1015    /// Pass empty HashMaps to clear previously set contexts.
1016    pub fn set_entity_contexts(
1017        &mut self,
1018        vendors: HashMap<String, VendorContext>,
1019        employees: HashMap<String, EmployeeContext>,
1020        accounts: HashMap<String, AccountContext>,
1021    ) {
1022        self.vendor_contexts = vendors;
1023        self.employee_contexts = employees;
1024        self.account_contexts = accounts;
1025    }
1026
1027    /// Returns a reference to the vendor context map.
1028    pub fn vendor_contexts(&self) -> &HashMap<String, VendorContext> {
1029        &self.vendor_contexts
1030    }
1031
1032    /// Returns a reference to the employee context map.
1033    pub fn employee_contexts(&self) -> &HashMap<String, EmployeeContext> {
1034        &self.employee_contexts
1035    }
1036
1037    /// Returns a reference to the account context map.
1038    pub fn account_contexts(&self) -> &HashMap<String, AccountContext> {
1039        &self.account_contexts
1040    }
1041
1042    /// Calculates a rate multiplier from the entity context maps alone (no
1043    /// `EntityAwareInjector` needed). This provides a lightweight fallback
1044    /// when context-aware injection is not fully enabled but context maps
1045    /// have been populated.
1046    ///
1047    /// The multiplier is the product of individual entity risk factors found
1048    /// in the context maps for the given journal entry. If no contexts match,
1049    /// returns 1.0 (no adjustment).
1050    fn calculate_context_rate_multiplier(&self, entry: &JournalEntry) -> f64 {
1051        if self.vendor_contexts.is_empty()
1052            && self.employee_contexts.is_empty()
1053            && self.account_contexts.is_empty()
1054        {
1055            return 1.0;
1056        }
1057
1058        let mut multiplier = 1.0;
1059
1060        // Vendor lookup via reference field
1061        if let Some(ref vendor_ref) = entry.header.reference {
1062            if let Some(ctx) = self.vendor_contexts.get(vendor_ref) {
1063                // New vendors get a 2.0x multiplier, dormant reactivations get 1.5x
1064                if ctx.is_new {
1065                    multiplier *= 2.0;
1066                }
1067                if ctx.is_dormant_reactivation {
1068                    multiplier *= 1.5;
1069                }
1070            }
1071        }
1072
1073        // Employee lookup via created_by
1074        if let Some(ctx) = self.employee_contexts.get(&entry.header.created_by) {
1075            if ctx.is_new {
1076                multiplier *= 1.5;
1077            }
1078            if ctx.is_volume_fatigued {
1079                multiplier *= 1.3;
1080            }
1081            if ctx.is_overtime {
1082                multiplier *= 1.2;
1083            }
1084        }
1085
1086        // Account lookup via first line's GL account
1087        if let Some(first_line) = entry.lines.first() {
1088            if let Some(ctx) = self.account_contexts.get(&first_line.gl_account) {
1089                if ctx.is_high_risk {
1090                    multiplier *= 2.0;
1091                }
1092            }
1093        }
1094
1095        multiplier
1096    }
1097
1098    /// Apply behavioral bias to a fraud-labeled entry so canonical forensic
1099    /// signals (weekend posting, round dollars, off-hours timestamps,
1100    /// post-close adjustments) have measurable lift over legitimate data.
1101    ///
1102    /// Delegates to [`datasynth_core::fraud_bias::apply_fraud_behavioral_bias`]
1103    /// and updates per-bias counters on `stats`. Returns the [`ProcessIssueType`]
1104    /// variants corresponding to each bias that fired so callers can emit
1105    /// secondary labels.
1106    fn apply_fraud_behavioral_bias(
1107        &mut self,
1108        entry: &mut JournalEntry,
1109    ) -> Vec<datasynth_core::models::ProcessIssueType> {
1110        use datasynth_core::models::ProcessIssueType;
1111
1112        let cfg = self.config.enhanced.fraud_behavioral_bias;
1113        let fired = apply_fraud_behavioral_bias(entry, &cfg, &mut self.rng);
1114        for issue in &fired {
1115            match issue {
1116                ProcessIssueType::WeekendPosting => self.stats.fraud_weekend_bias_applied += 1,
1117                ProcessIssueType::AfterHoursPosting => self.stats.fraud_off_hours_bias_applied += 1,
1118                ProcessIssueType::PostClosePosting => self.stats.fraud_post_close_bias_applied += 1,
1119                _ => {}
1120            }
1121        }
1122        // `round_dollar_bias` doesn't emit a ProcessIssueType (no canonical
1123        // process-issue label for it) — detect by matching the max line
1124        // amount against the set of round targets. If the bias fired,
1125        // the max amount is exactly one of [1K, 5K, 10K, 25K, 50K, 100K].
1126        if cfg.round_dollar_bias > 0.0 {
1127            const ROUND_TARGETS: &[i64] = &[1_000, 5_000, 10_000, 25_000, 50_000, 100_000];
1128            let max_amt: Decimal = entry
1129                .lines
1130                .iter()
1131                .map(|l| l.debit_amount.max(l.credit_amount))
1132                .max()
1133                .unwrap_or(Decimal::ZERO);
1134            if ROUND_TARGETS.iter().any(|t| max_amt == Decimal::from(*t)) {
1135                self.stats.fraud_round_dollar_bias_applied += 1;
1136            }
1137        }
1138        fired
1139    }
1140
1141    // =========================================================================
1142    // Enhanced Features API (v0.3.0+)
1143    // =========================================================================
1144
1145    /// Advances all active fraud schemes by one time step.
1146    ///
1147    /// Call this method once per simulated day to generate scheme actions.
1148    /// Returns the scheme actions generated for this date.
1149    pub fn advance_schemes(&mut self, date: NaiveDate, company_code: &str) -> Vec<SchemeAction> {
1150        if let Some(ref mut advancer) = self.scheme_advancer {
1151            let context = SchemeContext::new(date, company_code);
1152            let actions = advancer.advance_all(&context);
1153            self.scheme_actions.extend(actions.clone());
1154            actions
1155        } else {
1156            Vec::new()
1157        }
1158    }
1159
1160    /// Potentially starts a new fraud scheme based on probabilities.
1161    ///
1162    /// Call this method periodically (e.g., once per period) to allow new
1163    /// schemes to start based on configured probabilities.
1164    /// Returns the scheme ID if a scheme was started.
1165    pub fn maybe_start_scheme(
1166        &mut self,
1167        date: NaiveDate,
1168        company_code: &str,
1169        available_users: Vec<String>,
1170        available_accounts: Vec<String>,
1171        available_counterparties: Vec<String>,
1172    ) -> Option<uuid::Uuid> {
1173        if let Some(ref mut advancer) = self.scheme_advancer {
1174            let mut context = SchemeContext::new(date, company_code);
1175            context.available_users = available_users;
1176            context.available_accounts = available_accounts;
1177            context.available_counterparties = available_counterparties;
1178
1179            advancer.maybe_start_scheme(&context)
1180        } else {
1181            None
1182        }
1183    }
1184
1185    /// Returns all near-miss labels generated.
1186    pub fn get_near_miss_labels(&self) -> &[NearMissLabel] {
1187        &self.near_miss_labels
1188    }
1189
1190    /// Returns all scheme actions generated.
1191    pub fn get_scheme_actions(&self) -> &[SchemeAction] {
1192        &self.scheme_actions
1193    }
1194
1195    /// Returns the detection difficulty distribution.
1196    pub fn get_difficulty_distribution(&self) -> &HashMap<AnomalyDetectionDifficulty, usize> {
1197        &self.difficulty_distribution
1198    }
1199
1200    /// Checks for behavioral deviations for an entity with an observation.
1201    pub fn check_behavioral_deviations(
1202        &self,
1203        entity_id: &str,
1204        observation: &super::context::Observation,
1205    ) -> Vec<super::context::BehavioralDeviation> {
1206        if let Some(ref baseline) = self.behavioral_baseline {
1207            baseline.check_deviation(entity_id, observation)
1208        } else {
1209            Vec::new()
1210        }
1211    }
1212
1213    /// Gets the baseline for an entity.
1214    pub fn get_entity_baseline(&self, entity_id: &str) -> Option<&super::context::EntityBaseline> {
1215        if let Some(ref baseline) = self.behavioral_baseline {
1216            baseline.get_baseline(entity_id)
1217        } else {
1218            None
1219        }
1220    }
1221
1222    /// Returns the number of active schemes.
1223    pub fn active_scheme_count(&self) -> usize {
1224        if let Some(ref advancer) = self.scheme_advancer {
1225            advancer.active_scheme_count()
1226        } else {
1227            0
1228        }
1229    }
1230
1231    /// Returns whether enhanced features are enabled.
1232    pub fn has_enhanced_features(&self) -> bool {
1233        self.scheme_advancer.is_some()
1234            || self.near_miss_generator.is_some()
1235            || self.difficulty_calculator.is_some()
1236            || self.entity_aware_injector.is_some()
1237    }
1238}
1239
1240/// Builder for AnomalyInjectorConfig.
1241pub struct AnomalyInjectorConfigBuilder {
1242    config: AnomalyInjectorConfig,
1243}
1244
1245impl AnomalyInjectorConfigBuilder {
1246    /// Creates a new builder with default configuration.
1247    pub fn new() -> Self {
1248        Self {
1249            config: AnomalyInjectorConfig::default(),
1250        }
1251    }
1252
1253    /// Sets the total anomaly rate.
1254    pub fn with_total_rate(mut self, rate: f64) -> Self {
1255        self.config.rates.total_rate = rate;
1256        self
1257    }
1258
1259    /// Sets the fraud rate (proportion of anomalies).
1260    pub fn with_fraud_rate(mut self, rate: f64) -> Self {
1261        self.config.rates.fraud_rate = rate;
1262        self
1263    }
1264
1265    /// Sets the error rate (proportion of anomalies).
1266    pub fn with_error_rate(mut self, rate: f64) -> Self {
1267        self.config.rates.error_rate = rate;
1268        self
1269    }
1270
1271    /// Sets the random seed.
1272    pub fn with_seed(mut self, seed: u64) -> Self {
1273        self.config.seed = seed;
1274        self
1275    }
1276
1277    /// Sets the temporal pattern.
1278    pub fn with_temporal_pattern(mut self, pattern: TemporalPattern) -> Self {
1279        self.config.patterns.temporal_pattern = pattern;
1280        self
1281    }
1282
1283    /// Enables or disables label generation.
1284    pub fn with_labels(mut self, generate: bool) -> Self {
1285        self.config.generate_labels = generate;
1286        self
1287    }
1288
1289    /// Sets target companies.
1290    pub fn with_target_companies(mut self, companies: Vec<String>) -> Self {
1291        self.config.target_companies = companies;
1292        self
1293    }
1294
1295    /// Sets the date range.
1296    pub fn with_date_range(mut self, start: NaiveDate, end: NaiveDate) -> Self {
1297        self.config.date_range = Some((start, end));
1298        self
1299    }
1300
1301    // =========================================================================
1302    // Enhanced Features Configuration (v0.3.0+)
1303    // =========================================================================
1304
1305    /// Enables multi-stage fraud scheme generation.
1306    pub fn with_multi_stage_schemes(mut self, enabled: bool, probability: f64) -> Self {
1307        self.config.enhanced.multi_stage_schemes_enabled = enabled;
1308        self.config.enhanced.scheme_probability = probability;
1309        self
1310    }
1311
1312    /// Enables near-miss generation.
1313    pub fn with_near_misses(mut self, enabled: bool, proportion: f64) -> Self {
1314        self.config.enhanced.near_miss_enabled = enabled;
1315        self.config.enhanced.near_miss_proportion = proportion;
1316        self
1317    }
1318
1319    /// Sets approval thresholds for threshold-proximity near-misses.
1320    pub fn with_approval_thresholds(mut self, thresholds: Vec<Decimal>) -> Self {
1321        self.config.enhanced.approval_thresholds = thresholds;
1322        self
1323    }
1324
1325    /// Enables correlated anomaly injection.
1326    pub fn with_correlated_injection(mut self, enabled: bool) -> Self {
1327        self.config.enhanced.correlated_injection_enabled = enabled;
1328        self
1329    }
1330
1331    /// Enables temporal clustering (period-end spikes).
1332    pub fn with_temporal_clustering(mut self, enabled: bool, multiplier: f64) -> Self {
1333        self.config.enhanced.temporal_clustering_enabled = enabled;
1334        self.config.enhanced.period_end_multiplier = multiplier;
1335        self
1336    }
1337
1338    /// Enables detection difficulty classification.
1339    pub fn with_difficulty_classification(mut self, enabled: bool) -> Self {
1340        self.config.enhanced.difficulty_classification_enabled = enabled;
1341        self
1342    }
1343
1344    /// Enables context-aware injection.
1345    pub fn with_context_aware_injection(mut self, enabled: bool) -> Self {
1346        self.config.enhanced.context_aware_enabled = enabled;
1347        self
1348    }
1349
1350    /// Sets behavioral baseline configuration.
1351    pub fn with_behavioral_baseline(mut self, config: BehavioralBaselineConfig) -> Self {
1352        self.config.enhanced.behavioral_baseline_config = config;
1353        self
1354    }
1355
1356    /// Enables all enhanced features with default settings.
1357    pub fn with_all_enhanced_features(mut self) -> Self {
1358        self.config.enhanced.multi_stage_schemes_enabled = true;
1359        self.config.enhanced.scheme_probability = 0.02;
1360        self.config.enhanced.correlated_injection_enabled = true;
1361        self.config.enhanced.temporal_clustering_enabled = true;
1362        self.config.enhanced.period_end_multiplier = 2.5;
1363        self.config.enhanced.near_miss_enabled = true;
1364        self.config.enhanced.near_miss_proportion = 0.30;
1365        self.config.enhanced.difficulty_classification_enabled = true;
1366        self.config.enhanced.context_aware_enabled = true;
1367        self.config.enhanced.behavioral_baseline_config.enabled = true;
1368        self
1369    }
1370
1371    /// Builds the configuration.
1372    pub fn build(self) -> AnomalyInjectorConfig {
1373        self.config
1374    }
1375}
1376
1377impl Default for AnomalyInjectorConfigBuilder {
1378    fn default() -> Self {
1379        Self::new()
1380    }
1381}
1382
1383#[cfg(test)]
1384mod tests {
1385    use super::*;
1386    use chrono::NaiveDate;
1387    use datasynth_core::models::{JournalEntryLine, StatisticalAnomalyType};
1388    use rust_decimal_macros::dec;
1389
1390    fn create_test_entry(doc_num: &str) -> JournalEntry {
1391        let mut entry = JournalEntry::new_simple(
1392            doc_num.to_string(),
1393            "1000".to_string(),
1394            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1395            "Test Entry".to_string(),
1396        );
1397
1398        entry.add_line(JournalEntryLine {
1399            line_number: 1,
1400            gl_account: "5000".to_string(),
1401            debit_amount: dec!(1000),
1402            ..Default::default()
1403        });
1404
1405        entry.add_line(JournalEntryLine {
1406            line_number: 2,
1407            gl_account: "1000".to_string(),
1408            credit_amount: dec!(1000),
1409            ..Default::default()
1410        });
1411
1412        entry
1413    }
1414
1415    #[test]
1416    fn test_anomaly_injector_basic() {
1417        let config = AnomalyInjectorConfigBuilder::new()
1418            .with_total_rate(0.5) // High rate for testing
1419            .with_seed(42)
1420            .build();
1421
1422        let mut injector = AnomalyInjector::new(config);
1423
1424        let mut entries: Vec<_> = (0..100)
1425            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1426            .collect();
1427
1428        let result = injector.process_entries(&mut entries);
1429
1430        // With 50% rate, we should have some anomalies
1431        assert!(result.anomalies_injected > 0);
1432        assert!(!result.labels.is_empty());
1433        // `anomalies_injected` counts primary injection acts. `labels` also
1434        // includes secondary `ProcessIssue` labels emitted for each fraud
1435        // behavioural bias that fires, so `labels.len()` is always ≥ the
1436        // primary count.
1437        assert!(result.labels.len() >= result.anomalies_injected);
1438    }
1439
1440    #[test]
1441    fn test_specific_injection() {
1442        let config = AnomalyInjectorConfig::default();
1443        let mut injector = AnomalyInjector::new(config);
1444
1445        let mut entry = create_test_entry("JE001");
1446        let anomaly_type = AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount);
1447
1448        let label = injector.inject_specific(&mut entry, anomaly_type);
1449
1450        assert!(label.is_some());
1451        let label = label.unwrap();
1452        // document_id is the UUID string from the journal entry header
1453        assert!(!label.document_id.is_empty());
1454        assert_eq!(label.document_id, entry.document_number());
1455    }
1456
1457    #[test]
1458    fn test_self_approval_injection() {
1459        let config = AnomalyInjectorConfig::default();
1460        let mut injector = AnomalyInjector::new(config);
1461
1462        let mut entry = create_test_entry("JE001");
1463        let label = injector.create_self_approval(&mut entry, "USER001");
1464
1465        assert!(label.is_some());
1466        let label = label.unwrap();
1467        assert!(matches!(
1468            label.anomaly_type,
1469            AnomalyType::Fraud(FraudType::SelfApproval)
1470        ));
1471        assert!(label.related_entities.contains(&"USER001".to_string()));
1472    }
1473
1474    #[test]
1475    fn test_company_filtering() {
1476        let config = AnomalyInjectorConfigBuilder::new()
1477            .with_total_rate(1.0) // Inject all
1478            .with_target_companies(vec!["2000".to_string()])
1479            .build();
1480
1481        let mut injector = AnomalyInjector::new(config);
1482
1483        let mut entries = vec![
1484            create_test_entry("JE001"), // company 1000
1485            create_test_entry("JE002"), // company 1000
1486        ];
1487
1488        let result = injector.process_entries(&mut entries);
1489
1490        // No anomalies because entries are in company 1000, not 2000
1491        assert_eq!(result.anomalies_injected, 0);
1492    }
1493
1494    // =========================================================================
1495    // Entity Context Tests
1496    // =========================================================================
1497
1498    /// Helper to create a test entry with specific vendor reference and employee.
1499    fn create_test_entry_with_context(
1500        doc_num: &str,
1501        vendor_ref: Option<&str>,
1502        employee_id: &str,
1503        gl_account: &str,
1504    ) -> JournalEntry {
1505        let mut entry = JournalEntry::new_simple(
1506            doc_num.to_string(),
1507            "1000".to_string(),
1508            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
1509            "Test Entry".to_string(),
1510        );
1511
1512        entry.header.reference = vendor_ref.map(|v| v.to_string());
1513        entry.header.created_by = employee_id.to_string();
1514
1515        entry.add_line(JournalEntryLine {
1516            line_number: 1,
1517            gl_account: gl_account.to_string(),
1518            debit_amount: dec!(1000),
1519            ..Default::default()
1520        });
1521
1522        entry.add_line(JournalEntryLine {
1523            line_number: 2,
1524            gl_account: "1000".to_string(),
1525            credit_amount: dec!(1000),
1526            ..Default::default()
1527        });
1528
1529        entry
1530    }
1531
1532    #[test]
1533    fn test_set_entity_contexts() {
1534        let config = AnomalyInjectorConfig::default();
1535        let mut injector = AnomalyInjector::new(config);
1536
1537        // Initially empty
1538        assert!(injector.vendor_contexts().is_empty());
1539        assert!(injector.employee_contexts().is_empty());
1540        assert!(injector.account_contexts().is_empty());
1541
1542        // Set contexts
1543        let mut vendors = HashMap::new();
1544        vendors.insert(
1545            "V001".to_string(),
1546            VendorContext {
1547                vendor_id: "V001".to_string(),
1548                is_new: true,
1549                ..Default::default()
1550            },
1551        );
1552
1553        let mut employees = HashMap::new();
1554        employees.insert(
1555            "EMP001".to_string(),
1556            EmployeeContext {
1557                employee_id: "EMP001".to_string(),
1558                is_new: true,
1559                ..Default::default()
1560            },
1561        );
1562
1563        let mut accounts = HashMap::new();
1564        accounts.insert(
1565            "8100".to_string(),
1566            AccountContext {
1567                account_code: "8100".to_string(),
1568                is_high_risk: true,
1569                ..Default::default()
1570            },
1571        );
1572
1573        injector.set_entity_contexts(vendors, employees, accounts);
1574
1575        assert_eq!(injector.vendor_contexts().len(), 1);
1576        assert_eq!(injector.employee_contexts().len(), 1);
1577        assert_eq!(injector.account_contexts().len(), 1);
1578        assert!(injector.vendor_contexts().contains_key("V001"));
1579        assert!(injector.employee_contexts().contains_key("EMP001"));
1580        assert!(injector.account_contexts().contains_key("8100"));
1581    }
1582
1583    #[test]
1584    fn test_default_behavior_no_contexts() {
1585        // Without any entity contexts, the base rate is used unchanged.
1586        let config = AnomalyInjectorConfigBuilder::new()
1587            .with_total_rate(0.5)
1588            .with_seed(42)
1589            .build();
1590
1591        let mut injector = AnomalyInjector::new(config);
1592
1593        let mut entries: Vec<_> = (0..200)
1594            .map(|i| create_test_entry(&format!("JE{:04}", i)))
1595            .collect();
1596
1597        let result = injector.process_entries(&mut entries);
1598
1599        // With 50% base rate and no context, expect roughly 50% injection
1600        // Allow wide margin for randomness
1601        assert!(result.anomalies_injected > 0);
1602        let rate = result.anomalies_injected as f64 / result.entries_processed as f64;
1603        assert!(
1604            rate > 0.2 && rate < 0.8,
1605            "Expected ~50% rate, got {:.2}%",
1606            rate * 100.0
1607        );
1608    }
1609
1610    #[test]
1611    fn test_entity_context_increases_injection_rate() {
1612        // With high-risk entity contexts, the effective rate should be higher
1613        // than the base rate, leading to more anomalies being injected.
1614        let base_rate = 0.10; // Low base rate
1615
1616        // Run without contexts
1617        let config_no_ctx = AnomalyInjectorConfigBuilder::new()
1618            .with_total_rate(base_rate)
1619            .with_seed(123)
1620            .build();
1621
1622        let mut injector_no_ctx = AnomalyInjector::new(config_no_ctx);
1623
1624        let mut entries_no_ctx: Vec<_> = (0..500)
1625            .map(|i| {
1626                create_test_entry_with_context(
1627                    &format!("JE{:04}", i),
1628                    Some("V001"),
1629                    "EMP001",
1630                    "8100",
1631                )
1632            })
1633            .collect();
1634
1635        let result_no_ctx = injector_no_ctx.process_entries(&mut entries_no_ctx);
1636
1637        // Run with high-risk contexts (same seed for comparable randomness)
1638        let config_ctx = AnomalyInjectorConfigBuilder::new()
1639            .with_total_rate(base_rate)
1640            .with_seed(123)
1641            .build();
1642
1643        let mut injector_ctx = AnomalyInjector::new(config_ctx);
1644
1645        // Set up high-risk contexts
1646        let mut vendors = HashMap::new();
1647        vendors.insert(
1648            "V001".to_string(),
1649            VendorContext {
1650                vendor_id: "V001".to_string(),
1651                is_new: true,                  // 2.0x multiplier
1652                is_dormant_reactivation: true, // 1.5x multiplier
1653                ..Default::default()
1654            },
1655        );
1656
1657        let mut employees = HashMap::new();
1658        employees.insert(
1659            "EMP001".to_string(),
1660            EmployeeContext {
1661                employee_id: "EMP001".to_string(),
1662                is_new: true, // 1.5x multiplier
1663                ..Default::default()
1664            },
1665        );
1666
1667        let mut accounts = HashMap::new();
1668        accounts.insert(
1669            "8100".to_string(),
1670            AccountContext {
1671                account_code: "8100".to_string(),
1672                is_high_risk: true, // 2.0x multiplier
1673                ..Default::default()
1674            },
1675        );
1676
1677        injector_ctx.set_entity_contexts(vendors, employees, accounts);
1678
1679        let mut entries_ctx: Vec<_> = (0..500)
1680            .map(|i| {
1681                create_test_entry_with_context(
1682                    &format!("JE{:04}", i),
1683                    Some("V001"),
1684                    "EMP001",
1685                    "8100",
1686                )
1687            })
1688            .collect();
1689
1690        let result_ctx = injector_ctx.process_entries(&mut entries_ctx);
1691
1692        // The context-enhanced run should inject more anomalies
1693        assert!(
1694            result_ctx.anomalies_injected > result_no_ctx.anomalies_injected,
1695            "Expected more anomalies with high-risk contexts: {} (with ctx) vs {} (without ctx)",
1696            result_ctx.anomalies_injected,
1697            result_no_ctx.anomalies_injected,
1698        );
1699    }
1700
1701    #[test]
1702    fn test_risk_score_multiplication() {
1703        // Verify the calculate_context_rate_multiplier produces correct values.
1704        let config = AnomalyInjectorConfig::default();
1705        let mut injector = AnomalyInjector::new(config);
1706
1707        // No contexts: multiplier should be 1.0
1708        let entry_plain = create_test_entry_with_context("JE001", None, "USER1", "5000");
1709        assert!(
1710            (injector.calculate_context_rate_multiplier(&entry_plain) - 1.0).abs() < f64::EPSILON,
1711        );
1712
1713        // Set up a new vendor (2.0x) + high-risk account (2.0x) = 4.0x
1714        let mut vendors = HashMap::new();
1715        vendors.insert(
1716            "V_RISKY".to_string(),
1717            VendorContext {
1718                vendor_id: "V_RISKY".to_string(),
1719                is_new: true,
1720                ..Default::default()
1721            },
1722        );
1723
1724        let mut accounts = HashMap::new();
1725        accounts.insert(
1726            "9000".to_string(),
1727            AccountContext {
1728                account_code: "9000".to_string(),
1729                is_high_risk: true,
1730                ..Default::default()
1731            },
1732        );
1733
1734        injector.set_entity_contexts(vendors, HashMap::new(), accounts);
1735
1736        let entry_risky = create_test_entry_with_context("JE002", Some("V_RISKY"), "USER1", "9000");
1737        let multiplier = injector.calculate_context_rate_multiplier(&entry_risky);
1738        // new vendor = 2.0x, high-risk account = 2.0x => 4.0x
1739        assert!(
1740            (multiplier - 4.0).abs() < f64::EPSILON,
1741            "Expected 4.0x multiplier, got {}",
1742            multiplier,
1743        );
1744
1745        // Entry with only vendor context match (no account match)
1746        let entry_vendor_only =
1747            create_test_entry_with_context("JE003", Some("V_RISKY"), "USER1", "5000");
1748        let multiplier_vendor = injector.calculate_context_rate_multiplier(&entry_vendor_only);
1749        assert!(
1750            (multiplier_vendor - 2.0).abs() < f64::EPSILON,
1751            "Expected 2.0x multiplier (vendor only), got {}",
1752            multiplier_vendor,
1753        );
1754
1755        // Entry with no matching contexts
1756        let entry_no_match =
1757            create_test_entry_with_context("JE004", Some("V_SAFE"), "USER1", "5000");
1758        let multiplier_none = injector.calculate_context_rate_multiplier(&entry_no_match);
1759        assert!(
1760            (multiplier_none - 1.0).abs() < f64::EPSILON,
1761            "Expected 1.0x multiplier (no match), got {}",
1762            multiplier_none,
1763        );
1764    }
1765
1766    #[test]
1767    fn test_employee_context_multiplier() {
1768        let config = AnomalyInjectorConfig::default();
1769        let mut injector = AnomalyInjector::new(config);
1770
1771        let mut employees = HashMap::new();
1772        employees.insert(
1773            "EMP_NEW".to_string(),
1774            EmployeeContext {
1775                employee_id: "EMP_NEW".to_string(),
1776                is_new: true,             // 1.5x
1777                is_volume_fatigued: true, // 1.3x
1778                is_overtime: true,        // 1.2x
1779                ..Default::default()
1780            },
1781        );
1782
1783        injector.set_entity_contexts(HashMap::new(), employees, HashMap::new());
1784
1785        let entry = create_test_entry_with_context("JE001", None, "EMP_NEW", "5000");
1786        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1787
1788        // 1.5 * 1.3 * 1.2 = 2.34
1789        let expected = 1.5 * 1.3 * 1.2;
1790        assert!(
1791            (multiplier - expected).abs() < 0.01,
1792            "Expected {:.3}x multiplier, got {:.3}",
1793            expected,
1794            multiplier,
1795        );
1796    }
1797
1798    #[test]
1799    fn test_entity_contexts_persist_across_reset() {
1800        let config = AnomalyInjectorConfig::default();
1801        let mut injector = AnomalyInjector::new(config);
1802
1803        let mut vendors = HashMap::new();
1804        vendors.insert(
1805            "V001".to_string(),
1806            VendorContext {
1807                vendor_id: "V001".to_string(),
1808                is_new: true,
1809                ..Default::default()
1810            },
1811        );
1812
1813        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1814        assert_eq!(injector.vendor_contexts().len(), 1);
1815
1816        // Reset clears labels and stats but not entity contexts
1817        injector.reset();
1818        assert_eq!(injector.vendor_contexts().len(), 1);
1819    }
1820
1821    #[test]
1822    fn test_set_empty_contexts_clears() {
1823        let config = AnomalyInjectorConfig::default();
1824        let mut injector = AnomalyInjector::new(config);
1825
1826        let mut vendors = HashMap::new();
1827        vendors.insert(
1828            "V001".to_string(),
1829            VendorContext {
1830                vendor_id: "V001".to_string(),
1831                ..Default::default()
1832            },
1833        );
1834
1835        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1836        assert_eq!(injector.vendor_contexts().len(), 1);
1837
1838        // Setting empty maps clears
1839        injector.set_entity_contexts(HashMap::new(), HashMap::new(), HashMap::new());
1840        assert!(injector.vendor_contexts().is_empty());
1841    }
1842
1843    #[test]
1844    fn test_dormant_vendor_multiplier() {
1845        let config = AnomalyInjectorConfig::default();
1846        let mut injector = AnomalyInjector::new(config);
1847
1848        let mut vendors = HashMap::new();
1849        vendors.insert(
1850            "V_DORMANT".to_string(),
1851            VendorContext {
1852                vendor_id: "V_DORMANT".to_string(),
1853                is_dormant_reactivation: true, // 1.5x
1854                ..Default::default()
1855            },
1856        );
1857
1858        injector.set_entity_contexts(vendors, HashMap::new(), HashMap::new());
1859
1860        let entry = create_test_entry_with_context("JE001", Some("V_DORMANT"), "USER1", "5000");
1861        let multiplier = injector.calculate_context_rate_multiplier(&entry);
1862        assert!(
1863            (multiplier - 1.5).abs() < f64::EPSILON,
1864            "Expected 1.5x multiplier for dormant vendor, got {}",
1865            multiplier,
1866        );
1867    }
1868
1869    // =========================================================================
1870    // Fraud Behavioral Bias Tests
1871    // =========================================================================
1872
1873    /// When all biases are set to 1.0, every fraud entry gets every behavioral
1874    /// flag: weekend posting date, round-dollar amount, off-hours created_at,
1875    /// and post-close marking. This is the strong "all biases fire" guarantee
1876    /// that lets downstream ML classifiers learn these canonical signals.
1877    #[test]
1878    fn fraud_behavioral_bias_applies_all_flags_at_rate_one() {
1879        use chrono::{Datelike, Timelike, Weekday};
1880        use datasynth_core::models::FraudType;
1881
1882        let mut config = AnomalyInjectorConfig::default();
1883        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1884            enabled: true,
1885            weekend_bias: 1.0,
1886            round_dollar_bias: 1.0,
1887            off_hours_bias: 1.0,
1888            post_close_bias: 1.0,
1889        };
1890        let mut injector = AnomalyInjector::new(config);
1891
1892        // Use a Monday date so the weekend-shift always moves it.
1893        let mut entry = JournalEntry::new_simple(
1894            "JE001".to_string(),
1895            "1000".to_string(),
1896            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(), // Monday
1897            "Test Entry".to_string(),
1898        );
1899        entry.add_line(JournalEntryLine {
1900            line_number: 1,
1901            gl_account: "5000".to_string(),
1902            debit_amount: dec!(1237),
1903            ..Default::default()
1904        });
1905        entry.add_line(JournalEntryLine {
1906            line_number: 2,
1907            gl_account: "1000".to_string(),
1908            credit_amount: dec!(1237),
1909            ..Default::default()
1910        });
1911
1912        let _ =
1913            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1914
1915        // Weekend: shifted to Sat or Sun.
1916        assert!(
1917            matches!(
1918                entry.header.posting_date.weekday(),
1919                Weekday::Sat | Weekday::Sun
1920            ),
1921            "expected weekend posting date, got {:?}",
1922            entry.header.posting_date.weekday()
1923        );
1924        // Round-dollar: exactly one of the known round targets.
1925        let debit_total: Decimal = entry.lines.iter().map(|l| l.debit_amount).sum();
1926        let credit_total: Decimal = entry.lines.iter().map(|l| l.credit_amount).sum();
1927        assert_eq!(debit_total, credit_total, "entry must remain balanced");
1928        assert!(
1929            [
1930                dec!(1_000),
1931                dec!(5_000),
1932                dec!(10_000),
1933                dec!(25_000),
1934                dec!(50_000),
1935                dec!(100_000)
1936            ]
1937            .contains(&debit_total),
1938            "expected round-dollar total, got {}",
1939            debit_total
1940        );
1941        // Off-hours: 22:00–05:59 UTC.
1942        let hour = entry.header.created_at.hour();
1943        assert!(
1944            !(6..22).contains(&hour),
1945            "expected off-hours timestamp, got hour {}",
1946            hour
1947        );
1948        // Post-close marked.
1949        assert!(entry.header.is_post_close);
1950
1951        // Stats reflect each bias application.
1952        let stats = injector.get_stats();
1953        assert_eq!(stats.fraud_weekend_bias_applied, 1);
1954        assert_eq!(stats.fraud_round_dollar_bias_applied, 1);
1955        assert_eq!(stats.fraud_off_hours_bias_applied, 1);
1956        assert_eq!(stats.fraud_post_close_bias_applied, 1);
1957    }
1958
1959    /// When biases are all zero, no flags are applied even to fraud entries —
1960    /// the feature is fully opt-outable.
1961    #[test]
1962    fn fraud_behavioral_bias_rate_zero_applies_nothing() {
1963        use datasynth_core::models::FraudType;
1964
1965        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
1966        let mut config = AnomalyInjectorConfig::default();
1967        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1968            enabled: true,
1969            weekend_bias: 0.0,
1970            round_dollar_bias: 0.0,
1971            off_hours_bias: 0.0,
1972            post_close_bias: 0.0,
1973        };
1974        let mut injector = AnomalyInjector::new(config);
1975        let mut entry = create_test_entry("JE001");
1976        entry.header.posting_date = original_date;
1977
1978        let _ =
1979            injector.inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry));
1980
1981        assert_eq!(entry.header.posting_date, original_date);
1982        assert!(!entry.header.is_post_close);
1983        let stats = injector.get_stats();
1984        assert_eq!(stats.fraud_weekend_bias_applied, 0);
1985        assert_eq!(stats.fraud_round_dollar_bias_applied, 0);
1986        assert_eq!(stats.fraud_off_hours_bias_applied, 0);
1987        assert_eq!(stats.fraud_post_close_bias_applied, 0);
1988    }
1989
1990    /// Non-fraud anomalies (errors, process issues, etc.) are not touched by
1991    /// the bias — only `AnomalyType::Fraud(_)` triggers it.
1992    #[test]
1993    fn fraud_behavioral_bias_skips_non_fraud_anomalies() {
1994        let original_date = NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(); // Monday
1995        let mut config = AnomalyInjectorConfig::default();
1996        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
1997            enabled: true,
1998            weekend_bias: 1.0,
1999            round_dollar_bias: 1.0,
2000            off_hours_bias: 1.0,
2001            post_close_bias: 1.0,
2002        };
2003        let mut injector = AnomalyInjector::new(config);
2004        let mut entry = create_test_entry("JE001");
2005        entry.header.posting_date = original_date;
2006
2007        let _ = injector.inject_specific(
2008            &mut entry,
2009            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
2010        );
2011
2012        assert_eq!(entry.header.posting_date, original_date);
2013        let stats = injector.get_stats();
2014        assert_eq!(stats.fraud_weekend_bias_applied, 0);
2015    }
2016
2017    /// When behavioural biases fire on a fraud entry, secondary
2018    /// `ProcessIssue` labels should be pushed into the labels stream so
2019    /// auditors can filter for specific forensic patterns.
2020    #[test]
2021    fn fraud_behavioral_bias_emits_secondary_process_issue_labels() {
2022        use datasynth_core::models::{FraudType, ProcessIssueType};
2023
2024        let mut config = AnomalyInjectorConfig::default();
2025        config.enhanced.fraud_behavioral_bias = FraudBehavioralBiasConfig {
2026            enabled: true,
2027            weekend_bias: 1.0,
2028            round_dollar_bias: 0.0, // round-dollar does not emit a process-issue label
2029            off_hours_bias: 1.0,
2030            post_close_bias: 1.0,
2031        };
2032        let mut injector = AnomalyInjector::new(config);
2033        let mut entry = JournalEntry::new_simple(
2034            "JE001".into(),
2035            "1000".into(),
2036            NaiveDate::from_ymd_opt(2024, 6, 10).unwrap(),
2037            "Test".into(),
2038        );
2039        entry.add_line(JournalEntryLine {
2040            line_number: 1,
2041            gl_account: "5000".into(),
2042            debit_amount: dec!(1000),
2043            ..Default::default()
2044        });
2045        entry.add_line(JournalEntryLine {
2046            line_number: 2,
2047            gl_account: "1000".into(),
2048            credit_amount: dec!(1000),
2049            ..Default::default()
2050        });
2051
2052        let primary = injector
2053            .inject_specific(&mut entry, AnomalyType::Fraud(FraudType::FictitiousEntry))
2054            .expect("fraud label should be produced");
2055
2056        // Primary fraud label + 3 secondary process-issue labels.
2057        let labels = injector.get_labels();
2058        assert_eq!(
2059            labels.len(),
2060            3,
2061            "expected 3 secondary ProcessIssue labels; primary is returned, not pushed"
2062        );
2063        let types: Vec<AnomalyType> = labels.iter().map(|l| l.anomaly_type.clone()).collect();
2064        assert!(types.contains(&AnomalyType::ProcessIssue(ProcessIssueType::WeekendPosting)));
2065        assert!(types.contains(&AnomalyType::ProcessIssue(
2066            ProcessIssueType::AfterHoursPosting
2067        )));
2068        assert!(types.contains(&AnomalyType::ProcessIssue(
2069            ProcessIssueType::PostClosePosting
2070        )));
2071        assert_eq!(
2072            primary.anomaly_type,
2073            AnomalyType::Fraud(FraudType::FictitiousEntry)
2074        );
2075    }
2076}
datasynth_generators/anomaly/injector.rs

datasynth_generators/anomaly/
injector.rs