datasynth_core/models/
anomaly.rs

1//! Anomaly types and labels for synthetic data generation.
2//!
3//! This module provides comprehensive anomaly classification for:
4//! - Fraud detection training
5//! - Error detection systems
6//! - Process compliance monitoring
7//! - Statistical anomaly detection
8//! - Graph-based anomaly detection
9
10use chrono::{NaiveDate, NaiveDateTime};
11use rust_decimal::Decimal;
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14
15/// Causal reason explaining why an anomaly was injected.
16///
17/// This enables provenance tracking for understanding the "why" behind each anomaly.
18#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
19pub enum AnomalyCausalReason {
20    /// Injected due to random rate selection.
21    RandomRate {
22        /// Base rate used for selection.
23        base_rate: f64,
24    },
25    /// Injected due to temporal pattern matching.
26    TemporalPattern {
27        /// Name of the temporal pattern (e.g., "year_end_spike", "month_end").
28        pattern_name: String,
29    },
30    /// Injected based on entity targeting rules.
31    EntityTargeting {
32        /// Type of entity targeted (e.g., "vendor", "user", "account").
33        target_type: String,
34        /// ID of the targeted entity.
35        target_id: String,
36    },
37    /// Part of an anomaly cluster.
38    ClusterMembership {
39        /// ID of the cluster this anomaly belongs to.
40        cluster_id: String,
41    },
42    /// Part of a multi-step scenario.
43    ScenarioStep {
44        /// Type of scenario (e.g., "kickback_scheme", "round_tripping").
45        scenario_type: String,
46        /// Step number within the scenario.
47        step_number: u32,
48    },
49    /// Injected based on data quality profile.
50    DataQualityProfile {
51        /// Profile name (e.g., "noisy", "legacy", "clean").
52        profile: String,
53    },
54    /// Injected for ML training balance.
55    MLTrainingBalance {
56        /// Target class being balanced.
57        target_class: String,
58    },
59}
60
61/// Structured injection strategy with captured parameters.
62///
63/// Unlike the string-based `injection_strategy` field, this enum captures
64/// the exact parameters used during injection for full reproducibility.
65#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
66pub enum InjectionStrategy {
67    /// Amount was manipulated by a factor.
68    AmountManipulation {
69        /// Original amount before manipulation.
70        original: Decimal,
71        /// Multiplication factor applied.
72        factor: f64,
73    },
74    /// Amount adjusted to avoid a threshold.
75    ThresholdAvoidance {
76        /// Threshold being avoided.
77        threshold: Decimal,
78        /// Final amount after adjustment.
79        adjusted_amount: Decimal,
80    },
81    /// Date was backdated or forward-dated.
82    DateShift {
83        /// Number of days shifted (negative = backdated).
84        days_shifted: i32,
85        /// Original date before shift.
86        original_date: NaiveDate,
87    },
88    /// User approved their own transaction.
89    SelfApproval {
90        /// User who created and approved.
91        user_id: String,
92    },
93    /// Segregation of duties violation.
94    SoDViolation {
95        /// First duty involved.
96        duty1: String,
97        /// Second duty involved.
98        duty2: String,
99        /// User who performed both duties.
100        violating_user: String,
101    },
102    /// Exact duplicate of another document.
103    ExactDuplicate {
104        /// ID of the original document.
105        original_doc_id: String,
106    },
107    /// Near-duplicate with small variations.
108    NearDuplicate {
109        /// ID of the original document.
110        original_doc_id: String,
111        /// Fields that were varied.
112        varied_fields: Vec<String>,
113    },
114    /// Circular flow of funds/goods.
115    CircularFlow {
116        /// Chain of entities involved.
117        entity_chain: Vec<String>,
118    },
119    /// Split transaction to avoid threshold.
120    SplitTransaction {
121        /// Original total amount.
122        original_amount: Decimal,
123        /// Number of splits.
124        split_count: u32,
125        /// IDs of the split documents.
126        split_doc_ids: Vec<String>,
127    },
128    /// Round number manipulation.
129    RoundNumbering {
130        /// Original precise amount.
131        original_amount: Decimal,
132        /// Rounded amount.
133        rounded_amount: Decimal,
134    },
135    /// Timing manipulation (weekend, after-hours, etc.).
136    TimingManipulation {
137        /// Type of timing issue.
138        timing_type: String,
139        /// Original timestamp.
140        original_time: Option<NaiveDateTime>,
141    },
142    /// Account misclassification.
143    AccountMisclassification {
144        /// Correct account.
145        correct_account: String,
146        /// Incorrect account used.
147        incorrect_account: String,
148    },
149    /// Missing required field.
150    MissingField {
151        /// Name of the missing field.
152        field_name: String,
153    },
154    /// Custom injection strategy.
155    Custom {
156        /// Strategy name.
157        name: String,
158        /// Additional parameters.
159        parameters: HashMap<String, String>,
160    },
161}
162
163impl InjectionStrategy {
164    /// Returns a human-readable description of the strategy.
165    pub fn description(&self) -> String {
166        match self {
167            InjectionStrategy::AmountManipulation { factor, .. } => {
168                format!("Amount multiplied by {factor:.2}")
169            }
170            InjectionStrategy::ThresholdAvoidance { threshold, .. } => {
171                format!("Amount adjusted to avoid {threshold} threshold")
172            }
173            InjectionStrategy::DateShift { days_shifted, .. } => {
174                if *days_shifted < 0 {
175                    format!("Date backdated by {} days", days_shifted.abs())
176                } else {
177                    format!("Date forward-dated by {days_shifted} days")
178                }
179            }
180            InjectionStrategy::SelfApproval { user_id } => {
181                format!("Self-approval by user {user_id}")
182            }
183            InjectionStrategy::SoDViolation { duty1, duty2, .. } => {
184                format!("SoD violation: {duty1} and {duty2}")
185            }
186            InjectionStrategy::ExactDuplicate { original_doc_id } => {
187                format!("Exact duplicate of {original_doc_id}")
188            }
189            InjectionStrategy::NearDuplicate {
190                original_doc_id,
191                varied_fields,
192            } => {
193                format!("Near-duplicate of {original_doc_id} (varied: {varied_fields:?})")
194            }
195            InjectionStrategy::CircularFlow { entity_chain } => {
196                format!("Circular flow through {} entities", entity_chain.len())
197            }
198            InjectionStrategy::SplitTransaction { split_count, .. } => {
199                format!("Split into {split_count} transactions")
200            }
201            InjectionStrategy::RoundNumbering { .. } => "Amount rounded to even number".to_string(),
202            InjectionStrategy::TimingManipulation { timing_type, .. } => {
203                format!("Timing manipulation: {timing_type}")
204            }
205            InjectionStrategy::AccountMisclassification {
206                correct_account,
207                incorrect_account,
208            } => {
209                format!("Misclassified from {correct_account} to {incorrect_account}")
210            }
211            InjectionStrategy::MissingField { field_name } => {
212                format!("Missing required field: {field_name}")
213            }
214            InjectionStrategy::Custom { name, .. } => format!("Custom: {name}"),
215        }
216    }
217
218    /// Returns the strategy type name.
219    pub fn strategy_type(&self) -> &'static str {
220        match self {
221            InjectionStrategy::AmountManipulation { .. } => "AmountManipulation",
222            InjectionStrategy::ThresholdAvoidance { .. } => "ThresholdAvoidance",
223            InjectionStrategy::DateShift { .. } => "DateShift",
224            InjectionStrategy::SelfApproval { .. } => "SelfApproval",
225            InjectionStrategy::SoDViolation { .. } => "SoDViolation",
226            InjectionStrategy::ExactDuplicate { .. } => "ExactDuplicate",
227            InjectionStrategy::NearDuplicate { .. } => "NearDuplicate",
228            InjectionStrategy::CircularFlow { .. } => "CircularFlow",
229            InjectionStrategy::SplitTransaction { .. } => "SplitTransaction",
230            InjectionStrategy::RoundNumbering { .. } => "RoundNumbering",
231            InjectionStrategy::TimingManipulation { .. } => "TimingManipulation",
232            InjectionStrategy::AccountMisclassification { .. } => "AccountMisclassification",
233            InjectionStrategy::MissingField { .. } => "MissingField",
234            InjectionStrategy::Custom { .. } => "Custom",
235        }
236    }
237}
238
239/// Primary anomaly classification.
240#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
241pub enum AnomalyType {
242    /// Fraudulent activity.
243    Fraud(FraudType),
244    /// Data entry or processing error.
245    Error(ErrorType),
246    /// Process or control issue.
247    ProcessIssue(ProcessIssueType),
248    /// Statistical anomaly.
249    Statistical(StatisticalAnomalyType),
250    /// Relational/graph anomaly.
251    Relational(RelationalAnomalyType),
252    /// Custom anomaly type.
253    Custom(String),
254}
255
256impl AnomalyType {
257    /// Returns the category name.
258    pub fn category(&self) -> &'static str {
259        match self {
260            AnomalyType::Fraud(_) => "Fraud",
261            AnomalyType::Error(_) => "Error",
262            AnomalyType::ProcessIssue(_) => "ProcessIssue",
263            AnomalyType::Statistical(_) => "Statistical",
264            AnomalyType::Relational(_) => "Relational",
265            AnomalyType::Custom(_) => "Custom",
266        }
267    }
268
269    /// Returns the specific type name.
270    pub fn type_name(&self) -> String {
271        match self {
272            AnomalyType::Fraud(t) => format!("{t:?}"),
273            AnomalyType::Error(t) => format!("{t:?}"),
274            AnomalyType::ProcessIssue(t) => format!("{t:?}"),
275            AnomalyType::Statistical(t) => format!("{t:?}"),
276            AnomalyType::Relational(t) => format!("{t:?}"),
277            AnomalyType::Custom(s) => s.clone(),
278        }
279    }
280
281    /// Returns the severity level (1-5, 5 being most severe).
282    pub fn severity(&self) -> u8 {
283        match self {
284            AnomalyType::Fraud(t) => t.severity(),
285            AnomalyType::Error(t) => t.severity(),
286            AnomalyType::ProcessIssue(t) => t.severity(),
287            AnomalyType::Statistical(t) => t.severity(),
288            AnomalyType::Relational(t) => t.severity(),
289            AnomalyType::Custom(_) => 3,
290        }
291    }
292
293    /// Returns whether this anomaly is typically intentional.
294    pub fn is_intentional(&self) -> bool {
295        matches!(self, AnomalyType::Fraud(_))
296    }
297}
298
299/// Fraud types for detection training.
300#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
301pub enum FraudType {
302    // Journal Entry Fraud
303    /// Fictitious journal entry with no business purpose.
304    FictitiousEntry,
305    /// Fictitious transaction (alias for FictitiousEntry).
306    FictitiousTransaction,
307    /// Round-dollar amounts suggesting manual manipulation.
308    RoundDollarManipulation,
309    /// Entry posted just below approval threshold.
310    JustBelowThreshold,
311    /// Revenue recognition manipulation.
312    RevenueManipulation,
313    /// Expense capitalization fraud.
314    ImproperCapitalization,
315    /// Improperly capitalizing expenses as assets.
316    ExpenseCapitalization,
317    /// Cookie jar reserves manipulation.
318    ReserveManipulation,
319    /// Round-tripping funds through suspense/clearing accounts.
320    SuspenseAccountAbuse,
321    /// Splitting transactions to stay below approval thresholds.
322    SplitTransaction,
323    /// Unusual timing (weekend, holiday, after-hours postings).
324    TimingAnomaly,
325    /// Posting to unauthorized accounts.
326    UnauthorizedAccess,
327
328    // Approval Fraud
329    /// User approving their own request.
330    SelfApproval,
331    /// Approval beyond authorized limit.
332    ExceededApprovalLimit,
333    /// Segregation of duties violation.
334    SegregationOfDutiesViolation,
335    /// Approval by unauthorized user.
336    UnauthorizedApproval,
337    /// Collusion between approver and requester.
338    CollusiveApproval,
339
340    // Vendor/Payment Fraud
341    /// Fictitious vendor.
342    FictitiousVendor,
343    /// Duplicate payment to vendor.
344    DuplicatePayment,
345    /// Payment to shell company.
346    ShellCompanyPayment,
347    /// Kickback scheme.
348    Kickback,
349    /// Kickback scheme (alias).
350    KickbackScheme,
351    /// Unauthorized customer/vendor discount (sweethearting, side deals).
352    UnauthorizedDiscount,
353    /// Round-tripping funds through multiple entities or accounts to
354    /// inflate apparent activity or obscure origin.
355    RoundTripping,
356    /// Invoice manipulation.
357    InvoiceManipulation,
358
359    // Asset Fraud
360    /// Misappropriation of assets.
361    AssetMisappropriation,
362    /// Inventory theft.
363    InventoryTheft,
364    /// Ghost employee.
365    GhostEmployee,
366
367    // Financial Statement Fraud
368    /// Premature revenue recognition.
369    PrematureRevenue,
370    /// Understated liabilities.
371    UnderstatedLiabilities,
372    /// Overstated assets.
373    OverstatedAssets,
374    /// Channel stuffing.
375    ChannelStuffing,
376
377    // Accounting Standards Violations (ASC 606 / IFRS 15 - Revenue)
378    /// Improper revenue recognition timing (ASC 606/IFRS 15).
379    ImproperRevenueRecognition,
380    /// Multiple performance obligations not properly separated.
381    ImproperPoAllocation,
382    /// Variable consideration not properly estimated.
383    VariableConsiderationManipulation,
384    /// Contract modifications not properly accounted for.
385    ContractModificationMisstatement,
386
387    // Accounting Standards Violations (ASC 842 / IFRS 16 - Leases)
388    /// Lease classification manipulation (operating vs finance).
389    LeaseClassificationManipulation,
390    /// Off-balance sheet lease fraud.
391    OffBalanceSheetLease,
392    /// Lease liability understatement.
393    LeaseLiabilityUnderstatement,
394    /// ROU asset misstatement.
395    RouAssetMisstatement,
396
397    // Accounting Standards Violations (ASC 820 / IFRS 13 - Fair Value)
398    /// Fair value hierarchy misclassification.
399    FairValueHierarchyManipulation,
400    /// Level 3 input manipulation.
401    Level3InputManipulation,
402    /// Valuation technique manipulation.
403    ValuationTechniqueManipulation,
404
405    // Accounting Standards Violations (ASC 360 / IAS 36 - Impairment)
406    /// Delayed impairment recognition.
407    DelayedImpairment,
408    /// Improperly avoiding impairment testing.
409    ImpairmentTestAvoidance,
410    /// Cash flow projection manipulation for impairment.
411    CashFlowProjectionManipulation,
412    /// Improper impairment reversal (IFRS only).
413    ImproperImpairmentReversal,
414
415    // Sourcing/Procurement Fraud (S2C)
416    /// Bid rigging or collusion among bidders.
417    BidRigging,
418    /// Contracts with phantom/shell vendors.
419    PhantomVendorContract,
420    /// Splitting contracts to avoid approval thresholds.
421    SplitContractThreshold,
422    /// Conflict of interest in sourcing decisions.
423    ConflictOfInterestSourcing,
424
425    // HR/Payroll Fraud (H2R)
426    /// Ghost employee on payroll.
427    GhostEmployeePayroll,
428    /// Payroll inflation/unauthorized raises.
429    PayrollInflation,
430    /// Duplicate expense report submission.
431    DuplicateExpenseReport,
432    /// Fictitious expense claims.
433    FictitiousExpense,
434    /// Splitting expenses to avoid approval threshold.
435    SplitExpenseToAvoidApproval,
436
437    // O2C Fraud
438    /// Revenue timing manipulation via quotes.
439    RevenueTimingManipulation,
440    /// Overriding quote prices without authorization.
441    QuotePriceOverride,
442}
443
444impl FraudType {
445    /// Returns severity level (1-5).
446    pub fn severity(&self) -> u8 {
447        match self {
448            FraudType::RoundDollarManipulation => 2,
449            FraudType::JustBelowThreshold => 3,
450            FraudType::SelfApproval => 3,
451            FraudType::ExceededApprovalLimit => 3,
452            FraudType::DuplicatePayment => 3,
453            FraudType::FictitiousEntry => 4,
454            FraudType::RevenueManipulation => 5,
455            FraudType::FictitiousVendor => 5,
456            FraudType::ShellCompanyPayment => 5,
457            FraudType::AssetMisappropriation => 5,
458            FraudType::SegregationOfDutiesViolation => 4,
459            FraudType::CollusiveApproval => 5,
460            // Accounting Standards Violations (Revenue - ASC 606/IFRS 15)
461            FraudType::ImproperRevenueRecognition => 5,
462            FraudType::ImproperPoAllocation => 4,
463            FraudType::VariableConsiderationManipulation => 4,
464            FraudType::ContractModificationMisstatement => 3,
465            // Accounting Standards Violations (Leases - ASC 842/IFRS 16)
466            FraudType::LeaseClassificationManipulation => 4,
467            FraudType::OffBalanceSheetLease => 5,
468            FraudType::LeaseLiabilityUnderstatement => 4,
469            FraudType::RouAssetMisstatement => 3,
470            // Accounting Standards Violations (Fair Value - ASC 820/IFRS 13)
471            FraudType::FairValueHierarchyManipulation => 4,
472            FraudType::Level3InputManipulation => 5,
473            FraudType::ValuationTechniqueManipulation => 4,
474            // Accounting Standards Violations (Impairment - ASC 360/IAS 36)
475            FraudType::DelayedImpairment => 4,
476            FraudType::ImpairmentTestAvoidance => 4,
477            FraudType::CashFlowProjectionManipulation => 5,
478            FraudType::ImproperImpairmentReversal => 3,
479            _ => 4,
480        }
481    }
482}
483
484/// Error types for error detection.
485#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
486pub enum ErrorType {
487    // Data Entry Errors
488    /// Duplicate document entry.
489    DuplicateEntry,
490    /// Reversed debit/credit amounts.
491    ReversedAmount,
492    /// Transposed digits in amount.
493    TransposedDigits,
494    /// Wrong decimal placement.
495    DecimalError,
496    /// Missing required field.
497    MissingField,
498    /// Invalid account code.
499    InvalidAccount,
500
501    // Timing Errors
502    /// Posted to wrong period.
503    WrongPeriod,
504    /// Backdated entry.
505    BackdatedEntry,
506    /// Future-dated entry.
507    FutureDatedEntry,
508    /// Cutoff error.
509    CutoffError,
510
511    // Classification Errors
512    /// Wrong account classification.
513    MisclassifiedAccount,
514    /// Wrong cost center.
515    WrongCostCenter,
516    /// Wrong company code.
517    WrongCompanyCode,
518
519    // Calculation Errors
520    /// Unbalanced journal entry.
521    UnbalancedEntry,
522    /// Rounding error.
523    RoundingError,
524    /// Currency conversion error.
525    CurrencyError,
526    /// Tax calculation error.
527    TaxCalculationError,
528
529    // Accounting Standards Errors (Non-Fraudulent)
530    /// Wrong revenue recognition timing (honest mistake).
531    RevenueTimingError,
532    /// Performance obligation allocation error.
533    PoAllocationError,
534    /// Lease classification error (operating vs finance).
535    LeaseClassificationError,
536    /// Lease calculation error (PV, amortization).
537    LeaseCalculationError,
538    /// Fair value measurement error.
539    FairValueError,
540    /// Impairment calculation error.
541    ImpairmentCalculationError,
542    /// Discount rate error.
543    DiscountRateError,
544    /// Framework application error (IFRS vs GAAP).
545    FrameworkApplicationError,
546}
547
548impl ErrorType {
549    /// Returns severity level (1-5).
550    pub fn severity(&self) -> u8 {
551        match self {
552            ErrorType::RoundingError => 1,
553            ErrorType::MissingField => 2,
554            ErrorType::TransposedDigits => 2,
555            ErrorType::DecimalError => 3,
556            ErrorType::DuplicateEntry => 3,
557            ErrorType::ReversedAmount => 3,
558            ErrorType::WrongPeriod => 4,
559            ErrorType::UnbalancedEntry => 5,
560            ErrorType::CurrencyError => 4,
561            // Accounting Standards Errors
562            ErrorType::RevenueTimingError => 4,
563            ErrorType::PoAllocationError => 3,
564            ErrorType::LeaseClassificationError => 3,
565            ErrorType::LeaseCalculationError => 3,
566            ErrorType::FairValueError => 4,
567            ErrorType::ImpairmentCalculationError => 4,
568            ErrorType::DiscountRateError => 3,
569            ErrorType::FrameworkApplicationError => 4,
570            _ => 3,
571        }
572    }
573}
574
575/// Process issue types.
576#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
577pub enum ProcessIssueType {
578    // Approval Issues
579    /// Approval skipped entirely.
580    SkippedApproval,
581    /// Late approval (after posting).
582    LateApproval,
583    /// Missing supporting documentation.
584    MissingDocumentation,
585    /// Incomplete approval chain.
586    IncompleteApprovalChain,
587
588    // Timing Issues
589    /// Late posting.
590    LatePosting,
591    /// Posting outside business hours.
592    AfterHoursPosting,
593    /// Weekend/holiday posting.
594    WeekendPosting,
595    /// Rushed period-end posting.
596    RushedPeriodEnd,
597    /// Entry posted after the period-end close date (ISA 240.32).
598    /// Distinct from `RushedPeriodEnd` which flags pre-close volume spikes —
599    /// this variant specifically marks post-close adjustments.
600    PostClosePosting,
601
602    // Control Issues
603    /// Manual override of system control.
604    ManualOverride,
605    /// Unusual user access pattern.
606    UnusualAccess,
607    /// System bypass.
608    SystemBypass,
609    /// Batch processing anomaly.
610    BatchAnomaly,
611
612    // Documentation Issues
613    /// Vague or missing description.
614    VagueDescription,
615    /// Changed after posting.
616    PostFactoChange,
617    /// Incomplete audit trail.
618    IncompleteAuditTrail,
619
620    // Sourcing/Procurement Issues (S2C)
621    /// Purchasing outside of contracts (maverick spend).
622    MaverickSpend,
623    /// Purchasing against an expired contract.
624    ExpiredContractPurchase,
625    /// Overriding contracted price without authorization.
626    ContractPriceOverride,
627    /// Award given with only a single bid received.
628    SingleBidAward,
629    /// Bypassing supplier qualification requirements.
630    QualificationBypass,
631
632    // O2C Issues
633    /// Converting an expired quote to a sales order.
634    ExpiredQuoteConversion,
635}
636
637impl ProcessIssueType {
638    /// Returns severity level (1-5).
639    pub fn severity(&self) -> u8 {
640        match self {
641            ProcessIssueType::VagueDescription => 1,
642            ProcessIssueType::LatePosting => 2,
643            ProcessIssueType::AfterHoursPosting => 2,
644            ProcessIssueType::WeekendPosting => 2,
645            ProcessIssueType::PostClosePosting => 4,
646            ProcessIssueType::SkippedApproval => 4,
647            ProcessIssueType::ManualOverride => 4,
648            ProcessIssueType::SystemBypass => 5,
649            ProcessIssueType::IncompleteAuditTrail => 4,
650            _ => 3,
651        }
652    }
653}
654
655/// Statistical anomaly types.
656#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
657pub enum StatisticalAnomalyType {
658    // Amount Anomalies
659    /// Amount significantly above normal.
660    UnusuallyHighAmount,
661    /// Amount significantly below normal.
662    UnusuallyLowAmount,
663    /// Violates Benford's Law distribution.
664    BenfordViolation,
665    /// Exact duplicate amount (suspicious).
666    ExactDuplicateAmount,
667    /// Repeating pattern in amounts.
668    RepeatingAmount,
669
670    // Frequency Anomalies
671    /// Unusual transaction frequency.
672    UnusualFrequency,
673    /// Burst of transactions.
674    TransactionBurst,
675    /// Unusual time of day.
676    UnusualTiming,
677
678    // Trend Anomalies
679    /// Break in historical trend.
680    TrendBreak,
681    /// Sudden level shift.
682    LevelShift,
683    /// Seasonal pattern violation.
684    SeasonalAnomaly,
685
686    // Distribution Anomalies
687    /// Outlier in distribution.
688    StatisticalOutlier,
689    /// Change in variance.
690    VarianceChange,
691    /// Distribution shift.
692    DistributionShift,
693
694    // Sourcing/Contract Anomalies
695    /// Pattern of SLA breaches from a vendor.
696    SlaBreachPattern,
697    /// Contract with zero utilization.
698    UnusedContract,
699
700    // HR/Payroll Anomalies
701    /// Anomalous overtime patterns.
702    OvertimeAnomaly,
703
704    // Heavy-tail Anomalies (v5.30 B2 / #154)
705    /// Multi-100-line journal entry touching bridge accounts —
706    /// models real consolidation entries, period-end accruals, or
707    /// manual reclasses. Lifts the synthetic p99 / max
708    /// relational_score percentiles into the band the reference
709    /// shard exhibits (20× vs synth's normal-mode 12×). Opt-in via
710    /// `anomaly_injection.consolidation_outlier_rate` (default 0.0
711    /// — preserves v5.29 byte-identical output for configs that
712    /// don't opt in).
713    ConsolidationOutlier,
714}
715
716impl StatisticalAnomalyType {
717    /// Returns severity level (1-5).
718    pub fn severity(&self) -> u8 {
719        match self {
720            StatisticalAnomalyType::UnusualTiming => 1,
721            StatisticalAnomalyType::UnusualFrequency => 2,
722            StatisticalAnomalyType::BenfordViolation => 2,
723            StatisticalAnomalyType::UnusuallyHighAmount => 3,
724            StatisticalAnomalyType::TrendBreak => 3,
725            StatisticalAnomalyType::TransactionBurst => 4,
726            StatisticalAnomalyType::ExactDuplicateAmount => 3,
727            // v5.30 B2 — multi-100-line bridge-account postings are
728            // among the highest-magnitude single-event anomalies the
729            // engine emits; rate them at 4 alongside TransactionBurst.
730            StatisticalAnomalyType::ConsolidationOutlier => 4,
731            _ => 3,
732        }
733    }
734}
735
736/// Relational/graph anomaly types.
737#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
738pub enum RelationalAnomalyType {
739    // Transaction Pattern Anomalies
740    /// Circular transaction pattern.
741    CircularTransaction,
742    /// Unusual account combination.
743    UnusualAccountPair,
744    /// New trading partner.
745    NewCounterparty,
746    /// Dormant account suddenly active.
747    DormantAccountActivity,
748
749    // Network Anomalies
750    /// Unusual network centrality.
751    CentralityAnomaly,
752    /// Isolated transaction cluster.
753    IsolatedCluster,
754    /// Bridge node anomaly.
755    BridgeNodeAnomaly,
756    /// Community structure change.
757    CommunityAnomaly,
758
759    // Relationship Anomalies
760    /// Missing expected relationship.
761    MissingRelationship,
762    /// Unexpected relationship.
763    UnexpectedRelationship,
764    /// Relationship strength change.
765    RelationshipStrengthChange,
766
767    // Intercompany Anomalies
768    /// Unmatched intercompany transaction.
769    UnmatchedIntercompany,
770    /// Circular intercompany flow.
771    CircularIntercompany,
772    /// Transfer pricing anomaly.
773    TransferPricingAnomaly,
774
775    // Source-conditional anomalies (SOTA-12, #140)
776    /// JE uses a `(source, account-pair)` combination that is rare under the
777    /// per-source marginal P(account | source) — the single dominant explainer
778    /// for audit-packet top JEs (FINDINGS §13). Selected by the orchestrator's
779    /// anomaly-injection post-process from the generated JE set.
780    SourceConditionalRarity,
781}
782
783impl RelationalAnomalyType {
784    /// Returns severity level (1-5).
785    pub fn severity(&self) -> u8 {
786        match self {
787            RelationalAnomalyType::NewCounterparty => 1,
788            RelationalAnomalyType::DormantAccountActivity => 2,
789            RelationalAnomalyType::UnusualAccountPair => 2,
790            RelationalAnomalyType::CircularTransaction => 4,
791            RelationalAnomalyType::CircularIntercompany => 4,
792            RelationalAnomalyType::TransferPricingAnomaly => 4,
793            RelationalAnomalyType::UnmatchedIntercompany => 3,
794            RelationalAnomalyType::SourceConditionalRarity => 2,
795            _ => 3,
796        }
797    }
798}
799
800/// A labeled anomaly for supervised learning.
801#[derive(Debug, Clone, Serialize, Deserialize)]
802pub struct LabeledAnomaly {
803    /// Unique anomaly identifier.
804    pub anomaly_id: String,
805    /// Type of anomaly.
806    pub anomaly_type: AnomalyType,
807    /// Document or entity that contains the anomaly.
808    pub document_id: String,
809    /// Document type (JE, PO, Invoice, etc.).
810    pub document_type: String,
811    /// Company code.
812    pub company_code: String,
813    /// Date the anomaly occurred.
814    pub anomaly_date: NaiveDate,
815    /// Timestamp when detected/injected.
816    #[serde(with = "crate::serde_timestamp::naive")]
817    pub detection_timestamp: NaiveDateTime,
818    /// Confidence score (0.0 - 1.0) for injected anomalies.
819    pub confidence: f64,
820    /// Severity (1-5).
821    pub severity: u8,
822    /// Description of the anomaly.
823    pub description: String,
824    /// Related entities (user IDs, account codes, etc.).
825    pub related_entities: Vec<String>,
826    /// Monetary impact if applicable.
827    pub monetary_impact: Option<Decimal>,
828    /// Additional metadata.
829    pub metadata: HashMap<String, String>,
830    /// Whether this was injected (true) or naturally occurring (false).
831    pub is_injected: bool,
832    /// Injection strategy used (if injected) - legacy string field.
833    pub injection_strategy: Option<String>,
834    /// Cluster ID if part of an anomaly cluster.
835    pub cluster_id: Option<String>,
836
837    // ========================================
838    // PROVENANCE TRACKING FIELDS (Phase 1.2)
839    // ========================================
840    /// Hash of the original document before modification.
841    /// Enables tracking what the document looked like pre-injection.
842    #[serde(default, skip_serializing_if = "Option::is_none")]
843    pub original_document_hash: Option<String>,
844
845    /// Causal reason explaining why this anomaly was injected.
846    /// Provides "why" tracking for each anomaly.
847    #[serde(default, skip_serializing_if = "Option::is_none")]
848    pub causal_reason: Option<AnomalyCausalReason>,
849
850    /// Structured injection strategy with parameters.
851    /// More detailed than the legacy string-based injection_strategy field.
852    #[serde(default, skip_serializing_if = "Option::is_none")]
853    pub structured_strategy: Option<InjectionStrategy>,
854
855    /// Parent anomaly ID if this was derived from another anomaly.
856    /// Enables anomaly transformation chains.
857    #[serde(default, skip_serializing_if = "Option::is_none")]
858    pub parent_anomaly_id: Option<String>,
859
860    /// Child anomaly IDs that were derived from this anomaly.
861    #[serde(default, skip_serializing_if = "Vec::is_empty")]
862    pub child_anomaly_ids: Vec<String>,
863
864    /// Scenario ID if this anomaly is part of a multi-step scenario.
865    #[serde(default, skip_serializing_if = "Option::is_none")]
866    pub scenario_id: Option<String>,
867
868    /// Generation run ID that produced this anomaly.
869    /// Enables tracing anomalies back to their generation run.
870    #[serde(default, skip_serializing_if = "Option::is_none")]
871    pub run_id: Option<String>,
872
873    /// Seed used for RNG during generation.
874    /// Enables reproducibility.
875    #[serde(default, skip_serializing_if = "Option::is_none")]
876    pub generation_seed: Option<u64>,
877}
878
879impl LabeledAnomaly {
880    /// Creates a new labeled anomaly.
881    pub fn new(
882        anomaly_id: String,
883        anomaly_type: AnomalyType,
884        document_id: String,
885        document_type: String,
886        company_code: String,
887        anomaly_date: NaiveDate,
888    ) -> Self {
889        let severity = anomaly_type.severity();
890        let description = format!(
891            "{} - {} in document {}",
892            anomaly_type.category(),
893            anomaly_type.type_name(),
894            document_id
895        );
896
897        Self {
898            anomaly_id,
899            anomaly_type,
900            document_id,
901            document_type,
902            company_code,
903            anomaly_date,
904            detection_timestamp: chrono::Local::now().naive_local(),
905            confidence: 1.0,
906            severity,
907            description,
908            related_entities: Vec::new(),
909            monetary_impact: None,
910            metadata: HashMap::new(),
911            is_injected: true,
912            injection_strategy: None,
913            cluster_id: None,
914            // Provenance fields
915            original_document_hash: None,
916            causal_reason: None,
917            structured_strategy: None,
918            parent_anomaly_id: None,
919            child_anomaly_ids: Vec::new(),
920            scenario_id: None,
921            run_id: None,
922            generation_seed: None,
923        }
924    }
925
926    /// Sets the description.
927    pub fn with_description(mut self, description: &str) -> Self {
928        self.description = description.to_string();
929        self
930    }
931
932    /// Sets the monetary impact.
933    pub fn with_monetary_impact(mut self, impact: Decimal) -> Self {
934        self.monetary_impact = Some(impact);
935        self
936    }
937
938    /// Adds a related entity.
939    pub fn with_related_entity(mut self, entity: &str) -> Self {
940        self.related_entities.push(entity.to_string());
941        self
942    }
943
944    /// Adds metadata.
945    pub fn with_metadata(mut self, key: &str, value: &str) -> Self {
946        self.metadata.insert(key.to_string(), value.to_string());
947        self
948    }
949
950    /// Sets the injection strategy (legacy string).
951    pub fn with_injection_strategy(mut self, strategy: &str) -> Self {
952        self.injection_strategy = Some(strategy.to_string());
953        self
954    }
955
956    /// Sets the cluster ID.
957    pub fn with_cluster(mut self, cluster_id: &str) -> Self {
958        self.cluster_id = Some(cluster_id.to_string());
959        self
960    }
961
962    // ========================================
963    // PROVENANCE BUILDER METHODS (Phase 1.2)
964    // ========================================
965
966    /// Sets the original document hash for provenance tracking.
967    pub fn with_original_document_hash(mut self, hash: &str) -> Self {
968        self.original_document_hash = Some(hash.to_string());
969        self
970    }
971
972    /// Sets the causal reason for this anomaly.
973    pub fn with_causal_reason(mut self, reason: AnomalyCausalReason) -> Self {
974        self.causal_reason = Some(reason);
975        self
976    }
977
978    /// Sets the structured injection strategy.
979    pub fn with_structured_strategy(mut self, strategy: InjectionStrategy) -> Self {
980        // Also set the legacy string field for backward compatibility
981        self.injection_strategy = Some(strategy.strategy_type().to_string());
982        self.structured_strategy = Some(strategy);
983        self
984    }
985
986    /// Sets the parent anomaly ID (for anomaly derivation chains).
987    pub fn with_parent_anomaly(mut self, parent_id: &str) -> Self {
988        self.parent_anomaly_id = Some(parent_id.to_string());
989        self
990    }
991
992    /// Adds a child anomaly ID.
993    pub fn with_child_anomaly(mut self, child_id: &str) -> Self {
994        self.child_anomaly_ids.push(child_id.to_string());
995        self
996    }
997
998    /// Sets the scenario ID for multi-step scenario tracking.
999    pub fn with_scenario(mut self, scenario_id: &str) -> Self {
1000        self.scenario_id = Some(scenario_id.to_string());
1001        self
1002    }
1003
1004    /// Sets the generation run ID.
1005    pub fn with_run_id(mut self, run_id: &str) -> Self {
1006        self.run_id = Some(run_id.to_string());
1007        self
1008    }
1009
1010    /// Sets the generation seed for reproducibility.
1011    pub fn with_generation_seed(mut self, seed: u64) -> Self {
1012        self.generation_seed = Some(seed);
1013        self
1014    }
1015
1016    /// Sets multiple provenance fields at once for convenience.
1017    pub fn with_provenance(
1018        mut self,
1019        run_id: Option<&str>,
1020        seed: Option<u64>,
1021        causal_reason: Option<AnomalyCausalReason>,
1022    ) -> Self {
1023        if let Some(id) = run_id {
1024            self.run_id = Some(id.to_string());
1025        }
1026        self.generation_seed = seed;
1027        self.causal_reason = causal_reason;
1028        self
1029    }
1030
1031    /// Converts to a feature vector for ML.
1032    ///
1033    /// Returns a vector of 15 features:
1034    /// - 6 features: Category one-hot encoding (Fraud, Error, ProcessIssue, Statistical, Relational, Custom)
1035    /// - 1 feature: Severity (normalized 0-1)
1036    /// - 1 feature: Confidence
1037    /// - 1 feature: Has monetary impact (0/1)
1038    /// - 1 feature: Monetary impact (log-scaled)
1039    /// - 1 feature: Is intentional (0/1)
1040    /// - 1 feature: Number of related entities
1041    /// - 1 feature: Is part of cluster (0/1)
1042    /// - 1 feature: Is part of scenario (0/1)
1043    /// - 1 feature: Has parent anomaly (0/1) - indicates derivation
1044    pub fn to_features(&self) -> Vec<f64> {
1045        let mut features = Vec::new();
1046
1047        // Category one-hot encoding
1048        let categories = [
1049            "Fraud",
1050            "Error",
1051            "ProcessIssue",
1052            "Statistical",
1053            "Relational",
1054            "Custom",
1055        ];
1056        for cat in &categories {
1057            features.push(if self.anomaly_type.category() == *cat {
1058                1.0
1059            } else {
1060                0.0
1061            });
1062        }
1063
1064        // Severity (normalized)
1065        features.push(self.severity as f64 / 5.0);
1066
1067        // Confidence
1068        features.push(self.confidence);
1069
1070        // Has monetary impact
1071        features.push(if self.monetary_impact.is_some() {
1072            1.0
1073        } else {
1074            0.0
1075        });
1076
1077        // Monetary impact (log-scaled)
1078        if let Some(impact) = self.monetary_impact {
1079            let impact_f64: f64 = impact.try_into().unwrap_or(0.0);
1080            features.push((impact_f64.abs() + 1.0).ln());
1081        } else {
1082            features.push(0.0);
1083        }
1084
1085        // Is intentional
1086        features.push(if self.anomaly_type.is_intentional() {
1087            1.0
1088        } else {
1089            0.0
1090        });
1091
1092        // Number of related entities
1093        features.push(self.related_entities.len() as f64);
1094
1095        // Is part of cluster
1096        features.push(if self.cluster_id.is_some() { 1.0 } else { 0.0 });
1097
1098        // Provenance features
1099        // Is part of scenario
1100        features.push(if self.scenario_id.is_some() { 1.0 } else { 0.0 });
1101
1102        // Has parent anomaly (indicates this is a derived anomaly)
1103        features.push(if self.parent_anomaly_id.is_some() {
1104            1.0
1105        } else {
1106            0.0
1107        });
1108
1109        features
1110    }
1111
1112    /// Returns the number of features in the feature vector.
1113    pub fn feature_count() -> usize {
1114        15 // 6 category + 9 other features
1115    }
1116
1117    /// Returns feature names for documentation/ML metadata.
1118    pub fn feature_names() -> Vec<&'static str> {
1119        vec![
1120            "category_fraud",
1121            "category_error",
1122            "category_process_issue",
1123            "category_statistical",
1124            "category_relational",
1125            "category_custom",
1126            "severity_normalized",
1127            "confidence",
1128            "has_monetary_impact",
1129            "monetary_impact_log",
1130            "is_intentional",
1131            "related_entity_count",
1132            "is_clustered",
1133            "is_scenario_part",
1134            "is_derived",
1135        ]
1136    }
1137}
1138
1139/// Summary of anomalies for reporting.
1140#[derive(Debug, Clone, Default, Serialize, Deserialize)]
1141pub struct AnomalySummary {
1142    /// Total anomaly count.
1143    pub total_count: usize,
1144    /// Count by category.
1145    pub by_category: HashMap<String, usize>,
1146    /// Count by specific type.
1147    pub by_type: HashMap<String, usize>,
1148    /// Count by severity.
1149    pub by_severity: HashMap<u8, usize>,
1150    /// Count by company.
1151    pub by_company: HashMap<String, usize>,
1152    /// Total monetary impact.
1153    pub total_monetary_impact: Decimal,
1154    /// Date range.
1155    pub date_range: Option<(NaiveDate, NaiveDate)>,
1156    /// Number of clusters.
1157    pub cluster_count: usize,
1158}
1159
1160impl AnomalySummary {
1161    /// Creates a summary from a list of anomalies.
1162    pub fn from_anomalies(anomalies: &[LabeledAnomaly]) -> Self {
1163        let mut summary = AnomalySummary {
1164            total_count: anomalies.len(),
1165            ..Default::default()
1166        };
1167
1168        let mut min_date: Option<NaiveDate> = None;
1169        let mut max_date: Option<NaiveDate> = None;
1170        let mut clusters = std::collections::HashSet::new();
1171
1172        for anomaly in anomalies {
1173            // By category
1174            *summary
1175                .by_category
1176                .entry(anomaly.anomaly_type.category().to_string())
1177                .or_insert(0) += 1;
1178
1179            // By type
1180            *summary
1181                .by_type
1182                .entry(anomaly.anomaly_type.type_name())
1183                .or_insert(0) += 1;
1184
1185            // By severity
1186            *summary.by_severity.entry(anomaly.severity).or_insert(0) += 1;
1187
1188            // By company
1189            *summary
1190                .by_company
1191                .entry(anomaly.company_code.clone())
1192                .or_insert(0) += 1;
1193
1194            // Monetary impact
1195            if let Some(impact) = anomaly.monetary_impact {
1196                summary.total_monetary_impact += impact;
1197            }
1198
1199            // Date range
1200            match min_date {
1201                None => min_date = Some(anomaly.anomaly_date),
1202                Some(d) if anomaly.anomaly_date < d => min_date = Some(anomaly.anomaly_date),
1203                _ => {}
1204            }
1205            match max_date {
1206                None => max_date = Some(anomaly.anomaly_date),
1207                Some(d) if anomaly.anomaly_date > d => max_date = Some(anomaly.anomaly_date),
1208                _ => {}
1209            }
1210
1211            // Clusters
1212            if let Some(cluster_id) = &anomaly.cluster_id {
1213                clusters.insert(cluster_id.clone());
1214            }
1215        }
1216
1217        summary.date_range = min_date.zip(max_date);
1218        summary.cluster_count = clusters.len();
1219
1220        summary
1221    }
1222}
1223
1224// ============================================================================
1225// ENHANCED ANOMALY TAXONOMY (FR-003)
1226// ============================================================================
1227
1228/// High-level anomaly category for multi-class classification.
1229///
1230/// These categories provide a more granular classification than the base
1231/// AnomalyType enum, enabling better ML model training and audit reporting.
1232#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
1233pub enum AnomalyCategory {
1234    // Vendor-related anomalies
1235    /// Fictitious or shell vendor.
1236    FictitiousVendor,
1237    /// Kickback or collusion with vendor.
1238    VendorKickback,
1239    /// Related party vendor transactions.
1240    RelatedPartyVendor,
1241
1242    // Transaction-related anomalies
1243    /// Duplicate payment or invoice.
1244    DuplicatePayment,
1245    /// Unauthorized transaction.
1246    UnauthorizedTransaction,
1247    /// Structured transactions to avoid thresholds.
1248    StructuredTransaction,
1249
1250    // Pattern-based anomalies
1251    /// Circular flow of funds.
1252    CircularFlow,
1253    /// Behavioral anomaly (deviation from normal patterns).
1254    BehavioralAnomaly,
1255    /// Timing-based anomaly.
1256    TimingAnomaly,
1257
1258    // Journal entry anomalies
1259    /// Manual journal entry anomaly.
1260    JournalAnomaly,
1261    /// Manual override of controls.
1262    ManualOverride,
1263    /// Missing approval in chain.
1264    MissingApproval,
1265
1266    // Statistical anomalies
1267    /// Statistical outlier.
1268    StatisticalOutlier,
1269    /// Distribution anomaly (Benford, etc.).
1270    DistributionAnomaly,
1271
1272    // Custom category
1273    /// User-defined category.
1274    Custom(String),
1275}
1276
1277impl AnomalyCategory {
1278    /// Derives an AnomalyCategory from an AnomalyType.
1279    pub fn from_anomaly_type(anomaly_type: &AnomalyType) -> Self {
1280        match anomaly_type {
1281            AnomalyType::Fraud(fraud_type) => match fraud_type {
1282                FraudType::FictitiousVendor | FraudType::ShellCompanyPayment => {
1283                    AnomalyCategory::FictitiousVendor
1284                }
1285                FraudType::Kickback | FraudType::KickbackScheme => AnomalyCategory::VendorKickback,
1286                FraudType::DuplicatePayment => AnomalyCategory::DuplicatePayment,
1287                FraudType::SplitTransaction | FraudType::JustBelowThreshold => {
1288                    AnomalyCategory::StructuredTransaction
1289                }
1290                FraudType::SelfApproval
1291                | FraudType::UnauthorizedApproval
1292                | FraudType::CollusiveApproval => AnomalyCategory::UnauthorizedTransaction,
1293                FraudType::TimingAnomaly
1294                | FraudType::RoundDollarManipulation
1295                | FraudType::SuspenseAccountAbuse => AnomalyCategory::JournalAnomaly,
1296                _ => AnomalyCategory::BehavioralAnomaly,
1297            },
1298            AnomalyType::Error(error_type) => match error_type {
1299                ErrorType::DuplicateEntry => AnomalyCategory::DuplicatePayment,
1300                ErrorType::WrongPeriod
1301                | ErrorType::BackdatedEntry
1302                | ErrorType::FutureDatedEntry => AnomalyCategory::TimingAnomaly,
1303                _ => AnomalyCategory::JournalAnomaly,
1304            },
1305            AnomalyType::ProcessIssue(process_type) => match process_type {
1306                ProcessIssueType::SkippedApproval | ProcessIssueType::IncompleteApprovalChain => {
1307                    AnomalyCategory::MissingApproval
1308                }
1309                ProcessIssueType::ManualOverride | ProcessIssueType::SystemBypass => {
1310                    AnomalyCategory::ManualOverride
1311                }
1312                ProcessIssueType::AfterHoursPosting | ProcessIssueType::WeekendPosting => {
1313                    AnomalyCategory::TimingAnomaly
1314                }
1315                _ => AnomalyCategory::BehavioralAnomaly,
1316            },
1317            AnomalyType::Statistical(stat_type) => match stat_type {
1318                StatisticalAnomalyType::BenfordViolation
1319                | StatisticalAnomalyType::DistributionShift => AnomalyCategory::DistributionAnomaly,
1320                _ => AnomalyCategory::StatisticalOutlier,
1321            },
1322            AnomalyType::Relational(rel_type) => match rel_type {
1323                RelationalAnomalyType::CircularTransaction
1324                | RelationalAnomalyType::CircularIntercompany => AnomalyCategory::CircularFlow,
1325                _ => AnomalyCategory::BehavioralAnomaly,
1326            },
1327            AnomalyType::Custom(s) => AnomalyCategory::Custom(s.clone()),
1328        }
1329    }
1330
1331    /// Returns the category name as a string.
1332    pub fn name(&self) -> &str {
1333        match self {
1334            AnomalyCategory::FictitiousVendor => "fictitious_vendor",
1335            AnomalyCategory::VendorKickback => "vendor_kickback",
1336            AnomalyCategory::RelatedPartyVendor => "related_party_vendor",
1337            AnomalyCategory::DuplicatePayment => "duplicate_payment",
1338            AnomalyCategory::UnauthorizedTransaction => "unauthorized_transaction",
1339            AnomalyCategory::StructuredTransaction => "structured_transaction",
1340            AnomalyCategory::CircularFlow => "circular_flow",
1341            AnomalyCategory::BehavioralAnomaly => "behavioral_anomaly",
1342            AnomalyCategory::TimingAnomaly => "timing_anomaly",
1343            AnomalyCategory::JournalAnomaly => "journal_anomaly",
1344            AnomalyCategory::ManualOverride => "manual_override",
1345            AnomalyCategory::MissingApproval => "missing_approval",
1346            AnomalyCategory::StatisticalOutlier => "statistical_outlier",
1347            AnomalyCategory::DistributionAnomaly => "distribution_anomaly",
1348            AnomalyCategory::Custom(s) => s.as_str(),
1349        }
1350    }
1351
1352    /// Returns the ordinal value for ML encoding.
1353    pub fn ordinal(&self) -> u8 {
1354        match self {
1355            AnomalyCategory::FictitiousVendor => 0,
1356            AnomalyCategory::VendorKickback => 1,
1357            AnomalyCategory::RelatedPartyVendor => 2,
1358            AnomalyCategory::DuplicatePayment => 3,
1359            AnomalyCategory::UnauthorizedTransaction => 4,
1360            AnomalyCategory::StructuredTransaction => 5,
1361            AnomalyCategory::CircularFlow => 6,
1362            AnomalyCategory::BehavioralAnomaly => 7,
1363            AnomalyCategory::TimingAnomaly => 8,
1364            AnomalyCategory::JournalAnomaly => 9,
1365            AnomalyCategory::ManualOverride => 10,
1366            AnomalyCategory::MissingApproval => 11,
1367            AnomalyCategory::StatisticalOutlier => 12,
1368            AnomalyCategory::DistributionAnomaly => 13,
1369            AnomalyCategory::Custom(_) => 14,
1370        }
1371    }
1372
1373    /// Returns the total number of categories (excluding Custom).
1374    pub fn category_count() -> usize {
1375        15 // 14 fixed categories + Custom
1376    }
1377}
1378
1379/// Type of contributing factor for anomaly confidence/severity calculation.
1380#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1381pub enum FactorType {
1382    /// Amount deviation from expected value.
1383    AmountDeviation,
1384    /// Proximity to approval/reporting threshold.
1385    ThresholdProximity,
1386    /// Timing-related anomaly indicator.
1387    TimingAnomaly,
1388    /// Entity risk score contribution.
1389    EntityRisk,
1390    /// Pattern match confidence.
1391    PatternMatch,
1392    /// Frequency deviation from normal.
1393    FrequencyDeviation,
1394    /// Relationship-based anomaly indicator.
1395    RelationshipAnomaly,
1396    /// Control bypass indicator.
1397    ControlBypass,
1398    /// Benford's Law violation.
1399    BenfordViolation,
1400    /// Duplicate indicator.
1401    DuplicateIndicator,
1402    /// Approval chain issue.
1403    ApprovalChainIssue,
1404    /// Documentation gap.
1405    DocumentationGap,
1406    /// Custom factor type.
1407    Custom,
1408}
1409
1410impl FactorType {
1411    /// Returns the factor type name.
1412    pub fn name(&self) -> &'static str {
1413        match self {
1414            FactorType::AmountDeviation => "amount_deviation",
1415            FactorType::ThresholdProximity => "threshold_proximity",
1416            FactorType::TimingAnomaly => "timing_anomaly",
1417            FactorType::EntityRisk => "entity_risk",
1418            FactorType::PatternMatch => "pattern_match",
1419            FactorType::FrequencyDeviation => "frequency_deviation",
1420            FactorType::RelationshipAnomaly => "relationship_anomaly",
1421            FactorType::ControlBypass => "control_bypass",
1422            FactorType::BenfordViolation => "benford_violation",
1423            FactorType::DuplicateIndicator => "duplicate_indicator",
1424            FactorType::ApprovalChainIssue => "approval_chain_issue",
1425            FactorType::DocumentationGap => "documentation_gap",
1426            FactorType::Custom => "custom",
1427        }
1428    }
1429}
1430
1431/// Evidence supporting a contributing factor.
1432#[derive(Debug, Clone, Serialize, Deserialize)]
1433pub struct FactorEvidence {
1434    /// Source of the evidence (e.g., "transaction_history", "entity_registry").
1435    pub source: String,
1436    /// Raw evidence data.
1437    pub data: HashMap<String, String>,
1438}
1439
1440/// A contributing factor to anomaly confidence/severity.
1441#[derive(Debug, Clone, Serialize, Deserialize)]
1442pub struct ContributingFactor {
1443    /// Type of factor.
1444    pub factor_type: FactorType,
1445    /// Observed value.
1446    pub value: f64,
1447    /// Threshold or expected value.
1448    pub threshold: f64,
1449    /// Direction of comparison (true = value > threshold is anomalous).
1450    pub direction_greater: bool,
1451    /// Weight of this factor in overall calculation (0.0 - 1.0).
1452    pub weight: f64,
1453    /// Human-readable description.
1454    pub description: String,
1455    /// Optional supporting evidence.
1456    pub evidence: Option<FactorEvidence>,
1457}
1458
1459impl ContributingFactor {
1460    /// Creates a new contributing factor.
1461    pub fn new(
1462        factor_type: FactorType,
1463        value: f64,
1464        threshold: f64,
1465        direction_greater: bool,
1466        weight: f64,
1467        description: &str,
1468    ) -> Self {
1469        Self {
1470            factor_type,
1471            value,
1472            threshold,
1473            direction_greater,
1474            weight,
1475            description: description.to_string(),
1476            evidence: None,
1477        }
1478    }
1479
1480    /// Adds evidence to the factor.
1481    pub fn with_evidence(mut self, source: &str, data: HashMap<String, String>) -> Self {
1482        self.evidence = Some(FactorEvidence {
1483            source: source.to_string(),
1484            data,
1485        });
1486        self
1487    }
1488
1489    /// Calculates the factor's contribution to anomaly score.
1490    pub fn contribution(&self) -> f64 {
1491        let deviation = if self.direction_greater {
1492            (self.value - self.threshold).max(0.0)
1493        } else {
1494            (self.threshold - self.value).max(0.0)
1495        };
1496
1497        // Normalize by threshold to get relative deviation
1498        let relative_deviation = if self.threshold.abs() > 0.001 {
1499            deviation / self.threshold.abs()
1500        } else {
1501            deviation
1502        };
1503
1504        // Apply weight and cap at 1.0
1505        (relative_deviation * self.weight).min(1.0)
1506    }
1507}
1508
1509/// Enhanced anomaly label with dynamic confidence and severity.
1510#[derive(Debug, Clone, Serialize, Deserialize)]
1511pub struct EnhancedAnomalyLabel {
1512    /// Base labeled anomaly (backward compatible).
1513    pub base: LabeledAnomaly,
1514    /// Enhanced category classification.
1515    pub category: AnomalyCategory,
1516    /// Dynamically calculated confidence (0.0 - 1.0).
1517    pub enhanced_confidence: f64,
1518    /// Contextually calculated severity (0.0 - 1.0).
1519    pub enhanced_severity: f64,
1520    /// Factors contributing to confidence/severity.
1521    pub contributing_factors: Vec<ContributingFactor>,
1522    /// Secondary categories (for multi-label classification).
1523    pub secondary_categories: Vec<AnomalyCategory>,
1524}
1525
1526impl EnhancedAnomalyLabel {
1527    /// Creates an enhanced label from a base labeled anomaly.
1528    pub fn from_base(base: LabeledAnomaly) -> Self {
1529        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1530        let enhanced_confidence = base.confidence;
1531        let enhanced_severity = base.severity as f64 / 5.0;
1532
1533        Self {
1534            base,
1535            category,
1536            enhanced_confidence,
1537            enhanced_severity,
1538            contributing_factors: Vec::new(),
1539            secondary_categories: Vec::new(),
1540        }
1541    }
1542
1543    /// Sets the enhanced confidence.
1544    pub fn with_confidence(mut self, confidence: f64) -> Self {
1545        self.enhanced_confidence = confidence.clamp(0.0, 1.0);
1546        self
1547    }
1548
1549    /// Sets the enhanced severity.
1550    pub fn with_severity(mut self, severity: f64) -> Self {
1551        self.enhanced_severity = severity.clamp(0.0, 1.0);
1552        self
1553    }
1554
1555    /// Adds a contributing factor.
1556    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
1557        self.contributing_factors.push(factor);
1558        self
1559    }
1560
1561    /// Adds a secondary category.
1562    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
1563        if !self.secondary_categories.contains(&category) && category != self.category {
1564            self.secondary_categories.push(category);
1565        }
1566        self
1567    }
1568
1569    /// Converts to an extended feature vector.
1570    ///
1571    /// Returns base features (15) + enhanced features (10) = 25 features.
1572    pub fn to_features(&self) -> Vec<f64> {
1573        let mut features = self.base.to_features();
1574
1575        // Enhanced features
1576        features.push(self.enhanced_confidence);
1577        features.push(self.enhanced_severity);
1578        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
1579        features.push(self.secondary_categories.len() as f64);
1580        features.push(self.contributing_factors.len() as f64);
1581
1582        // Max factor weight
1583        let max_weight = self
1584            .contributing_factors
1585            .iter()
1586            .map(|f| f.weight)
1587            .fold(0.0, f64::max);
1588        features.push(max_weight);
1589
1590        // Factor type indicators (binary flags for key factor types)
1591        let has_control_bypass = self
1592            .contributing_factors
1593            .iter()
1594            .any(|f| f.factor_type == FactorType::ControlBypass);
1595        features.push(if has_control_bypass { 1.0 } else { 0.0 });
1596
1597        let has_amount_deviation = self
1598            .contributing_factors
1599            .iter()
1600            .any(|f| f.factor_type == FactorType::AmountDeviation);
1601        features.push(if has_amount_deviation { 1.0 } else { 0.0 });
1602
1603        let has_timing = self
1604            .contributing_factors
1605            .iter()
1606            .any(|f| f.factor_type == FactorType::TimingAnomaly);
1607        features.push(if has_timing { 1.0 } else { 0.0 });
1608
1609        let has_pattern_match = self
1610            .contributing_factors
1611            .iter()
1612            .any(|f| f.factor_type == FactorType::PatternMatch);
1613        features.push(if has_pattern_match { 1.0 } else { 0.0 });
1614
1615        features
1616    }
1617
1618    /// Returns the number of features in the enhanced feature vector.
1619    pub fn feature_count() -> usize {
1620        25 // 15 base + 10 enhanced
1621    }
1622
1623    /// Returns feature names for the enhanced feature vector.
1624    pub fn feature_names() -> Vec<&'static str> {
1625        let mut names = LabeledAnomaly::feature_names();
1626        names.extend(vec![
1627            "enhanced_confidence",
1628            "enhanced_severity",
1629            "category_ordinal",
1630            "secondary_category_count",
1631            "contributing_factor_count",
1632            "max_factor_weight",
1633            "has_control_bypass",
1634            "has_amount_deviation",
1635            "has_timing_factor",
1636            "has_pattern_match",
1637        ]);
1638        names
1639    }
1640}
1641
1642// ============================================================================
1643// MULTI-DIMENSIONAL LABELING (Anomaly Pattern Enhancements)
1644// ============================================================================
1645
1646/// Severity level classification for anomalies.
1647#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1648pub enum SeverityLevel {
1649    /// Minor issue, low impact.
1650    Low,
1651    /// Moderate issue, noticeable impact.
1652    #[default]
1653    Medium,
1654    /// Significant issue, substantial impact.
1655    High,
1656    /// Critical issue, severe impact requiring immediate attention.
1657    Critical,
1658}
1659
1660impl SeverityLevel {
1661    /// Returns the numeric value (1-4) for the severity level.
1662    pub fn numeric(&self) -> u8 {
1663        match self {
1664            SeverityLevel::Low => 1,
1665            SeverityLevel::Medium => 2,
1666            SeverityLevel::High => 3,
1667            SeverityLevel::Critical => 4,
1668        }
1669    }
1670
1671    /// Creates a severity level from a numeric value.
1672    pub fn from_numeric(value: u8) -> Self {
1673        match value {
1674            1 => SeverityLevel::Low,
1675            2 => SeverityLevel::Medium,
1676            3 => SeverityLevel::High,
1677            _ => SeverityLevel::Critical,
1678        }
1679    }
1680
1681    /// Creates a severity level from a normalized score (0.0-1.0).
1682    pub fn from_score(score: f64) -> Self {
1683        match score {
1684            s if s < 0.25 => SeverityLevel::Low,
1685            s if s < 0.50 => SeverityLevel::Medium,
1686            s if s < 0.75 => SeverityLevel::High,
1687            _ => SeverityLevel::Critical,
1688        }
1689    }
1690
1691    /// Returns a normalized score (0.0-1.0) for this severity level.
1692    pub fn to_score(&self) -> f64 {
1693        match self {
1694            SeverityLevel::Low => 0.125,
1695            SeverityLevel::Medium => 0.375,
1696            SeverityLevel::High => 0.625,
1697            SeverityLevel::Critical => 0.875,
1698        }
1699    }
1700}
1701
1702/// Structured severity scoring for anomalies.
1703#[derive(Debug, Clone, Serialize, Deserialize)]
1704pub struct AnomalySeverity {
1705    /// Severity level classification.
1706    pub level: SeverityLevel,
1707    /// Continuous severity score (0.0-1.0).
1708    pub score: f64,
1709    /// Absolute financial impact amount.
1710    pub financial_impact: Decimal,
1711    /// Whether this exceeds materiality threshold.
1712    pub is_material: bool,
1713    /// Materiality threshold used for determination.
1714    #[serde(default, skip_serializing_if = "Option::is_none")]
1715    pub materiality_threshold: Option<Decimal>,
1716}
1717
1718impl AnomalySeverity {
1719    /// Creates a new severity assessment.
1720    pub fn new(level: SeverityLevel, financial_impact: Decimal) -> Self {
1721        Self {
1722            level,
1723            score: level.to_score(),
1724            financial_impact,
1725            is_material: false,
1726            materiality_threshold: None,
1727        }
1728    }
1729
1730    /// Creates severity from a score, auto-determining level.
1731    pub fn from_score(score: f64, financial_impact: Decimal) -> Self {
1732        Self {
1733            level: SeverityLevel::from_score(score),
1734            score: score.clamp(0.0, 1.0),
1735            financial_impact,
1736            is_material: false,
1737            materiality_threshold: None,
1738        }
1739    }
1740
1741    /// Sets the materiality assessment.
1742    pub fn with_materiality(mut self, threshold: Decimal) -> Self {
1743        self.materiality_threshold = Some(threshold);
1744        self.is_material = self.financial_impact.abs() >= threshold;
1745        self
1746    }
1747}
1748
1749impl Default for AnomalySeverity {
1750    fn default() -> Self {
1751        Self {
1752            level: SeverityLevel::Medium,
1753            score: 0.5,
1754            financial_impact: Decimal::ZERO,
1755            is_material: false,
1756            materiality_threshold: None,
1757        }
1758    }
1759}
1760
1761/// Detection difficulty classification for anomalies.
1762///
1763/// Categorizes how difficult an anomaly is to detect, which is useful
1764/// for ML model benchmarking and audit procedure selection.
1765///
1766/// Note: This is distinct from `drift_events::AnomalyDetectionDifficulty` which
1767/// is used for drift event classification and has different variants.
1768#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1769pub enum AnomalyDetectionDifficulty {
1770    /// Obvious anomaly, easily caught by basic rules (expected detection rate: 99%).
1771    Trivial,
1772    /// Relatively easy to detect with standard procedures (expected detection rate: 90%).
1773    Easy,
1774    /// Requires moderate effort or specialized analysis (expected detection rate: 70%).
1775    #[default]
1776    Moderate,
1777    /// Difficult to detect, requires advanced techniques (expected detection rate: 40%).
1778    Hard,
1779    /// Expert-level difficulty, requires forensic analysis (expected detection rate: 15%).
1780    Expert,
1781}
1782
1783impl AnomalyDetectionDifficulty {
1784    /// Returns the expected detection rate for this difficulty level.
1785    pub fn expected_detection_rate(&self) -> f64 {
1786        match self {
1787            AnomalyDetectionDifficulty::Trivial => 0.99,
1788            AnomalyDetectionDifficulty::Easy => 0.90,
1789            AnomalyDetectionDifficulty::Moderate => 0.70,
1790            AnomalyDetectionDifficulty::Hard => 0.40,
1791            AnomalyDetectionDifficulty::Expert => 0.15,
1792        }
1793    }
1794
1795    /// Returns a numeric difficulty score (0.0-1.0).
1796    pub fn difficulty_score(&self) -> f64 {
1797        match self {
1798            AnomalyDetectionDifficulty::Trivial => 0.05,
1799            AnomalyDetectionDifficulty::Easy => 0.25,
1800            AnomalyDetectionDifficulty::Moderate => 0.50,
1801            AnomalyDetectionDifficulty::Hard => 0.75,
1802            AnomalyDetectionDifficulty::Expert => 0.95,
1803        }
1804    }
1805
1806    /// Creates a difficulty level from a score (0.0-1.0).
1807    pub fn from_score(score: f64) -> Self {
1808        match score {
1809            s if s < 0.15 => AnomalyDetectionDifficulty::Trivial,
1810            s if s < 0.35 => AnomalyDetectionDifficulty::Easy,
1811            s if s < 0.55 => AnomalyDetectionDifficulty::Moderate,
1812            s if s < 0.75 => AnomalyDetectionDifficulty::Hard,
1813            _ => AnomalyDetectionDifficulty::Expert,
1814        }
1815    }
1816
1817    /// Returns the name of this difficulty level.
1818    pub fn name(&self) -> &'static str {
1819        match self {
1820            AnomalyDetectionDifficulty::Trivial => "trivial",
1821            AnomalyDetectionDifficulty::Easy => "easy",
1822            AnomalyDetectionDifficulty::Moderate => "moderate",
1823            AnomalyDetectionDifficulty::Hard => "hard",
1824            AnomalyDetectionDifficulty::Expert => "expert",
1825        }
1826    }
1827}
1828
1829/// Ground truth certainty level for anomaly labels.
1830///
1831/// Indicates how certain we are that the label is correct.
1832#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
1833pub enum GroundTruthCertainty {
1834    /// Definitively known (injected anomaly with full provenance).
1835    #[default]
1836    Definite,
1837    /// Highly probable based on strong evidence.
1838    Probable,
1839    /// Possibly an anomaly based on indirect evidence.
1840    Possible,
1841}
1842
1843impl GroundTruthCertainty {
1844    /// Returns a certainty score (0.0-1.0).
1845    pub fn certainty_score(&self) -> f64 {
1846        match self {
1847            GroundTruthCertainty::Definite => 1.0,
1848            GroundTruthCertainty::Probable => 0.8,
1849            GroundTruthCertainty::Possible => 0.5,
1850        }
1851    }
1852
1853    /// Returns the name of this certainty level.
1854    pub fn name(&self) -> &'static str {
1855        match self {
1856            GroundTruthCertainty::Definite => "definite",
1857            GroundTruthCertainty::Probable => "probable",
1858            GroundTruthCertainty::Possible => "possible",
1859        }
1860    }
1861}
1862
1863/// Detection method classification.
1864///
1865/// Indicates which detection methods are recommended or effective for an anomaly.
1866#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1867pub enum DetectionMethod {
1868    /// Simple rule-based detection (thresholds, filters).
1869    RuleBased,
1870    /// Statistical analysis (distributions, outlier detection).
1871    Statistical,
1872    /// Machine learning models (classification, anomaly detection).
1873    MachineLearning,
1874    /// Graph-based analysis (network patterns, relationships).
1875    GraphBased,
1876    /// Manual forensic audit procedures.
1877    ForensicAudit,
1878    /// Combination of multiple methods.
1879    Hybrid,
1880}
1881
1882impl DetectionMethod {
1883    /// Returns the name of this detection method.
1884    pub fn name(&self) -> &'static str {
1885        match self {
1886            DetectionMethod::RuleBased => "rule_based",
1887            DetectionMethod::Statistical => "statistical",
1888            DetectionMethod::MachineLearning => "machine_learning",
1889            DetectionMethod::GraphBased => "graph_based",
1890            DetectionMethod::ForensicAudit => "forensic_audit",
1891            DetectionMethod::Hybrid => "hybrid",
1892        }
1893    }
1894
1895    /// Returns a description of this detection method.
1896    pub fn description(&self) -> &'static str {
1897        match self {
1898            DetectionMethod::RuleBased => "Simple threshold and filter rules",
1899            DetectionMethod::Statistical => "Statistical distribution analysis",
1900            DetectionMethod::MachineLearning => "ML classification models",
1901            DetectionMethod::GraphBased => "Network and relationship analysis",
1902            DetectionMethod::ForensicAudit => "Manual forensic procedures",
1903            DetectionMethod::Hybrid => "Combined multi-method approach",
1904        }
1905    }
1906}
1907
1908/// Extended anomaly label with comprehensive multi-dimensional classification.
1909///
1910/// This extends the base `EnhancedAnomalyLabel` with additional fields for
1911/// severity scoring, detection difficulty, recommended methods, and ground truth.
1912#[derive(Debug, Clone, Serialize, Deserialize)]
1913pub struct ExtendedAnomalyLabel {
1914    /// Base labeled anomaly.
1915    pub base: LabeledAnomaly,
1916    /// Enhanced category classification.
1917    pub category: AnomalyCategory,
1918    /// Structured severity assessment.
1919    pub severity: AnomalySeverity,
1920    /// Detection difficulty classification.
1921    pub detection_difficulty: AnomalyDetectionDifficulty,
1922    /// Recommended detection methods for this anomaly.
1923    pub recommended_methods: Vec<DetectionMethod>,
1924    /// Key indicators that should trigger detection.
1925    pub key_indicators: Vec<String>,
1926    /// Ground truth certainty level.
1927    pub ground_truth_certainty: GroundTruthCertainty,
1928    /// Contributing factors to confidence/severity.
1929    pub contributing_factors: Vec<ContributingFactor>,
1930    /// Related entity IDs (vendors, customers, employees, etc.).
1931    pub related_entity_ids: Vec<String>,
1932    /// Secondary categories for multi-label classification.
1933    pub secondary_categories: Vec<AnomalyCategory>,
1934    /// Scheme ID if part of a multi-stage fraud scheme.
1935    #[serde(default, skip_serializing_if = "Option::is_none")]
1936    pub scheme_id: Option<String>,
1937    /// Stage number within a scheme (1-indexed).
1938    #[serde(default, skip_serializing_if = "Option::is_none")]
1939    pub scheme_stage: Option<u32>,
1940    /// Whether this is a near-miss (suspicious but legitimate).
1941    #[serde(default)]
1942    pub is_near_miss: bool,
1943    /// Explanation if this is a near-miss.
1944    #[serde(default, skip_serializing_if = "Option::is_none")]
1945    pub near_miss_explanation: Option<String>,
1946}
1947
1948impl ExtendedAnomalyLabel {
1949    /// Creates an extended label from a base labeled anomaly.
1950    pub fn from_base(base: LabeledAnomaly) -> Self {
1951        let category = AnomalyCategory::from_anomaly_type(&base.anomaly_type);
1952        let severity = AnomalySeverity {
1953            level: SeverityLevel::from_numeric(base.severity),
1954            score: base.severity as f64 / 5.0,
1955            financial_impact: base.monetary_impact.unwrap_or(Decimal::ZERO),
1956            is_material: false,
1957            materiality_threshold: None,
1958        };
1959
1960        Self {
1961            base,
1962            category,
1963            severity,
1964            detection_difficulty: AnomalyDetectionDifficulty::Moderate,
1965            recommended_methods: vec![DetectionMethod::RuleBased],
1966            key_indicators: Vec::new(),
1967            ground_truth_certainty: GroundTruthCertainty::Definite,
1968            contributing_factors: Vec::new(),
1969            related_entity_ids: Vec::new(),
1970            secondary_categories: Vec::new(),
1971            scheme_id: None,
1972            scheme_stage: None,
1973            is_near_miss: false,
1974            near_miss_explanation: None,
1975        }
1976    }
1977
1978    /// Sets the severity assessment.
1979    pub fn with_severity(mut self, severity: AnomalySeverity) -> Self {
1980        self.severity = severity;
1981        self
1982    }
1983
1984    /// Sets the detection difficulty.
1985    pub fn with_difficulty(mut self, difficulty: AnomalyDetectionDifficulty) -> Self {
1986        self.detection_difficulty = difficulty;
1987        self
1988    }
1989
1990    /// Adds a recommended detection method.
1991    pub fn with_method(mut self, method: DetectionMethod) -> Self {
1992        if !self.recommended_methods.contains(&method) {
1993            self.recommended_methods.push(method);
1994        }
1995        self
1996    }
1997
1998    /// Sets the recommended detection methods.
1999    pub fn with_methods(mut self, methods: Vec<DetectionMethod>) -> Self {
2000        self.recommended_methods = methods;
2001        self
2002    }
2003
2004    /// Adds a key indicator.
2005    pub fn with_indicator(mut self, indicator: impl Into<String>) -> Self {
2006        self.key_indicators.push(indicator.into());
2007        self
2008    }
2009
2010    /// Sets the ground truth certainty.
2011    pub fn with_certainty(mut self, certainty: GroundTruthCertainty) -> Self {
2012        self.ground_truth_certainty = certainty;
2013        self
2014    }
2015
2016    /// Adds a contributing factor.
2017    pub fn with_factor(mut self, factor: ContributingFactor) -> Self {
2018        self.contributing_factors.push(factor);
2019        self
2020    }
2021
2022    /// Adds a related entity ID.
2023    pub fn with_entity(mut self, entity_id: impl Into<String>) -> Self {
2024        self.related_entity_ids.push(entity_id.into());
2025        self
2026    }
2027
2028    /// Adds a secondary category.
2029    pub fn with_secondary_category(mut self, category: AnomalyCategory) -> Self {
2030        if category != self.category && !self.secondary_categories.contains(&category) {
2031            self.secondary_categories.push(category);
2032        }
2033        self
2034    }
2035
2036    /// Sets scheme information.
2037    pub fn with_scheme(mut self, scheme_id: impl Into<String>, stage: u32) -> Self {
2038        self.scheme_id = Some(scheme_id.into());
2039        self.scheme_stage = Some(stage);
2040        self
2041    }
2042
2043    /// Marks this as a near-miss with explanation.
2044    pub fn as_near_miss(mut self, explanation: impl Into<String>) -> Self {
2045        self.is_near_miss = true;
2046        self.near_miss_explanation = Some(explanation.into());
2047        self
2048    }
2049
2050    /// Converts to an extended feature vector for ML.
2051    ///
2052    /// Returns base features (15) + extended features (15) = 30 features.
2053    pub fn to_features(&self) -> Vec<f64> {
2054        let mut features = self.base.to_features();
2055
2056        // Extended features
2057        features.push(self.severity.score);
2058        features.push(self.severity.level.to_score());
2059        features.push(if self.severity.is_material { 1.0 } else { 0.0 });
2060        features.push(self.detection_difficulty.difficulty_score());
2061        features.push(self.detection_difficulty.expected_detection_rate());
2062        features.push(self.ground_truth_certainty.certainty_score());
2063        features.push(self.category.ordinal() as f64 / AnomalyCategory::category_count() as f64);
2064        features.push(self.secondary_categories.len() as f64);
2065        features.push(self.contributing_factors.len() as f64);
2066        features.push(self.key_indicators.len() as f64);
2067        features.push(self.recommended_methods.len() as f64);
2068        features.push(self.related_entity_ids.len() as f64);
2069        features.push(if self.scheme_id.is_some() { 1.0 } else { 0.0 });
2070        features.push(self.scheme_stage.unwrap_or(0) as f64);
2071        features.push(if self.is_near_miss { 1.0 } else { 0.0 });
2072
2073        features
2074    }
2075
2076    /// Returns the number of features in the extended feature vector.
2077    pub fn feature_count() -> usize {
2078        30 // 15 base + 15 extended
2079    }
2080
2081    /// Returns feature names for the extended feature vector.
2082    pub fn feature_names() -> Vec<&'static str> {
2083        let mut names = LabeledAnomaly::feature_names();
2084        names.extend(vec![
2085            "severity_score",
2086            "severity_level_score",
2087            "is_material",
2088            "difficulty_score",
2089            "expected_detection_rate",
2090            "ground_truth_certainty",
2091            "category_ordinal",
2092            "secondary_category_count",
2093            "contributing_factor_count",
2094            "key_indicator_count",
2095            "recommended_method_count",
2096            "related_entity_count",
2097            "is_part_of_scheme",
2098            "scheme_stage",
2099            "is_near_miss",
2100        ]);
2101        names
2102    }
2103}
2104
2105// ============================================================================
2106// MULTI-STAGE FRAUD SCHEME TYPES
2107// ============================================================================
2108
2109/// Type of multi-stage fraud scheme.
2110#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2111pub enum SchemeType {
2112    /// Gradual embezzlement over time.
2113    GradualEmbezzlement,
2114    /// Revenue manipulation across periods.
2115    RevenueManipulation,
2116    /// Vendor kickback scheme.
2117    VendorKickback,
2118    /// Round-tripping funds through multiple entities.
2119    RoundTripping,
2120    /// Ghost employee scheme.
2121    GhostEmployee,
2122    /// Expense reimbursement fraud.
2123    ExpenseReimbursement,
2124    /// Inventory theft scheme.
2125    InventoryTheft,
2126    /// Custom scheme type.
2127    Custom,
2128}
2129
2130impl SchemeType {
2131    /// Returns the name of this scheme type.
2132    pub fn name(&self) -> &'static str {
2133        match self {
2134            SchemeType::GradualEmbezzlement => "gradual_embezzlement",
2135            SchemeType::RevenueManipulation => "revenue_manipulation",
2136            SchemeType::VendorKickback => "vendor_kickback",
2137            SchemeType::RoundTripping => "round_tripping",
2138            SchemeType::GhostEmployee => "ghost_employee",
2139            SchemeType::ExpenseReimbursement => "expense_reimbursement",
2140            SchemeType::InventoryTheft => "inventory_theft",
2141            SchemeType::Custom => "custom",
2142        }
2143    }
2144
2145    /// Returns the typical number of stages for this scheme type.
2146    pub fn typical_stages(&self) -> u32 {
2147        match self {
2148            SchemeType::GradualEmbezzlement => 4, // testing, escalation, acceleration, desperation
2149            SchemeType::RevenueManipulation => 4, // Q4->Q1->Q2->Q4
2150            SchemeType::VendorKickback => 4,      // setup, inflation, kickback, concealment
2151            SchemeType::RoundTripping => 3,       // setup, execution, reversal
2152            SchemeType::GhostEmployee => 3,       // creation, payroll, concealment
2153            SchemeType::ExpenseReimbursement => 3, // submission, approval, payment
2154            SchemeType::InventoryTheft => 3,      // access, theft, cover-up
2155            SchemeType::Custom => 4,
2156        }
2157    }
2158}
2159
2160/// Status of detection for a fraud scheme.
2161#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2162pub enum SchemeDetectionStatus {
2163    /// Scheme is undetected.
2164    #[default]
2165    Undetected,
2166    /// Under investigation but not confirmed.
2167    UnderInvestigation,
2168    /// Partially detected (some transactions flagged).
2169    PartiallyDetected,
2170    /// Fully detected and confirmed.
2171    FullyDetected,
2172}
2173
2174/// Reference to a transaction within a scheme.
2175#[derive(Debug, Clone, Serialize, Deserialize)]
2176pub struct SchemeTransactionRef {
2177    /// Document ID of the transaction.
2178    pub document_id: String,
2179    /// Transaction date.
2180    pub date: chrono::NaiveDate,
2181    /// Transaction amount.
2182    pub amount: Decimal,
2183    /// Stage this transaction belongs to.
2184    pub stage: u32,
2185    /// Anomaly ID if labeled.
2186    #[serde(default, skip_serializing_if = "Option::is_none")]
2187    pub anomaly_id: Option<String>,
2188}
2189
2190/// Concealment technique used in fraud.
2191#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2192pub enum ConcealmentTechnique {
2193    /// Document manipulation or forgery.
2194    DocumentManipulation,
2195    /// Circumventing approval processes.
2196    ApprovalCircumvention,
2197    /// Exploiting timing (period-end, holidays).
2198    TimingExploitation,
2199    /// Transaction splitting to avoid thresholds.
2200    TransactionSplitting,
2201    /// Account misclassification.
2202    AccountMisclassification,
2203    /// Collusion with other employees.
2204    Collusion,
2205    /// Data alteration or deletion.
2206    DataAlteration,
2207    /// Creating false documentation.
2208    FalseDocumentation,
2209}
2210
2211impl ConcealmentTechnique {
2212    /// Returns the difficulty bonus this technique adds.
2213    pub fn difficulty_bonus(&self) -> f64 {
2214        match self {
2215            ConcealmentTechnique::DocumentManipulation => 0.20,
2216            ConcealmentTechnique::ApprovalCircumvention => 0.15,
2217            ConcealmentTechnique::TimingExploitation => 0.10,
2218            ConcealmentTechnique::TransactionSplitting => 0.15,
2219            ConcealmentTechnique::AccountMisclassification => 0.10,
2220            ConcealmentTechnique::Collusion => 0.25,
2221            ConcealmentTechnique::DataAlteration => 0.20,
2222            ConcealmentTechnique::FalseDocumentation => 0.15,
2223        }
2224    }
2225}
2226
2227// ============================================================================
2228// ACFE-ALIGNED FRAUD TAXONOMY
2229// ============================================================================
2230//
2231// Based on the Association of Certified Fraud Examiners (ACFE) Report to the
2232// Nations: Occupational Fraud Classification System. This taxonomy provides
2233// ACFE-aligned categories, schemes, and calibration data.
2234
2235/// ACFE-aligned fraud categories based on the Occupational Fraud Tree.
2236///
2237/// ACFE Report to the Nations statistics (typical):
2238/// - Asset Misappropriation: 86% of cases, $100k median loss
2239/// - Corruption: 33% of cases, $150k median loss
2240/// - Financial Statement Fraud: 10% of cases, $954k median loss
2241///
2242/// Note: Percentages sum to >100% because some schemes fall into multiple categories.
2243#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
2244pub enum AcfeFraudCategory {
2245    /// Theft of organizational assets (cash, inventory, equipment).
2246    /// Most common (86% of cases) but typically lowest median loss ($100k).
2247    #[default]
2248    AssetMisappropriation,
2249    /// Abuse of position for personal gain through bribery, kickbacks, conflicts of interest.
2250    /// Medium frequency (33% of cases), medium median loss ($150k).
2251    Corruption,
2252    /// Intentional misstatement of financial statements.
2253    /// Least common (10% of cases) but highest median loss ($954k).
2254    FinancialStatementFraud,
2255}
2256
2257impl AcfeFraudCategory {
2258    /// Returns the name of this category.
2259    pub fn name(&self) -> &'static str {
2260        match self {
2261            AcfeFraudCategory::AssetMisappropriation => "asset_misappropriation",
2262            AcfeFraudCategory::Corruption => "corruption",
2263            AcfeFraudCategory::FinancialStatementFraud => "financial_statement_fraud",
2264        }
2265    }
2266
2267    /// Returns the typical percentage of occupational fraud cases (from ACFE reports).
2268    pub fn typical_occurrence_rate(&self) -> f64 {
2269        match self {
2270            AcfeFraudCategory::AssetMisappropriation => 0.86,
2271            AcfeFraudCategory::Corruption => 0.33,
2272            AcfeFraudCategory::FinancialStatementFraud => 0.10,
2273        }
2274    }
2275
2276    /// Returns the typical median loss amount (from ACFE reports).
2277    pub fn typical_median_loss(&self) -> Decimal {
2278        match self {
2279            AcfeFraudCategory::AssetMisappropriation => Decimal::new(100_000, 0),
2280            AcfeFraudCategory::Corruption => Decimal::new(150_000, 0),
2281            AcfeFraudCategory::FinancialStatementFraud => Decimal::new(954_000, 0),
2282        }
2283    }
2284
2285    /// Returns the typical detection time in months (from ACFE reports).
2286    pub fn typical_detection_months(&self) -> u32 {
2287        match self {
2288            AcfeFraudCategory::AssetMisappropriation => 12,
2289            AcfeFraudCategory::Corruption => 18,
2290            AcfeFraudCategory::FinancialStatementFraud => 24,
2291        }
2292    }
2293}
2294
2295/// Cash-based fraud schemes under Asset Misappropriation.
2296///
2297/// Organized according to the ACFE Fraud Tree:
2298/// - Theft of Cash on Hand
2299/// - Theft of Cash Receipts
2300/// - Fraudulent Disbursements
2301#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2302pub enum CashFraudScheme {
2303    // ========== Theft of Cash on Hand ==========
2304    /// Stealing cash from cash drawers or safes after it has been recorded.
2305    Larceny,
2306    /// Stealing cash before it is recorded in the books (intercepts receipts).
2307    Skimming,
2308
2309    // ========== Theft of Cash Receipts ==========
2310    /// Skimming from sales transactions before recording.
2311    SalesSkimming,
2312    /// Intercepting customer payments on accounts receivable.
2313    ReceivablesSkimming,
2314    /// Creating false refunds to pocket the difference.
2315    RefundSchemes,
2316
2317    // ========== Fraudulent Disbursements - Billing Schemes ==========
2318    /// Creating fictitious vendors to invoice and pay.
2319    ShellCompany,
2320    /// Manipulating payments to legitimate vendors for personal gain.
2321    NonAccompliceVendor,
2322    /// Using company funds for personal purchases.
2323    PersonalPurchases,
2324
2325    // ========== Fraudulent Disbursements - Payroll Schemes ==========
2326    /// Creating fake employees to collect wages.
2327    GhostEmployee,
2328    /// Falsifying hours worked, sales commissions, or salary rates.
2329    FalsifiedWages,
2330    /// Manipulating commission calculations.
2331    CommissionSchemes,
2332
2333    // ========== Fraudulent Disbursements - Expense Reimbursement ==========
2334    /// Claiming non-business expenses as business expenses.
2335    MischaracterizedExpenses,
2336    /// Inflating legitimate expense amounts.
2337    OverstatedExpenses,
2338    /// Creating completely fictitious expenses.
2339    FictitiousExpenses,
2340
2341    // ========== Fraudulent Disbursements - Check/Payment Tampering ==========
2342    /// Forging the signature of an authorized check signer.
2343    ForgedMaker,
2344    /// Intercepting and altering the endorsement on legitimate checks.
2345    ForgedEndorsement,
2346    /// Altering the payee on a legitimate check.
2347    AlteredPayee,
2348    /// Authorized signer writing checks for personal benefit.
2349    AuthorizedMaker,
2350
2351    // ========== Fraudulent Disbursements - Register/POS Schemes ==========
2352    /// Creating false voided transactions.
2353    FalseVoids,
2354    /// Processing fictitious refunds.
2355    FalseRefunds,
2356}
2357
2358impl CashFraudScheme {
2359    /// Returns the ACFE category this scheme belongs to.
2360    pub fn category(&self) -> AcfeFraudCategory {
2361        AcfeFraudCategory::AssetMisappropriation
2362    }
2363
2364    /// Returns the subcategory within the ACFE Fraud Tree.
2365    pub fn subcategory(&self) -> &'static str {
2366        match self {
2367            CashFraudScheme::Larceny | CashFraudScheme::Skimming => "theft_of_cash_on_hand",
2368            CashFraudScheme::SalesSkimming
2369            | CashFraudScheme::ReceivablesSkimming
2370            | CashFraudScheme::RefundSchemes => "theft_of_cash_receipts",
2371            CashFraudScheme::ShellCompany
2372            | CashFraudScheme::NonAccompliceVendor
2373            | CashFraudScheme::PersonalPurchases => "billing_schemes",
2374            CashFraudScheme::GhostEmployee
2375            | CashFraudScheme::FalsifiedWages
2376            | CashFraudScheme::CommissionSchemes => "payroll_schemes",
2377            CashFraudScheme::MischaracterizedExpenses
2378            | CashFraudScheme::OverstatedExpenses
2379            | CashFraudScheme::FictitiousExpenses => "expense_reimbursement",
2380            CashFraudScheme::ForgedMaker
2381            | CashFraudScheme::ForgedEndorsement
2382            | CashFraudScheme::AlteredPayee
2383            | CashFraudScheme::AuthorizedMaker => "check_tampering",
2384            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => "register_schemes",
2385        }
2386    }
2387
2388    /// Returns the typical severity (1-5) for this scheme.
2389    pub fn severity(&self) -> u8 {
2390        match self {
2391            // Lower severity - often small amounts, easier to detect
2392            CashFraudScheme::FalseVoids
2393            | CashFraudScheme::FalseRefunds
2394            | CashFraudScheme::MischaracterizedExpenses => 3,
2395            // Medium severity
2396            CashFraudScheme::OverstatedExpenses
2397            | CashFraudScheme::Skimming
2398            | CashFraudScheme::Larceny
2399            | CashFraudScheme::PersonalPurchases
2400            | CashFraudScheme::FalsifiedWages => 4,
2401            // Higher severity - larger amounts, harder to detect
2402            CashFraudScheme::ShellCompany
2403            | CashFraudScheme::GhostEmployee
2404            | CashFraudScheme::FictitiousExpenses
2405            | CashFraudScheme::ForgedMaker
2406            | CashFraudScheme::AuthorizedMaker => 5,
2407            _ => 4,
2408        }
2409    }
2410
2411    /// Returns the typical detection difficulty.
2412    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2413        match self {
2414            // Easy to detect with basic controls
2415            CashFraudScheme::FalseVoids | CashFraudScheme::FalseRefunds => {
2416                AnomalyDetectionDifficulty::Easy
2417            }
2418            // Moderate - requires reconciliation
2419            CashFraudScheme::Larceny | CashFraudScheme::OverstatedExpenses => {
2420                AnomalyDetectionDifficulty::Moderate
2421            }
2422            // Hard - requires sophisticated analysis
2423            CashFraudScheme::Skimming
2424            | CashFraudScheme::ShellCompany
2425            | CashFraudScheme::GhostEmployee => AnomalyDetectionDifficulty::Hard,
2426            // Expert level
2427            CashFraudScheme::SalesSkimming | CashFraudScheme::ReceivablesSkimming => {
2428                AnomalyDetectionDifficulty::Expert
2429            }
2430            _ => AnomalyDetectionDifficulty::Moderate,
2431        }
2432    }
2433
2434    /// Returns all variants for iteration.
2435    pub fn all_variants() -> &'static [CashFraudScheme] {
2436        &[
2437            CashFraudScheme::Larceny,
2438            CashFraudScheme::Skimming,
2439            CashFraudScheme::SalesSkimming,
2440            CashFraudScheme::ReceivablesSkimming,
2441            CashFraudScheme::RefundSchemes,
2442            CashFraudScheme::ShellCompany,
2443            CashFraudScheme::NonAccompliceVendor,
2444            CashFraudScheme::PersonalPurchases,
2445            CashFraudScheme::GhostEmployee,
2446            CashFraudScheme::FalsifiedWages,
2447            CashFraudScheme::CommissionSchemes,
2448            CashFraudScheme::MischaracterizedExpenses,
2449            CashFraudScheme::OverstatedExpenses,
2450            CashFraudScheme::FictitiousExpenses,
2451            CashFraudScheme::ForgedMaker,
2452            CashFraudScheme::ForgedEndorsement,
2453            CashFraudScheme::AlteredPayee,
2454            CashFraudScheme::AuthorizedMaker,
2455            CashFraudScheme::FalseVoids,
2456            CashFraudScheme::FalseRefunds,
2457        ]
2458    }
2459}
2460
2461/// Inventory and Other Asset fraud schemes under Asset Misappropriation.
2462#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2463pub enum AssetFraudScheme {
2464    // ========== Inventory Schemes ==========
2465    /// Misusing or converting inventory for personal benefit.
2466    InventoryMisuse,
2467    /// Stealing physical inventory items.
2468    InventoryTheft,
2469    /// Manipulating purchasing to facilitate theft.
2470    InventoryPurchasingScheme,
2471    /// Manipulating receiving/shipping to steal inventory.
2472    InventoryReceivingScheme,
2473
2474    // ========== Other Asset Schemes ==========
2475    /// Misusing company equipment or vehicles.
2476    EquipmentMisuse,
2477    /// Theft of company equipment, tools, or supplies.
2478    EquipmentTheft,
2479    /// Unauthorized access to or theft of intellectual property.
2480    IntellectualPropertyTheft,
2481    /// Using company time/resources for personal business.
2482    TimeTheft,
2483}
2484
2485impl AssetFraudScheme {
2486    /// Returns the ACFE category this scheme belongs to.
2487    pub fn category(&self) -> AcfeFraudCategory {
2488        AcfeFraudCategory::AssetMisappropriation
2489    }
2490
2491    /// Returns the subcategory within the ACFE Fraud Tree.
2492    pub fn subcategory(&self) -> &'static str {
2493        match self {
2494            AssetFraudScheme::InventoryMisuse
2495            | AssetFraudScheme::InventoryTheft
2496            | AssetFraudScheme::InventoryPurchasingScheme
2497            | AssetFraudScheme::InventoryReceivingScheme => "inventory",
2498            _ => "other_assets",
2499        }
2500    }
2501
2502    /// Returns the typical severity (1-5) for this scheme.
2503    pub fn severity(&self) -> u8 {
2504        match self {
2505            AssetFraudScheme::TimeTheft | AssetFraudScheme::EquipmentMisuse => 2,
2506            AssetFraudScheme::InventoryMisuse | AssetFraudScheme::EquipmentTheft => 3,
2507            AssetFraudScheme::InventoryTheft
2508            | AssetFraudScheme::InventoryPurchasingScheme
2509            | AssetFraudScheme::InventoryReceivingScheme => 4,
2510            AssetFraudScheme::IntellectualPropertyTheft => 5,
2511        }
2512    }
2513}
2514
2515/// Corruption schemes under the ACFE Fraud Tree.
2516///
2517/// Corruption schemes involve the wrongful use of influence in a business
2518/// transaction to procure personal benefit.
2519#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2520pub enum CorruptionScheme {
2521    // ========== Conflicts of Interest ==========
2522    /// Employee has undisclosed financial interest in purchasing decisions.
2523    PurchasingConflict,
2524    /// Employee has undisclosed relationship with customer/vendor.
2525    SalesConflict,
2526    /// Employee owns or has interest in competing business.
2527    OutsideBusinessInterest,
2528    /// Employee makes decisions benefiting family members.
2529    NepotismConflict,
2530
2531    // ========== Bribery ==========
2532    /// Kickback payments from vendors for favorable treatment.
2533    InvoiceKickback,
2534    /// Collusion among vendors to inflate prices.
2535    BidRigging,
2536    /// Other cash payments for favorable decisions.
2537    CashBribery,
2538    /// Bribery of government officials.
2539    PublicOfficial,
2540
2541    // ========== Illegal Gratuities ==========
2542    /// Gifts given after favorable decisions (not agreed in advance).
2543    IllegalGratuity,
2544
2545    // ========== Economic Extortion ==========
2546    /// Demanding payment under threat of adverse action.
2547    EconomicExtortion,
2548}
2549
2550impl CorruptionScheme {
2551    /// Returns the ACFE category this scheme belongs to.
2552    pub fn category(&self) -> AcfeFraudCategory {
2553        AcfeFraudCategory::Corruption
2554    }
2555
2556    /// Returns the subcategory within the ACFE Fraud Tree.
2557    pub fn subcategory(&self) -> &'static str {
2558        match self {
2559            CorruptionScheme::PurchasingConflict
2560            | CorruptionScheme::SalesConflict
2561            | CorruptionScheme::OutsideBusinessInterest
2562            | CorruptionScheme::NepotismConflict => "conflicts_of_interest",
2563            CorruptionScheme::InvoiceKickback
2564            | CorruptionScheme::BidRigging
2565            | CorruptionScheme::CashBribery
2566            | CorruptionScheme::PublicOfficial => "bribery",
2567            CorruptionScheme::IllegalGratuity => "illegal_gratuities",
2568            CorruptionScheme::EconomicExtortion => "economic_extortion",
2569        }
2570    }
2571
2572    /// Returns the typical severity (1-5) for this scheme.
2573    pub fn severity(&self) -> u8 {
2574        match self {
2575            // Lower severity conflicts of interest
2576            CorruptionScheme::NepotismConflict => 3,
2577            // Medium severity
2578            CorruptionScheme::PurchasingConflict
2579            | CorruptionScheme::SalesConflict
2580            | CorruptionScheme::OutsideBusinessInterest
2581            | CorruptionScheme::IllegalGratuity => 4,
2582            // High severity - active corruption
2583            CorruptionScheme::InvoiceKickback
2584            | CorruptionScheme::BidRigging
2585            | CorruptionScheme::CashBribery
2586            | CorruptionScheme::EconomicExtortion => 5,
2587            // Highest severity - involves public officials
2588            CorruptionScheme::PublicOfficial => 5,
2589        }
2590    }
2591
2592    /// Returns the typical detection difficulty.
2593    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2594        match self {
2595            // Easier to detect with proper disclosure requirements
2596            CorruptionScheme::NepotismConflict | CorruptionScheme::OutsideBusinessInterest => {
2597                AnomalyDetectionDifficulty::Moderate
2598            }
2599            // Hard - requires transaction pattern analysis
2600            CorruptionScheme::PurchasingConflict
2601            | CorruptionScheme::SalesConflict
2602            | CorruptionScheme::BidRigging => AnomalyDetectionDifficulty::Hard,
2603            // Expert level - deliberate concealment
2604            CorruptionScheme::InvoiceKickback
2605            | CorruptionScheme::CashBribery
2606            | CorruptionScheme::PublicOfficial
2607            | CorruptionScheme::IllegalGratuity
2608            | CorruptionScheme::EconomicExtortion => AnomalyDetectionDifficulty::Expert,
2609        }
2610    }
2611
2612    /// Returns all variants for iteration.
2613    pub fn all_variants() -> &'static [CorruptionScheme] {
2614        &[
2615            CorruptionScheme::PurchasingConflict,
2616            CorruptionScheme::SalesConflict,
2617            CorruptionScheme::OutsideBusinessInterest,
2618            CorruptionScheme::NepotismConflict,
2619            CorruptionScheme::InvoiceKickback,
2620            CorruptionScheme::BidRigging,
2621            CorruptionScheme::CashBribery,
2622            CorruptionScheme::PublicOfficial,
2623            CorruptionScheme::IllegalGratuity,
2624            CorruptionScheme::EconomicExtortion,
2625        ]
2626    }
2627}
2628
2629/// Financial Statement Fraud schemes under the ACFE Fraud Tree.
2630///
2631/// Financial statement fraud involves the intentional misstatement or omission
2632/// of material information in financial reports.
2633#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2634pub enum FinancialStatementScheme {
2635    // ========== Asset/Revenue Overstatement ==========
2636    /// Recording revenue before it is earned.
2637    PrematureRevenue,
2638    /// Deferring expenses to future periods.
2639    DelayedExpenses,
2640    /// Recording revenue for transactions that never occurred.
2641    FictitiousRevenues,
2642    /// Failing to record known liabilities.
2643    ConcealedLiabilities,
2644    /// Overstating the value of assets.
2645    ImproperAssetValuations,
2646    /// Omitting or misstating required disclosures.
2647    ImproperDisclosures,
2648    /// Manipulating timing of revenue recognition (channel stuffing).
2649    ChannelStuffing,
2650    /// Recognizing bill-and-hold revenue improperly.
2651    BillAndHold,
2652    /// Capitalizing expenses that should be expensed.
2653    ImproperCapitalization,
2654
2655    // ========== Asset/Revenue Understatement ==========
2656    /// Understating revenue (often for tax purposes).
2657    UnderstatedRevenues,
2658    /// Recording excessive expenses.
2659    OverstatedExpenses,
2660    /// Recording excessive liabilities or reserves.
2661    OverstatedLiabilities,
2662    /// Undervaluing assets for writedowns/reserves.
2663    ImproperAssetWritedowns,
2664}
2665
2666impl FinancialStatementScheme {
2667    /// Returns the ACFE category this scheme belongs to.
2668    pub fn category(&self) -> AcfeFraudCategory {
2669        AcfeFraudCategory::FinancialStatementFraud
2670    }
2671
2672    /// Returns the subcategory within the ACFE Fraud Tree.
2673    pub fn subcategory(&self) -> &'static str {
2674        match self {
2675            FinancialStatementScheme::UnderstatedRevenues
2676            | FinancialStatementScheme::OverstatedExpenses
2677            | FinancialStatementScheme::OverstatedLiabilities
2678            | FinancialStatementScheme::ImproperAssetWritedowns => "understatement",
2679            _ => "overstatement",
2680        }
2681    }
2682
2683    /// Returns the typical severity (1-5) for this scheme.
2684    pub fn severity(&self) -> u8 {
2685        // All financial statement fraud is high severity
2686        5
2687    }
2688
2689    /// Returns the typical detection difficulty.
2690    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2691        match self {
2692            // Easier to detect with good analytics
2693            FinancialStatementScheme::ChannelStuffing
2694            | FinancialStatementScheme::DelayedExpenses => AnomalyDetectionDifficulty::Moderate,
2695            // Hard - requires deep analysis
2696            FinancialStatementScheme::PrematureRevenue
2697            | FinancialStatementScheme::ImproperCapitalization
2698            | FinancialStatementScheme::ImproperAssetWritedowns => AnomalyDetectionDifficulty::Hard,
2699            // Expert level
2700            FinancialStatementScheme::FictitiousRevenues
2701            | FinancialStatementScheme::ConcealedLiabilities
2702            | FinancialStatementScheme::ImproperAssetValuations
2703            | FinancialStatementScheme::ImproperDisclosures
2704            | FinancialStatementScheme::BillAndHold => AnomalyDetectionDifficulty::Expert,
2705            _ => AnomalyDetectionDifficulty::Hard,
2706        }
2707    }
2708
2709    /// Returns all variants for iteration.
2710    pub fn all_variants() -> &'static [FinancialStatementScheme] {
2711        &[
2712            FinancialStatementScheme::PrematureRevenue,
2713            FinancialStatementScheme::DelayedExpenses,
2714            FinancialStatementScheme::FictitiousRevenues,
2715            FinancialStatementScheme::ConcealedLiabilities,
2716            FinancialStatementScheme::ImproperAssetValuations,
2717            FinancialStatementScheme::ImproperDisclosures,
2718            FinancialStatementScheme::ChannelStuffing,
2719            FinancialStatementScheme::BillAndHold,
2720            FinancialStatementScheme::ImproperCapitalization,
2721            FinancialStatementScheme::UnderstatedRevenues,
2722            FinancialStatementScheme::OverstatedExpenses,
2723            FinancialStatementScheme::OverstatedLiabilities,
2724            FinancialStatementScheme::ImproperAssetWritedowns,
2725        ]
2726    }
2727}
2728
2729/// Unified ACFE scheme type that encompasses all fraud schemes.
2730#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2731pub enum AcfeScheme {
2732    /// Cash-based fraud schemes.
2733    Cash(CashFraudScheme),
2734    /// Inventory and other asset fraud schemes.
2735    Asset(AssetFraudScheme),
2736    /// Corruption schemes.
2737    Corruption(CorruptionScheme),
2738    /// Financial statement fraud schemes.
2739    FinancialStatement(FinancialStatementScheme),
2740}
2741
2742impl AcfeScheme {
2743    /// Returns the ACFE category this scheme belongs to.
2744    pub fn category(&self) -> AcfeFraudCategory {
2745        match self {
2746            AcfeScheme::Cash(s) => s.category(),
2747            AcfeScheme::Asset(s) => s.category(),
2748            AcfeScheme::Corruption(s) => s.category(),
2749            AcfeScheme::FinancialStatement(s) => s.category(),
2750        }
2751    }
2752
2753    /// Returns the severity (1-5) for this scheme.
2754    pub fn severity(&self) -> u8 {
2755        match self {
2756            AcfeScheme::Cash(s) => s.severity(),
2757            AcfeScheme::Asset(s) => s.severity(),
2758            AcfeScheme::Corruption(s) => s.severity(),
2759            AcfeScheme::FinancialStatement(s) => s.severity(),
2760        }
2761    }
2762
2763    /// Returns the detection difficulty for this scheme.
2764    pub fn detection_difficulty(&self) -> AnomalyDetectionDifficulty {
2765        match self {
2766            AcfeScheme::Cash(s) => s.detection_difficulty(),
2767            AcfeScheme::Asset(_) => AnomalyDetectionDifficulty::Moderate,
2768            AcfeScheme::Corruption(s) => s.detection_difficulty(),
2769            AcfeScheme::FinancialStatement(s) => s.detection_difficulty(),
2770        }
2771    }
2772}
2773
2774/// How a fraud was detected (from ACFE statistics).
2775#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2776pub enum AcfeDetectionMethod {
2777    /// Tip from employee, customer, vendor, or anonymous source.
2778    Tip,
2779    /// Internal audit procedures.
2780    InternalAudit,
2781    /// Management review and oversight.
2782    ManagementReview,
2783    /// External audit procedures.
2784    ExternalAudit,
2785    /// Account reconciliation discrepancies.
2786    AccountReconciliation,
2787    /// Document examination.
2788    DocumentExamination,
2789    /// Discovered by accident.
2790    ByAccident,
2791    /// Automated monitoring/IT controls.
2792    ItControls,
2793    /// Surveillance or investigation.
2794    Surveillance,
2795    /// Confession by perpetrator.
2796    Confession,
2797    /// Law enforcement notification.
2798    LawEnforcement,
2799    /// Other detection method.
2800    Other,
2801}
2802
2803impl AcfeDetectionMethod {
2804    /// Returns the typical percentage of frauds detected by this method (from ACFE reports).
2805    pub fn typical_detection_rate(&self) -> f64 {
2806        match self {
2807            AcfeDetectionMethod::Tip => 0.42,
2808            AcfeDetectionMethod::InternalAudit => 0.16,
2809            AcfeDetectionMethod::ManagementReview => 0.12,
2810            AcfeDetectionMethod::ExternalAudit => 0.04,
2811            AcfeDetectionMethod::AccountReconciliation => 0.05,
2812            AcfeDetectionMethod::DocumentExamination => 0.04,
2813            AcfeDetectionMethod::ByAccident => 0.06,
2814            AcfeDetectionMethod::ItControls => 0.03,
2815            AcfeDetectionMethod::Surveillance => 0.02,
2816            AcfeDetectionMethod::Confession => 0.02,
2817            AcfeDetectionMethod::LawEnforcement => 0.01,
2818            AcfeDetectionMethod::Other => 0.03,
2819        }
2820    }
2821
2822    /// Returns all variants for iteration.
2823    pub fn all_variants() -> &'static [AcfeDetectionMethod] {
2824        &[
2825            AcfeDetectionMethod::Tip,
2826            AcfeDetectionMethod::InternalAudit,
2827            AcfeDetectionMethod::ManagementReview,
2828            AcfeDetectionMethod::ExternalAudit,
2829            AcfeDetectionMethod::AccountReconciliation,
2830            AcfeDetectionMethod::DocumentExamination,
2831            AcfeDetectionMethod::ByAccident,
2832            AcfeDetectionMethod::ItControls,
2833            AcfeDetectionMethod::Surveillance,
2834            AcfeDetectionMethod::Confession,
2835            AcfeDetectionMethod::LawEnforcement,
2836            AcfeDetectionMethod::Other,
2837        ]
2838    }
2839}
2840
2841/// Department/position of perpetrator (from ACFE statistics).
2842#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2843pub enum PerpetratorDepartment {
2844    /// Accounting, finance, or bookkeeping.
2845    Accounting,
2846    /// Operations or manufacturing.
2847    Operations,
2848    /// Executive/upper management.
2849    Executive,
2850    /// Sales.
2851    Sales,
2852    /// Customer service.
2853    CustomerService,
2854    /// Purchasing/procurement.
2855    Purchasing,
2856    /// Information technology.
2857    It,
2858    /// Human resources.
2859    HumanResources,
2860    /// Administrative/clerical.
2861    Administrative,
2862    /// Warehouse/inventory.
2863    Warehouse,
2864    /// Board of directors.
2865    BoardOfDirectors,
2866    /// Other department.
2867    Other,
2868}
2869
2870impl PerpetratorDepartment {
2871    /// Returns the typical percentage of frauds by department (from ACFE reports).
2872    pub fn typical_occurrence_rate(&self) -> f64 {
2873        match self {
2874            PerpetratorDepartment::Accounting => 0.21,
2875            PerpetratorDepartment::Operations => 0.17,
2876            PerpetratorDepartment::Executive => 0.12,
2877            PerpetratorDepartment::Sales => 0.11,
2878            PerpetratorDepartment::CustomerService => 0.07,
2879            PerpetratorDepartment::Purchasing => 0.06,
2880            PerpetratorDepartment::It => 0.05,
2881            PerpetratorDepartment::HumanResources => 0.04,
2882            PerpetratorDepartment::Administrative => 0.04,
2883            PerpetratorDepartment::Warehouse => 0.03,
2884            PerpetratorDepartment::BoardOfDirectors => 0.02,
2885            PerpetratorDepartment::Other => 0.08,
2886        }
2887    }
2888
2889    /// Returns the typical median loss by perpetrator department.
2890    pub fn typical_median_loss(&self) -> Decimal {
2891        match self {
2892            PerpetratorDepartment::Executive => Decimal::new(600_000, 0),
2893            PerpetratorDepartment::BoardOfDirectors => Decimal::new(500_000, 0),
2894            PerpetratorDepartment::Sales => Decimal::new(150_000, 0),
2895            PerpetratorDepartment::Accounting => Decimal::new(130_000, 0),
2896            PerpetratorDepartment::Purchasing => Decimal::new(120_000, 0),
2897            PerpetratorDepartment::Operations => Decimal::new(100_000, 0),
2898            PerpetratorDepartment::It => Decimal::new(100_000, 0),
2899            _ => Decimal::new(80_000, 0),
2900        }
2901    }
2902}
2903
2904/// Perpetrator position level (from ACFE statistics).
2905#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
2906pub enum PerpetratorLevel {
2907    /// Entry-level employee.
2908    Employee,
2909    /// Manager or supervisor.
2910    Manager,
2911    /// Owner, executive, or C-level.
2912    OwnerExecutive,
2913}
2914
2915impl PerpetratorLevel {
2916    /// Returns the typical percentage of frauds by position level.
2917    pub fn typical_occurrence_rate(&self) -> f64 {
2918        match self {
2919            PerpetratorLevel::Employee => 0.42,
2920            PerpetratorLevel::Manager => 0.36,
2921            PerpetratorLevel::OwnerExecutive => 0.22,
2922        }
2923    }
2924
2925    /// Returns the typical median loss by position level.
2926    pub fn typical_median_loss(&self) -> Decimal {
2927        match self {
2928            PerpetratorLevel::Employee => Decimal::new(50_000, 0),
2929            PerpetratorLevel::Manager => Decimal::new(125_000, 0),
2930            PerpetratorLevel::OwnerExecutive => Decimal::new(337_000, 0),
2931        }
2932    }
2933}
2934
2935/// ACFE Calibration data for fraud generation.
2936///
2937/// Contains statistical parameters based on ACFE Report to the Nations
2938/// for realistic fraud pattern generation.
2939#[derive(Debug, Clone, Serialize, Deserialize)]
2940pub struct AcfeCalibration {
2941    /// Overall median loss for occupational fraud ($117,000 typical).
2942    pub median_loss: Decimal,
2943    /// Median duration in months before detection (12 months typical).
2944    pub median_duration_months: u32,
2945    /// Distribution of fraud by category.
2946    pub category_distribution: HashMap<String, f64>,
2947    /// Distribution of detection methods.
2948    pub detection_method_distribution: HashMap<String, f64>,
2949    /// Distribution by perpetrator department.
2950    pub department_distribution: HashMap<String, f64>,
2951    /// Distribution by perpetrator level.
2952    pub level_distribution: HashMap<String, f64>,
2953    /// Average number of red flags per fraud case.
2954    pub avg_red_flags_per_case: f64,
2955    /// Percentage of frauds involving collusion.
2956    pub collusion_rate: f64,
2957}
2958
2959impl Default for AcfeCalibration {
2960    fn default() -> Self {
2961        let mut category_distribution = HashMap::new();
2962        category_distribution.insert("asset_misappropriation".to_string(), 0.86);
2963        category_distribution.insert("corruption".to_string(), 0.33);
2964        category_distribution.insert("financial_statement_fraud".to_string(), 0.10);
2965
2966        let mut detection_method_distribution = HashMap::new();
2967        for method in AcfeDetectionMethod::all_variants() {
2968            detection_method_distribution.insert(
2969                format!("{method:?}").to_lowercase(),
2970                method.typical_detection_rate(),
2971            );
2972        }
2973
2974        let mut department_distribution = HashMap::new();
2975        department_distribution.insert("accounting".to_string(), 0.21);
2976        department_distribution.insert("operations".to_string(), 0.17);
2977        department_distribution.insert("executive".to_string(), 0.12);
2978        department_distribution.insert("sales".to_string(), 0.11);
2979        department_distribution.insert("customer_service".to_string(), 0.07);
2980        department_distribution.insert("purchasing".to_string(), 0.06);
2981        department_distribution.insert("other".to_string(), 0.26);
2982
2983        let mut level_distribution = HashMap::new();
2984        level_distribution.insert("employee".to_string(), 0.42);
2985        level_distribution.insert("manager".to_string(), 0.36);
2986        level_distribution.insert("owner_executive".to_string(), 0.22);
2987
2988        Self {
2989            median_loss: Decimal::new(117_000, 0),
2990            median_duration_months: 12,
2991            category_distribution,
2992            detection_method_distribution,
2993            department_distribution,
2994            level_distribution,
2995            avg_red_flags_per_case: 2.8,
2996            collusion_rate: 0.50,
2997        }
2998    }
2999}
3000
3001impl AcfeCalibration {
3002    /// Creates a new ACFE calibration with the given parameters.
3003    pub fn new(median_loss: Decimal, median_duration_months: u32) -> Self {
3004        Self {
3005            median_loss,
3006            median_duration_months,
3007            ..Self::default()
3008        }
3009    }
3010
3011    /// Returns the median loss for a specific category.
3012    pub fn median_loss_for_category(&self, category: AcfeFraudCategory) -> Decimal {
3013        category.typical_median_loss()
3014    }
3015
3016    /// Returns the median duration for a specific category.
3017    pub fn median_duration_for_category(&self, category: AcfeFraudCategory) -> u32 {
3018        category.typical_detection_months()
3019    }
3020
3021    /// Validates the calibration data.
3022    pub fn validate(&self) -> Result<(), String> {
3023        if self.median_loss <= Decimal::ZERO {
3024            return Err("Median loss must be positive".to_string());
3025        }
3026        if self.median_duration_months == 0 {
3027            return Err("Median duration must be at least 1 month".to_string());
3028        }
3029        if self.collusion_rate < 0.0 || self.collusion_rate > 1.0 {
3030            return Err("Collusion rate must be between 0.0 and 1.0".to_string());
3031        }
3032        Ok(())
3033    }
3034}
3035
3036/// Fraud Triangle components (Pressure, Opportunity, Rationalization).
3037///
3038/// The fraud triangle is a model for explaining the factors that cause
3039/// someone to commit occupational fraud.
3040#[derive(Debug, Clone, Serialize, Deserialize)]
3041pub struct FraudTriangle {
3042    /// Pressure or incentive to commit fraud.
3043    pub pressure: PressureType,
3044    /// Opportunity factors that enable fraud.
3045    pub opportunities: Vec<OpportunityFactor>,
3046    /// Rationalization used to justify the fraud.
3047    pub rationalization: Rationalization,
3048}
3049
3050impl FraudTriangle {
3051    /// Creates a new fraud triangle.
3052    pub fn new(
3053        pressure: PressureType,
3054        opportunities: Vec<OpportunityFactor>,
3055        rationalization: Rationalization,
3056    ) -> Self {
3057        Self {
3058            pressure,
3059            opportunities,
3060            rationalization,
3061        }
3062    }
3063
3064    /// Returns a risk score based on the fraud triangle components.
3065    pub fn risk_score(&self) -> f64 {
3066        let pressure_score = self.pressure.risk_weight();
3067        let opportunity_score: f64 = self
3068            .opportunities
3069            .iter()
3070            .map(OpportunityFactor::risk_weight)
3071            .sum::<f64>()
3072            / self.opportunities.len().max(1) as f64;
3073        let rationalization_score = self.rationalization.risk_weight();
3074
3075        (pressure_score + opportunity_score + rationalization_score) / 3.0
3076    }
3077}
3078
3079/// Types of pressure/incentive that can lead to fraud.
3080#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3081pub enum PressureType {
3082    // Financial Pressures
3083    /// Personal financial difficulties (debt, lifestyle beyond means).
3084    PersonalFinancialDifficulties,
3085    /// Pressure to meet financial targets/earnings expectations.
3086    FinancialTargets,
3087    /// Market or analyst expectations.
3088    MarketExpectations,
3089    /// Debt covenant compliance requirements.
3090    CovenantCompliance,
3091    /// Credit rating maintenance.
3092    CreditRatingMaintenance,
3093    /// Acquisition/merger valuation pressure.
3094    AcquisitionValuation,
3095
3096    // Non-Financial Pressures
3097    /// Fear of job loss.
3098    JobSecurity,
3099    /// Pressure to maintain status or image.
3100    StatusMaintenance,
3101    /// Gambling addiction.
3102    GamblingAddiction,
3103    /// Substance abuse issues.
3104    SubstanceAbuse,
3105    /// Family pressure or obligations.
3106    FamilyPressure,
3107    /// Greed or desire for more.
3108    Greed,
3109}
3110
3111impl PressureType {
3112    /// Returns the risk weight (0.0-1.0) for this pressure type.
3113    pub fn risk_weight(&self) -> f64 {
3114        match self {
3115            PressureType::PersonalFinancialDifficulties => 0.80,
3116            PressureType::FinancialTargets => 0.75,
3117            PressureType::MarketExpectations => 0.70,
3118            PressureType::CovenantCompliance => 0.85,
3119            PressureType::CreditRatingMaintenance => 0.70,
3120            PressureType::AcquisitionValuation => 0.75,
3121            PressureType::JobSecurity => 0.65,
3122            PressureType::StatusMaintenance => 0.55,
3123            PressureType::GamblingAddiction => 0.90,
3124            PressureType::SubstanceAbuse => 0.85,
3125            PressureType::FamilyPressure => 0.60,
3126            PressureType::Greed => 0.70,
3127        }
3128    }
3129}
3130
3131/// Opportunity factors that enable fraud.
3132#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3133pub enum OpportunityFactor {
3134    /// Weak internal controls.
3135    WeakInternalControls,
3136    /// Lack of segregation of duties.
3137    LackOfSegregation,
3138    /// Override capability.
3139    ManagementOverride,
3140    /// Complex or unusual transactions.
3141    ComplexTransactions,
3142    /// Related party transactions.
3143    RelatedPartyTransactions,
3144    /// Poor tone at the top.
3145    PoorToneAtTop,
3146    /// Inadequate supervision.
3147    InadequateSupervision,
3148    /// Access to assets without accountability.
3149    AssetAccess,
3150    /// Inadequate record keeping.
3151    PoorRecordKeeping,
3152    /// Failure to discipline fraud perpetrators.
3153    LackOfDiscipline,
3154    /// Lack of independent checks.
3155    LackOfIndependentChecks,
3156}
3157
3158impl OpportunityFactor {
3159    /// Returns the risk weight (0.0-1.0) for this opportunity factor.
3160    pub fn risk_weight(&self) -> f64 {
3161        match self {
3162            OpportunityFactor::WeakInternalControls => 0.85,
3163            OpportunityFactor::LackOfSegregation => 0.80,
3164            OpportunityFactor::ManagementOverride => 0.90,
3165            OpportunityFactor::ComplexTransactions => 0.70,
3166            OpportunityFactor::RelatedPartyTransactions => 0.75,
3167            OpportunityFactor::PoorToneAtTop => 0.85,
3168            OpportunityFactor::InadequateSupervision => 0.75,
3169            OpportunityFactor::AssetAccess => 0.70,
3170            OpportunityFactor::PoorRecordKeeping => 0.65,
3171            OpportunityFactor::LackOfDiscipline => 0.60,
3172            OpportunityFactor::LackOfIndependentChecks => 0.75,
3173        }
3174    }
3175}
3176
3177/// Rationalizations used by fraud perpetrators.
3178#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3179pub enum Rationalization {
3180    /// "I'm just borrowing; I'll pay it back."
3181    TemporaryBorrowing,
3182    /// "Everyone does it."
3183    EveryoneDoesIt,
3184    /// "It's for the good of the company."
3185    ForTheCompanyGood,
3186    /// "I deserve this; the company owes me."
3187    Entitlement,
3188    /// "I was just following orders."
3189    FollowingOrders,
3190    /// "They won't miss it; they have plenty."
3191    TheyWontMissIt,
3192    /// "I need it more than they do."
3193    NeedItMore,
3194    /// "It's not really stealing."
3195    NotReallyStealing,
3196    /// "I'm underpaid for what I do."
3197    Underpaid,
3198    /// "It's a victimless crime."
3199    VictimlessCrime,
3200}
3201
3202impl Rationalization {
3203    /// Returns the risk weight (0.0-1.0) for this rationalization.
3204    pub fn risk_weight(&self) -> f64 {
3205        match self {
3206            // More dangerous rationalizations
3207            Rationalization::Entitlement => 0.85,
3208            Rationalization::EveryoneDoesIt => 0.80,
3209            Rationalization::NotReallyStealing => 0.80,
3210            Rationalization::TheyWontMissIt => 0.75,
3211            // Medium risk
3212            Rationalization::Underpaid => 0.70,
3213            Rationalization::ForTheCompanyGood => 0.65,
3214            Rationalization::NeedItMore => 0.65,
3215            // Lower risk (still indicates fraud)
3216            Rationalization::TemporaryBorrowing => 0.60,
3217            Rationalization::FollowingOrders => 0.55,
3218            Rationalization::VictimlessCrime => 0.60,
3219        }
3220    }
3221}
3222
3223// ============================================================================
3224// NEAR-MISS TYPES
3225// ============================================================================
3226
3227/// Type of near-miss pattern (suspicious but legitimate).
3228#[derive(Debug, Clone, Serialize, Deserialize)]
3229pub enum NearMissPattern {
3230    /// Transaction very similar to another (possible duplicate but legitimate).
3231    NearDuplicate {
3232        /// Date difference from similar transaction.
3233        date_difference_days: u32,
3234        /// Original transaction ID.
3235        similar_transaction_id: String,
3236    },
3237    /// Amount just below approval threshold (but legitimate).
3238    ThresholdProximity {
3239        /// The threshold being approached.
3240        threshold: Decimal,
3241        /// Percentage of threshold (0.0-1.0).
3242        proximity: f64,
3243    },
3244    /// Unusual but legitimate business pattern.
3245    UnusualLegitimate {
3246        /// Type of legitimate pattern.
3247        pattern_type: LegitimatePatternType,
3248        /// Business justification.
3249        justification: String,
3250    },
3251    /// Error that was caught and corrected.
3252    CorrectedError {
3253        /// Days until correction.
3254        correction_lag_days: u32,
3255        /// Correction document ID.
3256        correction_document_id: String,
3257    },
3258}
3259
3260/// Types of unusual but legitimate business patterns.
3261#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3262pub enum LegitimatePatternType {
3263    /// Year-end bonus payment.
3264    YearEndBonus,
3265    /// Contract prepayment.
3266    ContractPrepayment,
3267    /// Settlement payment.
3268    SettlementPayment,
3269    /// Insurance claim.
3270    InsuranceClaim,
3271    /// One-time vendor payment.
3272    OneTimePayment,
3273    /// Asset disposal.
3274    AssetDisposal,
3275    /// Seasonal inventory buildup.
3276    SeasonalInventory,
3277    /// Promotional spending.
3278    PromotionalSpending,
3279}
3280
3281impl LegitimatePatternType {
3282    /// Returns a description of this pattern type.
3283    pub fn description(&self) -> &'static str {
3284        match self {
3285            LegitimatePatternType::YearEndBonus => "Year-end bonus payment",
3286            LegitimatePatternType::ContractPrepayment => "Contract prepayment per terms",
3287            LegitimatePatternType::SettlementPayment => "Legal settlement payment",
3288            LegitimatePatternType::InsuranceClaim => "Insurance claim reimbursement",
3289            LegitimatePatternType::OneTimePayment => "One-time vendor payment",
3290            LegitimatePatternType::AssetDisposal => "Fixed asset disposal",
3291            LegitimatePatternType::SeasonalInventory => "Seasonal inventory buildup",
3292            LegitimatePatternType::PromotionalSpending => "Promotional campaign spending",
3293        }
3294    }
3295}
3296
3297/// What might trigger a false positive for this near-miss.
3298#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
3299pub enum FalsePositiveTrigger {
3300    /// Amount is near threshold.
3301    AmountNearThreshold,
3302    /// Timing is unusual.
3303    UnusualTiming,
3304    /// Similar to existing transaction.
3305    SimilarTransaction,
3306    /// New counterparty.
3307    NewCounterparty,
3308    /// Account combination unusual.
3309    UnusualAccountCombination,
3310    /// Volume spike.
3311    VolumeSpike,
3312    /// Round amount.
3313    RoundAmount,
3314}
3315
3316/// Label for a near-miss case.
3317#[derive(Debug, Clone, Serialize, Deserialize)]
3318pub struct NearMissLabel {
3319    /// Document ID.
3320    pub document_id: String,
3321    /// The near-miss pattern.
3322    pub pattern: NearMissPattern,
3323    /// How suspicious it appears (0.0-1.0).
3324    pub suspicion_score: f64,
3325    /// What would trigger a false positive.
3326    pub false_positive_trigger: FalsePositiveTrigger,
3327    /// Why this is actually legitimate.
3328    pub explanation: String,
3329}
3330
3331impl NearMissLabel {
3332    /// Creates a new near-miss label.
3333    pub fn new(
3334        document_id: impl Into<String>,
3335        pattern: NearMissPattern,
3336        suspicion_score: f64,
3337        trigger: FalsePositiveTrigger,
3338        explanation: impl Into<String>,
3339    ) -> Self {
3340        Self {
3341            document_id: document_id.into(),
3342            pattern,
3343            suspicion_score: suspicion_score.clamp(0.0, 1.0),
3344            false_positive_trigger: trigger,
3345            explanation: explanation.into(),
3346        }
3347    }
3348}
3349
3350/// Configuration for anomaly rates.
3351#[derive(Debug, Clone, Serialize, Deserialize)]
3352pub struct AnomalyRateConfig {
3353    /// Overall anomaly rate (0.0 - 1.0).
3354    pub total_rate: f64,
3355    /// Fraud rate as proportion of anomalies.
3356    pub fraud_rate: f64,
3357    /// Error rate as proportion of anomalies.
3358    pub error_rate: f64,
3359    /// Process issue rate as proportion of anomalies.
3360    pub process_issue_rate: f64,
3361    /// Statistical anomaly rate as proportion of anomalies.
3362    pub statistical_rate: f64,
3363    /// Relational anomaly rate as proportion of anomalies.
3364    pub relational_rate: f64,
3365}
3366
3367impl Default for AnomalyRateConfig {
3368    fn default() -> Self {
3369        Self {
3370            total_rate: 0.02,         // 2% of transactions are anomalous
3371            fraud_rate: 0.25,         // 25% of anomalies are fraud
3372            error_rate: 0.35,         // 35% of anomalies are errors
3373            process_issue_rate: 0.20, // 20% are process issues
3374            statistical_rate: 0.15,   // 15% are statistical
3375            relational_rate: 0.05,    // 5% are relational
3376        }
3377    }
3378}
3379
3380impl AnomalyRateConfig {
3381    /// Validates that rates sum to approximately 1.0.
3382    pub fn validate(&self) -> Result<(), String> {
3383        let sum = self.fraud_rate
3384            + self.error_rate
3385            + self.process_issue_rate
3386            + self.statistical_rate
3387            + self.relational_rate;
3388
3389        if (sum - 1.0).abs() > 0.01 {
3390            return Err(format!("Anomaly category rates must sum to 1.0, got {sum}"));
3391        }
3392
3393        if self.total_rate < 0.0 || self.total_rate > 1.0 {
3394            return Err(format!(
3395                "Total rate must be between 0.0 and 1.0, got {}",
3396                self.total_rate
3397            ));
3398        }
3399
3400        Ok(())
3401    }
3402}
3403
3404#[cfg(test)]
3405mod tests {
3406    use super::*;
3407    use rust_decimal_macros::dec;
3408
3409    #[test]
3410    fn test_anomaly_type_category() {
3411        let fraud = AnomalyType::Fraud(FraudType::SelfApproval);
3412        assert_eq!(fraud.category(), "Fraud");
3413        assert!(fraud.is_intentional());
3414
3415        let error = AnomalyType::Error(ErrorType::DuplicateEntry);
3416        assert_eq!(error.category(), "Error");
3417        assert!(!error.is_intentional());
3418    }
3419
3420    #[test]
3421    fn test_labeled_anomaly() {
3422        let anomaly = LabeledAnomaly::new(
3423            "ANO001".to_string(),
3424            AnomalyType::Fraud(FraudType::SelfApproval),
3425            "JE001".to_string(),
3426            "JE".to_string(),
3427            "1000".to_string(),
3428            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3429        )
3430        .with_description("User approved their own expense report")
3431        .with_related_entity("USER001");
3432
3433        assert_eq!(anomaly.severity, 3);
3434        assert!(anomaly.is_injected);
3435        assert_eq!(anomaly.related_entities.len(), 1);
3436    }
3437
3438    #[test]
3439    fn test_labeled_anomaly_with_provenance() {
3440        let anomaly = LabeledAnomaly::new(
3441            "ANO001".to_string(),
3442            AnomalyType::Fraud(FraudType::SelfApproval),
3443            "JE001".to_string(),
3444            "JE".to_string(),
3445            "1000".to_string(),
3446            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3447        )
3448        .with_run_id("run-123")
3449        .with_generation_seed(42)
3450        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 })
3451        .with_structured_strategy(InjectionStrategy::SelfApproval {
3452            user_id: "USER001".to_string(),
3453        })
3454        .with_scenario("scenario-001")
3455        .with_original_document_hash("abc123");
3456
3457        assert_eq!(anomaly.run_id, Some("run-123".to_string()));
3458        assert_eq!(anomaly.generation_seed, Some(42));
3459        assert!(anomaly.causal_reason.is_some());
3460        assert!(anomaly.structured_strategy.is_some());
3461        assert_eq!(anomaly.scenario_id, Some("scenario-001".to_string()));
3462        assert_eq!(anomaly.original_document_hash, Some("abc123".to_string()));
3463
3464        // Check that legacy injection_strategy is also set
3465        assert_eq!(anomaly.injection_strategy, Some("SelfApproval".to_string()));
3466    }
3467
3468    #[test]
3469    fn test_labeled_anomaly_derivation_chain() {
3470        let parent = LabeledAnomaly::new(
3471            "ANO001".to_string(),
3472            AnomalyType::Fraud(FraudType::DuplicatePayment),
3473            "JE001".to_string(),
3474            "JE".to_string(),
3475            "1000".to_string(),
3476            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3477        );
3478
3479        let child = LabeledAnomaly::new(
3480            "ANO002".to_string(),
3481            AnomalyType::Error(ErrorType::DuplicateEntry),
3482            "JE002".to_string(),
3483            "JE".to_string(),
3484            "1000".to_string(),
3485            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3486        )
3487        .with_parent_anomaly(&parent.anomaly_id);
3488
3489        assert_eq!(child.parent_anomaly_id, Some("ANO001".to_string()));
3490    }
3491
3492    #[test]
3493    fn test_injection_strategy_description() {
3494        let strategy = InjectionStrategy::AmountManipulation {
3495            original: dec!(1000),
3496            factor: 2.5,
3497        };
3498        assert_eq!(strategy.description(), "Amount multiplied by 2.50");
3499        assert_eq!(strategy.strategy_type(), "AmountManipulation");
3500
3501        let strategy = InjectionStrategy::ThresholdAvoidance {
3502            threshold: dec!(10000),
3503            adjusted_amount: dec!(9999),
3504        };
3505        assert_eq!(
3506            strategy.description(),
3507            "Amount adjusted to avoid 10000 threshold"
3508        );
3509
3510        let strategy = InjectionStrategy::DateShift {
3511            days_shifted: -5,
3512            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3513        };
3514        assert_eq!(strategy.description(), "Date backdated by 5 days");
3515
3516        let strategy = InjectionStrategy::DateShift {
3517            days_shifted: 3,
3518            original_date: NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3519        };
3520        assert_eq!(strategy.description(), "Date forward-dated by 3 days");
3521    }
3522
3523    #[test]
3524    fn test_causal_reason_variants() {
3525        let reason = AnomalyCausalReason::RandomRate { base_rate: 0.02 };
3526        if let AnomalyCausalReason::RandomRate { base_rate } = reason {
3527            assert!((base_rate - 0.02).abs() < 0.001);
3528        }
3529
3530        let reason = AnomalyCausalReason::TemporalPattern {
3531            pattern_name: "year_end_spike".to_string(),
3532        };
3533        if let AnomalyCausalReason::TemporalPattern { pattern_name } = reason {
3534            assert_eq!(pattern_name, "year_end_spike");
3535        }
3536
3537        let reason = AnomalyCausalReason::ScenarioStep {
3538            scenario_type: "kickback".to_string(),
3539            step_number: 3,
3540        };
3541        if let AnomalyCausalReason::ScenarioStep {
3542            scenario_type,
3543            step_number,
3544        } = reason
3545        {
3546            assert_eq!(scenario_type, "kickback");
3547            assert_eq!(step_number, 3);
3548        }
3549    }
3550
3551    #[test]
3552    fn test_feature_vector_length() {
3553        let anomaly = LabeledAnomaly::new(
3554            "ANO001".to_string(),
3555            AnomalyType::Fraud(FraudType::SelfApproval),
3556            "JE001".to_string(),
3557            "JE".to_string(),
3558            "1000".to_string(),
3559            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3560        );
3561
3562        let features = anomaly.to_features();
3563        assert_eq!(features.len(), LabeledAnomaly::feature_count());
3564        assert_eq!(features.len(), LabeledAnomaly::feature_names().len());
3565    }
3566
3567    #[test]
3568    fn test_feature_vector_with_provenance() {
3569        let anomaly = LabeledAnomaly::new(
3570            "ANO001".to_string(),
3571            AnomalyType::Fraud(FraudType::SelfApproval),
3572            "JE001".to_string(),
3573            "JE".to_string(),
3574            "1000".to_string(),
3575            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3576        )
3577        .with_scenario("scenario-001")
3578        .with_parent_anomaly("ANO000");
3579
3580        let features = anomaly.to_features();
3581
3582        // Last two features should be 1.0 (has scenario, has parent)
3583        assert_eq!(features[features.len() - 2], 1.0); // is_scenario_part
3584        assert_eq!(features[features.len() - 1], 1.0); // is_derived
3585    }
3586
3587    #[test]
3588    fn test_anomaly_summary() {
3589        let anomalies = vec![
3590            LabeledAnomaly::new(
3591                "ANO001".to_string(),
3592                AnomalyType::Fraud(FraudType::SelfApproval),
3593                "JE001".to_string(),
3594                "JE".to_string(),
3595                "1000".to_string(),
3596                NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3597            ),
3598            LabeledAnomaly::new(
3599                "ANO002".to_string(),
3600                AnomalyType::Error(ErrorType::DuplicateEntry),
3601                "JE002".to_string(),
3602                "JE".to_string(),
3603                "1000".to_string(),
3604                NaiveDate::from_ymd_opt(2024, 1, 16).unwrap(),
3605            ),
3606        ];
3607
3608        let summary = AnomalySummary::from_anomalies(&anomalies);
3609
3610        assert_eq!(summary.total_count, 2);
3611        assert_eq!(summary.by_category.get("Fraud"), Some(&1));
3612        assert_eq!(summary.by_category.get("Error"), Some(&1));
3613    }
3614
3615    #[test]
3616    fn test_rate_config_validation() {
3617        let config = AnomalyRateConfig::default();
3618        assert!(config.validate().is_ok());
3619
3620        let bad_config = AnomalyRateConfig {
3621            fraud_rate: 0.5,
3622            error_rate: 0.5,
3623            process_issue_rate: 0.5, // Sum > 1.0
3624            ..Default::default()
3625        };
3626        assert!(bad_config.validate().is_err());
3627    }
3628
3629    #[test]
3630    fn test_injection_strategy_serialization() {
3631        let strategy = InjectionStrategy::SoDViolation {
3632            duty1: "CreatePO".to_string(),
3633            duty2: "ApprovePO".to_string(),
3634            violating_user: "USER001".to_string(),
3635        };
3636
3637        let json = serde_json::to_string(&strategy).unwrap();
3638        let deserialized: InjectionStrategy = serde_json::from_str(&json).unwrap();
3639
3640        assert_eq!(strategy, deserialized);
3641    }
3642
3643    #[test]
3644    fn test_labeled_anomaly_serialization_with_provenance() {
3645        let anomaly = LabeledAnomaly::new(
3646            "ANO001".to_string(),
3647            AnomalyType::Fraud(FraudType::SelfApproval),
3648            "JE001".to_string(),
3649            "JE".to_string(),
3650            "1000".to_string(),
3651            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3652        )
3653        .with_run_id("run-123")
3654        .with_generation_seed(42)
3655        .with_causal_reason(AnomalyCausalReason::RandomRate { base_rate: 0.02 });
3656
3657        let json = serde_json::to_string(&anomaly).unwrap();
3658        let deserialized: LabeledAnomaly = serde_json::from_str(&json).unwrap();
3659
3660        assert_eq!(anomaly.run_id, deserialized.run_id);
3661        assert_eq!(anomaly.generation_seed, deserialized.generation_seed);
3662    }
3663
3664    // ========================================
3665    // FR-003 ENHANCED TAXONOMY TESTS
3666    // ========================================
3667
3668    #[test]
3669    fn test_anomaly_category_from_anomaly_type() {
3670        // Fraud mappings
3671        let fraud_vendor = AnomalyType::Fraud(FraudType::FictitiousVendor);
3672        assert_eq!(
3673            AnomalyCategory::from_anomaly_type(&fraud_vendor),
3674            AnomalyCategory::FictitiousVendor
3675        );
3676
3677        let fraud_kickback = AnomalyType::Fraud(FraudType::KickbackScheme);
3678        assert_eq!(
3679            AnomalyCategory::from_anomaly_type(&fraud_kickback),
3680            AnomalyCategory::VendorKickback
3681        );
3682
3683        let fraud_structured = AnomalyType::Fraud(FraudType::SplitTransaction);
3684        assert_eq!(
3685            AnomalyCategory::from_anomaly_type(&fraud_structured),
3686            AnomalyCategory::StructuredTransaction
3687        );
3688
3689        // Error mappings
3690        let error_duplicate = AnomalyType::Error(ErrorType::DuplicateEntry);
3691        assert_eq!(
3692            AnomalyCategory::from_anomaly_type(&error_duplicate),
3693            AnomalyCategory::DuplicatePayment
3694        );
3695
3696        // Process issue mappings
3697        let process_skip = AnomalyType::ProcessIssue(ProcessIssueType::SkippedApproval);
3698        assert_eq!(
3699            AnomalyCategory::from_anomaly_type(&process_skip),
3700            AnomalyCategory::MissingApproval
3701        );
3702
3703        // Relational mappings
3704        let relational_circular =
3705            AnomalyType::Relational(RelationalAnomalyType::CircularTransaction);
3706        assert_eq!(
3707            AnomalyCategory::from_anomaly_type(&relational_circular),
3708            AnomalyCategory::CircularFlow
3709        );
3710    }
3711
3712    #[test]
3713    fn test_anomaly_category_ordinal() {
3714        assert_eq!(AnomalyCategory::FictitiousVendor.ordinal(), 0);
3715        assert_eq!(AnomalyCategory::VendorKickback.ordinal(), 1);
3716        assert_eq!(AnomalyCategory::Custom("test".to_string()).ordinal(), 14);
3717    }
3718
3719    #[test]
3720    fn test_contributing_factor() {
3721        let factor = ContributingFactor::new(
3722            FactorType::AmountDeviation,
3723            15000.0,
3724            10000.0,
3725            true,
3726            0.5,
3727            "Amount exceeds threshold",
3728        );
3729
3730        assert_eq!(factor.factor_type, FactorType::AmountDeviation);
3731        assert_eq!(factor.value, 15000.0);
3732        assert_eq!(factor.threshold, 10000.0);
3733        assert!(factor.direction_greater);
3734
3735        // Contribution: (15000 - 10000) / 10000 * 0.5 = 0.25
3736        let contribution = factor.contribution();
3737        assert!((contribution - 0.25).abs() < 0.01);
3738    }
3739
3740    #[test]
3741    fn test_contributing_factor_with_evidence() {
3742        let mut data = HashMap::new();
3743        data.insert("expected".to_string(), "10000".to_string());
3744        data.insert("actual".to_string(), "15000".to_string());
3745
3746        let factor = ContributingFactor::new(
3747            FactorType::AmountDeviation,
3748            15000.0,
3749            10000.0,
3750            true,
3751            0.5,
3752            "Amount deviation detected",
3753        )
3754        .with_evidence("transaction_history", data);
3755
3756        assert!(factor.evidence.is_some());
3757        let evidence = factor.evidence.unwrap();
3758        assert_eq!(evidence.source, "transaction_history");
3759        assert_eq!(evidence.data.get("expected"), Some(&"10000".to_string()));
3760    }
3761
3762    #[test]
3763    fn test_enhanced_anomaly_label() {
3764        let base = LabeledAnomaly::new(
3765            "ANO001".to_string(),
3766            AnomalyType::Fraud(FraudType::DuplicatePayment),
3767            "JE001".to_string(),
3768            "JE".to_string(),
3769            "1000".to_string(),
3770            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3771        );
3772
3773        let enhanced = EnhancedAnomalyLabel::from_base(base)
3774            .with_confidence(0.85)
3775            .with_severity(0.7)
3776            .with_factor(ContributingFactor::new(
3777                FactorType::DuplicateIndicator,
3778                1.0,
3779                0.5,
3780                true,
3781                0.4,
3782                "Duplicate payment detected",
3783            ))
3784            .with_secondary_category(AnomalyCategory::StructuredTransaction);
3785
3786        assert_eq!(enhanced.category, AnomalyCategory::DuplicatePayment);
3787        assert_eq!(enhanced.enhanced_confidence, 0.85);
3788        assert_eq!(enhanced.enhanced_severity, 0.7);
3789        assert_eq!(enhanced.contributing_factors.len(), 1);
3790        assert_eq!(enhanced.secondary_categories.len(), 1);
3791    }
3792
3793    #[test]
3794    fn test_enhanced_anomaly_label_features() {
3795        let base = LabeledAnomaly::new(
3796            "ANO001".to_string(),
3797            AnomalyType::Fraud(FraudType::SelfApproval),
3798            "JE001".to_string(),
3799            "JE".to_string(),
3800            "1000".to_string(),
3801            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3802        );
3803
3804        let enhanced = EnhancedAnomalyLabel::from_base(base)
3805            .with_confidence(0.9)
3806            .with_severity(0.8)
3807            .with_factor(ContributingFactor::new(
3808                FactorType::ControlBypass,
3809                1.0,
3810                0.0,
3811                true,
3812                0.5,
3813                "Control bypass detected",
3814            ));
3815
3816        let features = enhanced.to_features();
3817
3818        // Should have 25 features (15 base + 10 enhanced)
3819        assert_eq!(features.len(), EnhancedAnomalyLabel::feature_count());
3820        assert_eq!(features.len(), 25);
3821
3822        // Check enhanced confidence is in features
3823        assert_eq!(features[15], 0.9); // enhanced_confidence
3824
3825        // Check has_control_bypass flag
3826        assert_eq!(features[21], 1.0); // has_control_bypass
3827    }
3828
3829    #[test]
3830    fn test_enhanced_anomaly_label_feature_names() {
3831        let names = EnhancedAnomalyLabel::feature_names();
3832        assert_eq!(names.len(), 25);
3833        assert!(names.contains(&"enhanced_confidence"));
3834        assert!(names.contains(&"enhanced_severity"));
3835        assert!(names.contains(&"has_control_bypass"));
3836    }
3837
3838    #[test]
3839    fn test_factor_type_names() {
3840        assert_eq!(FactorType::AmountDeviation.name(), "amount_deviation");
3841        assert_eq!(FactorType::ThresholdProximity.name(), "threshold_proximity");
3842        assert_eq!(FactorType::ControlBypass.name(), "control_bypass");
3843    }
3844
3845    #[test]
3846    fn test_anomaly_category_serialization() {
3847        let category = AnomalyCategory::CircularFlow;
3848        let json = serde_json::to_string(&category).unwrap();
3849        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3850        assert_eq!(category, deserialized);
3851
3852        let custom = AnomalyCategory::Custom("custom_type".to_string());
3853        let json = serde_json::to_string(&custom).unwrap();
3854        let deserialized: AnomalyCategory = serde_json::from_str(&json).unwrap();
3855        assert_eq!(custom, deserialized);
3856    }
3857
3858    #[test]
3859    fn test_enhanced_label_secondary_category_dedup() {
3860        let base = LabeledAnomaly::new(
3861            "ANO001".to_string(),
3862            AnomalyType::Fraud(FraudType::DuplicatePayment),
3863            "JE001".to_string(),
3864            "JE".to_string(),
3865            "1000".to_string(),
3866            NaiveDate::from_ymd_opt(2024, 1, 15).unwrap(),
3867        );
3868
3869        let enhanced = EnhancedAnomalyLabel::from_base(base)
3870            // Try to add the primary category as secondary (should be ignored)
3871            .with_secondary_category(AnomalyCategory::DuplicatePayment)
3872            // Add a valid secondary
3873            .with_secondary_category(AnomalyCategory::TimingAnomaly)
3874            // Try to add duplicate secondary (should be ignored)
3875            .with_secondary_category(AnomalyCategory::TimingAnomaly);
3876
3877        // Should only have 1 secondary category (TimingAnomaly)
3878        assert_eq!(enhanced.secondary_categories.len(), 1);
3879        assert_eq!(
3880            enhanced.secondary_categories[0],
3881            AnomalyCategory::TimingAnomaly
3882        );
3883    }
3884
3885    // ==========================================================================
3886    // Accounting Standards Fraud Type Tests
3887    // ==========================================================================
3888
3889    #[test]
3890    fn test_revenue_recognition_fraud_types() {
3891        // Test ASC 606/IFRS 15 related fraud types
3892        let fraud_types = [
3893            FraudType::ImproperRevenueRecognition,
3894            FraudType::ImproperPoAllocation,
3895            FraudType::VariableConsiderationManipulation,
3896            FraudType::ContractModificationMisstatement,
3897        ];
3898
3899        for fraud_type in fraud_types {
3900            let anomaly_type = AnomalyType::Fraud(fraud_type);
3901            assert_eq!(anomaly_type.category(), "Fraud");
3902            assert!(anomaly_type.is_intentional());
3903            assert!(anomaly_type.severity() >= 3);
3904        }
3905    }
3906
3907    #[test]
3908    fn test_lease_accounting_fraud_types() {
3909        // Test ASC 842/IFRS 16 related fraud types
3910        let fraud_types = [
3911            FraudType::LeaseClassificationManipulation,
3912            FraudType::OffBalanceSheetLease,
3913            FraudType::LeaseLiabilityUnderstatement,
3914            FraudType::RouAssetMisstatement,
3915        ];
3916
3917        for fraud_type in fraud_types {
3918            let anomaly_type = AnomalyType::Fraud(fraud_type);
3919            assert_eq!(anomaly_type.category(), "Fraud");
3920            assert!(anomaly_type.is_intentional());
3921            assert!(anomaly_type.severity() >= 3);
3922        }
3923
3924        // Off-balance sheet lease fraud should be high severity
3925        assert_eq!(FraudType::OffBalanceSheetLease.severity(), 5);
3926    }
3927
3928    #[test]
3929    fn test_fair_value_fraud_types() {
3930        // Test ASC 820/IFRS 13 related fraud types
3931        let fraud_types = [
3932            FraudType::FairValueHierarchyManipulation,
3933            FraudType::Level3InputManipulation,
3934            FraudType::ValuationTechniqueManipulation,
3935        ];
3936
3937        for fraud_type in fraud_types {
3938            let anomaly_type = AnomalyType::Fraud(fraud_type);
3939            assert_eq!(anomaly_type.category(), "Fraud");
3940            assert!(anomaly_type.is_intentional());
3941            assert!(anomaly_type.severity() >= 4);
3942        }
3943
3944        // Level 3 manipulation is highest severity (unobservable inputs)
3945        assert_eq!(FraudType::Level3InputManipulation.severity(), 5);
3946    }
3947
3948    #[test]
3949    fn test_impairment_fraud_types() {
3950        // Test ASC 360/IAS 36 related fraud types
3951        let fraud_types = [
3952            FraudType::DelayedImpairment,
3953            FraudType::ImpairmentTestAvoidance,
3954            FraudType::CashFlowProjectionManipulation,
3955            FraudType::ImproperImpairmentReversal,
3956        ];
3957
3958        for fraud_type in fraud_types {
3959            let anomaly_type = AnomalyType::Fraud(fraud_type);
3960            assert_eq!(anomaly_type.category(), "Fraud");
3961            assert!(anomaly_type.is_intentional());
3962            assert!(anomaly_type.severity() >= 3);
3963        }
3964
3965        // Cash flow manipulation has highest severity
3966        assert_eq!(FraudType::CashFlowProjectionManipulation.severity(), 5);
3967    }
3968
3969    // ==========================================================================
3970    // Accounting Standards Error Type Tests
3971    // ==========================================================================
3972
3973    #[test]
3974    fn test_standards_error_types() {
3975        // Test non-fraudulent accounting standards errors
3976        let error_types = [
3977            ErrorType::RevenueTimingError,
3978            ErrorType::PoAllocationError,
3979            ErrorType::LeaseClassificationError,
3980            ErrorType::LeaseCalculationError,
3981            ErrorType::FairValueError,
3982            ErrorType::ImpairmentCalculationError,
3983            ErrorType::DiscountRateError,
3984            ErrorType::FrameworkApplicationError,
3985        ];
3986
3987        for error_type in error_types {
3988            let anomaly_type = AnomalyType::Error(error_type);
3989            assert_eq!(anomaly_type.category(), "Error");
3990            assert!(!anomaly_type.is_intentional());
3991            assert!(anomaly_type.severity() >= 3);
3992        }
3993    }
3994
3995    #[test]
3996    fn test_framework_application_error() {
3997        // Test IFRS vs GAAP confusion errors
3998        let error_type = ErrorType::FrameworkApplicationError;
3999        assert_eq!(error_type.severity(), 4);
4000
4001        let anomaly = LabeledAnomaly::new(
4002            "ERR001".to_string(),
4003            AnomalyType::Error(error_type),
4004            "JE100".to_string(),
4005            "JE".to_string(),
4006            "1000".to_string(),
4007            NaiveDate::from_ymd_opt(2024, 6, 30).unwrap(),
4008        )
4009        .with_description("LIFO inventory method used under IFRS (not permitted)")
4010        .with_metadata("framework", "IFRS")
4011        .with_metadata("standard_violated", "IAS 2");
4012
4013        assert_eq!(anomaly.anomaly_type.category(), "Error");
4014        assert_eq!(
4015            anomaly.metadata.get("standard_violated"),
4016            Some(&"IAS 2".to_string())
4017        );
4018    }
4019
4020    #[test]
4021    fn test_standards_anomaly_serialization() {
4022        // Test that new fraud types serialize/deserialize correctly
4023        let fraud_types = [
4024            FraudType::ImproperRevenueRecognition,
4025            FraudType::LeaseClassificationManipulation,
4026            FraudType::FairValueHierarchyManipulation,
4027            FraudType::DelayedImpairment,
4028        ];
4029
4030        for fraud_type in fraud_types {
4031            let json = serde_json::to_string(&fraud_type).expect("Failed to serialize");
4032            let deserialized: FraudType =
4033                serde_json::from_str(&json).expect("Failed to deserialize");
4034            assert_eq!(fraud_type, deserialized);
4035        }
4036
4037        // Test error types
4038        let error_types = [
4039            ErrorType::RevenueTimingError,
4040            ErrorType::LeaseCalculationError,
4041            ErrorType::FairValueError,
4042            ErrorType::FrameworkApplicationError,
4043        ];
4044
4045        for error_type in error_types {
4046            let json = serde_json::to_string(&error_type).expect("Failed to serialize");
4047            let deserialized: ErrorType =
4048                serde_json::from_str(&json).expect("Failed to deserialize");
4049            assert_eq!(error_type, deserialized);
4050        }
4051    }
4052
4053    #[test]
4054    fn test_standards_labeled_anomaly() {
4055        // Test creating a labeled anomaly for a standards violation
4056        let anomaly = LabeledAnomaly::new(
4057            "STD001".to_string(),
4058            AnomalyType::Fraud(FraudType::ImproperRevenueRecognition),
4059            "CONTRACT-2024-001".to_string(),
4060            "Revenue".to_string(),
4061            "1000".to_string(),
4062            NaiveDate::from_ymd_opt(2024, 12, 31).unwrap(),
4063        )
4064        .with_description("Revenue recognized before performance obligation satisfied (ASC 606)")
4065        .with_monetary_impact(dec!(500000))
4066        .with_metadata("standard", "ASC 606")
4067        .with_metadata("paragraph", "606-10-25-1")
4068        .with_metadata("contract_id", "C-2024-001")
4069        .with_related_entity("CONTRACT-2024-001")
4070        .with_related_entity("CUSTOMER-500");
4071
4072        assert_eq!(anomaly.severity, 5); // ImproperRevenueRecognition has severity 5
4073        assert!(anomaly.is_injected);
4074        assert_eq!(anomaly.monetary_impact, Some(dec!(500000)));
4075        assert_eq!(anomaly.related_entities.len(), 2);
4076        assert_eq!(
4077            anomaly.metadata.get("standard"),
4078            Some(&"ASC 606".to_string())
4079        );
4080    }
4081
4082    // ==========================================================================
4083    // Multi-Dimensional Labeling Tests
4084    // ==========================================================================
4085
4086    #[test]
4087    fn test_severity_level() {
4088        assert_eq!(SeverityLevel::Low.numeric(), 1);
4089        assert_eq!(SeverityLevel::Critical.numeric(), 4);
4090
4091        assert_eq!(SeverityLevel::from_numeric(1), SeverityLevel::Low);
4092        assert_eq!(SeverityLevel::from_numeric(4), SeverityLevel::Critical);
4093
4094        assert_eq!(SeverityLevel::from_score(0.1), SeverityLevel::Low);
4095        assert_eq!(SeverityLevel::from_score(0.9), SeverityLevel::Critical);
4096
4097        assert!((SeverityLevel::Medium.to_score() - 0.375).abs() < 0.01);
4098    }
4099
4100    #[test]
4101    fn test_anomaly_severity() {
4102        let severity =
4103            AnomalySeverity::new(SeverityLevel::High, dec!(50000)).with_materiality(dec!(10000));
4104
4105        assert_eq!(severity.level, SeverityLevel::High);
4106        assert!(severity.is_material);
4107        assert_eq!(severity.materiality_threshold, Some(dec!(10000)));
4108
4109        // Not material
4110        let low_severity =
4111            AnomalySeverity::new(SeverityLevel::Low, dec!(5000)).with_materiality(dec!(10000));
4112        assert!(!low_severity.is_material);
4113    }
4114
4115    #[test]
4116    fn test_detection_difficulty() {
4117        assert!(
4118            (AnomalyDetectionDifficulty::Trivial.expected_detection_rate() - 0.99).abs() < 0.01
4119        );
4120        assert!((AnomalyDetectionDifficulty::Expert.expected_detection_rate() - 0.15).abs() < 0.01);
4121
4122        assert_eq!(
4123            AnomalyDetectionDifficulty::from_score(0.05),
4124            AnomalyDetectionDifficulty::Trivial
4125        );
4126        assert_eq!(
4127            AnomalyDetectionDifficulty::from_score(0.90),
4128            AnomalyDetectionDifficulty::Expert
4129        );
4130
4131        assert_eq!(AnomalyDetectionDifficulty::Moderate.name(), "moderate");
4132    }
4133
4134    #[test]
4135    fn test_ground_truth_certainty() {
4136        assert_eq!(GroundTruthCertainty::Definite.certainty_score(), 1.0);
4137        assert_eq!(GroundTruthCertainty::Probable.certainty_score(), 0.8);
4138        assert_eq!(GroundTruthCertainty::Possible.certainty_score(), 0.5);
4139    }
4140
4141    #[test]
4142    fn test_detection_method() {
4143        assert_eq!(DetectionMethod::RuleBased.name(), "rule_based");
4144        assert_eq!(DetectionMethod::MachineLearning.name(), "machine_learning");
4145    }
4146
4147    #[test]
4148    fn test_extended_anomaly_label() {
4149        let base = LabeledAnomaly::new(
4150            "ANO001".to_string(),
4151            AnomalyType::Fraud(FraudType::FictitiousVendor),
4152            "JE001".to_string(),
4153            "JE".to_string(),
4154            "1000".to_string(),
4155            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4156        )
4157        .with_monetary_impact(dec!(100000));
4158
4159        let extended = ExtendedAnomalyLabel::from_base(base)
4160            .with_severity(AnomalySeverity::new(SeverityLevel::Critical, dec!(100000)))
4161            .with_difficulty(AnomalyDetectionDifficulty::Hard)
4162            .with_method(DetectionMethod::GraphBased)
4163            .with_method(DetectionMethod::ForensicAudit)
4164            .with_indicator("New vendor with no history")
4165            .with_indicator("Large first transaction")
4166            .with_certainty(GroundTruthCertainty::Definite)
4167            .with_entity("V001")
4168            .with_secondary_category(AnomalyCategory::BehavioralAnomaly)
4169            .with_scheme("SCHEME001", 2);
4170
4171        assert_eq!(extended.severity.level, SeverityLevel::Critical);
4172        assert_eq!(
4173            extended.detection_difficulty,
4174            AnomalyDetectionDifficulty::Hard
4175        );
4176        // from_base adds RuleBased, then we add 2 more (GraphBased, ForensicAudit)
4177        assert_eq!(extended.recommended_methods.len(), 3);
4178        assert_eq!(extended.key_indicators.len(), 2);
4179        assert_eq!(extended.scheme_id, Some("SCHEME001".to_string()));
4180        assert_eq!(extended.scheme_stage, Some(2));
4181    }
4182
4183    #[test]
4184    fn test_extended_anomaly_label_features() {
4185        let base = LabeledAnomaly::new(
4186            "ANO001".to_string(),
4187            AnomalyType::Fraud(FraudType::SelfApproval),
4188            "JE001".to_string(),
4189            "JE".to_string(),
4190            "1000".to_string(),
4191            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4192        );
4193
4194        let extended =
4195            ExtendedAnomalyLabel::from_base(base).with_difficulty(AnomalyDetectionDifficulty::Hard);
4196
4197        let features = extended.to_features();
4198        assert_eq!(features.len(), ExtendedAnomalyLabel::feature_count());
4199        assert_eq!(features.len(), 30);
4200
4201        // Check difficulty score is in features
4202        let difficulty_idx = 18; // Position of difficulty_score
4203        assert!((features[difficulty_idx] - 0.75).abs() < 0.01);
4204    }
4205
4206    #[test]
4207    fn test_extended_label_near_miss() {
4208        let base = LabeledAnomaly::new(
4209            "ANO001".to_string(),
4210            AnomalyType::Statistical(StatisticalAnomalyType::UnusuallyHighAmount),
4211            "JE001".to_string(),
4212            "JE".to_string(),
4213            "1000".to_string(),
4214            NaiveDate::from_ymd_opt(2024, 6, 15).unwrap(),
4215        );
4216
4217        let extended = ExtendedAnomalyLabel::from_base(base)
4218            .as_near_miss("Year-end bonus payment, legitimately high");
4219
4220        assert!(extended.is_near_miss);
4221        assert!(extended.near_miss_explanation.is_some());
4222    }
4223
4224    #[test]
4225    fn test_scheme_type() {
4226        assert_eq!(
4227            SchemeType::GradualEmbezzlement.name(),
4228            "gradual_embezzlement"
4229        );
4230        assert_eq!(SchemeType::GradualEmbezzlement.typical_stages(), 4);
4231        assert_eq!(SchemeType::VendorKickback.typical_stages(), 4);
4232    }
4233
4234    #[test]
4235    fn test_concealment_technique() {
4236        assert!(ConcealmentTechnique::Collusion.difficulty_bonus() > 0.0);
4237        assert!(
4238            ConcealmentTechnique::Collusion.difficulty_bonus()
4239                > ConcealmentTechnique::TimingExploitation.difficulty_bonus()
4240        );
4241    }
4242
4243    #[test]
4244    fn test_near_miss_label() {
4245        let near_miss = NearMissLabel::new(
4246            "JE001",
4247            NearMissPattern::ThresholdProximity {
4248                threshold: dec!(10000),
4249                proximity: 0.95,
4250            },
4251            0.7,
4252            FalsePositiveTrigger::AmountNearThreshold,
4253            "Transaction is 95% of threshold but business justified",
4254        );
4255
4256        assert_eq!(near_miss.document_id, "JE001");
4257        assert_eq!(near_miss.suspicion_score, 0.7);
4258        assert_eq!(
4259            near_miss.false_positive_trigger,
4260            FalsePositiveTrigger::AmountNearThreshold
4261        );
4262    }
4263
4264    #[test]
4265    fn test_legitimate_pattern_type() {
4266        assert_eq!(
4267            LegitimatePatternType::YearEndBonus.description(),
4268            "Year-end bonus payment"
4269        );
4270        assert_eq!(
4271            LegitimatePatternType::InsuranceClaim.description(),
4272            "Insurance claim reimbursement"
4273        );
4274    }
4275
4276    #[test]
4277    fn test_severity_detection_difficulty_serialization() {
4278        let severity = AnomalySeverity::new(SeverityLevel::High, dec!(50000));
4279        let json = serde_json::to_string(&severity).expect("Failed to serialize");
4280        let deserialized: AnomalySeverity =
4281            serde_json::from_str(&json).expect("Failed to deserialize");
4282        assert_eq!(severity.level, deserialized.level);
4283
4284        let difficulty = AnomalyDetectionDifficulty::Hard;
4285        let json = serde_json::to_string(&difficulty).expect("Failed to serialize");
4286        let deserialized: AnomalyDetectionDifficulty =
4287            serde_json::from_str(&json).expect("Failed to deserialize");
4288        assert_eq!(difficulty, deserialized);
4289    }
4290
4291    // ========================================
4292    // ACFE Taxonomy Tests
4293    // ========================================
4294
4295    #[test]
4296    fn test_acfe_fraud_category() {
4297        let asset = AcfeFraudCategory::AssetMisappropriation;
4298        assert_eq!(asset.name(), "asset_misappropriation");
4299        assert!((asset.typical_occurrence_rate() - 0.86).abs() < 0.01);
4300        assert_eq!(asset.typical_median_loss(), Decimal::new(100_000, 0));
4301        assert_eq!(asset.typical_detection_months(), 12);
4302
4303        let corruption = AcfeFraudCategory::Corruption;
4304        assert_eq!(corruption.name(), "corruption");
4305        assert!((corruption.typical_occurrence_rate() - 0.33).abs() < 0.01);
4306
4307        let fs_fraud = AcfeFraudCategory::FinancialStatementFraud;
4308        assert_eq!(fs_fraud.typical_median_loss(), Decimal::new(954_000, 0));
4309        assert_eq!(fs_fraud.typical_detection_months(), 24);
4310    }
4311
4312    #[test]
4313    fn test_cash_fraud_scheme() {
4314        let shell = CashFraudScheme::ShellCompany;
4315        assert_eq!(shell.category(), AcfeFraudCategory::AssetMisappropriation);
4316        assert_eq!(shell.subcategory(), "billing_schemes");
4317        assert_eq!(shell.severity(), 5);
4318        assert_eq!(
4319            shell.detection_difficulty(),
4320            AnomalyDetectionDifficulty::Hard
4321        );
4322
4323        let ghost = CashFraudScheme::GhostEmployee;
4324        assert_eq!(ghost.subcategory(), "payroll_schemes");
4325        assert_eq!(ghost.severity(), 5);
4326
4327        // Test all variants exist
4328        assert_eq!(CashFraudScheme::all_variants().len(), 20);
4329    }
4330
4331    #[test]
4332    fn test_asset_fraud_scheme() {
4333        let ip_theft = AssetFraudScheme::IntellectualPropertyTheft;
4334        assert_eq!(
4335            ip_theft.category(),
4336            AcfeFraudCategory::AssetMisappropriation
4337        );
4338        assert_eq!(ip_theft.subcategory(), "other_assets");
4339        assert_eq!(ip_theft.severity(), 5);
4340
4341        let inv_theft = AssetFraudScheme::InventoryTheft;
4342        assert_eq!(inv_theft.subcategory(), "inventory");
4343        assert_eq!(inv_theft.severity(), 4);
4344    }
4345
4346    #[test]
4347    fn test_corruption_scheme() {
4348        let kickback = CorruptionScheme::InvoiceKickback;
4349        assert_eq!(kickback.category(), AcfeFraudCategory::Corruption);
4350        assert_eq!(kickback.subcategory(), "bribery");
4351        assert_eq!(kickback.severity(), 5);
4352        assert_eq!(
4353            kickback.detection_difficulty(),
4354            AnomalyDetectionDifficulty::Expert
4355        );
4356
4357        let bid_rigging = CorruptionScheme::BidRigging;
4358        assert_eq!(bid_rigging.subcategory(), "bribery");
4359        assert_eq!(
4360            bid_rigging.detection_difficulty(),
4361            AnomalyDetectionDifficulty::Hard
4362        );
4363
4364        let purchasing = CorruptionScheme::PurchasingConflict;
4365        assert_eq!(purchasing.subcategory(), "conflicts_of_interest");
4366
4367        // Test all variants exist
4368        assert_eq!(CorruptionScheme::all_variants().len(), 10);
4369    }
4370
4371    #[test]
4372    fn test_financial_statement_scheme() {
4373        let fictitious = FinancialStatementScheme::FictitiousRevenues;
4374        assert_eq!(
4375            fictitious.category(),
4376            AcfeFraudCategory::FinancialStatementFraud
4377        );
4378        assert_eq!(fictitious.subcategory(), "overstatement");
4379        assert_eq!(fictitious.severity(), 5);
4380        assert_eq!(
4381            fictitious.detection_difficulty(),
4382            AnomalyDetectionDifficulty::Expert
4383        );
4384
4385        let understated = FinancialStatementScheme::UnderstatedRevenues;
4386        assert_eq!(understated.subcategory(), "understatement");
4387
4388        // Test all variants exist
4389        assert_eq!(FinancialStatementScheme::all_variants().len(), 13);
4390    }
4391
4392    #[test]
4393    fn test_acfe_scheme_unified() {
4394        let cash_scheme = AcfeScheme::Cash(CashFraudScheme::ShellCompany);
4395        assert_eq!(
4396            cash_scheme.category(),
4397            AcfeFraudCategory::AssetMisappropriation
4398        );
4399        assert_eq!(cash_scheme.severity(), 5);
4400
4401        let corruption_scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4402        assert_eq!(corruption_scheme.category(), AcfeFraudCategory::Corruption);
4403
4404        let fs_scheme = AcfeScheme::FinancialStatement(FinancialStatementScheme::PrematureRevenue);
4405        assert_eq!(
4406            fs_scheme.category(),
4407            AcfeFraudCategory::FinancialStatementFraud
4408        );
4409    }
4410
4411    #[test]
4412    fn test_acfe_detection_method() {
4413        let tip = AcfeDetectionMethod::Tip;
4414        assert!((tip.typical_detection_rate() - 0.42).abs() < 0.01);
4415
4416        let internal_audit = AcfeDetectionMethod::InternalAudit;
4417        assert!((internal_audit.typical_detection_rate() - 0.16).abs() < 0.01);
4418
4419        let external_audit = AcfeDetectionMethod::ExternalAudit;
4420        assert!((external_audit.typical_detection_rate() - 0.04).abs() < 0.01);
4421
4422        // Test all variants exist
4423        assert_eq!(AcfeDetectionMethod::all_variants().len(), 12);
4424    }
4425
4426    #[test]
4427    fn test_perpetrator_department() {
4428        let accounting = PerpetratorDepartment::Accounting;
4429        assert!((accounting.typical_occurrence_rate() - 0.21).abs() < 0.01);
4430        assert_eq!(accounting.typical_median_loss(), Decimal::new(130_000, 0));
4431
4432        let executive = PerpetratorDepartment::Executive;
4433        assert_eq!(executive.typical_median_loss(), Decimal::new(600_000, 0));
4434    }
4435
4436    #[test]
4437    fn test_perpetrator_level() {
4438        let employee = PerpetratorLevel::Employee;
4439        assert!((employee.typical_occurrence_rate() - 0.42).abs() < 0.01);
4440        assert_eq!(employee.typical_median_loss(), Decimal::new(50_000, 0));
4441
4442        let exec = PerpetratorLevel::OwnerExecutive;
4443        assert_eq!(exec.typical_median_loss(), Decimal::new(337_000, 0));
4444    }
4445
4446    #[test]
4447    fn test_acfe_calibration() {
4448        let cal = AcfeCalibration::default();
4449        assert_eq!(cal.median_loss, Decimal::new(117_000, 0));
4450        assert_eq!(cal.median_duration_months, 12);
4451        assert!((cal.collusion_rate - 0.50).abs() < 0.01);
4452        assert!(cal.validate().is_ok());
4453
4454        // Test custom calibration
4455        let custom_cal = AcfeCalibration::new(Decimal::new(200_000, 0), 18);
4456        assert_eq!(custom_cal.median_loss, Decimal::new(200_000, 0));
4457        assert_eq!(custom_cal.median_duration_months, 18);
4458
4459        // Test validation failure
4460        let bad_cal = AcfeCalibration {
4461            collusion_rate: 1.5,
4462            ..Default::default()
4463        };
4464        assert!(bad_cal.validate().is_err());
4465    }
4466
4467    #[test]
4468    fn test_fraud_triangle() {
4469        let triangle = FraudTriangle::new(
4470            PressureType::FinancialTargets,
4471            vec![
4472                OpportunityFactor::WeakInternalControls,
4473                OpportunityFactor::ManagementOverride,
4474            ],
4475            Rationalization::ForTheCompanyGood,
4476        );
4477
4478        // Risk score should be between 0 and 1
4479        let risk = triangle.risk_score();
4480        assert!((0.0..=1.0).contains(&risk));
4481        // Should be relatively high given the components
4482        assert!(risk > 0.5);
4483    }
4484
4485    #[test]
4486    fn test_pressure_types() {
4487        let financial = PressureType::FinancialTargets;
4488        assert!(financial.risk_weight() > 0.5);
4489
4490        let gambling = PressureType::GamblingAddiction;
4491        assert_eq!(gambling.risk_weight(), 0.90);
4492    }
4493
4494    #[test]
4495    fn test_opportunity_factors() {
4496        let override_factor = OpportunityFactor::ManagementOverride;
4497        assert_eq!(override_factor.risk_weight(), 0.90);
4498
4499        let weak_controls = OpportunityFactor::WeakInternalControls;
4500        assert!(weak_controls.risk_weight() > 0.8);
4501    }
4502
4503    #[test]
4504    fn test_rationalizations() {
4505        let entitlement = Rationalization::Entitlement;
4506        assert!(entitlement.risk_weight() > 0.8);
4507
4508        let borrowing = Rationalization::TemporaryBorrowing;
4509        assert!(borrowing.risk_weight() < entitlement.risk_weight());
4510    }
4511
4512    #[test]
4513    fn test_acfe_scheme_serialization() {
4514        let scheme = AcfeScheme::Corruption(CorruptionScheme::BidRigging);
4515        let json = serde_json::to_string(&scheme).expect("Failed to serialize");
4516        let deserialized: AcfeScheme = serde_json::from_str(&json).expect("Failed to deserialize");
4517        assert_eq!(scheme, deserialized);
4518
4519        let calibration = AcfeCalibration::default();
4520        let json = serde_json::to_string(&calibration).expect("Failed to serialize");
4521        let deserialized: AcfeCalibration =
4522            serde_json::from_str(&json).expect("Failed to deserialize");
4523        assert_eq!(calibration.median_loss, deserialized.median_loss);
4524    }
4525}
datasynth_core/models/anomaly.rs

datasynth_core/models/
anomaly.rs